changeset 13728:e67e8c083d6d

New module 'regex-quote'. * lib/regex-quote.h: New file. * lib/regex-quote.c: New file. * modules/regex-quote: New file. Suggested by Reuben Thomas <rrt@sc3d.org>.
author Bruno Haible <bruno@clisp.org>
date Sat, 25 Sep 2010 12:38:20 +0200
parents 006342ca9f6f
children 3f67238189f8
files ChangeLog lib/regex-quote.c lib/regex-quote.h modules/regex-quote
diffstat 4 files changed, 159 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2010-09-25  Bruno Haible  <bruno@clisp.org>
+
+	New module 'regex-quote'.
+	* lib/regex-quote.h: New file.
+	* lib/regex-quote.c: New file.
+	* modules/regex-quote: New file.
+	Suggested by Reuben Thomas <rrt@sc3d.org>.
+
 2010-09-24  Bruno Haible  <bruno@clisp.org>
 
 	unistr/u8-strchr: Fix a test failure on i586 glibc systems.
new file mode 100644
--- /dev/null
+++ b/lib/regex-quote.c
@@ -0,0 +1,86 @@
+/* Construct a regular expression from a literal string.
+   Copyright (C) 1995, 2010 Free Software Foundation, Inc.
+   Written by Bruno Haible <haible@clisp.cons.org>, 2010.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+/* Specification.  */
+#include "regex-quote.h"
+
+#include <string.h>
+
+#include "mbuiter.h"
+#include "xalloc.h"
+
+/* Characters that are special in a BRE.  */
+static const char bre_special[] = "$^.*[]\\";
+
+/* Characters that are special in an ERE.  */
+static const char ere_special[] = "$^.*[]\\+?()";
+
+size_t
+regex_quote_length (const char *string, int cflags)
+{
+  const char *special = (cflags != 0 ? ere_special : bre_special);
+  size_t length;
+  mbui_iterator_t iter;
+
+  length = 0;
+  for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter))
+    {
+      /* We know that special contains only ASCII characters.  */
+      if (mb_len (mbui_cur (iter)) == 1
+          && strchr (special, * mbui_cur_ptr (iter)))
+        length += 1;
+      length += mb_len (mbui_cur (iter));
+    }
+  return length;
+}
+
+/* Copies the quoted string to p and returns the incremented p.
+   There must be room for regex_quote_length (string, cflags) + 1 bytes at p.
+ */
+char *
+regex_quote_copy (char *p, const char *string, int cflags)
+{
+  const char *special = (cflags != 0 ? ere_special : bre_special);
+  mbui_iterator_t iter;
+
+  for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter))
+    {
+      /* We know that special contains only ASCII characters.  */
+      if (mb_len (mbui_cur (iter)) == 1
+          && strchr (special, * mbui_cur_ptr (iter)))
+        *p++ = '\\';
+      memcpy (p, mbui_cur_ptr (iter), mb_len (mbui_cur (iter)));
+      p += mb_len (mbui_cur (iter));
+    }
+  return p;
+}
+
+/* Returns the freshly allocated quoted string.  */
+char *
+regex_quote (const char *string, int cflags)
+{
+  size_t length = regex_quote_length (string, cflags);
+  char *result = XNMALLOC (length + 1, char);
+  char *p;
+
+  p = result;
+  p = regex_quote_copy (p, string, cflags);
+  *p = '\0';
+  return result;
+}
new file mode 100644
--- /dev/null
+++ b/lib/regex-quote.h
@@ -0,0 +1,41 @@
+/* Construct a regular expression from a literal string.
+   Copyright (C) 1995, 2010 Free Software Foundation, Inc.
+   Written by Bruno Haible <haible@clisp.cons.org>, 2010.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <stddef.h>
+
+/* regex_quote converts a literal string to a regular expression that will
+   look for this literal string.
+   cflags can be 0 or REG_EXTENDED.
+   If it is 0, the result is a Basic Regular Expression (BRE)
+   <http://www.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_03>.
+   If it is REG_EXTENDED, the result is an Extended Regular Expression (ERE)
+   <http://www.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_04>.
+   The result is not anchored; if you want it to match only complete lines,
+   you need to add "^" at the beginning of the result and "$" at the end of the
+   result.
+ */
+
+/* Returns the number of bytes needed for the quoted string.  */
+extern size_t regex_quote_length (const char *string, int cflags);
+
+/* Copies the quoted string to p and returns the incremented p.
+   There must be room for regex_quote_length (string, cflags) + 1 bytes at p.
+ */
+extern char * regex_quote_copy (char *p, const char *string, int cflags);
+
+/* Returns the freshly allocated quoted string.  */
+extern char * regex_quote (const char *string, int cflags);
new file mode 100644
--- /dev/null
+++ b/modules/regex-quote
@@ -0,0 +1,24 @@
+Description:
+Construct a regular expression from a literal string.
+
+Files:
+lib/regex-quote.h
+lib/regex-quote.c
+
+Depends-on:
+xalloc
+mbuiter
+
+configure.ac:
+
+Makefile.am:
+lib_SOURCES += regex-quote.c
+
+Include:
+"regex-quote.h"
+
+License:
+GPL
+
+Maintainer:
+Bruno Haible