changeset 5872:fab6701e5cb2

New fts module. * MODULES.html.sh (File system functions): Add fts, fts-lgpl. * lib/fts.c: Don't include "cycle-check.h" or "hash.h". (setup_dir, free_dir): New functions. (enter_dir, leave_dir): Define trivial alternatives of _LGPL_PACKAGE. Move to fts-cycle.c if !_LGPL_PACKAGE. (HT_INITIAL_SIZE, ENTER_DIR): Remove. All uses removed. (LEAVE_DIR): Fix typo: pass Fts and Ent to leave_dir. (struct Active_dir, AD_compare, AD_hash, enter_dir, leave_dir): Move to fts-cycle.c. (fts_open): Use setup_dir. (fts_close): Use free_dir. (fts_read): Have just one copy of the ENTER_DIR code rather than three. This adds a label and some gotos, but the alternatives were messier. Check for memory allocation failure when entering a dir. (fts_stat) [_LGPL_PACKAGE]: Bring back glibc cycle detection code. * lib/fts_.h (_LGPL_PACKAGE) [defined _LIBC]: New macro. (FTS): New member fts_cycle, that is a union that contains the old active_dir_ht and cycle_state. All uses changed to mention fts_cycle.ht and fts_cycle.state. * lib/fts-cycle.c: New file, containing GPL'ed code migrated out of fts.c, with the following changes: (setup_dir, free_dir): New functions. (enter_dir): Now returns bool. Return true if successful, false if memory exhausted. All callers changed. Do not bother partly cleaning up on memory allocation failure; that is free_dir's job. However, free ad if hash_insert fails, to avoid memory leak. (enter_dir, leave_dir): Accommodate change to FTS by inspecting fts->fts_options to see which union member to use. * m4/fts.m4 (gl_FUNC_FTS_CORE): Renamed from gl_FUNC_FTS. (gl_FUNC_FTS, gl_FUNC_FTS_LGPL): New macros. * lib/unlinkdir.h (cannot_unlink_dir) [UNLINK_CANNOT_UNLINK_DIR]: Now a macro, to pacify GCC.
author Paul Eggert <eggert@cs.ucla.edu>
date Fri, 20 May 2005 23:07:53 +0000
parents 8b95519c71b9
children 8885a222fb8a
files ChangeLog MODULES.html.sh lib/ChangeLog lib/fts-cycle.c lib/fts.c lib/fts_.h lib/unlinkdir.h m4/ChangeLog m4/fts.m4 modules/fts modules/fts-lgpl
diffstat 11 files changed, 349 insertions(+), 156 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2005-05-20  Paul Eggert  <eggert@cs.ucla.edu>
+
+	* MODULES.html.sh (File system functions): Add fts, fts-lgpl.
+
 2005-05-18  Jim Meyering  <jim@meyering.net>
 
 	* modules/dirfd (License): Change to LGPL.  Most of the code
--- a/MODULES.html.sh
+++ b/MODULES.html.sh
@@ -1815,6 +1815,8 @@
   func_module file-type
   func_module fileblocks
   func_module filemode
+  func_module fts
+  func_module fts-lgpl
   func_module isdir
   func_module lchown
   func_module makepath
--- a/lib/ChangeLog
+++ b/lib/ChangeLog
@@ -1,3 +1,40 @@
+2005-05-20  Paul Eggert  <eggert@cs.ucla.edu>
+
+	New fts module.
+	* fts.c: Don't include "cycle-check.h" or "hash.h".
+	(setup_dir, free_dir): New functions.
+	(enter_dir, leave_dir): Define trivial
+	alternatives of _LGPL_PACKAGE.  Move to fts-cycle.c if !_LGPL_PACKAGE.
+	(HT_INITIAL_SIZE, ENTER_DIR): Remove.  All uses removed.
+	(LEAVE_DIR): Fix typo: pass Fts and Ent to leave_dir.
+	(struct Active_dir, AD_compare, AD_hash, enter_dir, leave_dir):
+	Move to fts-cycle.c.
+	(fts_open): Use setup_dir.
+	(fts_close): Use free_dir.
+	(fts_read): Have just one copy of the ENTER_DIR code rather than three.
+	This adds a label and some gotos, but the alternatives were messier.
+	Check for memory allocation failure when entering a dir.
+	(fts_stat) [_LGPL_PACKAGE]: Bring back glibc cycle detection code.
+	* fts_.h (_LGPL_PACKAGE) [defined _LIBC]: New macro.
+	(FTS): New member fts_cycle, that is a union that contains the
+	old active_dir_ht and cycle_state.  All uses changed to mention
+	fts_cycle.ht and fts_cycle.state.
+	* fts-cycle.c: New file, containing GPL'ed code migrated out of
+	fts.c, with the following changes:
+	(setup_dir, free_dir): New functions.
+	(enter_dir): Now returns bool.  Return true if successful, false
+	if memory exhausted.  All callers changed.
+	Do not bother partly cleaning up on
+	memory allocation failure; that is free_dir's job.
+	However, free ad if hash_insert fails, to avoid memory leak.
+	(enter_dir, leave_dir): Accommodate change to FTS by inspecting
+	fts->fts_options to see which union member to use.
+
+2005-05-20  Jim Meyering  <jim@meyering.net>
+
+	* unlinkdir.h (cannot_unlink_dir) [UNLINK_CANNOT_UNLINK_DIR]:
+	Now a macro, to pacify GCC.
+
 2005-05-20  Eric Blake  <ebb9@byu.net>  (tiny change)
 
 	* chown.c (rpl_chown): Return -1 on failure.
new file mode 100644
--- /dev/null
+++ b/lib/fts-cycle.c
@@ -0,0 +1,154 @@
+/* Detect cycles in file tree walks.
+
+   Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
+
+   Written by Jim Meyering.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software Foundation,
+   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
+
+#include "cycle-check.h"
+#include "hash.h"
+
+/* Use these each of these to map a device/inode pair to an FTSENT.  */
+struct Active_dir
+{
+  dev_t dev;
+  ino_t ino;
+  FTSENT *fts_ent;
+};
+
+static bool
+AD_compare (void const *x, void const *y)
+{
+  struct Active_dir const *ax = x;
+  struct Active_dir const *ay = y;
+  return ax->ino == ay->ino
+      && ax->dev == ay->dev;
+}
+
+static size_t
+AD_hash (void const *x, size_t table_size)
+{
+  struct Active_dir const *ax = x;
+  return (uintmax_t) ax->ino % table_size;
+}
+
+/* Set up the cycle-detection machinery.  */
+
+static bool
+setup_dir (FTS *fts)
+{
+  if (fts->fts_options & FTS_TIGHT_CYCLE_CHECK)
+    {
+      enum { HT_INITIAL_SIZE = 31 };
+      fts->fts_cycle.ht = hash_initialize (HT_INITIAL_SIZE, NULL, AD_hash,
+					   AD_compare, free);
+      if (! fts->fts_cycle.ht)
+	return false;
+    }
+  else
+    {
+      fts->fts_cycle.state = malloc (sizeof *fts->fts_cycle.state);
+      if (! fts->fts_cycle.state)
+	return false;
+      cycle_check_init (fts->fts_cycle.state);
+    }
+
+  return true;
+}
+
+/* Enter a directory during a file tree walk.  */
+
+static bool
+enter_dir (FTS *fts, FTSENT *ent)
+{
+  if (fts->fts_options & FTS_TIGHT_CYCLE_CHECK)
+    {
+      struct stat const *st = ent->fts_statp;
+      struct Active_dir *ad = malloc (sizeof *ad);
+      struct Active_dir *ad_from_table;
+
+      if (!ad)
+	return false;
+
+      ad->dev = st->st_dev;
+      ad->ino = st->st_ino;
+      ad->fts_ent = ent;
+
+      /* See if we've already encountered this directory.
+	 This can happen when following symlinks as well as
+	 with a corrupted directory hierarchy. */
+      ad_from_table = hash_insert (fts->fts_cycle.ht, ad);
+
+      if (ad_from_table != ad)
+	{
+	  free (ad);
+	  if (!ad_from_table)
+	    return false;
+
+	  /* There was an entry with matching dev/inode already in the table.
+	     Record the fact that we've found a cycle.  */
+	  ent->fts_cycle = ad_from_table->fts_ent;
+	  ent->fts_info = FTS_DC;
+	}
+    }
+  else
+    {
+      if (cycle_check (fts->fts_cycle.state, ent->fts_statp))
+	{
+	  /* FIXME: setting fts_cycle like this isn't proper.
+	     To do what the documentation requires, we'd have to
+	     go around the cycle again and find the right entry.
+	     But no callers in coreutils use the fts_cycle member. */
+	  ent->fts_cycle = ent;
+	  ent->fts_info = FTS_DC;
+	}
+    }
+
+  return true;
+}
+
+/* Leave a directory during a file tree walk.  */
+
+static void
+leave_dir (FTS *fts, FTSENT *ent)
+{
+  if (fts->fts_options & FTS_TIGHT_CYCLE_CHECK)
+    {
+      struct stat const *st = ent->fts_statp;
+      struct Active_dir obj;
+      void *found;
+      obj.dev = st->st_dev;
+      obj.ino = st->st_ino;
+      found = hash_delete (fts->fts_cycle.ht, &obj);
+      if (!found)
+	abort ();
+      free (found);
+    }
+}
+
+/* Free any memory used for cycle detection.  */
+
+static void
+free_dir (FTS *sp)
+{
+  if (sp->fts_options & FTS_TIGHT_CYCLE_CHECK)
+    {
+      if (sp->fts_cycle.ht)
+	hash_free (sp->fts_cycle.ht);
+    }
+  else
+    free (sp->fts_cycle.state);
+}
--- a/lib/fts.c
+++ b/lib/fts.c
@@ -66,8 +66,6 @@
 #include <fcntl.h>
 #include <errno.h>
 #include "dirfd.h"
-#include "cycle-check.h"
-#include "hash.h"
 #include "unistd-safer.h"
 #include <stdbool.h>
 #include <stdlib.h>
@@ -167,6 +165,15 @@
 static int      fts_safe_changedir (FTS *, FTSENT *, int, const char *)
      internal_function;
 
+#if _LGPL_PACKAGE
+static bool enter_dir (FTS *fts, FTSENT *ent) { return true; }
+static void leave_dir (FTS *fts, FTSENT *ent) {}
+static bool setup_dir (FTS *fts) { return true; }
+static void free_dir (FTS *fts) {}
+#else
+# include "fts-cycle.c"
+#endif
+
 #ifndef MAX
 # define MAX(a,b) ((a) > (b) ? (a) : (b))
 #endif
@@ -192,8 +199,6 @@
 #define BNAMES		2		/* fts_children, names only */
 #define BREAD		3		/* fts_read */
 
-#define HT_INITIAL_SIZE 31
-
 #if FTS_DEBUG
 bool fts_debug = false;
 # include <stdio.h>
@@ -202,114 +207,13 @@
 # define Dprintf(x)
 #endif
 
-#define ENTER_DIR(Fts, Ent, Tag)				\
-  do {								\
-      Dprintf (("  %s-entering: %s\n", Tag, (Ent)->fts_path));	\
-      enter_dir (sp, p);					\
-    } while (0)
-
 #define LEAVE_DIR(Fts, Ent, Tag)				\
-  do {								\
+  do								\
+    {								\
       Dprintf (("  %s-leaving: %s\n", Tag, (Ent)->fts_path));	\
-      leave_dir (sp, p);					\
-    } while (0)
-
-/* Use these each of these to map a device/inode pair to an FTSENT.  */
-struct Active_dir
-{
-  dev_t dev;
-  ino_t ino;
-  FTSENT *fts_ent;
-};
-
-static bool
-AD_compare (void const *x, void const *y)
-{
-  struct Active_dir const *ax = x;
-  struct Active_dir const *ay = y;
-  return ax->ino == ay->ino
-      && ax->dev == ay->dev;
-}
-
-static size_t
-AD_hash (void const *x, size_t table_size)
-{
-  struct Active_dir const *ax = x;
-  return (uintmax_t) ax->ino % table_size;
-}
-
-static void
-enter_dir (FTS *fts, FTSENT *ent)
-{
-  if (fts->active_dir_ht)
-    {
-      struct stat const *st = ent->fts_statp;
-      struct Active_dir *ad = malloc (sizeof (struct Active_dir));
-      struct Active_dir *ad_from_table;
-
-      if (ad == NULL)
-	goto give_up;
-
-      ad->dev = st->st_dev;
-      ad->ino = st->st_ino;
-      ad->fts_ent = ent;
-
-      /* See if we've already encountered this directory.
-	 This can happen when following symlinks as well as
-	 with a corrupted directory hierarchy. */
-      ad_from_table = hash_insert (fts->active_dir_ht, ad);
-
-      if (ad_from_table == NULL)
-	{
-	give_up:
-	  /* Insertion failed due to lack of memory.  Free the hash
-	     table and turn off this sort of cycle detection.  */
-	  hash_free (fts->active_dir_ht);
-	  fts->active_dir_ht = NULL;
-	  return;
-	}
-
-      if (ad_from_table != ad)
-	{
-	  /* There was an entry with matching dev/inode already in the table.
-	     Record the fact that we've found a cycle.  */
-	  ent->fts_cycle = ad_from_table->fts_ent;
-	  ent->fts_info = FTS_DC;
-
-	  /* ad was not inserted, so free it.  */
-	  free (ad);
-	}
-    }
-  else if (fts->cycle_state)
-    {
-      if (cycle_check (fts->cycle_state, ent->fts_statp))
-	{
-	  /* FIXME: setting fts_cycle like this isn't proper.
-	     To do what the documentation requires, we'd have to
-	     go around the cycle again and find the right entry.
-	     But no callers in coreutils use the fts_cycle member. */
-	  ent->fts_cycle = ent;
-	  ent->fts_info = FTS_DC;
-	}
-    }
-}
-
-static void
-leave_dir (FTS *fts, FTSENT *ent)
-{
-  if (fts->active_dir_ht)
-    {
-      struct stat const *st = ent->fts_statp;
-      struct Active_dir obj;
-      void *found;
-      obj.dev = st->st_dev;
-      obj.ino = st->st_ino;
-      found = hash_delete (fts->active_dir_ht, &obj);
-      if (!found)
-	abort ();
-      free (found);
-    }
-}
+      leave_dir (Fts, Ent);					\
+    }								\
+  while (false)
 
 /* Open the directory DIR if possible, and return a file
    descriptor.  Return -1 and set errno on failure.  It doesn't matter
@@ -416,24 +320,8 @@
 		goto mem3;
 	sp->fts_cur->fts_link = root;
 	sp->fts_cur->fts_info = FTS_INIT;
-
-	if (ISSET (FTS_TIGHT_CYCLE_CHECK))
-	  {
-	    sp->active_dir_ht = hash_initialize (HT_INITIAL_SIZE,
-						 NULL, AD_hash,
-						 AD_compare, free);
-	    if (sp->active_dir_ht == NULL)
-	      goto mem3;
-	    sp->cycle_state = malloc (sizeof *sp->cycle_state);
-	  }
-	else
-	  {
-	    sp->cycle_state = malloc (sizeof *sp->cycle_state);
-	    if (sp->cycle_state == NULL)
-	      goto mem3;
-	    cycle_check_init (sp->cycle_state);
-	    sp->active_dir_ht = NULL;
-	  }
+	if (! setup_dir (sp))
+	  goto mem3;
 
 	/*
 	 * If using chdir(2), grab a file descriptor pointing to dot to ensure
@@ -514,11 +402,7 @@
 		(void)close(sp->fts_rfd);
 	}
 
-	/* Free any memory used for cycle detection.  */
-	if (sp->active_dir_ht)
-	  hash_free (sp->active_dir_ht);
-	if (sp->cycle_state)
-	  free (sp->cycle_state);
+	free_dir (sp);
 
 	/* Free up the stream pointer. */
 	free(sp);
@@ -583,9 +467,7 @@
 			} else
 				p->fts_flags |= FTS_SYMFOLLOW;
 		}
-		if (p->fts_info == FTS_D)
-			ENTER_DIR (sp, p, "7");
-		return (p);
+		goto check_for_dir;
 	}
 
 	/* Directory in pre-order. */
@@ -664,9 +546,7 @@
 				return (NULL);
 			}
 			fts_load(sp, p);
-			if (p->fts_info == FTS_D)
-				ENTER_DIR (sp, p, "8");
-			return (sp->fts_cur = p);
+			goto check_for_dir;
 		}
 
 		/*
@@ -691,9 +571,18 @@
 name:		t = sp->fts_path + NAPPEND(p->fts_parent);
 		*t++ = '/';
 		memmove(t, p->fts_name, p->fts_namelen + 1);
+check_for_dir:
+		sp->fts_cur = p;
 		if (p->fts_info == FTS_D)
-			ENTER_DIR (sp, p, "9");
-		return (sp->fts_cur = p);
+		  {
+		    Dprintf (("  %s-entering: %s\n", sp, p->fts_path));
+		    if (! enter_dir (sp, p))
+		      {
+			__set_errno (ENOMEM);
+			return NULL;
+		      }
+		  }
+		return p;
 	}
 
 	/* Move up to the parent node. */
@@ -1213,6 +1102,28 @@
 	if (S_ISDIR(sbp->st_mode)) {
 		if (ISDOT(p->fts_name))
 			return (FTS_DOT);
+
+#if _LGPL_PACKAGE
+		{
+		  /*
+		   * Cycle detection is done by brute force when the directory
+		   * is first encountered.  If the tree gets deep enough or the
+		   * number of symbolic links to directories is high enough,
+		   * something faster might be worthwhile.
+		   */
+		  FTSENT *t;
+
+		  for (t = p->fts_parent;
+		       t->fts_level >= FTS_ROOTLEVEL; t = t->fts_parent)
+		    if (sbp->st_ino == t->fts_statp->st_ino
+			&& sbp->st_dev == t->fts_statp->st_dev)
+		      {
+			p->fts_cycle = t;
+			return (FTS_DC);
+		      }
+		}
+#endif
+
 		return (FTS_D);
 	}
 	if (S_ISLNK(sbp->st_mode))
--- a/lib/fts_.h
+++ b/lib/fts_.h
@@ -52,6 +52,7 @@
 
 # ifdef _LIBC
 #  include <features.h>
+#  define _LGPL_PACKAGE 1
 # else
 #  undef __THROW
 #  define __THROW
@@ -104,19 +105,28 @@
 # define FTS_STOP	0x2000		/* (private) unrecoverable error */
 	int fts_options;		/* fts_open options, global flags */
 
-	/* This data structure records the directories between a starting
-	   point and the current directory.  I.e., a directory is recorded
-	   here IFF we have visited it once, but we have not yet completed
-	   processing of all its entries.  Every time we visit a new directory,
-	   we add that directory to this set.  When we finish with a directory
-	   (usually by visiting it a second time), we remove it from this
-	   set.  Each entry in this data structure is a device/inode pair.
-	   This data structure is used to detect directory cycles efficiently
-	   and promptly even when the depth of a hierarchy is in the tens
-	   of thousands.  Lazy checking, as done by GNU rm via cycle-check.c,
-	   wouldn't be appropriate for du.  */
-	struct hash_table *active_dir_ht;
-	struct cycle_check_state *cycle_state;
+# if !_LGPL_PACKAGE
+	union {
+		/* This data structure is used if FTS_TIGHT_CYCLE_CHECK is
+		   specified.  It records the directories between a starting
+		   point and the current directory.  I.e., a directory is
+		   recorded here IFF we have visited it once, but we have not
+		   yet completed processing of all its entries.  Every time we
+		   visit a new directory, we add that directory to this set.
+		   When we finish with a directory (usually by visiting it a
+		   second time), we remove it from this set.  Each entry in
+		   this data structure is a device/inode pair.  This data
+		   structure is used to detect directory cycles efficiently and
+		   promptly even when the depth of a hierarchy is in the tens
+		   of thousands.  */
+		struct hash_table *ht;
+
+		/* This data structure uses lazy checking, as done by rm via
+	           cycle-check.c.  It's the default, but it's not appropriate
+	           for programs like du.  */
+		struct cycle_check_state *state;
+	} fts_cycle;
+# endif
 } FTS;
 
 typedef struct _ftsent {
--- a/lib/unlinkdir.h
+++ b/lib/unlinkdir.h
@@ -21,7 +21,7 @@
 #include <stdbool.h>
 
 #if UNLINK_CANNOT_UNLINK_DIR
-static bool cannot_unlink_dir (void) { return true; }
+# define cannot_unlink_dir() true
 #else
 bool cannot_unlink_dir (void);
 #endif
--- a/m4/ChangeLog
+++ b/m4/ChangeLog
@@ -1,3 +1,8 @@
+2005-05-20  Paul Eggert  <eggert@cs.ucla.edu>
+
+	* fts.m4 (gl_FUNC_FTS_CORE): Renamed from gl_FUNC_FTS.
+	(gl_FUNC_FTS, gl_FUNC_FTS_LGPL): New macros.
+
 2005-05-20  Eric Blake  <ebb9@byu.net>  (tiny change)
 
 	* chown.m4 (gl_FUNC_CHOWN): Correct sense of test for honoring IDs
--- a/m4/fts.m4
+++ b/m4/fts.m4
@@ -1,4 +1,4 @@
-#serial 3
+#serial 4
 dnl Copyright (C) 2005 Free Software Foundation, Inc.
 dnl This file is free software; the Free Software Foundation
 dnl gives unlimited permission to copy and/or distribute it,
@@ -6,6 +6,20 @@
 
 AC_DEFUN([gl_FUNC_FTS],
 [
+  AC_REQUIRE([gl_FUNC_FTS_CORE])
+  AC_LIBSOURCES([fts-cycle.c])
+])
+
+AC_DEFUN([gl_FUNC_FTS_LGPL],
+[
+  AC_REQUIRE([gl_FUNC_FTS_CORE])
+  AC_DEFINE([_LGPL_PACKAGE], 1,
+    [Define to 1 if compiling for a package to be distributed under the
+     GNU Lesser Public License.])
+])
+
+AC_DEFUN([gl_FUNC_FTS_CORE],
+[
   AC_LIBSOURCES([fts.c, fts_.h])
 
   dnl Use this version of fts unconditionally, since the GNU libc and
new file mode 100644
--- /dev/null
+++ b/modules/fts
@@ -0,0 +1,30 @@
+Description:
+Traverse a file hierarchy.
+
+Files:
+lib/fts_.h
+lib/fts.c
+lib/fts-cycle.c
+m4/fts.m4
+
+Depends-on:
+cycle-check
+hash
+dirfd
+unistd-safer
+stdbool
+gettext
+
+configure.ac:
+gl_FUNC_FTS
+
+Makefile.am:
+
+Include:
+"fts_.h"
+
+License:
+GPL
+
+Maintainer:
+Jim Meyering
new file mode 100644
--- /dev/null
+++ b/modules/fts-lgpl
@@ -0,0 +1,26 @@
+Description:
+Traverse a file hierarchy (LPGL'ed version).
+
+Files:
+lib/fts_.h
+lib/fts.c
+m4/fts.m4
+
+Depends-on:
+dirfd
+stdbool
+gettext
+
+configure.ac:
+gl_FUNC_FTS_LGPL
+
+Makefile.am:
+
+Include:
+"fts_.h"
+
+License:
+LGPL
+
+Maintainer:
+Jim Meyering