diff kpathsea/db.c @ 2999:faa5d0421460

[project @ 1997-05-23 03:02:09 by jwe]
author jwe
date Fri, 23 May 1997 03:02:36 +0000
parents
children 1f0b06020e36
line wrap: on
line diff
new file mode 100644
--- /dev/null
+++ b/kpathsea/db.c
@@ -0,0 +1,519 @@
+/* db.c: an external database to avoid filesystem lookups.
+
+Copyright (C) 1994, 95, 96, 97 Karl Berry.
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public
+License as published by the Free Software Foundation; either
+version 2 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
+
+#include <kpathsea/config.h>
+#include <kpathsea/absolute.h>
+#include <kpathsea/c-fopen.h>
+#include <kpathsea/c-pathch.h>
+#include <kpathsea/db.h>
+#include <kpathsea/hash.h>
+#include <kpathsea/line.h>
+#include <kpathsea/pathsearch.h>
+#include <kpathsea/readable.h>
+#include <kpathsea/str-list.h>
+#include <kpathsea/tex-file.h>
+#include <kpathsea/variable.h>
+
+static hash_table_type db; /* The hash table for all the ls-R's.  */
+#ifndef DB_HASH_SIZE
+#define DB_HASH_SIZE 7603  /* A minimal ls-R has about 3500 entries.  */
+#endif
+#ifndef DB_NAME
+#define DB_NAME "ls-R"
+#endif
+
+static hash_table_type alias_db;
+#ifndef ALIAS_NAME
+#define ALIAS_NAME "aliases"
+#endif
+#ifndef ALIAS_HASH_SIZE
+#define ALIAS_HASH_SIZE 1009
+#endif
+
+static str_list_type db_dir_list;
+
+/* If DIRNAME contains any element beginning with a `.' (that is more
+   than just `./'), return true.  This is to allow ``hidden''
+   directories -- ones that don't get searched.  */
+
+static boolean
+ignore_dir_p P1C(const_string, dirname)
+{
+  const_string dot_pos = dirname;
+  
+  while ((dot_pos = strchr (dot_pos + 1, '.'))) {
+    /* If / before and no / after, skip it. */
+    if (IS_DIR_SEP (dot_pos[-1]) && dot_pos[1] && !IS_DIR_SEP (dot_pos[1]))
+      return true;
+  }
+  
+  return false;
+}
+
+/* If no DB_FILENAME, return false (maybe they aren't using this feature).
+   Otherwise, add entries from DB_FILENAME to TABLE, and return true.  */
+
+static boolean
+db_build P2C(hash_table_type *, table,  const_string, db_filename)
+{
+  string line;
+  unsigned dir_count = 0, file_count = 0, ignore_dir_count = 0;
+  unsigned len = strlen (db_filename) - sizeof (DB_NAME) + 1; /* Keep the /. */
+  string top_dir = xmalloc (len + 1);
+  string cur_dir = NULL; /* First thing in ls-R might be a filename.  */
+  FILE *db_file = fopen (db_filename, FOPEN_R_MODE);
+  
+  strncpy (top_dir, db_filename, len);
+  top_dir[len] = 0;
+  
+  if (db_file) {
+    while ((line = read_line (db_file)) != NULL) {
+      len = strlen (line);
+
+      /* A line like `/foo:' = new dir foo.  Allow both absolute (/...)
+         and explicitly relative (./...) names here.  It's a kludge to
+         pass in the directory name with the trailing : still attached,
+         but it doesn't actually hurt.  */
+      if (len > 0 && line[len - 1] == ':' && kpse_absolute_p (line, true)) {
+        /* New directory line.  */
+        if (!ignore_dir_p (line)) {
+          /* If they gave a relative name, prepend full directory name now.  */
+          line[len - 1] = DIR_SEP;
+          /* Skip over leading `./', it confuses `match' and is just a
+             waste of space, anyway.  This will lose on `../', but `match'
+             won't work there, either, so it doesn't matter.  */
+          cur_dir = *line == '.' ? concat (top_dir, line + 2) : xstrdup (line);
+          dir_count++;
+        } else {
+          cur_dir = NULL;
+          ignore_dir_count++;
+        }
+
+      /* Ignore blank, `.' and `..' lines.  */
+      } else if (*line != 0 && cur_dir   /* a file line? */
+                 && !(*line == '.'
+                      && (line[1] == '0' || (line[1] == '.' && line[2] == 0))))
+       {/* Make a new hash table entry with a key of `line' and a data
+           of `cur_dir'.  An already-existing identical key is ok, since
+           a file named `foo' can be in more than one directory.  Share
+           `cur_dir' among all its files (and hence never free it). */
+        hash_insert (table, xstrdup (line), cur_dir);
+        file_count++;
+
+      } /* else ignore blank lines or top-level files
+           or files in ignored directories*/
+
+      free (line);
+    }
+
+    xfclose (db_file, db_filename);
+
+    if (file_count == 0) {
+      WARNING1 ("kpathsea: No usable entries in %s", db_filename);
+      WARNING ("kpathsea: See the manual for how to generate ls-R");
+      db_file = NULL;
+    } else {
+      str_list_add (&db_dir_list, xstrdup (top_dir));
+    }
+
+#ifdef KPSE_DEBUG
+    if (KPSE_DEBUG_P (KPSE_DEBUG_HASH)) {
+      /* Don't make this a debugging bit, since the output is so
+         voluminous, and being able to specify -1 is too useful.
+         Instead, let people who want it run the program under
+         a debugger and change the variable that way.  */
+      boolean hash_summary_only = true;
+
+      DEBUGF4 ("%s: %u entries in %d directories (%d hidden).\n",
+               db_filename, file_count, dir_count, ignore_dir_count);
+      DEBUGF ("ls-R hash table:");
+      hash_print (*table, hash_summary_only);
+      fflush (stderr);
+    }
+#endif /* KPSE_DEBUG */
+  }
+
+  free (top_dir);
+
+  return db_file != NULL;
+}
+
+
+/* Insert FNAME into the hash table.  This is for files that get built
+   during a run.  We wouldn't want to reread all of ls-R, even if it got
+   rebuilt.  */
+
+void
+kpse_db_insert P1C(const_string, passed_fname)
+{
+  /* We might not have found ls-R, or even had occasion to look for it
+     yet, so do nothing if we have no hash table.  */
+  if (db.buckets) {
+    const_string dir_part;
+    string fname = xstrdup (passed_fname);
+    string baseptr = (string) basename (fname);
+    const_string file_part = xstrdup (baseptr);
+
+    *baseptr = '\0';  /* Chop off the filename.  */
+    dir_part = fname; /* That leaves the dir, with the trailing /.  */
+
+    hash_insert (&db, file_part, dir_part);
+  }
+}
+
+/* Return true if FILENAME could be in PATH_ELT, i.e., if the directory
+   part of FILENAME matches PATH_ELT.  Have to consider // wildcards, but
+   $ and ~ expansion have already been done.  */
+     
+static boolean
+match P2C(const_string, filename,  const_string, path_elt)
+{
+  const_string original_filename = filename;
+  boolean matched = false;
+  boolean done = false;
+  
+  for (; !done && *filename && *path_elt; filename++, path_elt++) {
+    if (FILECHARCASEEQ (*filename, *path_elt)) /* normal character match */
+      ;
+
+    else if (IS_DIR_SEP (*path_elt)  /* at // */
+             && original_filename < filename && IS_DIR_SEP (path_elt[-1])) {
+      while (IS_DIR_SEP (*path_elt))
+        path_elt++; /* get past second and any subsequent /'s */
+      if (*path_elt == 0) {
+        /* Trailing //, matches anything. We could make this part of the
+           other case, but it seems pointless to do the extra work.  */
+        matched = true;
+        done = true;
+      } else {
+        /* Intermediate //, have to match rest of PATH_ELT.  */
+        for (; !matched && *filename; filename++) {
+          /* Try matching at each possible character.  */
+          if (IS_DIR_SEP (filename[-1])
+              && FILECHARCASEEQ (*filename, *path_elt))
+            matched = match (filename, path_elt);
+        }
+      }
+    }
+
+    else /* normal character nonmatch, quit */
+      done = true;
+  }
+  
+  /* If we've reached the end of PATH_ELT, check that we're at the last
+     component of FILENAME, we've matched.  */
+  if (!matched && *path_elt == 0) {
+    /* Probably PATH_ELT ended with `vf' or some such, and FILENAME ends
+       with `vf/ptmr.vf'.  In that case, we'll be at a directory
+       separator.  On the other hand, if PATH_ELT ended with a / (as in
+       `vf/'), FILENAME being the same `vf/ptmr.vf', we'll be at the
+       `p'.  Upshot: if we're at a dir separator in FILENAME, skip it.
+       But if not, that's ok, as long as there are no more dir separators.  */
+    if (IS_DIR_SEP (*filename))
+      filename++;
+      
+    while (*filename && !IS_DIR_SEP (*filename))
+      filename++;
+    matched = *filename == 0;
+  }
+  
+  return matched;
+}
+
+
+/* If DB_DIR is a prefix of PATH_ELT, return true; otherwise false.
+   That is, the question is whether to try the db for a file looked up
+   in PATH_ELT.  If PATH_ELT == ".", for example, the answer is no. If
+   PATH_ELT == "/usr/local/lib/texmf/fonts//tfm", the answer is yes.
+   
+   In practice, ls-R is only needed for lengthy subdirectory
+   comparisons, but there's no gain to checking PATH_ELT to see if it is
+   a subdir match, since the only way to do that is to do a string
+   search in it, which is all we do anyway.  */
+   
+static boolean
+elt_in_db P2C(const_string, db_dir,  const_string, path_elt)
+{
+  boolean found = false;
+
+  while (!found && FILECHARCASEEQ (*db_dir++, *path_elt++)) {
+    /* If we've matched the entire db directory, it's good.  */
+    if (*db_dir == 0)
+      found = true;
+ 
+    /* If we've reached the end of PATH_ELT, but not the end of the db
+       directory, it's no good.  */
+    else if (*path_elt == 0)
+      break;
+  }
+
+  return found;
+}
+
+/* If ALIAS_FILENAME exists, read it into TABLE.  */
+
+static boolean
+alias_build P2C(hash_table_type *, table,  const_string, alias_filename)
+{
+  string line, real, alias;
+  unsigned count = 0;
+  FILE *alias_file = fopen (alias_filename, FOPEN_R_MODE);
+
+  if (alias_file) {
+    while ((line = read_line (alias_file)) != NULL) {
+      /* comments or empty */
+      if (*line == 0 || *line == '%' || *line == '#') {
+        ;
+      } else {
+        /* Each line should have two fields: realname aliasname.  */
+        real = line;
+        while (*real && ISSPACE (*real))
+          real++;
+        alias = real;
+        while (*alias && !ISSPACE (*alias))
+          alias++;
+        *alias++ = 0;
+        while (*alias && ISSPACE (*alias)) 
+          alias++;
+        /* Is the check for errors strong enough?  Should we warn the user
+           for potential errors?  */
+        if (strlen (real) != 0 && strlen (alias) != 0) {
+          hash_insert (table, xstrdup (alias), xstrdup (real));
+          count++;
+        }
+      }
+      free (line);
+    }
+
+#ifdef KPSE_DEBUG
+    if (KPSE_DEBUG_P (KPSE_DEBUG_HASH)) {
+      /* As with ls-R above ... */
+      boolean hash_summary_only = true;
+      DEBUGF2 ("%s: %u aliases.\n", alias_filename, count);
+      DEBUGF ("alias hash table:");
+      hash_print (*table, hash_summary_only);
+      fflush (stderr);
+    }
+#endif /* KPSE_DEBUG */
+
+    xfclose (alias_file, alias_filename);
+  }
+
+  return alias_file != NULL;
+}
+
+/* Initialize the path for ls-R files, and read them all into the hash
+   table `db'.  If no usable ls-R's are found, set db.buckets to NULL.  */
+
+void
+kpse_init_db P1H(void)
+{
+  boolean ok = false;
+  const_string db_path = kpse_init_format (kpse_db_format);
+  string *db_files = kpse_all_path_search (db_path, DB_NAME);
+  string *orig_db_files = db_files;
+
+  /* Must do this after the path searching (which ends up calling
+    kpse_db_search recursively), so db.buckets stays NULL.  */
+  db = hash_create (DB_HASH_SIZE);
+
+  while (db_files && *db_files) {
+    if (db_build (&db, *db_files))
+      ok = true;
+    free (*db_files);
+    db_files++;
+  }
+  
+  if (!ok) {
+    /* If db can't be built, leave `size' nonzero (so we don't
+       rebuild it), but clear `buckets' (so we don't look in it).  */
+    free (db.buckets);
+    db.buckets = NULL;
+  }
+
+  free (orig_db_files);
+
+  /* Add the content of any alias databases.  There may exist more than
+     one alias file along DB_NAME files.  This duplicates the above code
+     -- should be a function.  */
+  ok = false;
+  db_files = kpse_all_path_search (db_path, ALIAS_NAME);
+  orig_db_files = db_files;
+
+  alias_db = hash_create (ALIAS_HASH_SIZE);
+
+  while (db_files && *db_files) {
+    if (alias_build (&alias_db, *db_files))
+      ok = true;
+    free (*db_files);
+    db_files++;
+  }
+
+  if (!ok) {
+    free (alias_db.buckets);
+    alias_db.buckets = NULL;
+  }
+
+  free (orig_db_files);
+}
+
+/* Avoid doing anything if this PATH_ELT is irrelevant to the databases. */
+
+str_list_type *
+kpse_db_search P3C(const_string, name,  const_string, orig_path_elt,
+                   boolean, all)
+{
+  string *db_dirs, *orig_dirs, *r;
+  const_string last_slash;
+  string path_elt;
+  boolean done;
+  str_list_type *ret;
+  unsigned e;
+  string *aliases = NULL;
+  boolean relevant = false;
+  
+  /* If we failed to build the database (or if this is the recursive
+     call to build the db path), quit.  */
+  if (db.buckets == NULL)
+    return NULL;
+  
+  /* When tex-glyph.c calls us looking for, e.g., dpi600/cmr10.pk, we
+     won't find it unless we change NAME to just `cmr10.pk' and append
+     `/dpi600' to PATH_ELT.  We are justified in using a literal `/'
+     here, since that's what tex-glyph.c unconditionally uses in
+     DPI_BITMAP_SPEC.  But don't do anything if the / begins NAME; that
+     should never happen.  */
+  last_slash = strrchr (name, '/');
+  if (last_slash && last_slash != name) {
+    unsigned len = last_slash - name + 1;
+    string dir_part = xmalloc (len);
+    strncpy (dir_part, name, len - 1);
+    dir_part[len - 1] = 0;
+    path_elt = concat3 (orig_path_elt, "/", dir_part);
+    name = last_slash + 1;
+  } else
+    path_elt = (string) orig_path_elt;
+
+  /* Don't bother doing any lookups if this `path_elt' isn't covered by
+     any of database directories.  We do this not so much because the
+     extra couple of hash lookups matter -- they don't -- but rather
+     because we want to return NULL in this case, so path_search can
+     know to do a disk search.  */
+  for (e = 0; !relevant && e < STR_LIST_LENGTH (db_dir_list); e++) {
+    relevant = elt_in_db (STR_LIST_ELT (db_dir_list, e), path_elt);
+  }
+  if (!relevant)
+    return NULL;
+
+  /* If we have aliases for this name, use them.  */
+  if (alias_db.buckets)
+    aliases = hash_lookup (alias_db, name);
+
+  if (!aliases) {
+    aliases = XTALLOC1 (string);
+    aliases[0] = NULL;
+  }
+  {  /* Push aliases up by one and insert the original name at the front.  */
+    unsigned i;
+    unsigned len = 1; /* Have NULL element already allocated.  */
+    for (r = aliases; *r; r++)
+      len++;
+    XRETALLOC (aliases, len + 1, string);
+    for (i = len; i > 0; i--) {
+      aliases[i] = aliases[i - 1];
+    }
+    aliases[0] = (string) name;
+  }
+
+  done = false;
+  for (r = aliases; !done && *r; r++) {
+    string try = *r;
+
+    /* We have an ls-R db.  Look up `try'.  */
+    orig_dirs = db_dirs = hash_lookup (db, try);
+
+    ret = XTALLOC1 (str_list_type);
+    *ret = str_list_init ();
+
+    /* For each filename found, see if it matches the path element.  For
+       example, if we have .../cx/cmr10.300pk and .../ricoh/cmr10.300pk,
+       and the path looks like .../cx, we don't want the ricoh file.  */
+    while (!done && db_dirs && *db_dirs) {
+      string db_file = concat (*db_dirs, try);
+      boolean matched = match (db_file, path_elt);
+
+  #ifdef KPSE_DEBUG
+      if (KPSE_DEBUG_P (KPSE_DEBUG_SEARCH))
+        DEBUGF3 ("db:match(%s,%s) = %d\n", db_file, path_elt, matched);
+  #endif
+
+      /* We got a hit in the database.  Now see if the file actually
+         exists, possibly under an alias.  */
+      if (matched) {
+        string found = NULL;
+        if (kpse_readable_file (db_file)) {
+          found = db_file;
+          
+        } else {
+          string *a;
+          
+          free (db_file); /* `db_file' wasn't on disk.  */
+          
+          /* The hit in the DB doesn't exist in disk.  Now try all its
+             aliases.  For example, suppose we have a hierarchy on CD,
+             thus `mf.bas', but ls-R contains `mf.base'.  Find it anyway.
+             Could probably work around this with aliases, but
+             this is pretty easy and shouldn't hurt.  The upshot is that
+             if one of the aliases actually exists, we use that.  */
+          for (a = aliases + 1; *a && !found; a++) {
+            string atry = concat (*db_dirs, *a);
+            if (kpse_readable_file (atry))
+              found = atry;
+            else
+              free (atry);
+          }
+        }
+          
+        /* If we have a real file, add it to the list, maybe done.  */
+        if (found) {
+          str_list_add (ret, found);
+          if (!all && found)
+            done = true;
+        }
+      } else { /* no match in the db */
+        free (db_file);
+      }
+      
+
+      /* On to the next directory, if any.  */
+      db_dirs++;
+    }
+
+    /* This is just the space for the pointers, not the strings.  */
+    if (orig_dirs && *orig_dirs)
+      free (orig_dirs);
+  }
+  
+  free (aliases);
+  
+  /* If we had to break up NAME, free the temporary PATH_ELT.  */
+  if (path_elt != orig_path_elt)
+    free (path_elt);
+
+  return ret;
+}