view lib/exclude.c @ 12766:2c14f1e449e5

warn-on-use: use instead of link-warning Each *.in.h file serves two purposes - provide enough self-contained content to serve as a replacement for the system header regardless of the user's compiler, and offer the developer the ability to detect any gnulib modules that might have been inadvertantly excluded. The former requires independence from config.h, and routes everything through a Makefile.am snippet that uses @@ substitution specific to the modules that were in use, with details learned at configure time. The latter works by poisoning anything that gnulib did not replace, but which the developer had natively available, explaining why their tests passed during development. Poisoning relies on gcc features, and requires manual triggering by adding -DGNULIB_POSIXCHECK to CFLAGS; it assumes that <config.h> is properly included. In fact, we do not want to use @@ substitution for HAVE_DECL_* during poisoning, because the warning is only relevant for the gnulib modules which were not included, and thus where the m4 macros to set proper @@ values have not been run. Furthermore, we only need to poison interfaces that already have a declaration; if something is not declared, then the developer wouldn't have been able to link, so their code won't be using the problematic interface in the first place. * modules/stdio (Depends-on, Makefile.am): Drop link-warning. * modules/unistd (Depends-on, Makefile.am): Likewise. * modules/arpa_inet (Depends-on): Replace link-warning with warn-on-use. (Makefile.am): Update rules accordingly. * modules/ctype (Depends-on, Makefile.am): Likewise. * modules/dirent (Depends-on, Makefile.am): Likewise. * modules/fcntl-h (Depends-on, Makefile.am): Likewise. * modules/inttypes (Depends-on, Makefile.am): Likewise. * modules/langinfo (Depends-on, Makefile.am): Likewise. * modules/locale (Depends-on, Makefile.am): Likewise. * modules/math (Depends-on, Makefile.am): Likewise. * modules/search (Depends-on, Makefile.am): Likewise. * modules/signal (Depends-on, Makefile.am): Likewise. * modules/spawn (Depends-on, Makefile.am): Likewise. * modules/stdlib (Depends-on, Makefile.am): Likewise. * modules/string (Depends-on, Makefile.am): Likewise. * modules/strings (Depends-on, Makefile.am): Likewise. * modules/sys_file (Depends-on, Makefile.am): Likewise. * modules/sys_ioctl (Depends-on, Makefile.am): Likewise. * modules/sys_select (Depends-on, Makefile.am): Likewise. * modules/sys_socket (Depends-on, Makefile.am): Likewise. * modules/sys_stat (Depends-on, Makefile.am): Likewise. * modules/sys_times (Depends-on, Makefile.am): Likewise. * modules/sys_utsname (Depends-on, Makefile.am): Likewise. * modules/wchar (Depends-on, Makefile.am): Likewise. * m4/arpa_inet_h.m4 (gl_HEADER_ARPA_INET): Check which functions should be poisoned. * m4/ctype.m4 (gl_CTYPE_H): Likewise. * m4/dirent_h.m4 (gl_DIRENT_H): Likewise. * m4/fcntl_h.m4 (gl_FCNTL_H): Likewise. * m4/inttypes.m4 (gl_INTTYPES_H): Likewise. * m4/langinfo_h.m4 (gl_LANGINFO_H): Likewise. * m4/locale_h.m4 (gl_LOCALE_H): Likewise. * m4/math_h.m4 (gl_MATH_H): Likewise. * m4/search_h.m4 (gl_SEARCH_H): Likewise. * m4/signal_h.m4 (gl_SIGNAL_H): Likewise. * m4/spawn_h.m4 (gl_SPAWN_H): Likewise. * m4/stdio_h.m4 (gl_STDIO_H): Likewise. * m4/stdlib_h.m4 (gl_STDLIB_H): Likewise. * m4/string_h.m4 (gl_HEADER_STRING_H_BODY): Likewise. * m4/strings_h.m4 (gl_HEADER_STRINGS_H_BODY): Likewise. * m4/sys_file_h.m4 (gl_HEADER_SYS_FILE_H_DEFAULTS): Likewise. * m4/sys_ioctl_h.m4 (gl_SYS_IOCTL_H): Likewise. * m4/sys_select_h.m4 (gl_HEADER_SYS_SELECT): Likewise. * m4/sys_socket_h.m4 (gl_HEADER_SYS_SOCKET): Likewise. * m4/sys_stat_h.m4 (gl_HEADER_SYS_STAT_H): Likewise. * m4/sys_times_h.m4 (gl_SYS_TIMES_H): Likewise. * m4/sys_utsname_h.m4 (gl_SYS_UTSNAME_H): Likewise. * m4/unistd_h.m4 (gl_UNISTD_H): Likewise. * m4/wchar.m4 (gl_WCHAR_H): Likewise. * lib/arpa_inet.in.h: Use _GL_WARN_ON_USE instead of GL_LINK_WARNING. * lib/ctype.in.h: Likewise. * lib/dirent.in.h: Likewise. * lib/fcntl.in.h: Likewise. * lib/inttypes.in.h: Likewise. * lib/langinfo.in.h: Likewise. * lib/locale.in.h: Likewise. * lib/math.in.h: Likewise. * lib/search.in.h: Likewise. * lib/signal.in.h: Likewise. * lib/spawn.in.h: Likewise. * lib/stdio.in.h: Likewise. * lib/stdlib.in.h: Likewise. * lib/string.in.h: Likewise. * lib/strings.in.h: Likewise. * lib/sys_file.in.h: Likewise. * lib/sys_ioctl.in.h: Likewise. * lib/sys_select.in.h: Likewise. * lib/sys_socket.in.h: Likewise. * lib/sys_stat.in.h: Likewise. * lib/sys_times.in.h: Likewise. * lib/sys_utsname.in.h: Likewise. * lib/unistd.in.h: Likewise. * lib/wchar.in.h: Likewise. Signed-off-by: Eric Blake <ebb9@byu.net>
author Eric Blake <ebb9@byu.net>
date Wed, 16 Dec 2009 16:20:28 -0700 (2009-12-16)
parents c2cbabec01dd
children 538b96cce54a
line wrap: on
line source
/* exclude.c -- exclude file names

   Copyright (C) 1992, 1993, 1994, 1997, 1999, 2000, 2001, 2002, 2003, 2004,
   2005, 2006, 2007, 2009, 2010 Free Software Foundation, Inc.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */

/* Written by Paul Eggert <eggert@twinsun.com>
   and Sergey Poznyakoff <gray@gnu.org>.
   Thanks to Phil Proudman <phil@proudman51.freeserve.co.uk>
   for improvement suggestions. */

#include <config.h>

#include <stdbool.h>

#include <ctype.h>
#include <errno.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <wctype.h>

#include "exclude.h"
#include "hash.h"
#include "mbuiter.h"
#include "fnmatch.h"
#include "xalloc.h"
#include "verify.h"

#if USE_UNLOCKED_IO
# include "unlocked-io.h"
#endif

/* Non-GNU systems lack these options, so we don't need to check them.  */
#ifndef FNM_CASEFOLD
# define FNM_CASEFOLD 0
#endif
#ifndef FNM_EXTMATCH
# define FNM_EXTMATCH 0
#endif
#ifndef FNM_LEADING_DIR
# define FNM_LEADING_DIR 0
#endif

verify (((EXCLUDE_ANCHORED | EXCLUDE_INCLUDE | EXCLUDE_WILDCARDS)
         & (FNM_PATHNAME | FNM_NOESCAPE | FNM_PERIOD | FNM_LEADING_DIR
            | FNM_CASEFOLD | FNM_EXTMATCH))
        == 0);


/* Exclusion patterns are grouped into a singly-linked list of
   "exclusion segments".  Each segment represents a set of patterns
   that can be matches using the same algorithm.  Non-wildcard
   patterns are kept in hash tables, to speed up searches.  Wildcard
   patterns are stored as arrays of patterns. */


/* An exclude pattern-options pair.  The options are fnmatch options
   ORed with EXCLUDE_* options.  */

struct patopts
  {
    char const *pattern;
    int options;
  };

/* An array of pattern-options pairs.  */

struct exclude_pattern
  {
    struct patopts *exclude;
    size_t exclude_alloc;
    size_t exclude_count;
  };

enum exclude_type
  {
    exclude_hash,                    /* a hash table of excluded names */
    exclude_pattern                  /* an array of exclude patterns */
  };

struct exclude_segment
  {
    struct exclude_segment *next;    /* next segment in list */
    enum exclude_type type;          /* type of this segment */
    int options;                     /* common options for this segment */
    union
    {
      Hash_table *table;             /* for type == exclude_hash */
      struct exclude_pattern pat;    /* for type == exclude_pattern */
    } v;
  };

/* The exclude structure keeps a singly-linked list of exclude segments */
struct exclude
  {
    struct exclude_segment *head, *tail;
  };

/* Return true if str has wildcard characters */
bool
fnmatch_pattern_has_wildcards (const char *str, int options)
{
  const char *cset = "\\?*[]";
  if (options & FNM_NOESCAPE)
    cset++;
  while (*str)
    {
      size_t n = strcspn (str, cset);
      if (str[n] == 0)
        break;
      else if (str[n] == '\\')
        {
          str += n + 1;
          if (*str)
            str++;
        }
      else
        return true;
    }
  return false;
}

/* Return a newly allocated and empty exclude list.  */

struct exclude *
new_exclude (void)
{
  return xzalloc (sizeof *new_exclude ());
}

/* Calculate the hash of string.  */
static size_t
string_hasher (void const *data, size_t n_buckets)
{
  char const *p = data;
  return hash_string (p, n_buckets);
}

/* Ditto, for case-insensitive hashes */
static size_t
string_hasher_ci (void const *data, size_t n_buckets)
{
  char const *p = data;
  mbui_iterator_t iter;
  size_t value = 0;

  for (mbui_init (iter, p); mbui_avail (iter); mbui_advance (iter))
    {
      mbchar_t m = mbui_cur (iter);
      wchar_t wc;

      if (m.wc_valid)
        wc = towlower (m.wc);
      else
        wc = *m.ptr;

      value = (value * 31 + wc) % n_buckets;
    }

  return value;
}

/* compare two strings for equality */
static bool
string_compare (void const *data1, void const *data2)
{
  char const *p1 = data1;
  char const *p2 = data2;
  return strcmp (p1, p2) == 0;
}

/* compare two strings for equality, case-insensitive */
static bool
string_compare_ci (void const *data1, void const *data2)
{
  char const *p1 = data1;
  char const *p2 = data2;
  return mbscasecmp (p1, p2) == 0;
}

static void
string_free (void *data)
{
  free (data);
}

/* Create new exclude segment of given TYPE and OPTIONS, and attach it
   to the tail of list in EX */
static struct exclude_segment *
new_exclude_segment (struct exclude *ex, enum exclude_type type, int options)
{
  struct exclude_segment *sp = xzalloc (sizeof (struct exclude_segment));
  sp->type = type;
  sp->options = options;
  switch (type)
    {
    case exclude_pattern:
      break;

    case exclude_hash:
      sp->v.table = hash_initialize (0, NULL,
                                     (options & FNM_CASEFOLD) ?
                                       string_hasher_ci
                                       : string_hasher,
                                     (options & FNM_CASEFOLD) ?
                                       string_compare_ci
                                       : string_compare,
                                     string_free);
      break;
    }
  if (ex->tail)
    ex->tail->next = sp;
  else
    ex->head = sp;
  ex->tail = sp;
  return sp;
}

/* Free a single exclude segment */
static void
free_exclude_segment (struct exclude_segment *seg)
{
  switch (seg->type)
    {
    case exclude_pattern:
      free (seg->v.pat.exclude);
      break;

    case exclude_hash:
      hash_free (seg->v.table);
      break;
    }
  free (seg);
}

/* Free the storage associated with an exclude list.  */
void
free_exclude (struct exclude *ex)
{
  struct exclude_segment *seg;
  for (seg = ex->head; seg; )
    {
      struct exclude_segment *next = seg->next;
      free_exclude_segment (seg);
      seg = next;
    }
  free (ex);
}

/* Return zero if PATTERN matches F, obeying OPTIONS, except that
   (unlike fnmatch) wildcards are disabled in PATTERN.  */

static int
fnmatch_no_wildcards (char const *pattern, char const *f, int options)
{
  if (! (options & FNM_LEADING_DIR))
    return ((options & FNM_CASEFOLD)
            ? mbscasecmp (pattern, f)
            : strcmp (pattern, f));
  else if (! (options & FNM_CASEFOLD))
    {
      size_t patlen = strlen (pattern);
      int r = strncmp (pattern, f, patlen);
      if (! r)
        {
          r = f[patlen];
          if (r == '/')
            r = 0;
        }
      return r;
    }
  else
    {
      /* Walk through a copy of F, seeing whether P matches any prefix
         of F.

         FIXME: This is an O(N**2) algorithm; it should be O(N).
         Also, the copy should not be necessary.  However, fixing this
         will probably involve a change to the mbs* API.  */

      char *fcopy = xstrdup (f);
      char *p;
      int r;
      for (p = fcopy; ; *p++ = '/')
        {
          p = strchr (p, '/');
          if (p)
            *p = '\0';
          r = mbscasecmp (pattern, fcopy);
          if (!p || r <= 0)
            break;
        }
      free (fcopy);
      return r;
    }
}

bool
exclude_fnmatch (char const *pattern, char const *f, int options)
{
  int (*matcher) (char const *, char const *, int) =
    (options & EXCLUDE_WILDCARDS
     ? fnmatch
     : fnmatch_no_wildcards);
  bool matched = ((*matcher) (pattern, f, options) == 0);
  char const *p;

  if (! (options & EXCLUDE_ANCHORED))
    for (p = f; *p && ! matched; p++)
      if (*p == '/' && p[1] != '/')
        matched = ((*matcher) (pattern, p + 1, options) == 0);

  return matched;
}

/* Return true if the exclude_pattern segment SEG excludes F.  */

static bool
excluded_file_pattern_p (struct exclude_segment const *seg, char const *f)
{
  size_t exclude_count = seg->v.pat.exclude_count;
  struct patopts const *exclude = seg->v.pat.exclude;
  size_t i;
  bool excluded = !! (exclude[0].options & EXCLUDE_INCLUDE);

  /* Scan through the options, until they change excluded */
  for (i = 0; i < exclude_count; i++)
    {
      char const *pattern = exclude[i].pattern;
      int options = exclude[i].options;
      if (excluded != exclude_fnmatch (pattern, f, options))
        return !excluded;
    }
  return excluded;
}

/* Return true if the exclude_hash segment SEG excludes F.
   BUFFER is an auxiliary storage of the same length as F (with nul
   terminator included) */
static bool
excluded_file_name_p (struct exclude_segment const *seg, char const *f,
                      char *buffer)
{
  int options = seg->options;
  bool excluded = !! (options & EXCLUDE_INCLUDE);
  Hash_table *table = seg->v.table;

  do
    {
      /* initialize the pattern */
      strcpy (buffer, f);

      while (1)
        {
          if (hash_lookup (table, buffer))
            return !excluded;
          if (options & FNM_LEADING_DIR)
            {
              char *p = strrchr (buffer, '/');
              if (p)
                {
                  *p = 0;
                  continue;
                }
            }
          break;
        }

      if (!(options & EXCLUDE_ANCHORED))
        {
          f = strchr (f, '/');
          if (f)
            f++;
        }
      else
        break;
    }
  while (f);
  return excluded;
}

/* Return true if EX excludes F.  */

bool
excluded_file_name (struct exclude const *ex, char const *f)
{
  struct exclude_segment *seg;
  bool excluded;
  char *filename = NULL;

  /* If no patterns are given, the default is to include.  */
  if (!ex->head)
    return false;

  /* Otherwise, the default is the opposite of the first option.  */
  excluded = !! (ex->head->options & EXCLUDE_INCLUDE);
  /* Scan through the segments, seeing whether they change status from
     excluded to included or vice versa.  */
  for (seg = ex->head; seg; seg = seg->next)
    {
      bool rc;

      switch (seg->type)
        {
        case exclude_pattern:
          rc = excluded_file_pattern_p (seg, f);
          break;

        case exclude_hash:
          if (!filename)
            filename = xmalloc (strlen (f) + 1);
          rc = excluded_file_name_p (seg, f, filename);
          break;

        default:
          abort ();
        }
      if (rc != excluded)
        {
          excluded = rc;
          break;
        }
    }
  free (filename);
  return excluded;
}

/* Append to EX the exclusion PATTERN with OPTIONS.  */

void
add_exclude (struct exclude *ex, char const *pattern, int options)
{
  struct exclude_segment *seg;

  if ((options & EXCLUDE_WILDCARDS)
      && fnmatch_pattern_has_wildcards (pattern, options))
    {
      struct exclude_pattern *pat;
      struct patopts *patopts;

      if (ex->tail && ex->tail->type == exclude_pattern
          && ((ex->tail->options & EXCLUDE_INCLUDE) ==
              (options & EXCLUDE_INCLUDE)))
        seg = ex->tail;
      else
        seg = new_exclude_segment (ex, exclude_pattern, options);

      pat = &seg->v.pat;
      if (pat->exclude_count == pat->exclude_alloc)
        pat->exclude = x2nrealloc (pat->exclude, &pat->exclude_alloc,
                                   sizeof *pat->exclude);
      patopts = &pat->exclude[pat->exclude_count++];
      patopts->pattern = pattern;
      patopts->options = options;
    }
  else
    {
      char *str, *p;
#define EXCLUDE_HASH_FLAGS (EXCLUDE_INCLUDE|EXCLUDE_ANCHORED|\
                            FNM_LEADING_DIR|FNM_CASEFOLD)
      if (ex->tail && ex->tail->type == exclude_hash
          && ((ex->tail->options & EXCLUDE_HASH_FLAGS) ==
              (options & EXCLUDE_HASH_FLAGS)))
        seg = ex->tail;
      else
        seg = new_exclude_segment (ex, exclude_hash, options);

      str = xstrdup (pattern);
      p = hash_insert (seg->v.table, str);
      if (p != str)
        free (str);
    }
}

/* Use ADD_FUNC to append to EX the patterns in FILE_NAME, each with
   OPTIONS.  LINE_END terminates each pattern in the file.  If
   LINE_END is a space character, ignore trailing spaces and empty
   lines in FILE.  Return -1 on failure, 0 on success.  */

int
add_exclude_file (void (*add_func) (struct exclude *, char const *, int),
                  struct exclude *ex, char const *file_name, int options,
                  char line_end)
{
  bool use_stdin = file_name[0] == '-' && !file_name[1];
  FILE *in;
  char *buf = NULL;
  char *p;
  char const *pattern;
  char const *lim;
  size_t buf_alloc = 0;
  size_t buf_count = 0;
  int c;
  int e = 0;

  if (use_stdin)
    in = stdin;
  else if (! (in = fopen (file_name, "r")))
    return -1;

  while ((c = getc (in)) != EOF)
    {
      if (buf_count == buf_alloc)
        buf = x2realloc (buf, &buf_alloc);
      buf[buf_count++] = c;
    }

  if (ferror (in))
    e = errno;

  if (!use_stdin && fclose (in) != 0)
    e = errno;

  buf = xrealloc (buf, buf_count + 1);
  buf[buf_count] = line_end;
  lim = buf + buf_count + ! (buf_count == 0 || buf[buf_count - 1] == line_end);
  pattern = buf;

  for (p = buf; p < lim; p++)
    if (*p == line_end)
      {
        char *pattern_end = p;

        if (isspace ((unsigned char) line_end))
          {
            for (; ; pattern_end--)
              if (pattern_end == pattern)
                goto next_pattern;
              else if (! isspace ((unsigned char) pattern_end[-1]))
                break;
          }

        *pattern_end = '\0';
        (*add_func) (ex, pattern, options);

      next_pattern:
        pattern = p + 1;
      }

  errno = e;
  return e ? -1 : 0;
}