diff lib/regex_internal.h @ 16361:ab59b5080051

regex: merge glibc changes * lib/regcomp.c (init_dfa): Tighten overflow checks to test for IDX_MAX too, since IDX_MAX can be much less than SIZE_MAX. (init_word_char): Work even if bitset words are not exactly 32 or 64 bits wide. Don't assume there are no padding bits. * lib/regex.c [_LIBC]: Do not include <config.h>. [!_LIBC]: Add pragmas to ignore -Wsuggest-attributes=pure and -Wtype-limits. * lib/regex.h (__USE_GNU): Renamed from __USE_GNU_REGEX, to avoid needless disagreement with glibc. All uses changed. Define it to 1 only if _GNU_SOURCE, to match glibc. (_REG_RM_NAME): Remove; no longer needed, since the names in question are now all protected by __USE_GNU. (_REG_RE_NAME): Remove; replaced by glibc's __REPB_PREFIX. (REG_TRANSLATE_TYPE): Remove; replaced by glibc's __RE_TRANSLATE_TYPE. * lib/regex_internal.h (MIN): New macro. 2012-01-03 Ulrich Drepper <drepper@gmail.com> * lib/regcomp.c (init_word_char): Optimize regex a bit. 2011-12-30 Jakub Jelinek <jakub@redhat.com> * lib/regex_internal.c (re_string_fetch_byte_case): Fix up regcomp/regexec. The problem is that parse_bracket_symbol is miscompiled, and it turns out it is because of an incorrect attribute on re_string_fetch_byte_case. Unlike re_string_peek_byte_case, this one is really not pure, it modifies memory (increments pstr->cur_idx), and with the pure attribute GCC assumed it doesn't and it cached the presumed value of regexp->cur_idx in a variable across the for (;; ++i) { if (i >= BRACKET_NAME_BUF_SIZE) return REG_EBRACK; if (token->type == OP_OPEN_CHAR_CLASS) ch = re_string_fetch_byte_case (regexp); else ch = re_string_fetch_byte (regexp); if (re_string_eoi(regexp)) return REG_EBRACK; if (ch == delim && re_string_peek_byte (regexp, 0) == ']') break; elem->opr.name[i] = ch; } 2011-11-29 Andreas Schwab <schwab@redhat.com> * lib/regcomp.c (build_equiv_class): Fix access after end of search string in regex matcher. 2011-11-12 Ulrich Drepper <drepper@redhat.com> * lib/regex_internal.c, lib/regex_internal.h: Fix warnings in regex. 2011-10-12 Ulrich Drepper <drepper@redhat.com> * lib/regcomp.c (parse_branch): One more regex memory leak fixed. 2011-10-11 Ulrich Drepper <drepper@redhat.com> * lib/regcomp.c (parse_branch, parse_sub_exp): More regex memory leak fixes and tests. (parse_sub_exp, parse_bracket_exp): Fix memory leak for some invalid regular expressions. 2011-05-28 Ulrich Drepper <drepper@gmail.com> * lib/regex_internal.c, lib/regexec.c: Fix unnecessary overallocation due to incomplete character. When incomplete characters are found at the end of a string the code ran amok and allocated lots of memory. Stricter limits are now in place. 2011-05-20 Reuben Thomas <rrt@sc3d.org> * lib/regex.h: Update documentation. 2011-05-16 Aharon Robbins <arnold@skeeve.com> * lib/regex.h: Update RE_SYNTAX*_AWK constants. 2010-05-05 Andreas Schwab <schwab@redhat.com> * lib/regexec.c (find_collation_sequence_value): Fix lookup of collation sequence value during regexp matching. 2010-01-22 Ulrich Drepper <drepper@redhat.com> * lib/regex_internal.c (re_dfa_add_node): Extend overflow detection. 2008-01-16 Ulrich Drepper <drepper@redhat.com> * lib/regex.h: Cleanup namespace. 2007-11-26 Ulrich Drepper <drepper@redhat.com> * lib/regex.h (REG_ENOSYS): Define REG_ENOSYS also for __USE_XOPEN2K. 2007-08-26 Ulrich Drepper <drepper@redhat.com> * lib/regex_internal.h: Prevent some declarations and definitions to be seen when used in tests. 2005-05-06 Ulrich Drepper <drepper@redhat.com> * lib/regex_internal.h: Include bits/libc-lock.h or define dummy __libc_lock_* macros if not _LIBC. (struct re_dfa_t): Add lock.
author Paul Eggert <eggert@cs.ucla.edu>
date Tue, 07 Feb 2012 22:47:01 -0800 (2012-02-08)
parents 18a38c9615f0
children 5290dc20c28b
line wrap: on
line diff
--- a/lib/regex_internal.h
+++ b/lib/regex_internal.h
@@ -22,7 +22,6 @@
 
 #include <assert.h>
 #include <ctype.h>
-#include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -32,13 +31,14 @@
 # include "localcharset.h"
 #endif
 #include <locale.h>
-
 #include <wchar.h>
 #include <wctype.h>
+#include <stdbool.h>
 #include <stdint.h>
 #if defined _LIBC
 # include <bits/libc-lock.h>
 #else
+# define __libc_lock_define(CLASS,NAME)
 # define __libc_lock_init(NAME) do { } while (0)
 # define __libc_lock_lock(NAME) do { } while (0)
 # define __libc_lock_unlock(NAME) do { } while (0)
@@ -111,8 +111,8 @@
 # define __wctype wctype
 # define __iswctype iswctype
 # define __btowc btowc
+# define __mbrtowc mbrtowc
 # define __wcrtomb wcrtomb
-# define __mbrtowc mbrtowc
 # define __regfree regfree
 # define attribute_hidden
 #endif /* not _LIBC */
@@ -124,6 +124,11 @@
 #endif
 
 typedef __re_idx_t Idx;
+#ifdef _REGEX_LARGE_OFFSETS
+# define IDX_MAX (SIZE_MAX - 2)
+#else
+# define IDX_MAX INT_MAX
+#endif
 
 /* Special return value for failure to match.  */
 #define REG_MISSING ((Idx) -1)
@@ -418,19 +423,21 @@
 # define internal_function
 #endif
 
+#ifndef NOT_IN_libc
 static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
 						Idx new_buf_len)
      internal_function;
-#ifdef RE_ENABLE_I18N
+# ifdef RE_ENABLE_I18N
 static void build_wcs_buffer (re_string_t *pstr) internal_function;
 static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr)
-     internal_function;
-#endif /* RE_ENABLE_I18N */
+  internal_function;
+# endif /* RE_ENABLE_I18N */
 static void build_upper_buffer (re_string_t *pstr) internal_function;
 static void re_string_translate_buffer (re_string_t *pstr) internal_function;
 static unsigned int re_string_context_at (const re_string_t *input, Idx idx,
 					  int eflags)
      internal_function __attribute ((pure));
+#endif
 #define re_string_peek_byte(pstr, offset) \
   ((pstr)->mbs[(pstr)->cur_idx + offset])
 #define re_string_fetch_byte(pstr) \
@@ -468,6 +475,9 @@
 #ifndef MAX
 # define MAX(a,b) ((a) < (b) ? (b) : (a))
 #endif
+#ifndef MIN
+# define MIN(a,b) ((a) < (b) ? (b) : (a))
+#endif
 
 #define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t)))
 #define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t)))
@@ -692,9 +702,7 @@
 #ifdef DEBUG
   char* re_str;
 #endif
-#ifdef _LIBC
   __libc_lock_define (, lock)
-#endif
 };
 
 #define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set))
@@ -818,15 +826,15 @@
   return (wint_t) pstr->wcs[idx];
 }
 
+# ifndef NOT_IN_libc
 static int
 internal_function __attribute ((pure))
 re_string_elem_size_at (const re_string_t *pstr, Idx idx)
 {
-# ifdef _LIBC
+#  ifdef _LIBC
   const unsigned char *p, *extra;
   const int32_t *table, *indirect;
-  int32_t tmp;
-#  include <locale/weight.h>
+#   include <locale/weight.h>
   uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 
   if (nrules != 0)
@@ -837,13 +845,14 @@
       indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
 						_NL_COLLATE_INDIRECTMB);
       p = pstr->mbs + idx;
-      tmp = findidx (&p);
+      findidx (&p, pstr->len - idx);
       return p - pstr->mbs - idx;
     }
   else
-# endif /* _LIBC */
+#  endif /* _LIBC */
     return 1;
 }
+# endif
 #endif /* RE_ENABLE_I18N */
 
 #ifndef __GNUC_PREREQ