diff lib/regex_internal.c @ 6184:f1728546eca4

On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the old glibc regex code mishandles strings longer than 2**31 bytes. This patch fixes this when the regex code is used in gnulib (i.e., outside glibc). * lib/regex.h (_REGEX_LARGE_OFFSETS): New feature-test macro, governing whether the rest of this patch is active. By default, the macro is disabled and the patch has no effect. (regoff_t) [defined _REGEX_LARGE_OFFSETS]: Define to off_t, not int. (__re_idx_t, __re_size_t, __re_long_size_t): New types. (struct re_pattern_buffer, re_search, re_search_2, re_match): (re_match_2, re_set_registers): Use the new types. * lib/regex_internal.h (Idx, re_hashval_t): New types. (REG_MISSING, REG_ERROR, REG_VALID_INDEX, REG_VALID_NONZERO_INDEX): New macros. (re_node_set, re_charset_t, re_token_t, re_string_realloc_buffers): (re_string_context_at, bin_tree_t, re_dfastate_t): (struct re_state_table_entry, state_array_t, re_sub_match_last_t): (re_sub_match_top_t, re_match_context_t, re_sift_context_t): (struct re_fail_stack_ent_t, struct re_fail_stack_t, struct re_dfa_t): (re_string_char_size_at, re_string_wchar_at): (re_string_elem_size_at): Use the new types and macros to port to 64-bit hosts. Use unsigned types for internal values, so that the code mostly works even for arrays larger than SSIZE_MAX. * lib/regcomp.c (re_compile_internal, init_dfa, duplicate_node): (search_duplicated_node, calc_eclosure_iter, fetch_number): (parse_reg_exp, parse_branch, parse_expression, parse_sub_exp): (build_equiv_class, build_charclass, re_compile_fastmap_iter): (free_dfa_content, create_initial_state, optimize_utf8, analyze): (optimize_subexps, calc_first, link_nfa_nodes, duplicate_node_closure): (calc_inveclosure, parse_dup_op, build_range_exp): (build_collating_symbol, parse_bracket_exp, build_charclass_op): (fetch_number, create_token_tree, mark_opt_subexp): Likewise. * lib/regex_internal.c (re_string_construct_common, create_ci_newstate): (create_cd_newstate, re_string_allocate, re_string_construct): (re_string_realloc_buffers, build_wcs_upper_buffer): (re_string_skip_chars, build_upper_buffer, re_string_translate_buffer): (re_string_reconstruct, re_string_peek_byte_case): (re_string_fetch_byte_case, re_string_context_at): (re_node_set_alloc, re_node_set_init_1, re_node_set_init_2): (re_node_set_init_copy, re_node_set_add_intersect): (re_node_set_init_union, re_node_set_merge, re_node_set_insert): (re_node_set_insert_last, re_node_set_compare, re_node_set_contains): (re_node_set_remove_at, re_dfa_add_node, calc_state_hash): (re_acquire_state, re_acquire_state_context, register_state): Likewise. * lib/regex.c (match_ctx_init, match_ctx_add_entry, search_cur_bkref_entry): (match_ctx_add_subtop, match_ctx_add_sublast, sift_ctx_init): (re_search_internal, re_search_2_stub, re_search_stub) (re_copy_regs, check_matching, check_halt_state_context, update_regs): (push_fail_stack, sift_states_iter_mb, build_sifted_states): (update_cur_sifted_state, check_dst_limits): (check_dst_limits_calc_pos_1, check_dst_limits_calc_pos): (check_subexp_limits, sift_states_bkref, merge_state_array): (check_subexp_matching_top, get_subexp, get_subexp_sub): (find_subexp_node, check_arrival, check_arrival_add_next_nodes): (check_arrival_expand_ecl, check_arrival_expand_ecl_sub): (expand_bkref_cache, check_node_accept_bytes): (group_nodes_into_DFAstates, check_node_accept, regexec, re_match): (re_search, re_match_2, re_search_2, prune_impossible_nodes): (acquire_init_state_context, check_halt_node_context): (proceed_next_node, pop_fail_stack, set_regs, free_fail_stack_return): (sift_states_backward, clean_state_log_if_needed): (sub_epsilon_src_nodes, add_epsilone_src_nodes, merge_state_with_log): (find_recover_state, transit_state_sb, transit_state_mb): (transit_state_bkref, build_trtable, match_ctx_clean): Likewise. * lib/regcomp.c (parse_dup_op): Add an extra test if Idx is unsigned, to work around an assumption that REG_MISSING is negative. * m4/regex.m4 (gl_REGEX): Require AC_SYS_LARGEFILE, Define _REGEX_LARGE_OFFSETS). Test for regoff_t/off_t bug in 64-bit and large-file glibc and in 32-bit large-file Solaris. * config/srclist.txt: Add glibc bug 1281.
author Paul Eggert <eggert@cs.ucla.edu>
date Wed, 31 Aug 2005 22:51:09 +0000
parents 2fce0cb93f59
children 6b09f7f6ba73
line wrap: on
line diff
--- a/lib/regex_internal.c
+++ b/lib/regex_internal.c
@@ -17,17 +17,17 @@
    with this program; if not, write to the Free Software Foundation,
    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
 
-static void re_string_construct_common (const char *str, int len,
+static void re_string_construct_common (const char *str, Idx len,
 					re_string_t *pstr,
 					REG_TRANSLATE_TYPE trans, int icase,
 					const re_dfa_t *dfa) internal_function;
 static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa,
 					  const re_node_set *nodes,
-					  unsigned int hash) internal_function;
+					  re_hashval_t hash) internal_function;
 static re_dfastate_t *create_cd_newstate (re_dfa_t *dfa,
 					  const re_node_set *nodes,
 					  unsigned int context,
-					  unsigned int hash) internal_function;
+					  re_hashval_t hash) internal_function;
 
 /* Functions for string operation.  */
 
@@ -36,11 +36,11 @@
 
 static reg_errcode_t
 internal_function
-re_string_allocate (re_string_t *pstr, const char *str, int len, int init_len,
+re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len,
 		    REG_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
 {
   reg_errcode_t ret;
-  int init_buf_len;
+  Idx init_buf_len;
 
   /* Ensure at least one character fits into the buffers.  */
   if (init_len < dfa->mb_cur_max)
@@ -64,7 +64,7 @@
 
 static reg_errcode_t
 internal_function
-re_string_construct (re_string_t *pstr, const char *str, int len,
+re_string_construct (re_string_t *pstr, const char *str, Idx len,
 		     REG_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
 {
   reg_errcode_t ret;
@@ -127,7 +127,7 @@
 
 static reg_errcode_t
 internal_function
-re_string_realloc_buffers (re_string_t *pstr, int new_buf_len)
+re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
 {
 #ifdef RE_ENABLE_I18N
   if (pstr->mb_cur_max > 1)
@@ -138,7 +138,7 @@
       pstr->wcs = new_wcs;
       if (pstr->offsets != NULL)
 	{
-	  int *new_offsets = re_realloc (pstr->offsets, int, new_buf_len);
+	  Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len);
 	  if (BE (new_offsets == NULL, 0))
 	    return REG_ESPACE;
 	  pstr->offsets = new_offsets;
@@ -160,7 +160,7 @@
 
 static void
 internal_function
-re_string_construct_common (const char *str, int len, re_string_t *pstr,
+re_string_construct_common (const char *str, Idx len, re_string_t *pstr,
 			    REG_TRANSLATE_TYPE trans, int icase,
 			    const re_dfa_t *dfa)
 {
@@ -201,7 +201,7 @@
   unsigned char buf[64];
 #endif
   mbstate_t prev_st;
-  int byte_idx, end_idx, remain_len;
+  Idx byte_idx, end_idx, remain_len;
   size_t mbclen;
 
   /* Build the buffers from pstr->valid_len to either pstr->len or
@@ -263,7 +263,7 @@
 build_wcs_upper_buffer (re_string_t *pstr)
 {
   mbstate_t prev_st;
-  int src_idx, byte_idx, end_idx, remain_len;
+  Idx src_idx, byte_idx, end_idx, remain_len;
   size_t mbclen;
 #ifdef _LIBC
   char buf[MB_LEN_MAX];
@@ -392,7 +392,7 @@
 
 		    if (pstr->offsets == NULL)
 		      {
-			pstr->offsets = re_malloc (int, pstr->bufs_len);
+			pstr->offsets = re_malloc (Idx, pstr->bufs_len);
 
 			if (pstr->offsets == NULL)
 			  return REG_ESPACE;
@@ -474,12 +474,12 @@
 /* Skip characters until the index becomes greater than NEW_RAW_IDX.
    Return the index.  */
 
-static int
+static Idx
 internal_function
-re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc)
+re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc)
 {
   mbstate_t prev_st;
-  int rawbuf_idx;
+  Idx rawbuf_idx;
   size_t mbclen;
   wchar_t wc = 0;
 
@@ -487,7 +487,7 @@
   for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
        rawbuf_idx < new_raw_idx;)
     {
-      int remain_len;
+      Idx remain_len;
       remain_len = pstr->len - rawbuf_idx;
       prev_st = pstr->cur_state;
       mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx,
@@ -513,7 +513,7 @@
 internal_function
 build_upper_buffer (re_string_t *pstr)
 {
-  int char_idx, end_idx;
+  Idx char_idx, end_idx;
   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
 
   for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
@@ -536,7 +536,7 @@
 internal_function
 re_string_translate_buffer (re_string_t *pstr)
 {
-  int buf_idx, end_idx;
+  Idx buf_idx, end_idx;
   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
 
   for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
@@ -555,9 +555,9 @@
 
 static reg_errcode_t
 internal_function
-re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
+re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
 {
-  int offset = idx - pstr->raw_mbs_idx;
+  regoff_t offset = (regoff_t) idx - (regoff_t) pstr->raw_mbs_idx;
   if (BE (offset < 0, 0))
     {
       /* Reset buffer.  */
@@ -621,7 +621,7 @@
 #ifdef RE_ENABLE_I18N
 	  if (pstr->mb_cur_max > 1)
 	    {
-	      int wcs_idx;
+	      Idx wcs_idx;
 	      wint_t wc = WEOF;
 
 	      if (pstr->is_utf8)
@@ -637,7 +637,7 @@
 		      {
 			mbstate_t cur_state;
 			wchar_t wc2;
-			int mlen = raw + pstr->len - p;
+			Idx mlen = raw + pstr->len - p;
 			unsigned char buf[6];
 
 			q = p;
@@ -732,9 +732,10 @@
 
 static unsigned char
 internal_function __attribute ((pure))
-re_string_peek_byte_case (const re_string_t *pstr, int idx)
+re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
 {
-  int ch, off;
+  int ch;
+  Idx off;
 
   /* Handle the common (easiest) cases first.  */
   if (BE (!pstr->mbs_allocated, 1))
@@ -776,7 +777,8 @@
 #ifdef RE_ENABLE_I18N
   if (pstr->offsets_needed)
     {
-      int off, ch;
+      Idx off;
+      int ch;
 
       /* For tr_TR.UTF-8 [[:islower:]] there is
 	 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs.  Skip
@@ -819,10 +821,10 @@
 
 static unsigned int
 internal_function
-re_string_context_at (const re_string_t *input, int idx, int eflags)
+re_string_context_at (const re_string_t *input, Idx idx, int eflags)
 {
   int c;
-  if (BE (idx < 0, 0))
+  if (BE (! REG_VALID_INDEX (idx), 0))
     /* In this case, we use the value stored in input->tip_context,
        since we can't know the character in input->mbs[-1] here.  */
     return input->tip_context;
@@ -833,7 +835,7 @@
   if (input->mb_cur_max > 1)
     {
       wint_t wc;
-      int wc_idx = idx;
+      Idx wc_idx = idx;
       while(input->wcs[wc_idx] == WEOF)
 	{
 #ifdef DEBUG
@@ -864,11 +866,11 @@
 
 static reg_errcode_t
 internal_function
-re_node_set_alloc (re_node_set *set, int size)
+re_node_set_alloc (re_node_set *set, Idx size)
 {
   set->alloc = size;
   set->nelem = 0;
-  set->elems = re_malloc (int, size);
+  set->elems = re_malloc (Idx, size);
   if (BE (set->elems == NULL, 0))
     return REG_ESPACE;
   return REG_NOERROR;
@@ -876,11 +878,11 @@
 
 static reg_errcode_t
 internal_function
-re_node_set_init_1 (re_node_set *set, int elem)
+re_node_set_init_1 (re_node_set *set, Idx elem)
 {
   set->alloc = 1;
   set->nelem = 1;
-  set->elems = re_malloc (int, 1);
+  set->elems = re_malloc (Idx, 1);
   if (BE (set->elems == NULL, 0))
     {
       set->alloc = set->nelem = 0;
@@ -892,10 +894,10 @@
 
 static reg_errcode_t
 internal_function
-re_node_set_init_2 (re_node_set *set, int elem1, int elem2)
+re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2)
 {
   set->alloc = 2;
-  set->elems = re_malloc (int, 2);
+  set->elems = re_malloc (Idx, 2);
   if (BE (set->elems == NULL, 0))
     return REG_ESPACE;
   if (elem1 == elem2)
@@ -928,13 +930,13 @@
   if (src->nelem > 0)
     {
       dest->alloc = dest->nelem;
-      dest->elems = re_malloc (int, dest->alloc);
+      dest->elems = re_malloc (Idx, dest->alloc);
       if (BE (dest->elems == NULL, 0))
 	{
 	  dest->alloc = dest->nelem = 0;
 	  return REG_ESPACE;
 	}
-      memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
+      memcpy (dest->elems, src->elems, src->nelem * sizeof dest->elems[0]);
     }
   else
     re_node_set_init_empty (dest);
@@ -950,7 +952,7 @@
 re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
 			   const re_node_set *src2)
 {
-  int i1, i2, is, id, delta, sbase;
+  Idx i1, i2, is, id, delta, sbase;
   if (src1->nelem == 0 || src2->nelem == 0)
     return REG_NOERROR;
 
@@ -958,8 +960,8 @@
      conservative estimate.  */
   if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
     {
-      int new_alloc = src1->nelem + src2->nelem + dest->alloc;
-      int *new_elems = re_realloc (dest->elems, int, new_alloc);
+      Idx new_alloc = src1->nelem + src2->nelem + dest->alloc;
+      Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc);
       if (BE (new_elems == NULL, 0))
         return REG_ESPACE;
       dest->elems = new_elems;
@@ -977,25 +979,25 @@
       if (src1->elems[i1] == src2->elems[i2])
 	{
 	  /* Try to find the item in DEST.  Maybe we could binary search?  */
-	  while (id >= 0 && dest->elems[id] > src1->elems[i1])
+	  while (REG_VALID_INDEX (id) && dest->elems[id] > src1->elems[i1])
 	    --id;
 
-          if (id < 0 || dest->elems[id] != src1->elems[i1])
+          if (! REG_VALID_INDEX (id) || dest->elems[id] != src1->elems[i1])
             dest->elems[--sbase] = src1->elems[i1];
 
-	  if (--i1 < 0 || --i2 < 0)
+	  if (! REG_VALID_INDEX (--i1) || ! REG_VALID_INDEX (--i2))
 	    break;
 	}
 
       /* Lower the highest of the two items.  */
       else if (src1->elems[i1] < src2->elems[i2])
 	{
-	  if (--i2 < 0)
+	  if (! REG_VALID_INDEX (--i2))
 	    break;
 	}
       else
 	{
-	  if (--i1 < 0)
+	  if (! REG_VALID_INDEX (--i1))
 	    break;
 	}
     }
@@ -1008,7 +1010,7 @@
      DEST elements are already in place; this is more or
      less the same loop that is in re_node_set_merge.  */
   dest->nelem += delta;
-  if (delta > 0 && id >= 0)
+  if (delta > 0 && REG_VALID_INDEX (id))
     for (;;)
       {
         if (dest->elems[is] > dest->elems[id])
@@ -1022,13 +1024,13 @@
           {
             /* Slide from the bottom.  */
             dest->elems[id + delta] = dest->elems[id];
-            if (--id < 0)
+            if (! REG_VALID_INDEX (--id))
               break;
           }
       }
 
   /* Copy remaining SRC elements.  */
-  memcpy (dest->elems, dest->elems + sbase, delta * sizeof (int));
+  memcpy (dest->elems, dest->elems + sbase, delta * sizeof dest->elems[0]);
 
   return REG_NOERROR;
 }
@@ -1041,11 +1043,11 @@
 re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
 			const re_node_set *src2)
 {
-  int i1, i2, id;
+  Idx i1, i2, id;
   if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
     {
       dest->alloc = src1->nelem + src2->nelem;
-      dest->elems = re_malloc (int, dest->alloc);
+      dest->elems = re_malloc (Idx, dest->alloc);
       if (BE (dest->elems == NULL, 0))
 	return REG_ESPACE;
     }
@@ -1073,13 +1075,13 @@
   if (i1 < src1->nelem)
     {
       memcpy (dest->elems + id, src1->elems + i1,
-	     (src1->nelem - i1) * sizeof (int));
+	     (src1->nelem - i1) * sizeof dest->elems[0]);
       id += src1->nelem - i1;
     }
   else if (i2 < src2->nelem)
     {
       memcpy (dest->elems + id, src2->elems + i2,
-	     (src2->nelem - i2) * sizeof (int));
+	     (src2->nelem - i2) * sizeof dest->elems[0]);
       id += src2->nelem - i2;
     }
   dest->nelem = id;
@@ -1093,13 +1095,13 @@
 internal_function
 re_node_set_merge (re_node_set *dest, const re_node_set *src)
 {
-  int is, id, sbase, delta;
+  Idx is, id, sbase, delta;
   if (src == NULL || src->nelem == 0)
     return REG_NOERROR;
   if (dest->alloc < 2 * src->nelem + dest->nelem)
     {
-      int new_alloc = 2 * (src->nelem + dest->alloc);
-      int *new_buffer = re_realloc (dest->elems, int, new_alloc);
+      Idx new_alloc = 2 * (src->nelem + dest->alloc);
+      Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc);
       if (BE (new_buffer == NULL, 0))
 	return REG_ESPACE;
       dest->elems = new_buffer;
@@ -1109,14 +1111,15 @@
   if (BE (dest->nelem == 0, 0))
     {
       dest->nelem = src->nelem;
-      memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
+      memcpy (dest->elems, src->elems, src->nelem * sizeof dest->elems[0]);
       return REG_NOERROR;
     }
 
   /* Copy into the top of DEST the items of SRC that are not
      found in DEST.  Maybe we could binary search in DEST?  */
   for (sbase = dest->nelem + 2 * src->nelem,
-       is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; )
+       is = src->nelem - 1, id = dest->nelem - 1;
+       REG_VALID_INDEX (is) && REG_VALID_INDEX (id); )
     {
       if (dest->elems[id] == src->elems[is])
         is--, id--;
@@ -1126,11 +1129,12 @@
         --id;
     }
 
-  if (is >= 0)
+  if (REG_VALID_INDEX (is))
     {
       /* If DEST is exhausted, the remaining items of SRC must be unique.  */
       sbase -= is + 1;
-      memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (int));
+      memcpy (dest->elems + sbase, src->elems,
+	      (is + 1) * sizeof dest->elems[0]);
     }
 
   id = dest->nelem - 1;
@@ -1155,11 +1159,11 @@
         {
           /* Slide from the bottom.  */
           dest->elems[id + delta] = dest->elems[id];
-	  if (--id < 0)
+	  if (! REG_VALID_INDEX (--id))
 	    {
 	      /* Copy remaining SRC elements.  */
 	      memcpy (dest->elems, dest->elems + sbase,
-	              delta * sizeof (int));
+	              delta * sizeof dest->elems[0]);
 	      break;
 	    }
 	}
@@ -1174,9 +1178,9 @@
 
 static int
 internal_function
-re_node_set_insert (re_node_set *set, int elem)
+re_node_set_insert (re_node_set *set, Idx elem)
 {
-  int idx;
+  Idx idx;
   /* In case the set is empty.  */
   if (set->alloc == 0)
     {
@@ -1197,9 +1201,9 @@
   /* Realloc if we need.  */
   if (set->alloc == set->nelem)
     {
-      int *new_elems;
+      Idx *new_elems;
       set->alloc = set->alloc * 2;
-      new_elems = re_realloc (set->elems, int, set->alloc);
+      new_elems = re_realloc (set->elems, Idx, set->alloc);
       if (BE (new_elems == NULL, 0))
 	return -1;
       set->elems = new_elems;
@@ -1231,14 +1235,14 @@
 
 static int
 internal_function
-re_node_set_insert_last (re_node_set *set, int elem)
+re_node_set_insert_last (re_node_set *set, Idx elem)
 {
   /* Realloc if we need.  */
   if (set->alloc == set->nelem)
     {
-      int *new_elems;
+      Idx *new_elems;
       set->alloc = (set->alloc + 1) * 2;
-      new_elems = re_realloc (set->elems, int, set->alloc);
+      new_elems = re_realloc (set->elems, Idx, set->alloc);
       if (BE (new_elems == NULL, 0))
 	return -1;
       set->elems = new_elems;
@@ -1256,10 +1260,10 @@
 internal_function __attribute ((pure))
 re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
 {
-  int i;
+  Idx i;
   if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
     return 0;
-  for (i = set1->nelem ; --i >= 0 ; )
+  for (i = set1->nelem ; REG_VALID_INDEX (--i) ; )
     if (set1->elems[i] != set2->elems[i])
       return 0;
   return 1;
@@ -1267,12 +1271,12 @@
 
 /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise.  */
 
-static int
+static Idx
 internal_function __attribute ((pure))
-re_node_set_contains (const re_node_set *set, int elem)
+re_node_set_contains (const re_node_set *set, Idx elem)
 {
-  unsigned int idx, right, mid;
-  if (set->nelem <= 0)
+  __re_size_t idx, right, mid;
+  if (! REG_VALID_NONZERO_INDEX (set->nelem))
     return 0;
 
   /* Binary search the element.  */
@@ -1291,7 +1295,7 @@
 
 static void
 internal_function
-re_node_set_remove_at (re_node_set *set, int idx)
+re_node_set_remove_at (re_node_set *set, Idx idx)
 {
   if (idx < 0 || idx >= set->nelem)
     return;
@@ -1302,31 +1306,31 @@
 
 
 /* Add the token TOKEN to dfa->nodes, and return the index of the token.
-   Or return -1, if an error will be occured.  */
+   Or return REG_MISSING if an error occurred.  */
 
-static int
+static Idx
 internal_function
 re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
 {
   int type = token.type;
   if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
     {
-      int new_nodes_alloc = dfa->nodes_alloc * 2;
-      int *new_nexts, *new_indices;
+      Idx new_nodes_alloc = dfa->nodes_alloc * 2;
+      Idx *new_nexts, *new_indices;
       re_node_set *new_edests, *new_eclosures;
 
       re_token_t *new_nodes = re_realloc (dfa->nodes, re_token_t,
 					  new_nodes_alloc);
       if (BE (new_nodes == NULL, 0))
-	return -1;
+	return REG_MISSING;
       dfa->nodes = new_nodes;
-      new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc);
-      new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc);
+      new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
+      new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
       new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
       new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
       if (BE (new_nexts == NULL || new_indices == NULL
 	      || new_edests == NULL || new_eclosures == NULL, 0))
-	return -1;
+	return REG_MISSING;
       dfa->nexts = new_nexts;
       dfa->org_indices = new_indices;
       dfa->edests = new_edests;
@@ -1339,18 +1343,18 @@
   dfa->nodes[dfa->nodes_len].accept_mb =
     (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
 #endif
-  dfa->nexts[dfa->nodes_len] = -1;
+  dfa->nexts[dfa->nodes_len] = REG_MISSING;
   re_node_set_init_empty (dfa->edests + dfa->nodes_len);
   re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
   return dfa->nodes_len++;
 }
 
-static inline unsigned int
+static inline re_hashval_t
 internal_function
 calc_state_hash (const re_node_set *nodes, unsigned int context)
 {
-  unsigned int hash = nodes->nelem + context;
-  int i;
+  re_hashval_t hash = nodes->nelem + context;
+  Idx i;
   for (i = 0 ; i < nodes->nelem ; i++)
     hash += nodes->elems[i];
   return hash;
@@ -1369,10 +1373,10 @@
 internal_function
 re_acquire_state (reg_errcode_t *err, re_dfa_t *dfa, const re_node_set *nodes)
 {
-  unsigned int hash;
+  re_hashval_t hash;
   re_dfastate_t *new_state;
   struct re_state_table_entry *spot;
-  int i;
+  Idx i;
 #ifdef lint
   /* Suppress bogus uninitialized-variable warnings.  */
   *err = REG_NOERROR;
@@ -1420,10 +1424,10 @@
 re_acquire_state_context (reg_errcode_t *err, re_dfa_t *dfa,
 			  const re_node_set *nodes, unsigned int context)
 {
-  unsigned int hash;
+  re_hashval_t hash;
   re_dfastate_t *new_state;
   struct re_state_table_entry *spot;
-  int i;
+  Idx i;
 #ifdef lint
   /* Suppress bogus uninitialized-variable warnings.  */
   *err = REG_NOERROR;
@@ -1461,11 +1465,11 @@
 
 static reg_errcode_t
 internal_function
-register_state (re_dfa_t *dfa, re_dfastate_t *newstate, unsigned int hash)
+register_state (re_dfa_t *dfa, re_dfastate_t *newstate, re_hashval_t hash)
 {
   struct re_state_table_entry *spot;
   reg_errcode_t err;
-  int i;
+  Idx i;
 
   newstate->hash = hash;
   err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
@@ -1473,7 +1477,7 @@
     return REG_ESPACE;
   for (i = 0; i < newstate->nodes.nelem; i++)
     {
-      int elem = newstate->nodes.elems[i];
+      Idx elem = newstate->nodes.elems[i];
       if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
         re_node_set_insert_last (&newstate->non_eps_nodes, elem);
     }
@@ -1481,7 +1485,7 @@
   spot = dfa->state_table + (hash & dfa->state_hash_mask);
   if (BE (spot->alloc <= spot->num, 0))
     {
-      int new_alloc = 2 * spot->num + 2;
+      Idx new_alloc = 2 * spot->num + 2;
       re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
 					      new_alloc);
       if (BE (new_array == NULL, 0))
@@ -1498,9 +1502,9 @@
 
 static re_dfastate_t *
 internal_function
-create_ci_newstate (re_dfa_t *dfa, const re_node_set *nodes, unsigned int hash)
+create_ci_newstate (re_dfa_t *dfa, const re_node_set *nodes, re_hashval_t hash)
 {
-  int i;
+  Idx i;
   reg_errcode_t err;
   re_dfastate_t *newstate;
 
@@ -1548,9 +1552,9 @@
 static re_dfastate_t *
 internal_function
 create_cd_newstate (re_dfa_t *dfa, const re_node_set *nodes,
-		    unsigned int context, unsigned int hash)
+		    unsigned int context, re_hashval_t hash)
 {
-  int i, nctx_nodes = 0;
+  Idx i, nctx_nodes = 0;
   reg_errcode_t err;
   re_dfastate_t *newstate;