comparison regex.c @ 501:f29b13c2cefd

(TRANSLATE, PATFETCH): Cast elt of `translate'. Don't define if already defined. (regex_compile): Use RE_TRANSLATE_TYPE for `translate'. (compile_range, re_search_2, re_match_2_internal, bcmp_translate) (regcomp): Use RE_TRANSLATE_TYPE for `translate'.
author Richard Stallman <rms@gnu.org>
date Thu, 19 Oct 1995 00:50:51 +0000
parents 985fe9826996
children 2ee098e3c26c
comparison
equal deleted inserted replaced
500:fc733d749dd0 501:f29b13c2cefd
1151 } \ 1151 } \
1152 \ 1152 \
1153 /* Push the info, starting with the registers. */ \ 1153 /* Push the info, starting with the registers. */ \
1154 DEBUG_PRINT1 ("\n"); \ 1154 DEBUG_PRINT1 ("\n"); \
1155 \ 1155 \
1156 if (!RE_NO_POSIX_BACKTRACKING & bufp->syntax) \ 1156 if (!(RE_NO_POSIX_BACKTRACKING & bufp->syntax)) \
1157 for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \ 1157 for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
1158 this_reg++) \ 1158 this_reg++) \
1159 { \ 1159 { \
1160 DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \ 1160 DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \
1161 DEBUG_STATEMENT (num_regs_pushed++); \ 1161 DEBUG_STATEMENT (num_regs_pushed++); \
1273 DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \ 1273 DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \
1274 \ 1274 \
1275 low_reg = (unsigned) POP_FAILURE_INT (); \ 1275 low_reg = (unsigned) POP_FAILURE_INT (); \
1276 DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \ 1276 DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \
1277 \ 1277 \
1278 if (!RE_NO_POSIX_BACKTRACKING & bufp->syntax) \ 1278 if (!(RE_NO_POSIX_BACKTRACKING & bufp->syntax)) \
1279 for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ 1279 for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
1280 { \ 1280 { \
1281 DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \ 1281 DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \
1282 \ 1282 \
1283 reg_info[this_reg].word = POP_FAILURE_ELT (); \ 1283 reg_info[this_reg].word = POP_FAILURE_ELT (); \
1287 DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ 1287 DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
1288 \ 1288 \
1289 regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \ 1289 regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \
1290 DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ 1290 DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
1291 } \ 1291 } \
1292 else \
1293 { \
1294 for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
1295 { \
1296 reg_info[this_reg].word = 0; \
1297 regend[this_reg] = 0; \
1298 regstart[this_reg] = 0; \
1299 } \
1300 highest_active_reg = high_reg; \
1301 } \
1292 \ 1302 \
1293 set_regs_matched_done = 0; \ 1303 set_regs_matched_done = 0; \
1294 DEBUG_STATEMENT (nfailure_points_popped++); \ 1304 DEBUG_STATEMENT (nfailure_points_popped++); \
1295 } /* POP_FAILURE_POINT */ 1305 } /* POP_FAILURE_POINT */
1296 1306
1363 1373
1364 /* Fetch the next character in the uncompiled pattern---translating it 1374 /* Fetch the next character in the uncompiled pattern---translating it
1365 if necessary. Also cast from a signed character in the constant 1375 if necessary. Also cast from a signed character in the constant
1366 string passed to us by the user to an unsigned char that we can use 1376 string passed to us by the user to an unsigned char that we can use
1367 as an array index (in, e.g., `translate'). */ 1377 as an array index (in, e.g., `translate'). */
1378 #ifndef PATFETCH
1368 #define PATFETCH(c) \ 1379 #define PATFETCH(c) \
1369 do {if (p == pend) return REG_EEND; \ 1380 do {if (p == pend) return REG_EEND; \
1370 c = (unsigned char) *p++; \ 1381 c = (unsigned char) *p++; \
1371 if (translate) c = translate[c]; \ 1382 if (translate) c = (unsigned char) translate[c]; \
1372 } while (0) 1383 } while (0)
1384 #endif
1373 1385
1374 /* Fetch the next character in the uncompiled pattern, with no 1386 /* Fetch the next character in the uncompiled pattern, with no
1375 translation. */ 1387 translation. */
1376 #define PATFETCH_RAW(c) \ 1388 #define PATFETCH_RAW(c) \
1377 do {if (p == pend) return REG_EEND; \ 1389 do {if (p == pend) return REG_EEND; \
1384 1396
1385 /* If `translate' is non-null, return translate[D], else just D. We 1397 /* If `translate' is non-null, return translate[D], else just D. We
1386 cast the subscript to translate because some data is declared as 1398 cast the subscript to translate because some data is declared as
1387 `char *', to avoid warnings when a string constant is passed. But 1399 `char *', to avoid warnings when a string constant is passed. But
1388 when we use a character as a subscript we must make it unsigned. */ 1400 when we use a character as a subscript we must make it unsigned. */
1389 #define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d)) 1401 #ifndef TRANSLATE
1402 #define TRANSLATE(d) \
1403 (translate ? (char) translate[(unsigned char) (d)] : (d))
1404 #endif
1390 1405
1391 1406
1392 /* Macros for outputting the compiled pattern into `buffer'. */ 1407 /* Macros for outputting the compiled pattern into `buffer'. */
1393 1408
1394 /* If the buffer isn't allocated when it comes in, use this. */ 1409 /* If the buffer isn't allocated when it comes in, use this. */
1650 /* Points to the current (ending) position in the pattern. */ 1665 /* Points to the current (ending) position in the pattern. */
1651 const char *p = pattern; 1666 const char *p = pattern;
1652 const char *pend = pattern + size; 1667 const char *pend = pattern + size;
1653 1668
1654 /* How to translate the characters in the pattern. */ 1669 /* How to translate the characters in the pattern. */
1655 char *translate = bufp->translate; 1670 RE_TRANSLATE_TYPE translate = bufp->translate;
1656 1671
1657 /* Address of the count-byte of the most recently inserted `exactn' 1672 /* Address of the count-byte of the most recently inserted `exactn'
1658 command. This makes it possible to tell if a new exact-match 1673 command. This makes it possible to tell if a new exact-match
1659 character can be added to that command or if the character requires 1674 character can be added to that command or if the character requires
1660 a new `exactn' command. */ 1675 a new `exactn' command. */
2816 `regex_compile' itself. */ 2831 `regex_compile' itself. */
2817 2832
2818 static reg_errcode_t 2833 static reg_errcode_t
2819 compile_range (p_ptr, pend, translate, syntax, b) 2834 compile_range (p_ptr, pend, translate, syntax, b)
2820 const char **p_ptr, *pend; 2835 const char **p_ptr, *pend;
2821 char *translate; 2836 RE_TRANSLATE_TYPE translate;
2822 reg_syntax_t syntax; 2837 reg_syntax_t syntax;
2823 unsigned char *b; 2838 unsigned char *b;
2824 { 2839 {
2825 unsigned this_char; 2840 unsigned this_char;
2826 2841
3248 struct re_registers *regs; 3263 struct re_registers *regs;
3249 int stop; 3264 int stop;
3250 { 3265 {
3251 int val; 3266 int val;
3252 register char *fastmap = bufp->fastmap; 3267 register char *fastmap = bufp->fastmap;
3253 register char *translate = bufp->translate; 3268 register RE_TRANSLATE_TYPE translate = bufp->translate;
3254 int total_size = size1 + size2; 3269 int total_size = size1 + size2;
3255 int endpos = startpos + range; 3270 int endpos = startpos + range;
3256 3271
3257 /* Check for out-of-range STARTPOS. */ 3272 /* Check for out-of-range STARTPOS. */
3258 if (startpos < 0 || startpos > total_size) 3273 if (startpos < 0 || startpos > total_size)
3536 /* Mark the opcode just after a start_memory, so we can test for an 3551 /* Mark the opcode just after a start_memory, so we can test for an
3537 empty subpattern when we get to the stop_memory. */ 3552 empty subpattern when we get to the stop_memory. */
3538 unsigned char *just_past_start_mem = 0; 3553 unsigned char *just_past_start_mem = 0;
3539 3554
3540 /* We use this to map every character in the string. */ 3555 /* We use this to map every character in the string. */
3541 char *translate = bufp->translate; 3556 RE_TRANSLATE_TYPE translate = bufp->translate;
3542 3557
3543 /* Failure point stack. Each place that can handle a failure further 3558 /* Failure point stack. Each place that can handle a failure further
3544 down the line pushes a failure point on this stack. It consists of 3559 down the line pushes a failure point on this stack. It consists of
3545 restart, regend, and reg_info for all registers corresponding to 3560 restart, regend, and reg_info for all registers corresponding to
3546 the subexpressions we're currently inside, plus the number of such 3561 the subexpressions we're currently inside, plus the number of such
3935 if (translate) 3950 if (translate)
3936 { 3951 {
3937 do 3952 do
3938 { 3953 {
3939 PREFETCH (); 3954 PREFETCH ();
3940 if (translate[(unsigned char) *d++] != (char) *p++) 3955 if ((unsigned char) translate[(unsigned char) *d++]
3956 != (unsigned char) *p++)
3941 goto fail; 3957 goto fail;
3942 } 3958 }
3943 while (--mcnt); 3959 while (--mcnt);
3944 } 3960 }
3945 else 3961 else
4349 /* If this on_failure_jump comes right before a group (i.e., 4365 /* If this on_failure_jump comes right before a group (i.e.,
4350 the original * applied to a group), save the information 4366 the original * applied to a group), save the information
4351 for that group and all inner ones, so that if we fail back 4367 for that group and all inner ones, so that if we fail back
4352 to this point, the group's information will be correct. 4368 to this point, the group's information will be correct.
4353 For example, in \(a*\)*\1, we need the preceding group, 4369 For example, in \(a*\)*\1, we need the preceding group,
4354 and in \(\(a*\)b*\)\2, we need the inner group. */ 4370 and in \(zz\(a*\)b*\)\2, we need the inner group. */
4355 4371
4356 /* We can't use `p' to check ahead because we push 4372 /* We can't use `p' to check ahead because we push
4357 a failure point to `p + mcnt' after we do this. */ 4373 a failure point to `p + mcnt' after we do this. */
4358 p1 = p; 4374 p1 = p;
4359 4375
5057 5073
5058 static int 5074 static int
5059 bcmp_translate (s1, s2, len, translate) 5075 bcmp_translate (s1, s2, len, translate)
5060 unsigned char *s1, *s2; 5076 unsigned char *s1, *s2;
5061 register int len; 5077 register int len;
5062 char *translate; 5078 RE_TRANSLATE_TYPE translate;
5063 { 5079 {
5064 register unsigned char *p1 = s1, *p2 = s2; 5080 register unsigned char *p1 = s1, *p2 = s2;
5065 while (len) 5081 while (len)
5066 { 5082 {
5067 if (translate[*p1++] != translate[*p2++]) return 1; 5083 if (translate[*p1++] != translate[*p2++]) return 1;
5229 5245
5230 if (cflags & REG_ICASE) 5246 if (cflags & REG_ICASE)
5231 { 5247 {
5232 unsigned i; 5248 unsigned i;
5233 5249
5234 preg->translate = (char *) malloc (CHAR_SET_SIZE); 5250 preg->translate
5251 = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
5252 * sizeof (*(RE_TRANSLATE_TYPE)0));
5235 if (preg->translate == NULL) 5253 if (preg->translate == NULL)
5236 return (int) REG_ESPACE; 5254 return (int) REG_ESPACE;
5237 5255
5238 /* Map uppercase characters to corresponding lowercase ones. */ 5256 /* Map uppercase characters to corresponding lowercase ones. */
5239 for (i = 0; i < CHAR_SET_SIZE; i++) 5257 for (i = 0; i < CHAR_SET_SIZE; i++)