Mercurial > hg > octave-jordi
changeset 7728:13820b9f5fd9
more consistent handling of CR/CRLF/LF line endings in lexer and parser
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Wed, 23 Apr 2008 16:03:34 -0400 |
parents | c8da61051ea2 |
children | 6f2b2cc4b957 |
files | src/ChangeLog src/input.cc src/lex.l src/parse.y |
diffstat | 4 files changed, 73 insertions(+), 116 deletions(-) [+] |
line wrap: on
line diff
--- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,14 @@ +2008-04-23 John W. Eaton <jwe@octave.org> + + * lex.l (text_yyinput): New function. Use it in place of yyinput. + (next_token_is_sep_op, scan_for_comments, eat_whitespace, + have_continuation): No need to check for CR or CRLF. + * parse.y (text_getc): Also return NL for single CR. + +2008-04-32 Michael Goffioul <michael.goffioul@gmail.com> + + * input.cc (get_input_from_file): Open file in binary mode. + 2008-04-20 John W. Eaton <jwe@octave.org> * oct-stream.cc (octave_stream::read): Allow single data type
--- a/src/input.cc +++ b/src/input.cc @@ -398,7 +398,7 @@ FILE *instream = 0; if (name.length () > 0) - instream = fopen (name.c_str (), "r"); + instream = fopen (name.c_str (), "rb"); if (! instream && warn) warning ("%s: no such file or directory", name.c_str ());
--- a/src/lex.l +++ b/src/lex.l @@ -255,6 +255,7 @@ // Forward declarations for functions defined at the bottom of this // file. +static int text_yyinput (void); static void fixup_column_count (char *s); static void do_comma_insert_check (void); static int is_keyword_token (const std::string& s); @@ -806,7 +807,7 @@ yyunput (yytext[0], yytext); - int c = yyinput (); + int c = text_yyinput (); if (c != EOF) { @@ -834,7 +835,7 @@ { int spc_gobbled = eat_continuation (); - int c = yyinput (); + int c = text_yyinput (); yyunput (c, yytext); @@ -903,6 +904,27 @@ lexer_flags.init (); } +static int +text_yyinput (void) +{ + int c = yyinput (); + + // Convert CRLF into just LF and single CR into LF. + + if (c == '\r') + { + c = yyinput (); + + if (c != '\n') + { + yyunput (c, yytext); + c = '\n'; + } + } + + return c; +} + // If we read some newlines, we need figure out what column we're // really looking at. @@ -1431,7 +1453,7 @@ public: flex_stream_reader (char *buf_arg) : stream_reader (), buf (buf_arg) { } - int getc (void) { return ::yyinput (); } + int getc (void) { return ::text_yyinput (); } int ungetc (int c) { ::yyunput (c, buf); return 0; } private: @@ -1524,25 +1546,11 @@ { bool retval = false; - int c1 = yyinput (); - - if (c1 == '\r') - { - int c2 = yyinput (); - - if (c2 == '\n') - { - c1 = '\n'; - - retval = true; - } - else - yyunput (c2, yytext); - } - else - retval = match_any (c1, ",;\n]"); - - yyunput (c1, yytext); + int c = text_yyinput (); + + retval = match_any (c, ",;\n]"); + + yyunput (c, yytext); return retval; } @@ -1555,7 +1563,7 @@ { bool un_op = false; - int c0 = yyinput (); + int c0 = text_yyinput (); if (c0 == '\'' && ! spc_prev) { @@ -1563,19 +1571,19 @@ } else if (c0 == '.') { - int c1 = yyinput (); + int c1 = text_yyinput (); un_op = (c1 == '\''); yyunput (c1, yytext); } else if (c0 == '+') { - int c1 = yyinput (); + int c1 = text_yyinput (); un_op = (c1 == '+'); yyunput (c1, yytext); } else if (c0 == '-') { - int c1 = yyinput (); + int c1 = text_yyinput (); un_op = (c1 == '-'); yyunput (c1, yytext); } @@ -1602,14 +1610,14 @@ { bool bin_op = false; - int c0 = yyinput (); + int c0 = text_yyinput (); switch (c0) { case '+': case '-': { - int c1 = yyinput (); + int c1 = text_yyinput (); switch (c1) { @@ -1644,7 +1652,7 @@ // .+ .- ./ .\ .^ .* .** case '.': { - int c1 = yyinput (); + int c1 = text_yyinput (); if (match_any (c1, "+-/\\^*")) // Always a binary op (may also include .+=, .-=, ./=, ...). @@ -1677,7 +1685,7 @@ case '~': case '!': { - int c1 = yyinput (); + int c1 = text_yyinput (); // ~ and ! can be unary ops, so require following =. if (c1 == '=') @@ -1756,25 +1764,6 @@ } break; - case '\r': - if (in_comment) - comment_buf += static_cast<char> (c); - if (i < len) - { - c = text[i++]; - - if (c == '\n') - { - if (in_comment) - { - comment_buf += static_cast<char> (c); - octave_comment_buffer::append (comment_buf); - in_comment = false; - beginning_of_comment = false; - } - } - } - default: if (in_comment) { @@ -1811,7 +1800,7 @@ int c = 0; - while ((c = yyinput ()) != EOF) + while ((c = text_yyinput ()) != EOF) { current_input_column++; @@ -1885,28 +1874,6 @@ goto done; } - case '\r': - if (in_comment) - comment_buf += static_cast<char> (c); - c = yyinput (); - if (c == EOF) - break; - else if (c == '\n') - { - retval |= ATE_NEWLINE; - if (in_comment) - { - comment_buf += static_cast<char> (c); - octave_comment_buffer::append (comment_buf); - in_comment = false; - beginning_of_comment = false; - } - current_input_column = 0; - break; - } - - // Fall through... - default: if (in_comment) { @@ -2002,7 +1969,7 @@ int c = 0; - while ((c = yyinput ()) != EOF) + while ((c = text_yyinput ()) != EOF) { buf << static_cast<char> (c); @@ -2048,27 +2015,6 @@ gripe_matlab_incompatible_continuation (); return true; - case '\r': - if (in_comment) - comment_buf += static_cast<char> (c); - c = yyinput (); - if (c == EOF) - break; - else if (c == '\n') - { - if (in_comment) - { - comment_buf += static_cast<char> (c); - octave_comment_buffer::append (comment_buf); - } - current_input_column = 0; - promptflag--; - gripe_matlab_incompatible_continuation (); - return true; - } - - // Fall through... - default: if (in_comment) { @@ -2102,10 +2048,10 @@ static bool have_ellipsis_continuation (bool trailing_comments_ok) { - char c1 = yyinput (); + char c1 = text_yyinput (); if (c1 == '.') { - char c2 = yyinput (); + char c2 = text_yyinput (); if (c2 == '.' && have_continuation (trailing_comments_ok)) return true; else @@ -2130,7 +2076,7 @@ { int retval = ATE_NOTHING; - int c = yyinput (); + int c = text_yyinput (); if ((c == '.' && have_ellipsis_continuation ()) || (c == '\\' && have_continuation ())) @@ -2152,7 +2098,7 @@ int c; int escape_pending = 0; - while ((c = yyinput ()) != EOF) + while ((c = text_yyinput ()) != EOF) { current_input_column++; @@ -2191,7 +2137,7 @@ buf << static_cast<char> (c); else { - c = yyinput (); + c = text_yyinput (); if (c == delim) { buf << static_cast<char> (c); @@ -2244,13 +2190,13 @@ { bool retval = false; - int c0 = yyinput (); + int c0 = text_yyinput (); switch (c0) { case '=': { - int c1 = yyinput (); + int c1 = text_yyinput (); yyunput (c1, yytext); if (c1 != '=') retval = true; @@ -2265,7 +2211,7 @@ case '&': case '|': { - int c1 = yyinput (); + int c1 = text_yyinput (); yyunput (c1, yytext); if (c1 == '=') retval = true; @@ -2274,10 +2220,10 @@ case '.': { - int c1 = yyinput (); + int c1 = text_yyinput (); if (match_any (c1, "+-*/\\")) { - int c2 = yyinput (); + int c2 = text_yyinput (); yyunput (c2, yytext); if (c2 == '=') retval = true; @@ -2288,10 +2234,10 @@ case '>': { - int c1 = yyinput (); + int c1 = text_yyinput (); if (c1 == '>') { - int c2 = yyinput (); + int c2 = text_yyinput (); yyunput (c2, yytext); if (c2 == '=') retval = true; @@ -2302,10 +2248,10 @@ case '<': { - int c1 = yyinput (); + int c1 = text_yyinput (); if (c1 == '<') { - int c2 = yyinput (); + int c2 = text_yyinput (); yyunput (c2, yytext); if (c2 == '=') retval = true; @@ -2326,7 +2272,7 @@ static bool next_token_is_index_op (void) { - int c = yyinput (); + int c = text_yyinput (); yyunput (c, yytext); return c == '(' || c == '{'; } @@ -2408,8 +2354,8 @@ int postfix_un_op = next_token_is_postfix_unary_op (spc_gobbled); - int c1 = yyinput (); - int c2 = yyinput (); + int c1 = text_yyinput (); + int c2 = text_yyinput (); yyunput (c2, yytext); yyunput (c1, yytext); @@ -2517,14 +2463,14 @@ // See if we have a plot keyword (title, using, with, or clear). - int c1 = yyinput (); + int c1 = text_yyinput (); bool next_tok_is_paren = (c1 == '('); bool next_tok_is_eq = false; if (c1 == '=') { - int c2 = yyinput (); + int c2 = text_yyinput (); yyunput (c2, yytext); if (c2 != '=')