# HG changeset patch # User John W. Eaton # Date 1208584771 14400 # Node ID 74f5e0c7de9e02c0c637b7efb1a9664a1ddab5c2 # Parent c3bb0b7a4261bbecec337ab6704ae9753cb0662c first pass at handling block comments diff --git a/src/ChangeLog b/src/ChangeLog --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,5 +1,20 @@ 2008-04-18 John W. Eaton + * lex.l, lex.h (process_comment): New arg, start_in_block. Call + grab_block_comment if start_in_block is true. Change all uses. + * lex.l (grab_block_comment): New function. + (grab_comment_block): New arg, at_bol. Change all uses. + Call grab_block_comment if we find the start of a block comment. + (block_comment_nesting_level): New static variable. + (^{S}*{CCHAR}\{{S}*{NL}): New rule. + (<>): Warn about open block comments. + (reset_parser): Set block_comment_nesting_level to zero. + * parse.y (parse_fcn_file): Stash help text from + gobble_leading_white_space after calling reset_parser. + (text_getc): Keep track of input_line_number here. + (skip_white_sapce): Don't increment input_line_number here. + * lex.l (grab_comment_block): Or here. + * lex.l (Vdisplay_tokens): New static variable. (F__display_tokens__): New function. (display_token): New function. @@ -8,7 +23,9 @@ ([\"\'], "'", \"): Move handle_string outside of COUNT_TOK_AND_RETURN macro parameter list. (handle_identifier): Don't use macros to return token values here. - + ({S}*{COMMENT}{SNLCMT}*|{S}*{NL}{SNLCMT}*): + Recognize block comments here. + * pr-output.cc (Fdisp): If nargout > 0, produce an sq-string unless arg is a dq-string. diff --git a/src/lex.h b/src/lex.h --- a/src/lex.h +++ b/src/lex.h @@ -152,7 +152,8 @@ stream_reader& operator = (const stream_reader&); }; -extern std::string grab_comment_block (stream_reader& reader, bool& eof); +extern std::string +grab_comment_block (stream_reader& reader, bool at_bol, bool& eof); // TRUE means that we have encountered EOF on the input stream. extern bool parser_end_of_input; diff --git a/src/lex.l b/src/lex.l --- a/src/lex.l +++ b/src/lex.l @@ -248,6 +248,10 @@ static unsigned int Vtoken_count = 0; +// The start state that was in effect when the beginning of a block +// comment was noticed. +static int block_comment_nesting_level = 0; + // Forward declarations for functions defined at the bottom of this // file. @@ -256,7 +260,7 @@ static int is_keyword_token (const std::string& s); static void prep_for_function (void); static void prep_for_nested_function (void); -static int process_comment (bool& eof); +static int process_comment (bool start_in_block, bool& eof); static bool match_any (char c, const char *s); static bool next_token_is_sep_op (void); static bool next_token_is_bin_op (bool spc_prev); @@ -367,6 +371,8 @@ // // It's also a pain in the ass to decide whether to insert a comma // after seeing a ']' character... + +// FIXME -- we need to handle block comments here. %} {SNLCMT}*\]{S}* { @@ -381,6 +387,10 @@ COUNT_TOK_AND_RETURN (tok_to_return); } +%{ +// FIXME -- we need to handle block comments here. +%} + {SNLCMT}*\}{S}* { scan_for_comments (yytext); fixup_column_count (yytext); @@ -455,6 +465,8 @@ // Semicolons are handled as row seprators in matrix constants. If we // don't eat whitespace here we can end up inserting too many // semicolons. + +// FIXME -- we need to handle block comments here. %} {SNLCMT}*;{SNLCMT}* { @@ -470,6 +482,8 @@ // In some cases, new lines can also become row separators. If we // don't eat whitespace here we can end up inserting too many // semicolons. + +// FIXME -- we need to handle block comments here. %} {S}*{COMMENT}{SNLCMT}* | @@ -565,6 +579,16 @@ %} <> { + if (block_comment_nesting_level != 0) + { + warning ("block comment open at end of input"); + + if ((reading_fcn_file || reading_script_file) + && ! curr_fcn_file_name.empty ()) + warning ("near line %d of file `%s.m'", + input_line_number, curr_fcn_file_name.c_str ()); + } + TOK_RETURN (END_OF_INPUT); } @@ -647,9 +671,11 @@ %} {CCHAR} { + yyunput (yytext[0], yytext); + bool eof = false; - yyunput (yytext[0], yytext); - int tok = process_comment (eof); + int tok = process_comment (false, eof); + if (eof) TOK_RETURN (END_OF_INPUT); else if (tok > 0) @@ -657,6 +683,18 @@ } %{ +// Block comments. +%} + +^{S}*{CCHAR}\{{S}*{NL} { + current_input_column = 1; + block_comment_nesting_level++; + promptflag--; + bool eof = false; + process_comment (true, eof); + } + +%{ // Other operators. %} @@ -827,6 +865,9 @@ // We do want a prompt by default. promptflag = 1; + // We are not in a block comment. + block_comment_nesting_level = 0; + // Error may have occurred inside some brackets, braces, or parentheses. nesting_level.clear (); @@ -1152,8 +1193,115 @@ val = Matrix (); } +static std::string +grab_block_comment (stream_reader& reader, bool& eof) +{ + std::string buf; + + bool at_bol = true; + bool look_for_marker = false; + + bool warned_incompatible = false; + + int c = 0; + + while ((c = reader.getc ()) != EOF) + { + current_input_column++; + + if (look_for_marker) + { + at_bol = false; + look_for_marker = false; + + if (c == '{' || c == '}') + { + std::string tmp_buf (1, static_cast (c)); + + int type = c; + + bool done = false; + + while ((c = reader.getc ()) != EOF && ! done) + { + current_input_column++; + + switch (c) + { + case ' ': + case '\t': + tmp_buf += static_cast (c); + break; + + case '\n': + { + current_input_column = 0; + at_bol = true; + done = true; + + if (type == '{') + { + block_comment_nesting_level++; + promptflag--; + } + else + { + block_comment_nesting_level--; + promptflag++; + + if (block_comment_nesting_level == 0) + { + buf += grab_comment_block (reader, true, eof); + + return buf; + } + } + } + break; + + default: + at_bol = false; + tmp_buf += static_cast (c); + buf += tmp_buf; + done = true; + break; + } + } + } + } + + if (at_bol && c == '%' || c == '#') + { + if (c == '#' && ! warned_incompatible) + { + warned_incompatible = true; + maybe_gripe_matlab_incompatible_comment (c); + } + + at_bol = false; + look_for_marker = true; + } + else + { + buf += static_cast (c); + + if (c == '\n') + { + current_input_column = 0; + at_bol = true; + } + } + } + + if (c == EOF) + eof = true; + + return buf; +} + std::string -grab_comment_block (stream_reader& reader, bool& eof) +grab_comment_block (stream_reader& reader, bool at_bol, + bool& eof) { std::string buf; @@ -1174,9 +1322,59 @@ if (begin_comment) { if (c == '%' || c == '#') - continue; + { + at_bol = false; + continue; + } + else if (at_bol && c == '{') + { + std::string tmp_buf (1, static_cast (c)); + + bool done = false; + + while ((c = reader.getc ()) != EOF && ! done) + { + current_input_column++; + + switch (c) + { + case ' ': + case '\t': + tmp_buf += static_cast (c); + break; + + case '\n': + { + current_input_column = 0; + at_bol = true; + done = true; + + block_comment_nesting_level++; + promptflag--; + + buf += grab_block_comment (reader, eof); + + in_comment = false; + + if (eof) + goto done; + } + break; + + default: + at_bol = false; + tmp_buf += static_cast (c); + buf += tmp_buf; + done = true; + break; + } + } + } else - begin_comment = false; + { + at_bol = false; + begin_comment = false; + } } if (in_comment) @@ -1185,9 +1383,8 @@ if (c == '\n') { - input_line_number++; + at_bol = true; current_input_column = 0; - in_comment = false; } } @@ -1242,7 +1439,7 @@ }; static int -process_comment (bool& eof) +process_comment (bool start_in_block, bool& eof) { eof = false; @@ -1253,7 +1450,12 @@ flex_stream_reader flex_reader (yytext); - std::string txt = grab_comment_block (flex_reader, eof); + // process_comment is only supposed to be called when we are not + // initially looking at a block comment. + + std::string txt = start_in_block + ? grab_block_comment (flex_reader, eof) + : grab_comment_block (flex_reader, false, eof); if (help_txt.empty () && nesting_level.none ()) { @@ -1509,6 +1711,8 @@ return retval; } +// FIXME -- we need to handle block comments here. + static void scan_for_comments (const char *text) { @@ -1593,6 +1797,8 @@ // ATE_SPACE_OR_TAB : space or tab in input // ATE_NEWLINE : bare new line in input +// FIXME -- we need to handle block comments here. + static yum_yum eat_whitespace (void) { @@ -1782,6 +1988,8 @@ // If non-whitespace characters are found before comment // characters, return 0. Otherwise, return 1. +// FIXME -- we need to handle block comments here. + static bool have_continuation (bool trailing_comments_ok) { diff --git a/src/parse.y b/src/parse.y --- a/src/parse.y +++ b/src/parse.y @@ -2845,12 +2845,16 @@ { c = getc (f); - if (c != '\n') + if (c == '\n') + input_line_number++; + else { ungetc (c, f); c = '\r'; } } + else if (c == '\n') + input_line_number++; return c; } @@ -2883,7 +2887,6 @@ break; case '\n': - input_line_number++; current_input_column = 0; break; @@ -2920,7 +2923,7 @@ if (eof) break; - txt = grab_comment_block (stdio_reader, eof); + txt = grab_comment_block (stdio_reader, true, eof); if (txt.empty ()) break; @@ -3028,9 +3031,6 @@ std::string help_txt = gobble_leading_white_space (ffile, eof); - if (! help_txt.empty ()) - help_buf.push (help_txt); - if (! eof) { std::string file_type; @@ -3079,6 +3079,9 @@ reset_parser (); + if (! help_txt.empty ()) + help_buf.push (help_txt); + if (parsing_script) prep_lexer_for_script ();