Mercurial > hg > octave-nkf
changeset 18223:615fdd2238c1 gui-release
improve compatibility of command syntax parsing (bug #41032)
* lex.h, lex.ll (lexical_feedback::command_arg_paren_count):
New data member.
(lexical_feedback::lexical_feedback): Initialize it.
(lexical_feedback::reset): Reset it.
(COMMAND_ARG_FINISH): New macro.
Rewrite COMMAND_START patterns to improve Matlab compatibility of
command syntax parsing.
(<DQ_STRING_START>\", <SQ_STRING_START>\'): Don't return token if
start state is COMMAND_START.
* close.m: Fix test.
author | Michael C. Grant <mcg@cvxr.com> |
---|---|
date | Mon, 06 Jan 2014 12:02:04 -0500 |
parents | 4d90e104bf35 |
children | 03226f218077 |
files | libinterp/parse-tree/lex.h libinterp/parse-tree/lex.ll scripts/plot/util/close.m |
diffstat | 3 files changed, 165 insertions(+), 59 deletions(-) [+] |
line wrap: on
line diff
--- a/libinterp/parse-tree/lex.h +++ b/libinterp/parse-tree/lex.h @@ -268,9 +268,9 @@ input_line_number (1), current_input_column (1), bracketflag (0), braceflag (0), looping (0), defining_func (0), looking_at_function_handle (0), - block_comment_nesting_level (0), token_count (0), - current_input_line (), comment_text (), help_text (), - string_text (), string_line (0), string_column (0), + block_comment_nesting_level (0), command_arg_paren_count (0), + token_count (0), current_input_line (), comment_text (), + help_text (), string_text (), string_line (0), string_column (0), fcn_file_name (), fcn_file_full_name (), looking_at_object_index (), parsed_function_name (), pending_local_variables (), symtab_context (), nesting_level (), tokens () @@ -389,6 +389,9 @@ // nestng level for blcok comments. int block_comment_nesting_level; + // Parenthesis count for command argument parsing. + int command_arg_paren_count; + // Count of tokens recognized by this lexer since initialized or // since the last reset. size_t token_count;
--- a/libinterp/parse-tree/lex.ll +++ b/libinterp/parse-tree/lex.ll @@ -232,6 +232,27 @@ } \ while (0) +// When a command argument boundary is detected, push out the +// current argument being built. This one seems like a good +// candidate for a function call. + +#define COMMAND_ARG_FINISH \ + do \ + { \ + if (curr_lexer->string_text.empty ()) \ + break; \ + \ + int retval = curr_lexer->handle_token (curr_lexer->string_text, \ + SQ_STRING); \ + \ + curr_lexer->string_text = ""; \ + curr_lexer->command_arg_paren_count = 0; \ + \ + yyless (0); \ + \ + return retval; \ + } \ + while (0) static bool Vdisplay_tokens = false; @@ -283,54 +304,129 @@ // Help and other command-style functions. %} -<COMMAND_START>{NL} { - curr_lexer->lexer_debug ("<COMMAND_START>{NL}"); +%{ +// Commands can be continued on a second line using the ellipsis. +// If an argument is in construction, it is completed. +%} + +<COMMAND_START>(\.\.\.)[^\r\n]*{NL} { + curr_lexer->lexer_debug ("<COMMAND_START>(\\.\\.\\.)[^\\r\\n]*{NL}"); + + COMMAND_ARG_FINISH; + + curr_lexer->input_line_number++; + curr_lexer->current_input_column = 1; + + HANDLE_STRING_CONTINUATION; + } + +%{ +// Commands normally end at the end of a line or a semicolon. +%} + +<COMMAND_START>({CCHAR}[^\r\n]*)?{NL} { + curr_lexer->lexer_debug ("<COMMAND_START>({CCHAR}[^\\r\\n]*)?{NL}"); + + COMMAND_ARG_FINISH; curr_lexer->input_line_number++; curr_lexer->current_input_column = 1; - curr_lexer->looking_for_object_index = false; curr_lexer->at_beginning_of_statement = true; - curr_lexer->pop_start_state (); - return curr_lexer->count_token ('\n'); + return curr_lexer->handle_token ('\n'); + } + +<COMMAND_START>[\,\;] { + curr_lexer->lexer_debug( "<COMMAND_START>[\\,\\;]" ); + + if (yytext[0] != ',' || curr_lexer->command_arg_paren_count == 0) + { + COMMAND_ARG_FINISH; + curr_lexer->looking_for_object_index = false; + curr_lexer->at_beginning_of_statement = true; + curr_lexer->pop_start_state (); + return curr_lexer->handle_token (yytext[0]); + } + else + curr_lexer->string_text += yytext; + + curr_lexer->current_input_column += yyleng; } -<COMMAND_START>[\;\,] { - curr_lexer->lexer_debug ("<COMMAND_START>[\\;\\,]"); - - curr_lexer->looking_for_object_index = false; - curr_lexer->at_beginning_of_statement = true; - - curr_lexer->pop_start_state (); - - if (strcmp (yytext, ",") == 0) - return curr_lexer->handle_token (','); - else - return curr_lexer->handle_token (';'); +%{ +// Unbalanced parentheses serve as pseudo-quotes: they are included in +// the final argument string, but they cause parentheses and quotes to +// be slurped into that argument as well. +%} + +<COMMAND_START>[\(\[\{]+ { + curr_lexer->lexer_debug ("<COMMAND_START>[\\(\\[\\{]+"); + + curr_lexer->command_arg_paren_count += yyleng; + curr_lexer->string_text += yytext; + curr_lexer->current_input_column += yyleng; } +<COMMAND_START>[\)\]\}]+ { + curr_lexer->lexer_debug ("<COMMAND_START>[\\)\\]\\}]+"); + + curr_lexer->command_arg_paren_count -= yyleng; + curr_lexer->string_text += yytext; + curr_lexer->current_input_column += yyleng; +} + +%{ +// Handle quoted strings. Quoted strings that are not separated by +// whitespace from other argument text are combined with that previous +// text. For instance, +// +// command 'text1'"text2" +// +// has a single argument text1text2, not two separate arguments. +// That's why we must test to see if we are in command argument mode +// when processing the end of a string. +%} + <COMMAND_START>[\"\'] { curr_lexer->lexer_debug ("<COMMAND_START>[\\\"\\']"); - curr_lexer->at_beginning_of_statement = false; - - curr_lexer->current_input_column++; - - curr_lexer->begin_string (yytext[0] == '"' - ? DQ_STRING_START : SQ_STRING_START); + if (curr_lexer->command_arg_paren_count == 0) + curr_lexer->begin_string (yytext[0] == '"' + ? DQ_STRING_START : SQ_STRING_START); + else + curr_lexer->string_text += yytext; + + curr_lexer->current_input_column += yyleng; } -<COMMAND_START>[^#% \t\r\n\;\,\"\'][^ \t\r\n\;\,]*{S}* { - curr_lexer->lexer_debug ("<COMMAND_START>[^#% \\t\\r\\n\\;\\,\\\"\\'][^ \\t\\r\\n\\;\\,]*{S}*"); - - std::string tok = strip_trailing_whitespace (yytext); - - curr_lexer->looking_for_object_index = false; - curr_lexer->at_beginning_of_statement = false; - - return curr_lexer->handle_token (tok, SQ_STRING); +%{ +// In standard command argument processing, whitespace separates +// arguments. In the presence of unbalanced parentheses, it is +// incorporated into the argument. +%} + +<COMMAND_START>{S}+ { + curr_lexer->lexer_debug ("<COMMAND_START>{S}+"); + + if (curr_lexer->command_arg_paren_count == 0) + COMMAND_ARG_FINISH; + else + curr_lexer->string_text += yytext; + + curr_lexer->current_input_column += yyleng; + } + +%{ +// Everything else is slurped into the command arguments. +%} + +<COMMAND_START>([\.]|[^#% \t\r\n\,\;\"\'\(\[\{\}\]\)]+) { + curr_lexer->lexer_debug ("<COMMAND_START>[^#% \\t\\r\\n\\.\\,\\;\\\"\\'\\(\\[\\{\\}\\]\\)]+"); + + curr_lexer->string_text += yytext; + curr_lexer->current_input_column += yyleng; } <MATRIX_START>{S}* { @@ -678,17 +774,20 @@ curr_lexer->pop_start_state (); - curr_lexer->looking_for_object_index = true; - curr_lexer->at_beginning_of_statement = false; - - curr_lexer->push_token (new token (DQ_STRING, - curr_lexer->string_text, - curr_lexer->string_line, - curr_lexer->string_column)); - - curr_lexer->string_text = ""; - - return curr_lexer->count_token_internal (DQ_STRING); + if (curr_lexer->start_state() != COMMAND_START) + { + curr_lexer->looking_for_object_index = true; + curr_lexer->at_beginning_of_statement = false; + + curr_lexer->push_token (new token (DQ_STRING, + curr_lexer->string_text, + curr_lexer->string_line, + curr_lexer->string_column)); + + curr_lexer->string_text = ""; + + return curr_lexer->count_token_internal (DQ_STRING); + } } <DQ_STRING_START>\\[0-7]{1,3} { @@ -861,17 +960,20 @@ curr_lexer->pop_start_state (); - curr_lexer->looking_for_object_index = true; - curr_lexer->at_beginning_of_statement = false; - - curr_lexer->push_token (new token (SQ_STRING, - curr_lexer->string_text, - curr_lexer->string_line, - curr_lexer->string_column)); - - curr_lexer->string_text = ""; - - return curr_lexer->count_token_internal (SQ_STRING); + if (curr_lexer->start_state() != COMMAND_START) + { + curr_lexer->looking_for_object_index = true; + curr_lexer->at_beginning_of_statement = false; + + curr_lexer->push_token (new token (SQ_STRING, + curr_lexer->string_text, + curr_lexer->string_line, + curr_lexer->string_column)); + + curr_lexer->string_text = ""; + + return curr_lexer->count_token_internal (SQ_STRING); + } } <SQ_STRING_START>[^\'\n\r]+ { @@ -1849,6 +1951,7 @@ fcn_file_full_name = ""; looking_at_object_index.clear (); looking_at_object_index.push_front (false); + command_arg_paren_count = 0; while (! parsed_function_name.empty ()) parsed_function_name.pop (); @@ -3265,3 +3368,4 @@ return status; } +
--- a/scripts/plot/util/close.m +++ b/scripts/plot/util/close.m @@ -105,5 +105,4 @@ %!error <first argument must be "all" or a figure> close ({"all"}) %!error <first argument must be "all" or a figure> close ("all_and_more") %!error <first argument must be "all" or a figure> close (-1) -%!error <expecting argument to be "all hidden"> close "all" hid" - +%!error <expecting argument to be "all hidden"> close all hid