# HG changeset patch # User John W. Eaton # Date 1373046981 14400 # Node ID f29dd5a7591da49fe3f851103ac2dbed353e9ef2 # Parent f21194531877e4ca405f574c38159e3ed4ae32ab more tweaks for parsing character strings * lex.ll (\'\', \', [^\'\n\r]+): New patterns to replace [^\'\n\r]*\'. ([^\'\n\r]*\'): Delete. Attempt to correctly update input position for all patterns. (\\{NL}): Only check for EOB or EOF if we are using the push lexer interface. diff --git a/libinterp/parse-tree/lex.ll b/libinterp/parse-tree/lex.ll --- a/libinterp/parse-tree/lex.ll +++ b/libinterp/parse-tree/lex.ll @@ -638,6 +638,8 @@ \" { curr_lexer->lexer_debug ("\\\""); + curr_lexer->current_input_column++; + curr_lexer->pop_start_state (); curr_lexer->looking_for_object_index = true; @@ -656,6 +658,8 @@ \\[0-7]{1,3} { curr_lexer->lexer_debug ("\\\\[0-7]{1,3}"); + curr_lexer->current_input_column += yyleng; + int result; sscanf (yytext+1, "%o", &result); @@ -668,42 +672,49 @@ "\\a" { curr_lexer->lexer_debug ("\"\\\\a\""); + curr_lexer->current_input_column += yyleng; curr_lexer->string_text += '\a'; } "\\b" { curr_lexer->lexer_debug ("\"\\\\b\""); + curr_lexer->current_input_column += yyleng; curr_lexer->string_text += '\b'; } "\\f" { curr_lexer->lexer_debug ("\"\\\\f\""); + curr_lexer->current_input_column += yyleng; curr_lexer->string_text += '\f'; } "\\n" { curr_lexer->lexer_debug ("\"\\\\n\""); + curr_lexer->current_input_column += yyleng; curr_lexer->string_text += '\n'; } "\\r" { curr_lexer->lexer_debug ("\"\\\\r\""); + curr_lexer->current_input_column += yyleng; curr_lexer->string_text += '\r'; } "\\t" { curr_lexer->lexer_debug ("\"\\\\t\""); + curr_lexer->current_input_column += yyleng; curr_lexer->string_text += '\t'; } "\\v" { curr_lexer->lexer_debug ("\"\\\\v\""); + curr_lexer->current_input_column += yyleng; curr_lexer->string_text += '\v'; } @@ -714,28 +725,35 @@ curr_lexer->input_line_number++; curr_lexer->current_input_column = 1; - // We can't rely on the trick used elsewhere of sticking ASCII 1 - // in the intput buffer and recognizing it as a special case - // because ASCII 1 is a valid character for a character string. - - if (curr_lexer->at_end_of_buffer ()) - return -1; - - if (curr_lexer->at_end_of_file ()) - return curr_lexer->handle_end_of_input (); - - // Otherwise, just keep going with the text from the current buffer. + if (curr_lexer->is_push_lexer ()) + { + // We can't rely on the trick used elsewhere of sticking ASCII + // 1 in the input buffer and recognizing it as a special case + // because ASCII 1 is a valid character for a character + // string. If we are at the end of the buffer, ask for more + // input. If we are at the end of the file, deal with it. + // Otherwise, just keep going with the text from the current + // buffer. + + if (curr_lexer->at_end_of_buffer ()) + return -1; + + if (curr_lexer->at_end_of_file ()) + return curr_lexer->handle_end_of_input (); + } } \\. { curr_lexer->lexer_debug ("\\\\."); + curr_lexer->current_input_column += yyleng; curr_lexer->string_text += yytext[1]; } [^\\\r\n\"]+ { curr_lexer->lexer_debug ("[^\\\\\\r\\n\\\"]+"); + curr_lexer->current_input_column += yyleng; curr_lexer->string_text += yytext; } @@ -754,40 +772,38 @@ // Single-quoted character strings. %} -[^\'\n\r]*\' { - curr_lexer->lexer_debug ("[^\\'\\n\\r]*\\'"); - - yytext[yyleng-1] = 0; - curr_lexer->string_text += yytext; +\'\' { + curr_lexer->lexer_debug ("\\'\\'"); curr_lexer->current_input_column += yyleng; - - int c = curr_lexer->text_yyinput (); - - if (c == '\'') - { - curr_lexer->string_text += c; - - curr_lexer->current_input_column++; - } - else - { - curr_lexer->xunput (c); - - curr_lexer->pop_start_state (); - - curr_lexer->looking_for_object_index = true; - curr_lexer->at_beginning_of_statement = false; - - curr_lexer->push_token (new token (SQ_STRING, - curr_lexer->string_text, - curr_lexer->string_line, - curr_lexer->string_column)); - - curr_lexer->string_text = ""; - - return curr_lexer->count_token_internal (SQ_STRING); - } + curr_lexer->string_text += '\''; + } + +\' { + curr_lexer->lexer_debug ("\\'"); + + curr_lexer->current_input_column++; + + curr_lexer->pop_start_state (); + + curr_lexer->looking_for_object_index = true; + curr_lexer->at_beginning_of_statement = false; + + curr_lexer->push_token (new token (SQ_STRING, + curr_lexer->string_text, + curr_lexer->string_line, + curr_lexer->string_column)); + + curr_lexer->string_text = ""; + + return curr_lexer->count_token_internal (SQ_STRING); + } + +[^\'\n\r]+ { + curr_lexer->lexer_debug ("[^\\'\\n\\r]+"); + + curr_lexer->current_input_column += yyleng; + curr_lexer->string_text += yytext; } {NL} {