octave-nkf: libinterp/parse-tree/lex.ll comparison

comparison libinterp/parse-tree/lex.ll @ 16903:f21194531877

improve character string handling in the lexer * lex.ll: Add calls to lexer_debug for character string patterns. Attempt to be consistent with handling of backslash characters in patterns passed to lexer_debug. (<DQ_STRING_START>\\{NL}): Handle EOF and EOB conditions explicitly. (octave_base_lexer::display_start_state): Handle DQ_STRING_START and SQ_STRING_START states.

author	John W. Eaton <jwe@octave.org>
date	Fri, 05 Jul 2013 13:28:50 -0400
parents	531473481084
children	f29dd5a7591d

comparison

equal deleted inserted replaced

-:51c1076a9c13
+:f21194531877
 // after a block of full-line comments, finish the full line comment
 // block.
 %}
 ^{S}*{CCHAR}\{{S}*{NL} {
-curr_lexer->lexer_debug ("^{S}*{CCHAR}\{{S}*{NL}");
+curr_lexer->lexer_debug ("^{S}*{CCHAR}\\{{S}*{NL}");
 yyless (0);
 if (curr_lexer->start_state () == LINE_COMMENT_START)
 {
 curr_lexer->push_start_state (BLOCK_COMMENT_START);
 }
 <BLOCK_COMMENT_START>^{S}*{CCHAR}\{{S}*{NL} {
-curr_lexer->lexer_debug ("<BLOCK_COMMENT_START>^{S}*{CCHAR}\{{S}*{NL}");
+curr_lexer->lexer_debug ("<BLOCK_COMMENT_START>^{S}*{CCHAR}\\{{S}*{NL}");
 curr_lexer->input_line_number++;
 curr_lexer->current_input_column = 1;
 if (curr_lexer->block_comment_nesting_level)
 %{
 // Double-quoted character strings.
 %}
 <DQ_STRING_START>\"\" {
+curr_lexer->lexer_debug ("<DQ_STRING_START>\\\"\\\"");
 curr_lexer->current_input_column += yyleng;
 curr_lexer->string_text += '"';
 }
 <DQ_STRING_START>\" {
+curr_lexer->lexer_debug ("<DQ_STRING_START>\\\"");
 curr_lexer->pop_start_state ();
 curr_lexer->looking_for_object_index = true;
 curr_lexer->at_beginning_of_statement = false;
 curr_lexer->string_text = "";
 return curr_lexer->count_token_internal (DQ_STRING);
 }
-<DQ_STRING_START>{NL} {
-error ("unterminated character string constant");
-return LEXICAL_ERROR;
-}
 <DQ_STRING_START>\\[0-7]{1,3} {
+curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\[0-7]{1,3}");
 int result;
 sscanf (yytext+1, "%o", &result);
 if (result > 0xff)
 error ("invalid octal escape sequence in character string");
 else
 curr_lexer->string_text += static_cast<unsigned char> (result);
 }
-<DQ_STRING_START>"\\a" { curr_lexer->string_text += '\a'; }
+<DQ_STRING_START>"\\a" {
-<DQ_STRING_START>"\\b" { curr_lexer->string_text += '\b'; }
+curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\a\"");
-<DQ_STRING_START>"\\f" { curr_lexer->string_text += '\f'; }
-<DQ_STRING_START>"\\n" { curr_lexer->string_text += '\n'; }
+curr_lexer->string_text += '\a';
-<DQ_STRING_START>"\\r" { curr_lexer->string_text += '\r'; }
+}
-<DQ_STRING_START>"\\t" { curr_lexer->string_text += '\t'; }
-<DQ_STRING_START>"\\v" { curr_lexer->string_text += '\v'; }
+<DQ_STRING_START>"\\b" {
+curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\b\"");
-<DQ_STRING_START>\\{ANY_INCLUDING_NL} {
+curr_lexer->string_text += '\b';
+}
+<DQ_STRING_START>"\\f" {
+curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\f\"");
+curr_lexer->string_text += '\f';
+}
+<DQ_STRING_START>"\\n" {
+curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\n\"");
+curr_lexer->string_text += '\n';
+}
+<DQ_STRING_START>"\\r" {
+curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\r\"");
+curr_lexer->string_text += '\r';
+}
+<DQ_STRING_START>"\\t" {
+curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\t\"");
+curr_lexer->string_text += '\t';
+}
+<DQ_STRING_START>"\\v" {
+curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\v\"");
+curr_lexer->string_text += '\v';
+}
+<DQ_STRING_START>\\{NL} {
+curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\{NL}");
+curr_lexer->decrement_promptflag ();
+curr_lexer->input_line_number++;
+curr_lexer->current_input_column = 1;
+// We can't rely on the trick used elsewhere of sticking ASCII 1
+// in the intput buffer and recognizing it as a special case
+// because ASCII 1 is a valid character for a character string.
+if (curr_lexer->at_end_of_buffer ())
+return -1;
+if (curr_lexer->at_end_of_file ())
+return curr_lexer->handle_end_of_input ();
+// Otherwise, just keep going with the text from the current buffer.
+}
+<DQ_STRING_START>\\. {
+curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\.");
 curr_lexer->string_text += yytext[1];
 }
-<DQ_STRING_START>[^\\\n\"]+ {
+<DQ_STRING_START>[^\\\r\n\"]+ {
+curr_lexer->lexer_debug ("<DQ_STRING_START>[^\\\\\\r\\n\\\"]+");
 curr_lexer->string_text += yytext;
+}
+<DQ_STRING_START>{NL} {
+curr_lexer->lexer_debug ("<DQ_STRING_START>{NL}");
+curr_lexer->input_line_number++;
+curr_lexer->current_input_column = 1;
+error ("unterminated character string constant");
+return LEXICAL_ERROR;
 }
 %{
 // Single-quoted character strings.
 %}
 <SQ_STRING_START>[^\'\n\r]*\' {
+curr_lexer->lexer_debug ("<SQ_STRING_START>[^\\'\\n\\r]*\\'");
 yytext[yyleng-1] = 0;
 curr_lexer->string_text += yytext;
 curr_lexer->current_input_column += yyleng;
 return curr_lexer->count_token_internal (SQ_STRING);
 }
 }
 <SQ_STRING_START>{NL} {
+curr_lexer->lexer_debug ("<SQ_STRING_START>{NL}");
+curr_lexer->input_line_number++;
+curr_lexer->current_input_column = 1;
 error ("unterminated character string constant");
 return LEXICAL_ERROR;
 }
 %{
 // Imaginary numbers.
 // the constant.
 %}
 {D}+/\.[\*/\\^\'] |
 {NUMBER} {
-curr_lexer->lexer_debug ("{D}+/\\.[\\*/\\^\\']|{NUMBER}");
+curr_lexer->lexer_debug ("{D}+/\\.[\\*/\\\\^\\']|{NUMBER}");
 if (curr_lexer->previous_token_may_be_command ()
 &&  curr_lexer->space_follows_previous_token ())
 {
 yyless (0);
 %{
 // Double quotes always begin strings.
 %}
 \" {
-curr_lexer->lexer_debug ("\"");
+curr_lexer->lexer_debug ("\\\"");
 if (curr_lexer->previous_token_may_be_command ()
 &&  curr_lexer->space_follows_previous_token ())
 {
 curr_lexer->current_input_column++;
 case LINE_COMMENT_START:
 std::cerr << "LINE_COMMENT_START" << std::endl;
 break;
+case DQ_STRING_START:
+std::cerr << "DQ_STRING_START" << std::endl;
+break;
+case SQ_STRING_START:
+std::cerr << "SQ_STRING_START" << std::endl;
+break;
 default:
 std::cerr << "UNKNOWN START STATE!" << std::endl;
 break;
 }
 }

Mercurial > hg > octave-nkf

comparison libinterp/parse-tree/lex.ll @ 16903:f21194531877