Mercurial > hg > octave-nkf
comparison libinterp/parse-tree/lex.ll @ 16903:f21194531877
improve character string handling in the lexer
* lex.ll: Add calls to lexer_debug for character string patterns.
Attempt to be consistent with handling of backslash characters in
patterns passed to lexer_debug.
(<DQ_STRING_START>\\{NL}): Handle EOF and EOB conditions explicitly.
(octave_base_lexer::display_start_state): Handle DQ_STRING_START and
SQ_STRING_START states.
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Fri, 05 Jul 2013 13:28:50 -0400 |
parents | 531473481084 |
children | f29dd5a7591d |
comparison
equal
deleted
inserted
replaced
16902:51c1076a9c13 | 16903:f21194531877 |
---|---|
450 // after a block of full-line comments, finish the full line comment | 450 // after a block of full-line comments, finish the full line comment |
451 // block. | 451 // block. |
452 %} | 452 %} |
453 | 453 |
454 ^{S}*{CCHAR}\{{S}*{NL} { | 454 ^{S}*{CCHAR}\{{S}*{NL} { |
455 curr_lexer->lexer_debug ("^{S}*{CCHAR}\{{S}*{NL}"); | 455 curr_lexer->lexer_debug ("^{S}*{CCHAR}\\{{S}*{NL}"); |
456 | 456 |
457 yyless (0); | 457 yyless (0); |
458 | 458 |
459 if (curr_lexer->start_state () == LINE_COMMENT_START) | 459 if (curr_lexer->start_state () == LINE_COMMENT_START) |
460 { | 460 { |
469 curr_lexer->push_start_state (BLOCK_COMMENT_START); | 469 curr_lexer->push_start_state (BLOCK_COMMENT_START); |
470 | 470 |
471 } | 471 } |
472 | 472 |
473 <BLOCK_COMMENT_START>^{S}*{CCHAR}\{{S}*{NL} { | 473 <BLOCK_COMMENT_START>^{S}*{CCHAR}\{{S}*{NL} { |
474 curr_lexer->lexer_debug ("<BLOCK_COMMENT_START>^{S}*{CCHAR}\{{S}*{NL}"); | 474 curr_lexer->lexer_debug ("<BLOCK_COMMENT_START>^{S}*{CCHAR}\\{{S}*{NL}"); |
475 | 475 |
476 curr_lexer->input_line_number++; | 476 curr_lexer->input_line_number++; |
477 curr_lexer->current_input_column = 1; | 477 curr_lexer->current_input_column = 1; |
478 | 478 |
479 if (curr_lexer->block_comment_nesting_level) | 479 if (curr_lexer->block_comment_nesting_level) |
627 %{ | 627 %{ |
628 // Double-quoted character strings. | 628 // Double-quoted character strings. |
629 %} | 629 %} |
630 | 630 |
631 <DQ_STRING_START>\"\" { | 631 <DQ_STRING_START>\"\" { |
632 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\"\\\""); | |
633 | |
632 curr_lexer->current_input_column += yyleng; | 634 curr_lexer->current_input_column += yyleng; |
633 curr_lexer->string_text += '"'; | 635 curr_lexer->string_text += '"'; |
634 } | 636 } |
635 | 637 |
636 <DQ_STRING_START>\" { | 638 <DQ_STRING_START>\" { |
639 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\""); | |
637 | 640 |
638 curr_lexer->pop_start_state (); | 641 curr_lexer->pop_start_state (); |
639 | 642 |
640 curr_lexer->looking_for_object_index = true; | 643 curr_lexer->looking_for_object_index = true; |
641 curr_lexer->at_beginning_of_statement = false; | 644 curr_lexer->at_beginning_of_statement = false; |
648 curr_lexer->string_text = ""; | 651 curr_lexer->string_text = ""; |
649 | 652 |
650 return curr_lexer->count_token_internal (DQ_STRING); | 653 return curr_lexer->count_token_internal (DQ_STRING); |
651 } | 654 } |
652 | 655 |
653 <DQ_STRING_START>{NL} { | |
654 error ("unterminated character string constant"); | |
655 return LEXICAL_ERROR; | |
656 } | |
657 | |
658 <DQ_STRING_START>\\[0-7]{1,3} { | 656 <DQ_STRING_START>\\[0-7]{1,3} { |
657 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\[0-7]{1,3}"); | |
658 | |
659 int result; | 659 int result; |
660 sscanf (yytext+1, "%o", &result); | 660 sscanf (yytext+1, "%o", &result); |
661 | 661 |
662 if (result > 0xff) | 662 if (result > 0xff) |
663 error ("invalid octal escape sequence in character string"); | 663 error ("invalid octal escape sequence in character string"); |
664 else | 664 else |
665 curr_lexer->string_text += static_cast<unsigned char> (result); | 665 curr_lexer->string_text += static_cast<unsigned char> (result); |
666 } | 666 } |
667 | 667 |
668 <DQ_STRING_START>"\\a" { curr_lexer->string_text += '\a'; } | 668 <DQ_STRING_START>"\\a" { |
669 <DQ_STRING_START>"\\b" { curr_lexer->string_text += '\b'; } | 669 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\a\""); |
670 <DQ_STRING_START>"\\f" { curr_lexer->string_text += '\f'; } | 670 |
671 <DQ_STRING_START>"\\n" { curr_lexer->string_text += '\n'; } | 671 curr_lexer->string_text += '\a'; |
672 <DQ_STRING_START>"\\r" { curr_lexer->string_text += '\r'; } | 672 } |
673 <DQ_STRING_START>"\\t" { curr_lexer->string_text += '\t'; } | 673 |
674 <DQ_STRING_START>"\\v" { curr_lexer->string_text += '\v'; } | 674 <DQ_STRING_START>"\\b" { |
675 | 675 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\b\""); |
676 <DQ_STRING_START>\\{ANY_INCLUDING_NL} { | 676 |
677 curr_lexer->string_text += '\b'; | |
678 } | |
679 | |
680 <DQ_STRING_START>"\\f" { | |
681 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\f\""); | |
682 | |
683 curr_lexer->string_text += '\f'; | |
684 } | |
685 | |
686 <DQ_STRING_START>"\\n" { | |
687 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\n\""); | |
688 | |
689 curr_lexer->string_text += '\n'; | |
690 } | |
691 | |
692 <DQ_STRING_START>"\\r" { | |
693 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\r\""); | |
694 | |
695 curr_lexer->string_text += '\r'; | |
696 } | |
697 | |
698 <DQ_STRING_START>"\\t" { | |
699 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\t\""); | |
700 | |
701 curr_lexer->string_text += '\t'; | |
702 } | |
703 | |
704 <DQ_STRING_START>"\\v" { | |
705 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\v\""); | |
706 | |
707 curr_lexer->string_text += '\v'; | |
708 } | |
709 | |
710 <DQ_STRING_START>\\{NL} { | |
711 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\{NL}"); | |
712 | |
713 curr_lexer->decrement_promptflag (); | |
714 curr_lexer->input_line_number++; | |
715 curr_lexer->current_input_column = 1; | |
716 | |
717 // We can't rely on the trick used elsewhere of sticking ASCII 1 | |
718 // in the intput buffer and recognizing it as a special case | |
719 // because ASCII 1 is a valid character for a character string. | |
720 | |
721 if (curr_lexer->at_end_of_buffer ()) | |
722 return -1; | |
723 | |
724 if (curr_lexer->at_end_of_file ()) | |
725 return curr_lexer->handle_end_of_input (); | |
726 | |
727 // Otherwise, just keep going with the text from the current buffer. | |
728 } | |
729 | |
730 <DQ_STRING_START>\\. { | |
731 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\."); | |
732 | |
677 curr_lexer->string_text += yytext[1]; | 733 curr_lexer->string_text += yytext[1]; |
678 } | 734 } |
679 | 735 |
680 <DQ_STRING_START>[^\\\n\"]+ { | 736 <DQ_STRING_START>[^\\\r\n\"]+ { |
737 curr_lexer->lexer_debug ("<DQ_STRING_START>[^\\\\\\r\\n\\\"]+"); | |
738 | |
681 curr_lexer->string_text += yytext; | 739 curr_lexer->string_text += yytext; |
740 } | |
741 | |
742 <DQ_STRING_START>{NL} { | |
743 curr_lexer->lexer_debug ("<DQ_STRING_START>{NL}"); | |
744 | |
745 curr_lexer->input_line_number++; | |
746 curr_lexer->current_input_column = 1; | |
747 | |
748 error ("unterminated character string constant"); | |
749 | |
750 return LEXICAL_ERROR; | |
682 } | 751 } |
683 | 752 |
684 %{ | 753 %{ |
685 // Single-quoted character strings. | 754 // Single-quoted character strings. |
686 %} | 755 %} |
687 | 756 |
688 <SQ_STRING_START>[^\'\n\r]*\' { | 757 <SQ_STRING_START>[^\'\n\r]*\' { |
758 curr_lexer->lexer_debug ("<SQ_STRING_START>[^\\'\\n\\r]*\\'"); | |
759 | |
689 yytext[yyleng-1] = 0; | 760 yytext[yyleng-1] = 0; |
690 curr_lexer->string_text += yytext; | 761 curr_lexer->string_text += yytext; |
691 | 762 |
692 curr_lexer->current_input_column += yyleng; | 763 curr_lexer->current_input_column += yyleng; |
693 | 764 |
718 return curr_lexer->count_token_internal (SQ_STRING); | 789 return curr_lexer->count_token_internal (SQ_STRING); |
719 } | 790 } |
720 } | 791 } |
721 | 792 |
722 <SQ_STRING_START>{NL} { | 793 <SQ_STRING_START>{NL} { |
794 curr_lexer->lexer_debug ("<SQ_STRING_START>{NL}"); | |
795 | |
796 curr_lexer->input_line_number++; | |
797 curr_lexer->current_input_column = 1; | |
798 | |
723 error ("unterminated character string constant"); | 799 error ("unterminated character string constant"); |
800 | |
724 return LEXICAL_ERROR; | 801 return LEXICAL_ERROR; |
725 } | 802 } |
726 | 803 |
727 %{ | 804 %{ |
728 // Imaginary numbers. | 805 // Imaginary numbers. |
762 // the constant. | 839 // the constant. |
763 %} | 840 %} |
764 | 841 |
765 {D}+/\.[\*/\\^\'] | | 842 {D}+/\.[\*/\\^\'] | |
766 {NUMBER} { | 843 {NUMBER} { |
767 curr_lexer->lexer_debug ("{D}+/\\.[\\*/\\^\\']|{NUMBER}"); | 844 curr_lexer->lexer_debug ("{D}+/\\.[\\*/\\\\^\\']|{NUMBER}"); |
768 | 845 |
769 if (curr_lexer->previous_token_may_be_command () | 846 if (curr_lexer->previous_token_may_be_command () |
770 && curr_lexer->space_follows_previous_token ()) | 847 && curr_lexer->space_follows_previous_token ()) |
771 { | 848 { |
772 yyless (0); | 849 yyless (0); |
1032 %{ | 1109 %{ |
1033 // Double quotes always begin strings. | 1110 // Double quotes always begin strings. |
1034 %} | 1111 %} |
1035 | 1112 |
1036 \" { | 1113 \" { |
1037 curr_lexer->lexer_debug ("\""); | 1114 curr_lexer->lexer_debug ("\\\""); |
1038 | 1115 |
1039 if (curr_lexer->previous_token_may_be_command () | 1116 if (curr_lexer->previous_token_may_be_command () |
1040 && curr_lexer->space_follows_previous_token ()) | 1117 && curr_lexer->space_follows_previous_token ()) |
1041 { | 1118 { |
1042 curr_lexer->current_input_column++; | 1119 curr_lexer->current_input_column++; |
2886 | 2963 |
2887 case LINE_COMMENT_START: | 2964 case LINE_COMMENT_START: |
2888 std::cerr << "LINE_COMMENT_START" << std::endl; | 2965 std::cerr << "LINE_COMMENT_START" << std::endl; |
2889 break; | 2966 break; |
2890 | 2967 |
2968 case DQ_STRING_START: | |
2969 std::cerr << "DQ_STRING_START" << std::endl; | |
2970 break; | |
2971 | |
2972 case SQ_STRING_START: | |
2973 std::cerr << "SQ_STRING_START" << std::endl; | |
2974 break; | |
2975 | |
2891 default: | 2976 default: |
2892 std::cerr << "UNKNOWN START STATE!" << std::endl; | 2977 std::cerr << "UNKNOWN START STATE!" << std::endl; |
2893 break; | 2978 break; |
2894 } | 2979 } |
2895 } | 2980 } |