comparison libinterp/parse-tree/lex.ll @ 16903:f21194531877

improve character string handling in the lexer * lex.ll: Add calls to lexer_debug for character string patterns. Attempt to be consistent with handling of backslash characters in patterns passed to lexer_debug. (<DQ_STRING_START>\\{NL}): Handle EOF and EOB conditions explicitly. (octave_base_lexer::display_start_state): Handle DQ_STRING_START and SQ_STRING_START states.
author John W. Eaton <jwe@octave.org>
date Fri, 05 Jul 2013 13:28:50 -0400
parents 531473481084
children f29dd5a7591d
comparison
equal deleted inserted replaced
16902:51c1076a9c13 16903:f21194531877
450 // after a block of full-line comments, finish the full line comment 450 // after a block of full-line comments, finish the full line comment
451 // block. 451 // block.
452 %} 452 %}
453 453
454 ^{S}*{CCHAR}\{{S}*{NL} { 454 ^{S}*{CCHAR}\{{S}*{NL} {
455 curr_lexer->lexer_debug ("^{S}*{CCHAR}\{{S}*{NL}"); 455 curr_lexer->lexer_debug ("^{S}*{CCHAR}\\{{S}*{NL}");
456 456
457 yyless (0); 457 yyless (0);
458 458
459 if (curr_lexer->start_state () == LINE_COMMENT_START) 459 if (curr_lexer->start_state () == LINE_COMMENT_START)
460 { 460 {
469 curr_lexer->push_start_state (BLOCK_COMMENT_START); 469 curr_lexer->push_start_state (BLOCK_COMMENT_START);
470 470
471 } 471 }
472 472
473 <BLOCK_COMMENT_START>^{S}*{CCHAR}\{{S}*{NL} { 473 <BLOCK_COMMENT_START>^{S}*{CCHAR}\{{S}*{NL} {
474 curr_lexer->lexer_debug ("<BLOCK_COMMENT_START>^{S}*{CCHAR}\{{S}*{NL}"); 474 curr_lexer->lexer_debug ("<BLOCK_COMMENT_START>^{S}*{CCHAR}\\{{S}*{NL}");
475 475
476 curr_lexer->input_line_number++; 476 curr_lexer->input_line_number++;
477 curr_lexer->current_input_column = 1; 477 curr_lexer->current_input_column = 1;
478 478
479 if (curr_lexer->block_comment_nesting_level) 479 if (curr_lexer->block_comment_nesting_level)
627 %{ 627 %{
628 // Double-quoted character strings. 628 // Double-quoted character strings.
629 %} 629 %}
630 630
631 <DQ_STRING_START>\"\" { 631 <DQ_STRING_START>\"\" {
632 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\"\\\"");
633
632 curr_lexer->current_input_column += yyleng; 634 curr_lexer->current_input_column += yyleng;
633 curr_lexer->string_text += '"'; 635 curr_lexer->string_text += '"';
634 } 636 }
635 637
636 <DQ_STRING_START>\" { 638 <DQ_STRING_START>\" {
639 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\"");
637 640
638 curr_lexer->pop_start_state (); 641 curr_lexer->pop_start_state ();
639 642
640 curr_lexer->looking_for_object_index = true; 643 curr_lexer->looking_for_object_index = true;
641 curr_lexer->at_beginning_of_statement = false; 644 curr_lexer->at_beginning_of_statement = false;
648 curr_lexer->string_text = ""; 651 curr_lexer->string_text = "";
649 652
650 return curr_lexer->count_token_internal (DQ_STRING); 653 return curr_lexer->count_token_internal (DQ_STRING);
651 } 654 }
652 655
653 <DQ_STRING_START>{NL} {
654 error ("unterminated character string constant");
655 return LEXICAL_ERROR;
656 }
657
658 <DQ_STRING_START>\\[0-7]{1,3} { 656 <DQ_STRING_START>\\[0-7]{1,3} {
657 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\[0-7]{1,3}");
658
659 int result; 659 int result;
660 sscanf (yytext+1, "%o", &result); 660 sscanf (yytext+1, "%o", &result);
661 661
662 if (result > 0xff) 662 if (result > 0xff)
663 error ("invalid octal escape sequence in character string"); 663 error ("invalid octal escape sequence in character string");
664 else 664 else
665 curr_lexer->string_text += static_cast<unsigned char> (result); 665 curr_lexer->string_text += static_cast<unsigned char> (result);
666 } 666 }
667 667
668 <DQ_STRING_START>"\\a" { curr_lexer->string_text += '\a'; } 668 <DQ_STRING_START>"\\a" {
669 <DQ_STRING_START>"\\b" { curr_lexer->string_text += '\b'; } 669 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\a\"");
670 <DQ_STRING_START>"\\f" { curr_lexer->string_text += '\f'; } 670
671 <DQ_STRING_START>"\\n" { curr_lexer->string_text += '\n'; } 671 curr_lexer->string_text += '\a';
672 <DQ_STRING_START>"\\r" { curr_lexer->string_text += '\r'; } 672 }
673 <DQ_STRING_START>"\\t" { curr_lexer->string_text += '\t'; } 673
674 <DQ_STRING_START>"\\v" { curr_lexer->string_text += '\v'; } 674 <DQ_STRING_START>"\\b" {
675 675 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\b\"");
676 <DQ_STRING_START>\\{ANY_INCLUDING_NL} { 676
677 curr_lexer->string_text += '\b';
678 }
679
680 <DQ_STRING_START>"\\f" {
681 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\f\"");
682
683 curr_lexer->string_text += '\f';
684 }
685
686 <DQ_STRING_START>"\\n" {
687 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\n\"");
688
689 curr_lexer->string_text += '\n';
690 }
691
692 <DQ_STRING_START>"\\r" {
693 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\r\"");
694
695 curr_lexer->string_text += '\r';
696 }
697
698 <DQ_STRING_START>"\\t" {
699 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\t\"");
700
701 curr_lexer->string_text += '\t';
702 }
703
704 <DQ_STRING_START>"\\v" {
705 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\v\"");
706
707 curr_lexer->string_text += '\v';
708 }
709
710 <DQ_STRING_START>\\{NL} {
711 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\{NL}");
712
713 curr_lexer->decrement_promptflag ();
714 curr_lexer->input_line_number++;
715 curr_lexer->current_input_column = 1;
716
717 // We can't rely on the trick used elsewhere of sticking ASCII 1
718 // in the intput buffer and recognizing it as a special case
719 // because ASCII 1 is a valid character for a character string.
720
721 if (curr_lexer->at_end_of_buffer ())
722 return -1;
723
724 if (curr_lexer->at_end_of_file ())
725 return curr_lexer->handle_end_of_input ();
726
727 // Otherwise, just keep going with the text from the current buffer.
728 }
729
730 <DQ_STRING_START>\\. {
731 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\.");
732
677 curr_lexer->string_text += yytext[1]; 733 curr_lexer->string_text += yytext[1];
678 } 734 }
679 735
680 <DQ_STRING_START>[^\\\n\"]+ { 736 <DQ_STRING_START>[^\\\r\n\"]+ {
737 curr_lexer->lexer_debug ("<DQ_STRING_START>[^\\\\\\r\\n\\\"]+");
738
681 curr_lexer->string_text += yytext; 739 curr_lexer->string_text += yytext;
740 }
741
742 <DQ_STRING_START>{NL} {
743 curr_lexer->lexer_debug ("<DQ_STRING_START>{NL}");
744
745 curr_lexer->input_line_number++;
746 curr_lexer->current_input_column = 1;
747
748 error ("unterminated character string constant");
749
750 return LEXICAL_ERROR;
682 } 751 }
683 752
684 %{ 753 %{
685 // Single-quoted character strings. 754 // Single-quoted character strings.
686 %} 755 %}
687 756
688 <SQ_STRING_START>[^\'\n\r]*\' { 757 <SQ_STRING_START>[^\'\n\r]*\' {
758 curr_lexer->lexer_debug ("<SQ_STRING_START>[^\\'\\n\\r]*\\'");
759
689 yytext[yyleng-1] = 0; 760 yytext[yyleng-1] = 0;
690 curr_lexer->string_text += yytext; 761 curr_lexer->string_text += yytext;
691 762
692 curr_lexer->current_input_column += yyleng; 763 curr_lexer->current_input_column += yyleng;
693 764
718 return curr_lexer->count_token_internal (SQ_STRING); 789 return curr_lexer->count_token_internal (SQ_STRING);
719 } 790 }
720 } 791 }
721 792
722 <SQ_STRING_START>{NL} { 793 <SQ_STRING_START>{NL} {
794 curr_lexer->lexer_debug ("<SQ_STRING_START>{NL}");
795
796 curr_lexer->input_line_number++;
797 curr_lexer->current_input_column = 1;
798
723 error ("unterminated character string constant"); 799 error ("unterminated character string constant");
800
724 return LEXICAL_ERROR; 801 return LEXICAL_ERROR;
725 } 802 }
726 803
727 %{ 804 %{
728 // Imaginary numbers. 805 // Imaginary numbers.
762 // the constant. 839 // the constant.
763 %} 840 %}
764 841
765 {D}+/\.[\*/\\^\'] | 842 {D}+/\.[\*/\\^\'] |
766 {NUMBER} { 843 {NUMBER} {
767 curr_lexer->lexer_debug ("{D}+/\\.[\\*/\\^\\']|{NUMBER}"); 844 curr_lexer->lexer_debug ("{D}+/\\.[\\*/\\\\^\\']|{NUMBER}");
768 845
769 if (curr_lexer->previous_token_may_be_command () 846 if (curr_lexer->previous_token_may_be_command ()
770 && curr_lexer->space_follows_previous_token ()) 847 && curr_lexer->space_follows_previous_token ())
771 { 848 {
772 yyless (0); 849 yyless (0);
1032 %{ 1109 %{
1033 // Double quotes always begin strings. 1110 // Double quotes always begin strings.
1034 %} 1111 %}
1035 1112
1036 \" { 1113 \" {
1037 curr_lexer->lexer_debug ("\""); 1114 curr_lexer->lexer_debug ("\\\"");
1038 1115
1039 if (curr_lexer->previous_token_may_be_command () 1116 if (curr_lexer->previous_token_may_be_command ()
1040 && curr_lexer->space_follows_previous_token ()) 1117 && curr_lexer->space_follows_previous_token ())
1041 { 1118 {
1042 curr_lexer->current_input_column++; 1119 curr_lexer->current_input_column++;
2886 2963
2887 case LINE_COMMENT_START: 2964 case LINE_COMMENT_START:
2888 std::cerr << "LINE_COMMENT_START" << std::endl; 2965 std::cerr << "LINE_COMMENT_START" << std::endl;
2889 break; 2966 break;
2890 2967
2968 case DQ_STRING_START:
2969 std::cerr << "DQ_STRING_START" << std::endl;
2970 break;
2971
2972 case SQ_STRING_START:
2973 std::cerr << "SQ_STRING_START" << std::endl;
2974 break;
2975
2891 default: 2976 default:
2892 std::cerr << "UNKNOWN START STATE!" << std::endl; 2977 std::cerr << "UNKNOWN START STATE!" << std::endl;
2893 break; 2978 break;
2894 } 2979 }
2895 } 2980 }