Mercurial > hg > octave-nkf
comparison libinterp/parse-tree/lex.ll @ 16904:f29dd5a7591d
more tweaks for parsing character strings
* lex.ll (<SQ_STRING_START>\'\', <SQ_STRING_START>\',
<SQ_STRING_START>[^\'\n\r]+): New patterns to replace
<SQ_STRING_START>[^\'\n\r]*\'.
(<SQ_STRING_START>[^\'\n\r]*\'): Delete.
Attempt to correctly update input position for all patterns.
(<DQ_STRING_START>\\{NL}): Only check for EOB or EOF if we are using
the push lexer interface.
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Fri, 05 Jul 2013 13:56:21 -0400 |
parents | f21194531877 |
children | bc12849bb6cc |
comparison
equal
deleted
inserted
replaced
16903:f21194531877 | 16904:f29dd5a7591d |
---|---|
636 } | 636 } |
637 | 637 |
638 <DQ_STRING_START>\" { | 638 <DQ_STRING_START>\" { |
639 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\""); | 639 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\""); |
640 | 640 |
641 curr_lexer->current_input_column++; | |
642 | |
641 curr_lexer->pop_start_state (); | 643 curr_lexer->pop_start_state (); |
642 | 644 |
643 curr_lexer->looking_for_object_index = true; | 645 curr_lexer->looking_for_object_index = true; |
644 curr_lexer->at_beginning_of_statement = false; | 646 curr_lexer->at_beginning_of_statement = false; |
645 | 647 |
654 } | 656 } |
655 | 657 |
656 <DQ_STRING_START>\\[0-7]{1,3} { | 658 <DQ_STRING_START>\\[0-7]{1,3} { |
657 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\[0-7]{1,3}"); | 659 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\[0-7]{1,3}"); |
658 | 660 |
661 curr_lexer->current_input_column += yyleng; | |
662 | |
659 int result; | 663 int result; |
660 sscanf (yytext+1, "%o", &result); | 664 sscanf (yytext+1, "%o", &result); |
661 | 665 |
662 if (result > 0xff) | 666 if (result > 0xff) |
663 error ("invalid octal escape sequence in character string"); | 667 error ("invalid octal escape sequence in character string"); |
666 } | 670 } |
667 | 671 |
668 <DQ_STRING_START>"\\a" { | 672 <DQ_STRING_START>"\\a" { |
669 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\a\""); | 673 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\a\""); |
670 | 674 |
675 curr_lexer->current_input_column += yyleng; | |
671 curr_lexer->string_text += '\a'; | 676 curr_lexer->string_text += '\a'; |
672 } | 677 } |
673 | 678 |
674 <DQ_STRING_START>"\\b" { | 679 <DQ_STRING_START>"\\b" { |
675 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\b\""); | 680 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\b\""); |
676 | 681 |
682 curr_lexer->current_input_column += yyleng; | |
677 curr_lexer->string_text += '\b'; | 683 curr_lexer->string_text += '\b'; |
678 } | 684 } |
679 | 685 |
680 <DQ_STRING_START>"\\f" { | 686 <DQ_STRING_START>"\\f" { |
681 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\f\""); | 687 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\f\""); |
682 | 688 |
689 curr_lexer->current_input_column += yyleng; | |
683 curr_lexer->string_text += '\f'; | 690 curr_lexer->string_text += '\f'; |
684 } | 691 } |
685 | 692 |
686 <DQ_STRING_START>"\\n" { | 693 <DQ_STRING_START>"\\n" { |
687 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\n\""); | 694 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\n\""); |
688 | 695 |
696 curr_lexer->current_input_column += yyleng; | |
689 curr_lexer->string_text += '\n'; | 697 curr_lexer->string_text += '\n'; |
690 } | 698 } |
691 | 699 |
692 <DQ_STRING_START>"\\r" { | 700 <DQ_STRING_START>"\\r" { |
693 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\r\""); | 701 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\r\""); |
694 | 702 |
703 curr_lexer->current_input_column += yyleng; | |
695 curr_lexer->string_text += '\r'; | 704 curr_lexer->string_text += '\r'; |
696 } | 705 } |
697 | 706 |
698 <DQ_STRING_START>"\\t" { | 707 <DQ_STRING_START>"\\t" { |
699 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\t\""); | 708 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\t\""); |
700 | 709 |
710 curr_lexer->current_input_column += yyleng; | |
701 curr_lexer->string_text += '\t'; | 711 curr_lexer->string_text += '\t'; |
702 } | 712 } |
703 | 713 |
704 <DQ_STRING_START>"\\v" { | 714 <DQ_STRING_START>"\\v" { |
705 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\v\""); | 715 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\v\""); |
706 | 716 |
717 curr_lexer->current_input_column += yyleng; | |
707 curr_lexer->string_text += '\v'; | 718 curr_lexer->string_text += '\v'; |
708 } | 719 } |
709 | 720 |
710 <DQ_STRING_START>\\{NL} { | 721 <DQ_STRING_START>\\{NL} { |
711 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\{NL}"); | 722 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\{NL}"); |
712 | 723 |
713 curr_lexer->decrement_promptflag (); | 724 curr_lexer->decrement_promptflag (); |
714 curr_lexer->input_line_number++; | 725 curr_lexer->input_line_number++; |
715 curr_lexer->current_input_column = 1; | 726 curr_lexer->current_input_column = 1; |
716 | 727 |
717 // We can't rely on the trick used elsewhere of sticking ASCII 1 | 728 if (curr_lexer->is_push_lexer ()) |
718 // in the intput buffer and recognizing it as a special case | 729 { |
719 // because ASCII 1 is a valid character for a character string. | 730 // We can't rely on the trick used elsewhere of sticking ASCII |
720 | 731 // 1 in the input buffer and recognizing it as a special case |
721 if (curr_lexer->at_end_of_buffer ()) | 732 // because ASCII 1 is a valid character for a character |
722 return -1; | 733 // string. If we are at the end of the buffer, ask for more |
723 | 734 // input. If we are at the end of the file, deal with it. |
724 if (curr_lexer->at_end_of_file ()) | 735 // Otherwise, just keep going with the text from the current |
725 return curr_lexer->handle_end_of_input (); | 736 // buffer. |
726 | 737 |
727 // Otherwise, just keep going with the text from the current buffer. | 738 if (curr_lexer->at_end_of_buffer ()) |
739 return -1; | |
740 | |
741 if (curr_lexer->at_end_of_file ()) | |
742 return curr_lexer->handle_end_of_input (); | |
743 } | |
728 } | 744 } |
729 | 745 |
730 <DQ_STRING_START>\\. { | 746 <DQ_STRING_START>\\. { |
731 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\."); | 747 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\."); |
732 | 748 |
749 curr_lexer->current_input_column += yyleng; | |
733 curr_lexer->string_text += yytext[1]; | 750 curr_lexer->string_text += yytext[1]; |
734 } | 751 } |
735 | 752 |
736 <DQ_STRING_START>[^\\\r\n\"]+ { | 753 <DQ_STRING_START>[^\\\r\n\"]+ { |
737 curr_lexer->lexer_debug ("<DQ_STRING_START>[^\\\\\\r\\n\\\"]+"); | 754 curr_lexer->lexer_debug ("<DQ_STRING_START>[^\\\\\\r\\n\\\"]+"); |
738 | 755 |
756 curr_lexer->current_input_column += yyleng; | |
739 curr_lexer->string_text += yytext; | 757 curr_lexer->string_text += yytext; |
740 } | 758 } |
741 | 759 |
742 <DQ_STRING_START>{NL} { | 760 <DQ_STRING_START>{NL} { |
743 curr_lexer->lexer_debug ("<DQ_STRING_START>{NL}"); | 761 curr_lexer->lexer_debug ("<DQ_STRING_START>{NL}"); |
752 | 770 |
753 %{ | 771 %{ |
754 // Single-quoted character strings. | 772 // Single-quoted character strings. |
755 %} | 773 %} |
756 | 774 |
757 <SQ_STRING_START>[^\'\n\r]*\' { | 775 <SQ_STRING_START>\'\' { |
758 curr_lexer->lexer_debug ("<SQ_STRING_START>[^\\'\\n\\r]*\\'"); | 776 curr_lexer->lexer_debug ("<SQ_STRING_START>\\'\\'"); |
759 | 777 |
760 yytext[yyleng-1] = 0; | 778 curr_lexer->current_input_column += yyleng; |
779 curr_lexer->string_text += '\''; | |
780 } | |
781 | |
782 <SQ_STRING_START>\' { | |
783 curr_lexer->lexer_debug ("<SQ_STRING_START>\\'"); | |
784 | |
785 curr_lexer->current_input_column++; | |
786 | |
787 curr_lexer->pop_start_state (); | |
788 | |
789 curr_lexer->looking_for_object_index = true; | |
790 curr_lexer->at_beginning_of_statement = false; | |
791 | |
792 curr_lexer->push_token (new token (SQ_STRING, | |
793 curr_lexer->string_text, | |
794 curr_lexer->string_line, | |
795 curr_lexer->string_column)); | |
796 | |
797 curr_lexer->string_text = ""; | |
798 | |
799 return curr_lexer->count_token_internal (SQ_STRING); | |
800 } | |
801 | |
802 <SQ_STRING_START>[^\'\n\r]+ { | |
803 curr_lexer->lexer_debug ("<SQ_STRING_START>[^\\'\\n\\r]+"); | |
804 | |
805 curr_lexer->current_input_column += yyleng; | |
761 curr_lexer->string_text += yytext; | 806 curr_lexer->string_text += yytext; |
762 | |
763 curr_lexer->current_input_column += yyleng; | |
764 | |
765 int c = curr_lexer->text_yyinput (); | |
766 | |
767 if (c == '\'') | |
768 { | |
769 curr_lexer->string_text += c; | |
770 | |
771 curr_lexer->current_input_column++; | |
772 } | |
773 else | |
774 { | |
775 curr_lexer->xunput (c); | |
776 | |
777 curr_lexer->pop_start_state (); | |
778 | |
779 curr_lexer->looking_for_object_index = true; | |
780 curr_lexer->at_beginning_of_statement = false; | |
781 | |
782 curr_lexer->push_token (new token (SQ_STRING, | |
783 curr_lexer->string_text, | |
784 curr_lexer->string_line, | |
785 curr_lexer->string_column)); | |
786 | |
787 curr_lexer->string_text = ""; | |
788 | |
789 return curr_lexer->count_token_internal (SQ_STRING); | |
790 } | |
791 } | 807 } |
792 | 808 |
793 <SQ_STRING_START>{NL} { | 809 <SQ_STRING_START>{NL} { |
794 curr_lexer->lexer_debug ("<SQ_STRING_START>{NL}"); | 810 curr_lexer->lexer_debug ("<SQ_STRING_START>{NL}"); |
795 | 811 |