Mercurial > hg > octave-nkf
comparison libinterp/parse-tree/lex.ll @ 16263:9acb86e6ac90
4/10 commits reworking the lexer
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Mon, 11 Mar 2013 14:28:11 -0400 |
parents | b45a90cdb0ae |
children | 6077d13ddb3b 71ee3afedb69 |
comparison
equal
deleted
inserted
replaced
16261:b45a90cdb0ae | 16263:9acb86e6ac90 |
---|---|
242 <MATRIX_START>{NL} { | 242 <MATRIX_START>{NL} { |
243 curr_lexer->lexer_debug ("<MATRIX_START>{NL}"); | 243 curr_lexer->lexer_debug ("<MATRIX_START>{NL}"); |
244 | 244 |
245 int tok = curr_lexer->previous_token_value (); | 245 int tok = curr_lexer->previous_token_value (); |
246 | 246 |
247 if (! (tok == ',' || tok == ';' || tok == '[' || tok == '{')) | 247 if (! (tok == ';' || tok == '[' || tok == '{')) |
248 curr_lexer->xunput (','); | 248 curr_lexer->xunput (';'); |
249 } | 249 } |
250 | 250 |
251 <KLUGE>@ { | 251 <KLUGE>@ { |
252 curr_lexer->lexer_debug ("<KLUGE>@"); | 252 curr_lexer->lexer_debug ("<KLUGE>@"); |
253 curr_lexer->pop_start_state (); | 253 curr_lexer->pop_start_state (); |
299 } | 299 } |
300 | 300 |
301 \[ { | 301 \[ { |
302 curr_lexer->lexer_debug ("\\["); | 302 curr_lexer->lexer_debug ("\\["); |
303 | 303 |
304 curr_lexer->nesting_level.bracket (); | 304 bool unput_comma = false; |
305 | 305 |
306 curr_lexer->looking_at_object_index.push_front (false); | 306 if (curr_lexer->whitespace_is_significant () |
307 | 307 && curr_lexer->space_follows_previous_token ()) |
308 curr_lexer->current_input_column += yyleng; | 308 { |
309 curr_lexer->looking_for_object_index = false; | 309 int tok = curr_lexer->previous_token_value (); |
310 curr_lexer->at_beginning_of_statement = false; | 310 |
311 | 311 if (! (tok == ';' || tok == ',' || tok == '[' || tok == '{' |
312 if (curr_lexer->defining_func | 312 || curr_lexer->previous_token_is_binop ())) |
313 && ! curr_lexer->parsed_function_name.top ()) | 313 unput_comma = true; |
314 curr_lexer->looking_at_return_list = true; | 314 } |
315 | |
316 if (unput_comma) | |
317 { | |
318 yyless (0); | |
319 curr_lexer->xunput (','); | |
320 } | |
315 else | 321 else |
316 curr_lexer->looking_at_matrix_or_assign_lhs = true; | 322 { |
317 | 323 curr_lexer->nesting_level.bracket (); |
318 curr_lexer->decrement_promptflag (); | 324 |
319 | 325 curr_lexer->looking_at_object_index.push_front (false); |
320 curr_lexer->bracketflag++; | 326 |
321 | 327 curr_lexer->current_input_column += yyleng; |
322 curr_lexer->push_start_state (MATRIX_START); | 328 curr_lexer->looking_for_object_index = false; |
323 | 329 curr_lexer->at_beginning_of_statement = false; |
324 return curr_lexer->count_token ('['); | 330 |
331 if (curr_lexer->defining_func | |
332 && ! curr_lexer->parsed_function_name.top ()) | |
333 curr_lexer->looking_at_return_list = true; | |
334 else | |
335 curr_lexer->looking_at_matrix_or_assign_lhs = true; | |
336 | |
337 curr_lexer->decrement_promptflag (); | |
338 | |
339 curr_lexer->bracketflag++; | |
340 | |
341 curr_lexer->push_start_state (MATRIX_START); | |
342 | |
343 return curr_lexer->count_token ('['); | |
344 } | |
325 } | 345 } |
326 | 346 |
327 \] { | 347 \] { |
328 curr_lexer->lexer_debug ("\\]"); | 348 curr_lexer->lexer_debug ("\\]"); |
329 | 349 |
483 %} | 503 %} |
484 | 504 |
485 {NUMBER}{Im} { | 505 {NUMBER}{Im} { |
486 curr_lexer->lexer_debug ("{NUMBER}{Im}"); | 506 curr_lexer->lexer_debug ("{NUMBER}{Im}"); |
487 | 507 |
508 int tok = curr_lexer->previous_token_value (); | |
509 | |
488 if (curr_lexer->whitespace_is_significant () | 510 if (curr_lexer->whitespace_is_significant () |
489 && curr_lexer->space_follows_previous_token () | 511 && curr_lexer->space_follows_previous_token () |
490 && ! curr_lexer->previous_token_is_binop ()) | 512 && ! (tok == '[' || tok == '{' |
513 || curr_lexer->previous_token_is_binop ())) | |
491 { | 514 { |
492 yyless (0); | 515 yyless (0); |
493 unput (','); | 516 unput (','); |
494 } | 517 } |
495 else | 518 else |
506 | 529 |
507 {D}+/\.[\*/\\^\'] | | 530 {D}+/\.[\*/\\^\'] | |
508 {NUMBER} { | 531 {NUMBER} { |
509 curr_lexer->lexer_debug ("{D}+/\\.[\\*/\\^\\']|{NUMBER}"); | 532 curr_lexer->lexer_debug ("{D}+/\\.[\\*/\\^\\']|{NUMBER}"); |
510 | 533 |
534 int tok = curr_lexer->previous_token_value (); | |
535 | |
511 if (curr_lexer->whitespace_is_significant () | 536 if (curr_lexer->whitespace_is_significant () |
512 && curr_lexer->space_follows_previous_token () | 537 && curr_lexer->space_follows_previous_token () |
513 && ! curr_lexer->previous_token_is_binop ()) | 538 && ! (tok == '[' || tok == '{' |
539 || curr_lexer->previous_token_is_binop ())) | |
514 { | 540 { |
515 yyless (0); | 541 yyless (0); |
516 unput (','); | 542 unput (','); |
517 } | 543 } |
518 else | 544 else |
569 yyless (0); | 595 yyless (0); |
570 unput (','); | 596 unput (','); |
571 } | 597 } |
572 else | 598 else |
573 { | 599 { |
574 if (curr_lexer->previous_token_may_be_command ()) | 600 if (! curr_lexer->looking_at_decl_list |
601 && curr_lexer->previous_token_may_be_command ()) | |
575 { | 602 { |
576 yyless (0); | 603 yyless (0); |
577 curr_lexer->push_start_state (COMMAND_START); | 604 curr_lexer->push_start_state (COMMAND_START); |
578 } | 605 } |
579 else | 606 else |
694 curr_lexer->xunput (','); | 721 curr_lexer->xunput (','); |
695 } | 722 } |
696 } | 723 } |
697 else | 724 else |
698 { | 725 { |
699 if (tok == ',' || tok == ';' | 726 if (tok == ',' || tok == ';' || tok == '[' || tok == '{' |
700 || curr_lexer->previous_token_is_binop ()) | 727 || curr_lexer->previous_token_is_binop ()) |
701 { | 728 { |
702 curr_lexer->current_input_column++; | 729 curr_lexer->current_input_column++; |
703 int retval = curr_lexer->handle_string ('\''); | 730 int retval = curr_lexer->handle_string ('\''); |
704 return curr_lexer->count_token_internal (retval); | 731 return curr_lexer->count_token_internal (retval); |
734 | 761 |
735 if (curr_lexer->whitespace_is_significant ()) | 762 if (curr_lexer->whitespace_is_significant ()) |
736 { | 763 { |
737 if (curr_lexer->space_follows_previous_token ()) | 764 if (curr_lexer->space_follows_previous_token ()) |
738 { | 765 { |
739 if (tok == '[' || tok == '{' | 766 if (tok == ',' || tok == ';' || tok == '[' || tok == '{' |
740 || curr_lexer->previous_token_is_binop ()) | 767 || curr_lexer->previous_token_is_binop ()) |
741 { | 768 { |
742 curr_lexer->current_input_column++; | 769 curr_lexer->current_input_column++; |
743 int retval = curr_lexer->handle_string ('"'); | 770 int retval = curr_lexer->handle_string ('"'); |
744 return curr_lexer->count_token_internal (retval); | 771 return curr_lexer->count_token_internal (retval); |
783 ">=" { return curr_lexer->handle_op (">=", EXPR_GE); } | 810 ">=" { return curr_lexer->handle_op (">=", EXPR_GE); } |
784 "&" { return curr_lexer->handle_op ("&", EXPR_AND); } | 811 "&" { return curr_lexer->handle_op ("&", EXPR_AND); } |
785 "|" { return curr_lexer->handle_op ("|", EXPR_OR); } | 812 "|" { return curr_lexer->handle_op ("|", EXPR_OR); } |
786 "<" { return curr_lexer->handle_op ("<", EXPR_LT); } | 813 "<" { return curr_lexer->handle_op ("<", EXPR_LT); } |
787 ">" { return curr_lexer->handle_op (">", EXPR_GT); } | 814 ">" { return curr_lexer->handle_op (">", EXPR_GT); } |
788 "+" { return curr_lexer->handle_op ("+", '+'); } | |
789 "-" { return curr_lexer->handle_op ("-", '-'); } | |
790 "*" { return curr_lexer->handle_op ("*", '*'); } | 815 "*" { return curr_lexer->handle_op ("*", '*'); } |
791 "/" { return curr_lexer->handle_op ("/", '/'); } | 816 "/" { return curr_lexer->handle_op ("/", '/'); } |
792 "\\" { return curr_lexer->handle_op ("\\", LEFTDIV); } | 817 "\\" { return curr_lexer->handle_op ("\\", LEFTDIV); } |
793 "^" { return curr_lexer->handle_op ("^", POW); } | 818 "^" { return curr_lexer->handle_op ("^", POW); } |
794 "**" { return curr_lexer->handle_incompatible_op ("**", POW); } | 819 "**" { return curr_lexer->handle_incompatible_op ("**", POW); } |
795 "&&" { return curr_lexer->handle_op ("&&", EXPR_AND_AND); } | 820 "&&" { return curr_lexer->handle_op ("&&", EXPR_AND_AND); } |
796 "||" { return curr_lexer->handle_op ("||", EXPR_OR_OR); } | 821 "||" { return curr_lexer->handle_op ("||", EXPR_OR_OR); } |
797 "<<" { return curr_lexer->handle_incompatible_op ("<<", LSHIFT); } | 822 "<<" { return curr_lexer->handle_incompatible_op ("<<", LSHIFT); } |
798 ">>" { return curr_lexer->handle_incompatible_op (">>", RSHIFT); } | 823 ">>" { return curr_lexer->handle_incompatible_op (">>", RSHIFT); } |
799 "~" { return curr_lexer->handle_op ("~", EXPR_NOT); } | |
800 "!" { return curr_lexer->handle_incompatible_op ("!", EXPR_NOT); } | |
801 ";" { return curr_lexer->handle_op (";", ';', true, true); } | 824 ";" { return curr_lexer->handle_op (";", ';', true, true); } |
825 | |
826 "+" { | |
827 int tok = curr_lexer->handle_unary_op ("+", '+'); | |
828 | |
829 if (tok < 0) | |
830 { | |
831 yyless (0); | |
832 curr_lexer->xunput (','); | |
833 } | |
834 else | |
835 return tok; | |
836 } | |
837 | |
838 "-" { | |
839 int prev_tok = curr_lexer->previous_token_value (); | |
840 bool space_before = curr_lexer->space_follows_previous_token (); | |
841 int c = curr_lexer->text_yyinput (); | |
842 curr_lexer->xunput (c); | |
843 bool space_after = (c == ' ' || c == '\t'); | |
844 | |
845 if (space_before && ! space_after | |
846 && curr_lexer->previous_token_may_be_command ()) | |
847 { | |
848 yyless (0); | |
849 curr_lexer->push_start_state (COMMAND_START); | |
850 } | |
851 else | |
852 { | |
853 int tok = curr_lexer->handle_unary_op ("-", '-'); | |
854 | |
855 if (tok < 0) | |
856 { | |
857 yyless (0); | |
858 curr_lexer->xunput (','); | |
859 } | |
860 else | |
861 return tok; | |
862 } | |
863 } | |
864 | |
865 "~" { | |
866 int tok = curr_lexer->handle_unary_op ("~", EXPR_NOT); | |
867 | |
868 if (tok < 0) | |
869 { | |
870 yyless (0); | |
871 curr_lexer->xunput (','); | |
872 } | |
873 else | |
874 return tok; | |
875 } | |
876 | |
877 "!" { | |
878 int tok = curr_lexer->handle_incompatible_unary_op ("!", EXPR_NOT); | |
879 | |
880 if (tok < 0) | |
881 { | |
882 yyless (0); | |
883 curr_lexer->xunput (','); | |
884 } | |
885 else | |
886 return tok; | |
887 } | |
802 | 888 |
803 "," { | 889 "," { |
804 return curr_lexer->handle_op | 890 return curr_lexer->handle_op |
805 (",", ',', true, ! curr_lexer->looking_at_object_index.front ()); | 891 (",", ',', true, ! curr_lexer->looking_at_object_index.front ()); |
806 } | 892 } |
808 ".'" { | 894 ".'" { |
809 return curr_lexer->handle_op (".'", TRANSPOSE, true, false); | 895 return curr_lexer->handle_op (".'", TRANSPOSE, true, false); |
810 } | 896 } |
811 | 897 |
812 "++" { | 898 "++" { |
813 return curr_lexer->handle_incompatible_op | 899 int tok = curr_lexer->handle_incompatible_unary_op |
814 ("++", PLUS_PLUS, true, false, true); | 900 ("++", PLUS_PLUS, true, false, true); |
901 | |
902 if (tok < 0) | |
903 { | |
904 yyless (0); | |
905 curr_lexer->xunput (','); | |
906 } | |
907 else | |
908 return tok; | |
815 } | 909 } |
816 | 910 |
817 "--" { | 911 "--" { |
818 ; | 912 int tok = curr_lexer->handle_incompatible_unary_op |
819 return curr_lexer->handle_incompatible_op | 913 ("--", MINUS_MINUS, true, false, true); |
820 ("--", MINUS_MINUS, true, false, true); | 914 |
915 if (tok < 0) | |
916 { | |
917 yyless (0); | |
918 curr_lexer->xunput (','); | |
919 } | |
920 else | |
921 return tok; | |
821 } | 922 } |
822 | 923 |
823 "(" { | 924 "(" { |
824 curr_lexer->lexer_debug ("("); | 925 curr_lexer->lexer_debug ("("); |
825 | 926 |
826 // If we are looking for an object index, then push TRUE for | 927 bool unput_comma = false; |
827 // looking_at_object_index. Otherwise, just push whatever state | 928 |
828 // is current (so that we can pop it off the stack when we find | 929 if (curr_lexer->whitespace_is_significant () |
829 // the matching close paren). | 930 && curr_lexer->space_follows_previous_token ()) |
830 | 931 { |
831 curr_lexer->looking_at_object_index.push_front | 932 int tok = curr_lexer->previous_token_value (); |
832 (curr_lexer->looking_for_object_index); | 933 |
833 | 934 if (! (tok == ';' || tok == ',' || tok == '[' || tok == '{' |
834 curr_lexer->looking_at_indirect_ref = false; | 935 || curr_lexer->previous_token_is_binop ())) |
835 curr_lexer->looking_for_object_index = false; | 936 unput_comma = true; |
836 curr_lexer->at_beginning_of_statement = false; | 937 } |
837 | 938 |
838 curr_lexer->nesting_level.paren (); | 939 if (unput_comma) |
839 curr_lexer->decrement_promptflag (); | 940 { |
840 | 941 yyless (0); |
841 return curr_lexer->handle_token ('('); | 942 curr_lexer->xunput (','); |
943 } | |
944 else | |
945 { | |
946 // If we are looking for an object index, then push TRUE for | |
947 // looking_at_object_index. Otherwise, just push whatever state | |
948 // is current (so that we can pop it off the stack when we find | |
949 // the matching close paren). | |
950 | |
951 curr_lexer->looking_at_object_index.push_front | |
952 (curr_lexer->looking_for_object_index); | |
953 | |
954 curr_lexer->looking_at_indirect_ref = false; | |
955 curr_lexer->looking_for_object_index = false; | |
956 curr_lexer->at_beginning_of_statement = false; | |
957 | |
958 curr_lexer->nesting_level.paren (); | |
959 curr_lexer->decrement_promptflag (); | |
960 | |
961 return curr_lexer->handle_token ('('); | |
962 } | |
842 } | 963 } |
843 | 964 |
844 ")" { | 965 ")" { |
845 curr_lexer->lexer_debug (")"); | 966 curr_lexer->lexer_debug (")"); |
846 | 967 |
1100 } | 1221 } |
1101 | 1222 |
1102 "{" { | 1223 "{" { |
1103 curr_lexer->lexer_debug ("{"); | 1224 curr_lexer->lexer_debug ("{"); |
1104 | 1225 |
1105 curr_lexer->nesting_level.brace (); | 1226 bool unput_comma = false; |
1106 | 1227 |
1107 curr_lexer->looking_at_object_index.push_front | 1228 if (curr_lexer->whitespace_is_significant () |
1108 (curr_lexer->looking_for_object_index); | 1229 && curr_lexer->space_follows_previous_token ()) |
1109 | 1230 { |
1110 curr_lexer->current_input_column += yyleng; | 1231 int tok = curr_lexer->previous_token_value (); |
1111 curr_lexer->looking_for_object_index = false; | 1232 |
1112 curr_lexer->at_beginning_of_statement = false; | 1233 if (! (tok == ';' || tok == ',' || tok == '[' || tok == '{' |
1113 | 1234 || curr_lexer->previous_token_is_binop ())) |
1114 curr_lexer->decrement_promptflag (); | 1235 unput_comma = true; |
1115 | 1236 } |
1116 curr_lexer->braceflag++; | 1237 |
1117 | 1238 if (unput_comma) |
1118 curr_lexer->push_start_state (MATRIX_START); | 1239 { |
1119 | 1240 yyless (0); |
1120 return curr_lexer->count_token ('{'); | 1241 curr_lexer->xunput (','); |
1242 } | |
1243 else | |
1244 { | |
1245 curr_lexer->nesting_level.brace (); | |
1246 | |
1247 curr_lexer->looking_at_object_index.push_front | |
1248 (curr_lexer->looking_for_object_index); | |
1249 | |
1250 curr_lexer->current_input_column += yyleng; | |
1251 curr_lexer->looking_for_object_index = false; | |
1252 curr_lexer->at_beginning_of_statement = false; | |
1253 | |
1254 curr_lexer->decrement_promptflag (); | |
1255 | |
1256 curr_lexer->braceflag++; | |
1257 | |
1258 curr_lexer->push_start_state (MATRIX_START); | |
1259 | |
1260 return curr_lexer->count_token ('{'); | |
1261 } | |
1121 } | 1262 } |
1122 | 1263 |
1123 "}" { | 1264 "}" { |
1124 curr_lexer->lexer_debug ("}"); | 1265 curr_lexer->lexer_debug ("}"); |
1125 | 1266 |
1900 "the 'static' keyword is obsolete and will be removed from a future version of Octave; please use 'persistent' instead; near line %d", | 2041 "the 'static' keyword is obsolete and will be removed from a future version of Octave; please use 'persistent' instead; near line %d", |
1901 input_line_number); | 2042 input_line_number); |
1902 // fall through ... | 2043 // fall through ... |
1903 | 2044 |
1904 case persistent_kw: | 2045 case persistent_kw: |
2046 case global_kw: | |
2047 looking_at_decl_list = true; | |
1905 break; | 2048 break; |
1906 | 2049 |
1907 case case_kw: | 2050 case case_kw: |
1908 case elseif_kw: | 2051 case elseif_kw: |
1909 case global_kw: | |
1910 case until_kw: | 2052 case until_kw: |
1911 break; | 2053 break; |
1912 | 2054 |
1913 case end_kw: | 2055 case end_kw: |
1914 if (inside_any_object_index () | 2056 if (inside_any_object_index () |
3257 | 3399 |
3258 int | 3400 int |
3259 octave_lexer::handle_op (const char *pattern, int tok, bool convert, | 3401 octave_lexer::handle_op (const char *pattern, int tok, bool convert, |
3260 bool bos, bool qit) | 3402 bool bos, bool qit) |
3261 { | 3403 { |
3404 lexer_debug (pattern); | |
3405 | |
3262 return handle_op_internal (pattern, tok, convert, bos, qit, true); | 3406 return handle_op_internal (pattern, tok, convert, bos, qit, true); |
3263 } | 3407 } |
3264 | 3408 |
3265 int | 3409 int |
3266 octave_lexer::handle_incompatible_op (const char *pattern, int tok, | 3410 octave_lexer::handle_incompatible_op (const char *pattern, int tok, |
3267 bool convert, bool bos, bool qit) | 3411 bool convert, bool bos, bool qit) |
3268 { | 3412 { |
3413 lexer_debug (pattern); | |
3414 | |
3269 return handle_op_internal (pattern, tok, convert, bos, qit, false); | 3415 return handle_op_internal (pattern, tok, convert, bos, qit, false); |
3416 } | |
3417 | |
3418 bool | |
3419 octave_lexer::maybe_unput_comma_before_unary_op (int tok) | |
3420 { | |
3421 int prev_tok = previous_token_value (); | |
3422 | |
3423 bool unput_comma = false; | |
3424 | |
3425 if (whitespace_is_significant () && space_follows_previous_token ()) | |
3426 { | |
3427 int c = text_yyinput (); | |
3428 xunput (c); | |
3429 | |
3430 bool space_after = (c == ' ' || c == '\t'); | |
3431 | |
3432 if (! (prev_tok == ';' || prev_tok == ',' | |
3433 || prev_tok == '[' || prev_tok == '{' | |
3434 || previous_token_is_binop () | |
3435 || ((tok == '+' || tok == '-') && space_after))) | |
3436 unput_comma = true; | |
3437 } | |
3438 | |
3439 return unput_comma; | |
3440 } | |
3441 | |
3442 int | |
3443 octave_lexer::handle_unary_op (const char *pattern, int tok, bool convert, | |
3444 bool bos, bool qit) | |
3445 { | |
3446 lexer_debug (pattern); | |
3447 | |
3448 return maybe_unput_comma_before_unary_op (tok) | |
3449 ? -1 : handle_op_internal (pattern, tok, convert, bos, qit, true); | |
3450 } | |
3451 | |
3452 int | |
3453 octave_lexer::handle_incompatible_unary_op (const char *pattern, int tok, | |
3454 bool convert, bool bos, bool qit) | |
3455 { | |
3456 lexer_debug (pattern); | |
3457 | |
3458 return maybe_unput_comma_before_unary_op (tok) | |
3459 ? -1 : handle_op_internal (pattern, tok, convert, bos, qit, false); | |
3270 } | 3460 } |
3271 | 3461 |
3272 int | 3462 int |
3273 octave_lexer::handle_assign_op (const char *pattern, int tok) | 3463 octave_lexer::handle_assign_op (const char *pattern, int tok) |
3274 { | 3464 { |
3289 | 3479 |
3290 int | 3480 int |
3291 octave_lexer::handle_op_internal (const char *pattern, int tok, bool convert, | 3481 octave_lexer::handle_op_internal (const char *pattern, int tok, bool convert, |
3292 bool bos, bool qit, bool compat) | 3482 bool bos, bool qit, bool compat) |
3293 { | 3483 { |
3294 lexer_debug (pattern); | |
3295 | |
3296 if (! compat) | 3484 if (! compat) |
3297 gripe_matlab_incompatible_operator (flex_yytext ()); | 3485 gripe_matlab_incompatible_operator (flex_yytext ()); |
3298 | 3486 |
3299 push_token (new token (tok, input_line_number, current_input_column)); | 3487 push_token (new token (tok, input_line_number, current_input_column)); |
3300 | 3488 |