Mercurial > hg > octave-lyh
comparison libinterp/parse-tree/lex.ll @ 16255:12bf6a3f8c45
store more info in token value class
* token.h, token.cc: Store token ID and trailing space info
* lex.h, lex.ll (lexical_feedback::token_cache): Handle storing and
retrieving extra info in the lexer.
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Mon, 11 Mar 2013 14:08:50 -0400 |
parents | a89cf57ba3a5 |
children | b28062b977fd db7f07b22b9b |
comparison
equal
deleted
inserted
replaced
16253:a89cf57ba3a5 | 16255:12bf6a3f8c45 |
---|---|
560 | 560 |
561 bool full_line_comment = curr_lexer->current_input_column == 1; | 561 bool full_line_comment = curr_lexer->current_input_column == 1; |
562 curr_lexer->input_line_number++; | 562 curr_lexer->input_line_number++; |
563 curr_lexer->current_input_column = 1; | 563 curr_lexer->current_input_column = 1; |
564 | 564 |
565 bool have_space = false; | |
565 size_t len = yyleng; | 566 size_t len = yyleng; |
566 size_t i = 0; | 567 size_t i = 0; |
567 while (i < len) | 568 while (i < len) |
568 { | 569 { |
569 char c = yytext[i]; | 570 char c = yytext[i]; |
570 if (c == '#' || c == '%' || c == ' ' || c == '\t') | 571 if (c == ' ' || c == '\t') |
572 { | |
573 have_space = true; | |
574 i++; | |
575 } | |
576 else | |
577 break; | |
578 } | |
579 | |
580 while (i < len) | |
581 { | |
582 char c = yytext[i]; | |
583 if (c == '#' || c == '%') | |
571 i++; | 584 i++; |
572 else | 585 else |
573 break; | 586 break; |
574 } | 587 } |
575 | 588 |
576 curr_lexer->comment_text += &yytext[i]; | 589 curr_lexer->comment_text += &yytext[i]; |
577 | 590 |
578 if (! full_line_comment) | 591 if (! full_line_comment) |
579 { | 592 { |
593 if (have_space) | |
594 curr_lexer->mark_previous_token_trailing_space (); | |
595 | |
580 curr_lexer->finish_comment (octave_comment_elt::end_of_line); | 596 curr_lexer->finish_comment (octave_comment_elt::end_of_line); |
581 | 597 |
582 curr_lexer->pop_start_state (); | 598 curr_lexer->pop_start_state (); |
583 } | 599 } |
584 } | 600 } |
625 // the <MATRIX_START> start state code above. | 641 // the <MATRIX_START> start state code above. |
626 %} | 642 %} |
627 | 643 |
628 {S}* { | 644 {S}* { |
629 curr_lexer->current_input_column += yyleng; | 645 curr_lexer->current_input_column += yyleng; |
646 | |
647 curr_lexer->mark_previous_token_trailing_space (); | |
630 } | 648 } |
631 | 649 |
632 %{ | 650 %{ |
633 // Continuation lines. Allow comments after continuations. | 651 // Continuation lines. Allow comments after continuations. |
634 %} | 652 %} |
1367 nesting_level.reset (); | 1385 nesting_level.reset (); |
1368 | 1386 |
1369 tokens.clear (); | 1387 tokens.clear (); |
1370 } | 1388 } |
1371 | 1389 |
1390 int | |
1391 lexical_feedback::previous_token_value (void) const | |
1392 { | |
1393 const token *tok = tokens.front (); | |
1394 return tok ? tok->token_value () : 0; | |
1395 } | |
1396 | |
1397 bool | |
1398 lexical_feedback::previous_token_value_is (int tok_val) const | |
1399 { | |
1400 const token *tok = tokens.front (); | |
1401 return tok ? tok->token_value_is (tok_val) : false; | |
1402 } | |
1403 | |
1404 void | |
1405 lexical_feedback::mark_previous_token_trailing_space (void) | |
1406 { | |
1407 token *tok = tokens.front (); | |
1408 if (tok && ! previous_token_value_is ('\n')) | |
1409 tok->mark_trailing_space (); | |
1410 } | |
1411 | |
1412 bool | |
1413 lexical_feedback::space_follows_previous_token (void) const | |
1414 { | |
1415 const token *tok = tokens.front (); | |
1416 return tok ? tok->space_follows_token () : false; | |
1417 } | |
1418 | |
1372 static bool | 1419 static bool |
1373 looks_like_copyright (const std::string& s) | 1420 looks_like_copyright (const std::string& s) |
1374 { | 1421 { |
1375 bool retval = false; | 1422 bool retval = false; |
1376 | 1423 |
1724 && (defining_func | 1771 && (defining_func |
1725 && ! (looking_at_return_list | 1772 && ! (looking_at_return_list |
1726 || parsed_function_name.top ())))) | 1773 || parsed_function_name.top ())))) |
1727 return 0; | 1774 return 0; |
1728 | 1775 |
1729 tok_val = new token (token::simple_end, l, c); | 1776 tok_val = new token (end_kw, token::simple_end, l, c); |
1730 at_beginning_of_statement = true; | 1777 at_beginning_of_statement = true; |
1731 break; | 1778 break; |
1732 | 1779 |
1733 case end_try_catch_kw: | 1780 case end_try_catch_kw: |
1734 tok_val = new token (token::try_catch_end, l, c); | 1781 tok_val = new token (end_try_catch_kw, token::try_catch_end, l, c); |
1735 at_beginning_of_statement = true; | 1782 at_beginning_of_statement = true; |
1736 break; | 1783 break; |
1737 | 1784 |
1738 case end_unwind_protect_kw: | 1785 case end_unwind_protect_kw: |
1739 tok_val = new token (token::unwind_protect_end, l, c); | 1786 tok_val = new token (end_unwind_protect_kw, |
1787 token::unwind_protect_end, l, c); | |
1740 at_beginning_of_statement = true; | 1788 at_beginning_of_statement = true; |
1741 break; | 1789 break; |
1742 | 1790 |
1743 case endfor_kw: | 1791 case endfor_kw: |
1744 tok_val = new token (token::for_end, l, c); | 1792 tok_val = new token (endfor_kw, token::for_end, l, c); |
1745 at_beginning_of_statement = true; | 1793 at_beginning_of_statement = true; |
1746 break; | 1794 break; |
1747 | 1795 |
1748 case endfunction_kw: | 1796 case endfunction_kw: |
1749 tok_val = new token (token::function_end, l, c); | 1797 tok_val = new token (endfunction_kw, token::function_end, l, c); |
1750 at_beginning_of_statement = true; | 1798 at_beginning_of_statement = true; |
1751 break; | 1799 break; |
1752 | 1800 |
1753 case endif_kw: | 1801 case endif_kw: |
1754 tok_val = new token (token::if_end, l, c); | 1802 tok_val = new token (endif_kw, token::if_end, l, c); |
1755 at_beginning_of_statement = true; | 1803 at_beginning_of_statement = true; |
1756 break; | 1804 break; |
1757 | 1805 |
1758 case endparfor_kw: | 1806 case endparfor_kw: |
1759 tok_val = new token (token::parfor_end, l, c); | 1807 tok_val = new token (endparfor_kw, token::parfor_end, l, c); |
1760 at_beginning_of_statement = true; | 1808 at_beginning_of_statement = true; |
1761 break; | 1809 break; |
1762 | 1810 |
1763 case endswitch_kw: | 1811 case endswitch_kw: |
1764 tok_val = new token (token::switch_end, l, c); | 1812 tok_val = new token (endswitch_kw, token::switch_end, l, c); |
1765 at_beginning_of_statement = true; | 1813 at_beginning_of_statement = true; |
1766 break; | 1814 break; |
1767 | 1815 |
1768 case endwhile_kw: | 1816 case endwhile_kw: |
1769 tok_val = new token (token::while_end, l, c); | 1817 tok_val = new token (endwhile_kw, token::while_end, l, c); |
1770 at_beginning_of_statement = true; | 1818 at_beginning_of_statement = true; |
1771 break; | 1819 break; |
1772 | 1820 |
1773 case endclassdef_kw: | 1821 case endclassdef_kw: |
1774 tok_val = new token (token::classdef_end, l, c); | 1822 tok_val = new token (endclassdef_kw, token::classdef_end, l, c); |
1775 at_beginning_of_statement = true; | 1823 at_beginning_of_statement = true; |
1776 break; | 1824 break; |
1777 | 1825 |
1778 case endenumeration_kw: | 1826 case endenumeration_kw: |
1779 tok_val = new token (token::enumeration_end, l, c); | 1827 tok_val = new token (endenumeration_kw, token::enumeration_end, |
1828 l, c); | |
1780 at_beginning_of_statement = true; | 1829 at_beginning_of_statement = true; |
1781 break; | 1830 break; |
1782 | 1831 |
1783 case endevents_kw: | 1832 case endevents_kw: |
1784 tok_val = new token (token::events_end, l, c); | 1833 tok_val = new token (endevents_kw, token::events_end, l, c); |
1785 at_beginning_of_statement = true; | 1834 at_beginning_of_statement = true; |
1786 break; | 1835 break; |
1787 | 1836 |
1788 case endmethods_kw: | 1837 case endmethods_kw: |
1789 tok_val = new token (token::methods_end, l, c); | 1838 tok_val = new token (endmethods_kw, token::methods_end, l, c); |
1790 at_beginning_of_statement = true; | 1839 at_beginning_of_statement = true; |
1791 break; | 1840 break; |
1792 | 1841 |
1793 case endproperties_kw: | 1842 case endproperties_kw: |
1794 tok_val = new token (token::properties_end, l, c); | 1843 tok_val = new token (endproperties_kw, token::properties_end, l, c); |
1795 at_beginning_of_statement = true; | 1844 at_beginning_of_statement = true; |
1796 break; | 1845 break; |
1797 | 1846 |
1798 | 1847 |
1799 case for_kw: | 1848 case for_kw: |
1869 case magic_file_kw: | 1918 case magic_file_kw: |
1870 { | 1919 { |
1871 if ((reading_fcn_file || reading_script_file | 1920 if ((reading_fcn_file || reading_script_file |
1872 || reading_classdef_file) | 1921 || reading_classdef_file) |
1873 && ! fcn_file_full_name.empty ()) | 1922 && ! fcn_file_full_name.empty ()) |
1874 tok_val = new token (fcn_file_full_name, l, c); | 1923 tok_val = new token (magic_file_kw, fcn_file_full_name, l, c); |
1875 else | 1924 else |
1876 tok_val = new token ("stdin", l, c); | 1925 tok_val = new token (magic_file_kw, "stdin", l, c); |
1877 } | 1926 } |
1878 break; | 1927 break; |
1879 | 1928 |
1880 case magic_line_kw: | 1929 case magic_line_kw: |
1881 tok_val = new token (static_cast<double> (l), "", l, c); | 1930 tok_val = new token (magic_line_kw, static_cast<double> (l), |
1931 "", l, c); | |
1882 break; | 1932 break; |
1883 | 1933 |
1884 default: | 1934 default: |
1885 panic_impossible (); | 1935 panic_impossible (); |
1886 } | 1936 } |
1887 | 1937 |
1888 if (! tok_val) | 1938 if (! tok_val) |
1889 tok_val = new token (l, c); | 1939 tok_val = new token (kw->tok, l, c); |
1890 | 1940 |
1891 push_token (tok_val); | 1941 push_token (tok_val); |
1892 | 1942 |
1893 return kw->tok; | 1943 return kw->tok; |
1894 } | 1944 } |
2283 quote_is_transpose = true; | 2333 quote_is_transpose = true; |
2284 convert_spaces_to_comma = true; | 2334 convert_spaces_to_comma = true; |
2285 looking_for_object_index = false; | 2335 looking_for_object_index = false; |
2286 at_beginning_of_statement = false; | 2336 at_beginning_of_statement = false; |
2287 | 2337 |
2288 push_token (new token (value, yytxt, input_line_number, | 2338 push_token (new token (NUM, value, yytxt, input_line_number, |
2289 current_input_column)); | 2339 current_input_column)); |
2290 | 2340 |
2291 current_input_column += flex_yyleng (); | 2341 current_input_column += flex_yyleng (); |
2292 | 2342 |
2293 do_comma_insert_check (); | 2343 do_comma_insert_check (); |
2315 offset++; | 2365 offset++; |
2316 } | 2366 } |
2317 else | 2367 else |
2318 break; | 2368 break; |
2319 } | 2369 } |
2370 | |
2371 if (have_space) | |
2372 mark_previous_token_trailing_space (); | |
2320 | 2373 |
2321 bool have_comment = false; | 2374 bool have_comment = false; |
2322 while (offset < yylng) | 2375 while (offset < yylng) |
2323 { | 2376 { |
2324 char c = yytxt[offset]; | 2377 char c = yytxt[offset]; |
2573 s = do_string_escapes (buf.str ()); | 2626 s = do_string_escapes (buf.str ()); |
2574 | 2627 |
2575 quote_is_transpose = true; | 2628 quote_is_transpose = true; |
2576 convert_spaces_to_comma = true; | 2629 convert_spaces_to_comma = true; |
2577 | 2630 |
2578 push_token (new token (s, bos_line, bos_col)); | |
2579 | |
2580 if (delim == '"') | 2631 if (delim == '"') |
2581 gripe_matlab_incompatible ("\" used as string delimiter"); | 2632 gripe_matlab_incompatible ("\" used as string delimiter"); |
2582 else if (delim == '\'') | 2633 else if (delim == '\'') |
2583 gripe_single_quote_string (); | 2634 gripe_single_quote_string (); |
2584 | 2635 |
2585 looking_for_object_index = true; | 2636 looking_for_object_index = true; |
2586 at_beginning_of_statement = false; | 2637 at_beginning_of_statement = false; |
2587 | 2638 |
2588 return delim == '"' ? DQ_STRING : SQ_STRING; | 2639 int tok = delim == '"' ? DQ_STRING : SQ_STRING; |
2640 | |
2641 push_token (new token (tok, s, bos_line, bos_col)); | |
2642 | |
2643 return tok; | |
2589 } | 2644 } |
2590 } | 2645 } |
2591 } | 2646 } |
2592 else | 2647 else |
2593 { | 2648 { |
3085 { | 3140 { |
3086 error ("method, class and package names may not be keywords"); | 3141 error ("method, class and package names may not be keywords"); |
3087 return LEXICAL_ERROR; | 3142 return LEXICAL_ERROR; |
3088 } | 3143 } |
3089 | 3144 |
3090 push_token (new token (meth.empty () ? 0 : &(symbol_table::insert (meth)), | 3145 push_token (new token (SUPERCLASSREF, |
3146 meth.empty () ? 0 : &(symbol_table::insert (meth)), | |
3091 cls.empty () ? 0 : &(symbol_table::insert (cls)), | 3147 cls.empty () ? 0 : &(symbol_table::insert (cls)), |
3092 pkg.empty () ? 0 : &(symbol_table::insert (pkg)), | 3148 pkg.empty () ? 0 : &(symbol_table::insert (pkg)), |
3093 input_line_number, current_input_column)); | 3149 input_line_number, current_input_column)); |
3094 | 3150 |
3095 convert_spaces_to_comma = true; | 3151 convert_spaces_to_comma = true; |
3119 { | 3175 { |
3120 error ("class and package names may not be keywords"); | 3176 error ("class and package names may not be keywords"); |
3121 return LEXICAL_ERROR; | 3177 return LEXICAL_ERROR; |
3122 } | 3178 } |
3123 | 3179 |
3124 push_token (new token (cls.empty () ? 0 : &(symbol_table::insert (cls)), | 3180 push_token (new token (METAQUERY, |
3181 cls.empty () ? 0 : &(symbol_table::insert (cls)), | |
3125 pkg.empty () ? 0 : &(symbol_table::insert (pkg)), | 3182 pkg.empty () ? 0 : &(symbol_table::insert (pkg)), |
3126 input_line_number, current_input_column)); | 3183 input_line_number, current_input_column)); |
3127 | 3184 |
3128 convert_spaces_to_comma = true; | 3185 convert_spaces_to_comma = true; |
3129 current_input_column += flex_yyleng (); | 3186 current_input_column += flex_yyleng (); |
3159 { | 3216 { |
3160 do_comma_insert_check (); | 3217 do_comma_insert_check (); |
3161 | 3218 |
3162 maybe_unput_comma (spc_gobbled); | 3219 maybe_unput_comma (spc_gobbled); |
3163 | 3220 |
3164 push_token (new token (tok, input_line_number, | 3221 push_token (new token (STRUCT_ELT, tok, input_line_number, |
3165 current_input_column)); | 3222 current_input_column)); |
3166 | 3223 |
3167 quote_is_transpose = true; | 3224 quote_is_transpose = true; |
3168 convert_spaces_to_comma = true; | 3225 convert_spaces_to_comma = true; |
3169 looking_for_object_index = true; | 3226 looking_for_object_index = true; |
3194 | 3251 |
3195 return LEXICAL_ERROR; | 3252 return LEXICAL_ERROR; |
3196 } | 3253 } |
3197 else | 3254 else |
3198 { | 3255 { |
3199 push_token (new token (tok, input_line_number, | 3256 push_token (new token (FCN_HANDLE, tok, input_line_number, |
3200 current_input_column)); | 3257 current_input_column)); |
3201 | 3258 |
3202 current_input_column += flex_yyleng (); | 3259 current_input_column += flex_yyleng (); |
3203 quote_is_transpose = false; | 3260 quote_is_transpose = false; |
3204 convert_spaces_to_comma = true; | 3261 convert_spaces_to_comma = true; |
3275 // transformation of the end keyword... | 3332 // transformation of the end keyword... |
3276 | 3333 |
3277 if (tok == "end") | 3334 if (tok == "end") |
3278 tok = "__end__"; | 3335 tok = "__end__"; |
3279 | 3336 |
3280 push_token (new token (&(symbol_table::insert (tok)), | 3337 push_token (new token (NAME, &(symbol_table::insert (tok)), |
3281 input_line_number, current_input_column)); | 3338 input_line_number, current_input_column)); |
3282 | 3339 |
3283 // After seeing an identifer, it is ok to convert spaces to a comma | 3340 // After seeing an identifer, it is ok to convert spaces to a comma |
3284 // (if needed). | 3341 // (if needed). |
3285 | 3342 |
3633 lexer_debug (pattern); | 3690 lexer_debug (pattern); |
3634 | 3691 |
3635 if (! compat) | 3692 if (! compat) |
3636 gripe_matlab_incompatible_operator (flex_yytext ()); | 3693 gripe_matlab_incompatible_operator (flex_yytext ()); |
3637 | 3694 |
3638 push_token (new token (input_line_number, current_input_column)); | 3695 push_token (new token (tok, input_line_number, current_input_column)); |
3639 | 3696 |
3640 current_input_column += flex_yyleng (); | 3697 current_input_column += flex_yyleng (); |
3641 quote_is_transpose = qit; | 3698 quote_is_transpose = qit; |
3642 convert_spaces_to_comma = convert; | 3699 convert_spaces_to_comma = convert; |
3643 looking_for_object_index = false; | 3700 looking_for_object_index = false; |
3647 } | 3704 } |
3648 | 3705 |
3649 int | 3706 int |
3650 octave_lexer::handle_token (const std::string& name, int tok) | 3707 octave_lexer::handle_token (const std::string& name, int tok) |
3651 { | 3708 { |
3652 token *tok_val = new token (name, input_line_number, current_input_column); | 3709 token *tok_val = new token (tok, name, input_line_number, |
3710 current_input_column); | |
3653 | 3711 |
3654 return handle_token (tok, tok_val); | 3712 return handle_token (tok, tok_val); |
3655 } | 3713 } |
3656 | 3714 |
3657 int | 3715 int |
3658 octave_lexer::handle_token (int tok, token *tok_val) | 3716 octave_lexer::handle_token (int tok, token *tok_val) |
3659 { | 3717 { |
3660 if (! tok_val) | 3718 if (! tok_val) |
3661 tok_val = new token (input_line_number, current_input_column); | 3719 tok_val = new token (tok, input_line_number, current_input_column); |
3662 | 3720 |
3663 push_token (tok_val); | 3721 push_token (tok_val); |
3664 | 3722 |
3665 current_input_column += flex_yyleng (); | 3723 current_input_column += flex_yyleng (); |
3666 quote_is_transpose = false; | 3724 quote_is_transpose = false; |