comparison libinterp/parse-tree/lex.ll @ 16255:12bf6a3f8c45

store more info in token value class * token.h, token.cc: Store token ID and trailing space info * lex.h, lex.ll (lexical_feedback::token_cache): Handle storing and retrieving extra info in the lexer.
author John W. Eaton <jwe@octave.org>
date Mon, 11 Mar 2013 14:08:50 -0400
parents a89cf57ba3a5
children b28062b977fd db7f07b22b9b
comparison
equal deleted inserted replaced
16253:a89cf57ba3a5 16255:12bf6a3f8c45
560 560
561 bool full_line_comment = curr_lexer->current_input_column == 1; 561 bool full_line_comment = curr_lexer->current_input_column == 1;
562 curr_lexer->input_line_number++; 562 curr_lexer->input_line_number++;
563 curr_lexer->current_input_column = 1; 563 curr_lexer->current_input_column = 1;
564 564
565 bool have_space = false;
565 size_t len = yyleng; 566 size_t len = yyleng;
566 size_t i = 0; 567 size_t i = 0;
567 while (i < len) 568 while (i < len)
568 { 569 {
569 char c = yytext[i]; 570 char c = yytext[i];
570 if (c == '#' || c == '%' || c == ' ' || c == '\t') 571 if (c == ' ' || c == '\t')
572 {
573 have_space = true;
574 i++;
575 }
576 else
577 break;
578 }
579
580 while (i < len)
581 {
582 char c = yytext[i];
583 if (c == '#' || c == '%')
571 i++; 584 i++;
572 else 585 else
573 break; 586 break;
574 } 587 }
575 588
576 curr_lexer->comment_text += &yytext[i]; 589 curr_lexer->comment_text += &yytext[i];
577 590
578 if (! full_line_comment) 591 if (! full_line_comment)
579 { 592 {
593 if (have_space)
594 curr_lexer->mark_previous_token_trailing_space ();
595
580 curr_lexer->finish_comment (octave_comment_elt::end_of_line); 596 curr_lexer->finish_comment (octave_comment_elt::end_of_line);
581 597
582 curr_lexer->pop_start_state (); 598 curr_lexer->pop_start_state ();
583 } 599 }
584 } 600 }
625 // the <MATRIX_START> start state code above. 641 // the <MATRIX_START> start state code above.
626 %} 642 %}
627 643
628 {S}* { 644 {S}* {
629 curr_lexer->current_input_column += yyleng; 645 curr_lexer->current_input_column += yyleng;
646
647 curr_lexer->mark_previous_token_trailing_space ();
630 } 648 }
631 649
632 %{ 650 %{
633 // Continuation lines. Allow comments after continuations. 651 // Continuation lines. Allow comments after continuations.
634 %} 652 %}
1367 nesting_level.reset (); 1385 nesting_level.reset ();
1368 1386
1369 tokens.clear (); 1387 tokens.clear ();
1370 } 1388 }
1371 1389
1390 int
1391 lexical_feedback::previous_token_value (void) const
1392 {
1393 const token *tok = tokens.front ();
1394 return tok ? tok->token_value () : 0;
1395 }
1396
1397 bool
1398 lexical_feedback::previous_token_value_is (int tok_val) const
1399 {
1400 const token *tok = tokens.front ();
1401 return tok ? tok->token_value_is (tok_val) : false;
1402 }
1403
1404 void
1405 lexical_feedback::mark_previous_token_trailing_space (void)
1406 {
1407 token *tok = tokens.front ();
1408 if (tok && ! previous_token_value_is ('\n'))
1409 tok->mark_trailing_space ();
1410 }
1411
1412 bool
1413 lexical_feedback::space_follows_previous_token (void) const
1414 {
1415 const token *tok = tokens.front ();
1416 return tok ? tok->space_follows_token () : false;
1417 }
1418
1372 static bool 1419 static bool
1373 looks_like_copyright (const std::string& s) 1420 looks_like_copyright (const std::string& s)
1374 { 1421 {
1375 bool retval = false; 1422 bool retval = false;
1376 1423
1724 && (defining_func 1771 && (defining_func
1725 && ! (looking_at_return_list 1772 && ! (looking_at_return_list
1726 || parsed_function_name.top ())))) 1773 || parsed_function_name.top ()))))
1727 return 0; 1774 return 0;
1728 1775
1729 tok_val = new token (token::simple_end, l, c); 1776 tok_val = new token (end_kw, token::simple_end, l, c);
1730 at_beginning_of_statement = true; 1777 at_beginning_of_statement = true;
1731 break; 1778 break;
1732 1779
1733 case end_try_catch_kw: 1780 case end_try_catch_kw:
1734 tok_val = new token (token::try_catch_end, l, c); 1781 tok_val = new token (end_try_catch_kw, token::try_catch_end, l, c);
1735 at_beginning_of_statement = true; 1782 at_beginning_of_statement = true;
1736 break; 1783 break;
1737 1784
1738 case end_unwind_protect_kw: 1785 case end_unwind_protect_kw:
1739 tok_val = new token (token::unwind_protect_end, l, c); 1786 tok_val = new token (end_unwind_protect_kw,
1787 token::unwind_protect_end, l, c);
1740 at_beginning_of_statement = true; 1788 at_beginning_of_statement = true;
1741 break; 1789 break;
1742 1790
1743 case endfor_kw: 1791 case endfor_kw:
1744 tok_val = new token (token::for_end, l, c); 1792 tok_val = new token (endfor_kw, token::for_end, l, c);
1745 at_beginning_of_statement = true; 1793 at_beginning_of_statement = true;
1746 break; 1794 break;
1747 1795
1748 case endfunction_kw: 1796 case endfunction_kw:
1749 tok_val = new token (token::function_end, l, c); 1797 tok_val = new token (endfunction_kw, token::function_end, l, c);
1750 at_beginning_of_statement = true; 1798 at_beginning_of_statement = true;
1751 break; 1799 break;
1752 1800
1753 case endif_kw: 1801 case endif_kw:
1754 tok_val = new token (token::if_end, l, c); 1802 tok_val = new token (endif_kw, token::if_end, l, c);
1755 at_beginning_of_statement = true; 1803 at_beginning_of_statement = true;
1756 break; 1804 break;
1757 1805
1758 case endparfor_kw: 1806 case endparfor_kw:
1759 tok_val = new token (token::parfor_end, l, c); 1807 tok_val = new token (endparfor_kw, token::parfor_end, l, c);
1760 at_beginning_of_statement = true; 1808 at_beginning_of_statement = true;
1761 break; 1809 break;
1762 1810
1763 case endswitch_kw: 1811 case endswitch_kw:
1764 tok_val = new token (token::switch_end, l, c); 1812 tok_val = new token (endswitch_kw, token::switch_end, l, c);
1765 at_beginning_of_statement = true; 1813 at_beginning_of_statement = true;
1766 break; 1814 break;
1767 1815
1768 case endwhile_kw: 1816 case endwhile_kw:
1769 tok_val = new token (token::while_end, l, c); 1817 tok_val = new token (endwhile_kw, token::while_end, l, c);
1770 at_beginning_of_statement = true; 1818 at_beginning_of_statement = true;
1771 break; 1819 break;
1772 1820
1773 case endclassdef_kw: 1821 case endclassdef_kw:
1774 tok_val = new token (token::classdef_end, l, c); 1822 tok_val = new token (endclassdef_kw, token::classdef_end, l, c);
1775 at_beginning_of_statement = true; 1823 at_beginning_of_statement = true;
1776 break; 1824 break;
1777 1825
1778 case endenumeration_kw: 1826 case endenumeration_kw:
1779 tok_val = new token (token::enumeration_end, l, c); 1827 tok_val = new token (endenumeration_kw, token::enumeration_end,
1828 l, c);
1780 at_beginning_of_statement = true; 1829 at_beginning_of_statement = true;
1781 break; 1830 break;
1782 1831
1783 case endevents_kw: 1832 case endevents_kw:
1784 tok_val = new token (token::events_end, l, c); 1833 tok_val = new token (endevents_kw, token::events_end, l, c);
1785 at_beginning_of_statement = true; 1834 at_beginning_of_statement = true;
1786 break; 1835 break;
1787 1836
1788 case endmethods_kw: 1837 case endmethods_kw:
1789 tok_val = new token (token::methods_end, l, c); 1838 tok_val = new token (endmethods_kw, token::methods_end, l, c);
1790 at_beginning_of_statement = true; 1839 at_beginning_of_statement = true;
1791 break; 1840 break;
1792 1841
1793 case endproperties_kw: 1842 case endproperties_kw:
1794 tok_val = new token (token::properties_end, l, c); 1843 tok_val = new token (endproperties_kw, token::properties_end, l, c);
1795 at_beginning_of_statement = true; 1844 at_beginning_of_statement = true;
1796 break; 1845 break;
1797 1846
1798 1847
1799 case for_kw: 1848 case for_kw:
1869 case magic_file_kw: 1918 case magic_file_kw:
1870 { 1919 {
1871 if ((reading_fcn_file || reading_script_file 1920 if ((reading_fcn_file || reading_script_file
1872 || reading_classdef_file) 1921 || reading_classdef_file)
1873 && ! fcn_file_full_name.empty ()) 1922 && ! fcn_file_full_name.empty ())
1874 tok_val = new token (fcn_file_full_name, l, c); 1923 tok_val = new token (magic_file_kw, fcn_file_full_name, l, c);
1875 else 1924 else
1876 tok_val = new token ("stdin", l, c); 1925 tok_val = new token (magic_file_kw, "stdin", l, c);
1877 } 1926 }
1878 break; 1927 break;
1879 1928
1880 case magic_line_kw: 1929 case magic_line_kw:
1881 tok_val = new token (static_cast<double> (l), "", l, c); 1930 tok_val = new token (magic_line_kw, static_cast<double> (l),
1931 "", l, c);
1882 break; 1932 break;
1883 1933
1884 default: 1934 default:
1885 panic_impossible (); 1935 panic_impossible ();
1886 } 1936 }
1887 1937
1888 if (! tok_val) 1938 if (! tok_val)
1889 tok_val = new token (l, c); 1939 tok_val = new token (kw->tok, l, c);
1890 1940
1891 push_token (tok_val); 1941 push_token (tok_val);
1892 1942
1893 return kw->tok; 1943 return kw->tok;
1894 } 1944 }
2283 quote_is_transpose = true; 2333 quote_is_transpose = true;
2284 convert_spaces_to_comma = true; 2334 convert_spaces_to_comma = true;
2285 looking_for_object_index = false; 2335 looking_for_object_index = false;
2286 at_beginning_of_statement = false; 2336 at_beginning_of_statement = false;
2287 2337
2288 push_token (new token (value, yytxt, input_line_number, 2338 push_token (new token (NUM, value, yytxt, input_line_number,
2289 current_input_column)); 2339 current_input_column));
2290 2340
2291 current_input_column += flex_yyleng (); 2341 current_input_column += flex_yyleng ();
2292 2342
2293 do_comma_insert_check (); 2343 do_comma_insert_check ();
2315 offset++; 2365 offset++;
2316 } 2366 }
2317 else 2367 else
2318 break; 2368 break;
2319 } 2369 }
2370
2371 if (have_space)
2372 mark_previous_token_trailing_space ();
2320 2373
2321 bool have_comment = false; 2374 bool have_comment = false;
2322 while (offset < yylng) 2375 while (offset < yylng)
2323 { 2376 {
2324 char c = yytxt[offset]; 2377 char c = yytxt[offset];
2573 s = do_string_escapes (buf.str ()); 2626 s = do_string_escapes (buf.str ());
2574 2627
2575 quote_is_transpose = true; 2628 quote_is_transpose = true;
2576 convert_spaces_to_comma = true; 2629 convert_spaces_to_comma = true;
2577 2630
2578 push_token (new token (s, bos_line, bos_col));
2579
2580 if (delim == '"') 2631 if (delim == '"')
2581 gripe_matlab_incompatible ("\" used as string delimiter"); 2632 gripe_matlab_incompatible ("\" used as string delimiter");
2582 else if (delim == '\'') 2633 else if (delim == '\'')
2583 gripe_single_quote_string (); 2634 gripe_single_quote_string ();
2584 2635
2585 looking_for_object_index = true; 2636 looking_for_object_index = true;
2586 at_beginning_of_statement = false; 2637 at_beginning_of_statement = false;
2587 2638
2588 return delim == '"' ? DQ_STRING : SQ_STRING; 2639 int tok = delim == '"' ? DQ_STRING : SQ_STRING;
2640
2641 push_token (new token (tok, s, bos_line, bos_col));
2642
2643 return tok;
2589 } 2644 }
2590 } 2645 }
2591 } 2646 }
2592 else 2647 else
2593 { 2648 {
3085 { 3140 {
3086 error ("method, class and package names may not be keywords"); 3141 error ("method, class and package names may not be keywords");
3087 return LEXICAL_ERROR; 3142 return LEXICAL_ERROR;
3088 } 3143 }
3089 3144
3090 push_token (new token (meth.empty () ? 0 : &(symbol_table::insert (meth)), 3145 push_token (new token (SUPERCLASSREF,
3146 meth.empty () ? 0 : &(symbol_table::insert (meth)),
3091 cls.empty () ? 0 : &(symbol_table::insert (cls)), 3147 cls.empty () ? 0 : &(symbol_table::insert (cls)),
3092 pkg.empty () ? 0 : &(symbol_table::insert (pkg)), 3148 pkg.empty () ? 0 : &(symbol_table::insert (pkg)),
3093 input_line_number, current_input_column)); 3149 input_line_number, current_input_column));
3094 3150
3095 convert_spaces_to_comma = true; 3151 convert_spaces_to_comma = true;
3119 { 3175 {
3120 error ("class and package names may not be keywords"); 3176 error ("class and package names may not be keywords");
3121 return LEXICAL_ERROR; 3177 return LEXICAL_ERROR;
3122 } 3178 }
3123 3179
3124 push_token (new token (cls.empty () ? 0 : &(symbol_table::insert (cls)), 3180 push_token (new token (METAQUERY,
3181 cls.empty () ? 0 : &(symbol_table::insert (cls)),
3125 pkg.empty () ? 0 : &(symbol_table::insert (pkg)), 3182 pkg.empty () ? 0 : &(symbol_table::insert (pkg)),
3126 input_line_number, current_input_column)); 3183 input_line_number, current_input_column));
3127 3184
3128 convert_spaces_to_comma = true; 3185 convert_spaces_to_comma = true;
3129 current_input_column += flex_yyleng (); 3186 current_input_column += flex_yyleng ();
3159 { 3216 {
3160 do_comma_insert_check (); 3217 do_comma_insert_check ();
3161 3218
3162 maybe_unput_comma (spc_gobbled); 3219 maybe_unput_comma (spc_gobbled);
3163 3220
3164 push_token (new token (tok, input_line_number, 3221 push_token (new token (STRUCT_ELT, tok, input_line_number,
3165 current_input_column)); 3222 current_input_column));
3166 3223
3167 quote_is_transpose = true; 3224 quote_is_transpose = true;
3168 convert_spaces_to_comma = true; 3225 convert_spaces_to_comma = true;
3169 looking_for_object_index = true; 3226 looking_for_object_index = true;
3194 3251
3195 return LEXICAL_ERROR; 3252 return LEXICAL_ERROR;
3196 } 3253 }
3197 else 3254 else
3198 { 3255 {
3199 push_token (new token (tok, input_line_number, 3256 push_token (new token (FCN_HANDLE, tok, input_line_number,
3200 current_input_column)); 3257 current_input_column));
3201 3258
3202 current_input_column += flex_yyleng (); 3259 current_input_column += flex_yyleng ();
3203 quote_is_transpose = false; 3260 quote_is_transpose = false;
3204 convert_spaces_to_comma = true; 3261 convert_spaces_to_comma = true;
3275 // transformation of the end keyword... 3332 // transformation of the end keyword...
3276 3333
3277 if (tok == "end") 3334 if (tok == "end")
3278 tok = "__end__"; 3335 tok = "__end__";
3279 3336
3280 push_token (new token (&(symbol_table::insert (tok)), 3337 push_token (new token (NAME, &(symbol_table::insert (tok)),
3281 input_line_number, current_input_column)); 3338 input_line_number, current_input_column));
3282 3339
3283 // After seeing an identifer, it is ok to convert spaces to a comma 3340 // After seeing an identifer, it is ok to convert spaces to a comma
3284 // (if needed). 3341 // (if needed).
3285 3342
3633 lexer_debug (pattern); 3690 lexer_debug (pattern);
3634 3691
3635 if (! compat) 3692 if (! compat)
3636 gripe_matlab_incompatible_operator (flex_yytext ()); 3693 gripe_matlab_incompatible_operator (flex_yytext ());
3637 3694
3638 push_token (new token (input_line_number, current_input_column)); 3695 push_token (new token (tok, input_line_number, current_input_column));
3639 3696
3640 current_input_column += flex_yyleng (); 3697 current_input_column += flex_yyleng ();
3641 quote_is_transpose = qit; 3698 quote_is_transpose = qit;
3642 convert_spaces_to_comma = convert; 3699 convert_spaces_to_comma = convert;
3643 looking_for_object_index = false; 3700 looking_for_object_index = false;
3647 } 3704 }
3648 3705
3649 int 3706 int
3650 octave_lexer::handle_token (const std::string& name, int tok) 3707 octave_lexer::handle_token (const std::string& name, int tok)
3651 { 3708 {
3652 token *tok_val = new token (name, input_line_number, current_input_column); 3709 token *tok_val = new token (tok, name, input_line_number,
3710 current_input_column);
3653 3711
3654 return handle_token (tok, tok_val); 3712 return handle_token (tok, tok_val);
3655 } 3713 }
3656 3714
3657 int 3715 int
3658 octave_lexer::handle_token (int tok, token *tok_val) 3716 octave_lexer::handle_token (int tok, token *tok_val)
3659 { 3717 {
3660 if (! tok_val) 3718 if (! tok_val)
3661 tok_val = new token (input_line_number, current_input_column); 3719 tok_val = new token (tok, input_line_number, current_input_column);
3662 3720
3663 push_token (tok_val); 3721 push_token (tok_val);
3664 3722
3665 current_input_column += flex_yyleng (); 3723 current_input_column += flex_yyleng ();
3666 quote_is_transpose = false; 3724 quote_is_transpose = false;