comparison libinterp/parse-tree/lex.ll @ 16114:73a21ade0b6b

* lex.ll: Reorder function definitions.
author John W. Eaton <jwe@octave.org>
date Tue, 26 Feb 2013 02:52:48 -0500
parents 7c5e5e97a3bc
children 67f71e2a6190
comparison
equal deleted inserted replaced
16113:7c5e5e97a3bc 16114:73a21ade0b6b
220 220
221 // Internal variable for lexer debugging state. 221 // Internal variable for lexer debugging state.
222 static bool lexer_debug_flag = false; 222 static bool lexer_debug_flag = false;
223 223
224 // Forward declarations for functions defined at the bottom of this 224 // Forward declarations for functions defined at the bottom of this
225 // file. 225 // file that are needed inside the lexer actions.
226 226
227 static bool match_any (char c, const char *s);
228 static std::string strip_trailing_whitespace (char *s); 227 static std::string strip_trailing_whitespace (char *s);
229 static int octave_read (char *buf, unsigned int max_size); 228 static int octave_read (char *buf, unsigned int max_size);
230 static void display_token (int tok); 229 static void display_token (int tok);
231 static void lexer_debug (const char *pattern, const char *text); 230 static void lexer_debug (const char *pattern, const char *text);
232 231
978 TOK_RETURN (END_OF_INPUT); 977 TOK_RETURN (END_OF_INPUT);
979 } 978 }
980 979
981 %% 980 %%
982 981
983 // GAG.
984 //
985 // If we're reading a matrix and the next character is '[', make sure
986 // that we insert a comma ahead of it.
987
988 void
989 lexical_feedback::do_comma_insert_check (void)
990 {
991 bool spc_gobbled = (eat_continuation () != lexical_feedback::NO_WHITESPACE);
992
993 int c = text_yyinput ();
994
995 xunput (c, yytext);
996
997 if (spc_gobbled)
998 xunput (' ', yytext);
999
1000 do_comma_insert = (! looking_at_object_index.front ()
1001 && bracketflag && c == '[');
1002 }
1003
1004 // Fix things up for errors or interrupts. The parser is never called 982 // Fix things up for errors or interrupts. The parser is never called
1005 // recursively, so it is always safe to reinitialize its state before 983 // recursively, so it is always safe to reinitialize its state before
1006 // doing any parsing. 984 // doing any parsing.
1007 985
1008 void 986 void
1177 std::cerr << "DEL"; 1155 std::cerr << "DEL";
1178 break; 1156 break;
1179 } 1157 }
1180 } 1158 }
1181 1159
1182 lexical_feedback::~lexical_feedback (void)
1183 {
1184 // Clear out the stack of token info used to track line and
1185 // column numbers.
1186
1187 while (! token_stack.empty ())
1188 {
1189 delete token_stack.top ();
1190 token_stack.pop ();
1191 }
1192 }
1193
1194 int
1195 lexical_feedback::text_yyinput (void)
1196 {
1197 int c = yyinput ();
1198
1199 if (lexer_debug_flag)
1200 {
1201 std::cerr << "I: ";
1202 display_character (c);
1203 std::cerr << std::endl;
1204 }
1205
1206 // Convert CRLF into just LF and single CR into LF.
1207
1208 if (c == '\r')
1209 {
1210 c = yyinput ();
1211
1212 if (lexer_debug_flag)
1213 {
1214 std::cerr << "I: ";
1215 display_character (c);
1216 std::cerr << std::endl;
1217 }
1218
1219 if (c != '\n')
1220 {
1221 xunput (c, yytext);
1222 c = '\n';
1223 }
1224 }
1225
1226 if (c == '\n')
1227 input_line_number++;
1228
1229 return c;
1230 }
1231
1232 void
1233 lexical_feedback::xunput (char c, char *buf)
1234 {
1235 if (lexer_debug_flag)
1236 {
1237 std::cerr << "U: ";
1238 display_character (c);
1239 std::cerr << std::endl;
1240 }
1241
1242 if (c == '\n')
1243 input_line_number--;
1244
1245 yyunput (c, buf);
1246 }
1247
1248 // If we read some newlines, we need figure out what column we're
1249 // really looking at.
1250
1251 void
1252 lexical_feedback::fixup_column_count (char *s)
1253 {
1254 char c;
1255 while ((c = *s++) != '\0')
1256 {
1257 if (c == '\n')
1258 {
1259 input_line_number++;
1260 current_input_column = 1;
1261 }
1262 else
1263 current_input_column++;
1264 }
1265 }
1266
1267 // Include these so that we don't have to link to libfl.a. 1160 // Include these so that we don't have to link to libfl.a.
1268 1161
1269 int 1162 int
1270 yywrap (void) 1163 yywrap (void)
1271 { 1164 {
1338 delete_input_buffer (void *buf) 1231 delete_input_buffer (void *buf)
1339 { 1232 {
1340 delete_buffer (static_cast<YY_BUFFER_STATE> (buf)); 1233 delete_buffer (static_cast<YY_BUFFER_STATE> (buf));
1341 } 1234 }
1342 1235
1343 bool
1344 lexical_feedback::inside_any_object_index (void)
1345 {
1346 bool retval = false;
1347
1348 for (std::list<bool>::const_iterator i = looking_at_object_index.begin ();
1349 i != looking_at_object_index.end (); i++)
1350 {
1351 if (*i)
1352 {
1353 retval = true;
1354 break;
1355 }
1356 }
1357
1358 return retval;
1359 }
1360
1361 // Handle keywords. Return -1 if the keyword should be ignored.
1362
1363 int
1364 lexical_feedback::is_keyword_token (const std::string& s)
1365 {
1366 int l = input_line_number;
1367 int c = current_input_column;
1368
1369 int len = s.length ();
1370
1371 const octave_kw *kw = octave_kw_hash::in_word_set (s.c_str (), len);
1372
1373 if (kw)
1374 {
1375 yylval.tok_val = 0;
1376
1377 switch (kw->kw_id)
1378 {
1379 case break_kw:
1380 case catch_kw:
1381 case continue_kw:
1382 case else_kw:
1383 case otherwise_kw:
1384 case return_kw:
1385 case unwind_protect_cleanup_kw:
1386 at_beginning_of_statement = true;
1387 break;
1388
1389 case static_kw:
1390 if ((reading_fcn_file || reading_script_file
1391 || reading_classdef_file)
1392 && ! curr_fcn_file_full_name.empty ())
1393 warning_with_id ("Octave:deprecated-keyword",
1394 "the 'static' keyword is obsolete and will be removed from a future version of Octave; please use 'persistent' instead; near line %d of file '%s'",
1395 input_line_number,
1396 curr_fcn_file_full_name.c_str ());
1397 else
1398 warning_with_id ("Octave:deprecated-keyword",
1399 "the 'static' keyword is obsolete and will be removed from a future version of Octave; please use 'persistent' instead; near line %d",
1400 input_line_number);
1401 // fall through ...
1402
1403 case persistent_kw:
1404 break;
1405
1406 case case_kw:
1407 case elseif_kw:
1408 case global_kw:
1409 case until_kw:
1410 break;
1411
1412 case end_kw:
1413 if (inside_any_object_index ()
1414 || (! reading_classdef_file
1415 && (defining_func
1416 && ! (looking_at_return_list
1417 || parsed_function_name.top ()))))
1418 return 0;
1419
1420 yylval.tok_val = new token (token::simple_end, l, c);
1421 at_beginning_of_statement = true;
1422 break;
1423
1424 case end_try_catch_kw:
1425 yylval.tok_val = new token (token::try_catch_end, l, c);
1426 at_beginning_of_statement = true;
1427 break;
1428
1429 case end_unwind_protect_kw:
1430 yylval.tok_val = new token (token::unwind_protect_end, l, c);
1431 at_beginning_of_statement = true;
1432 break;
1433
1434 case endfor_kw:
1435 yylval.tok_val = new token (token::for_end, l, c);
1436 at_beginning_of_statement = true;
1437 break;
1438
1439 case endfunction_kw:
1440 yylval.tok_val = new token (token::function_end, l, c);
1441 at_beginning_of_statement = true;
1442 break;
1443
1444 case endif_kw:
1445 yylval.tok_val = new token (token::if_end, l, c);
1446 at_beginning_of_statement = true;
1447 break;
1448
1449 case endparfor_kw:
1450 yylval.tok_val = new token (token::parfor_end, l, c);
1451 at_beginning_of_statement = true;
1452 break;
1453
1454 case endswitch_kw:
1455 yylval.tok_val = new token (token::switch_end, l, c);
1456 at_beginning_of_statement = true;
1457 break;
1458
1459 case endwhile_kw:
1460 yylval.tok_val = new token (token::while_end, l, c);
1461 at_beginning_of_statement = true;
1462 break;
1463
1464 case endclassdef_kw:
1465 yylval.tok_val = new token (token::classdef_end, l, c);
1466 at_beginning_of_statement = true;
1467 break;
1468
1469 case endenumeration_kw:
1470 yylval.tok_val = new token (token::enumeration_end, l, c);
1471 at_beginning_of_statement = true;
1472 break;
1473
1474 case endevents_kw:
1475 yylval.tok_val = new token (token::events_end, l, c);
1476 at_beginning_of_statement = true;
1477 break;
1478
1479 case endmethods_kw:
1480 yylval.tok_val = new token (token::methods_end, l, c);
1481 at_beginning_of_statement = true;
1482 break;
1483
1484 case endproperties_kw:
1485 yylval.tok_val = new token (token::properties_end, l, c);
1486 at_beginning_of_statement = true;
1487 break;
1488
1489
1490 case for_kw:
1491 case parfor_kw:
1492 case while_kw:
1493 promptflag--;
1494 looping++;
1495 break;
1496
1497 case do_kw:
1498 at_beginning_of_statement = true;
1499 promptflag--;
1500 looping++;
1501 break;
1502
1503 case try_kw:
1504 case unwind_protect_kw:
1505 at_beginning_of_statement = true;
1506 promptflag--;
1507 break;
1508
1509 case if_kw:
1510 case switch_kw:
1511 promptflag--;
1512 break;
1513
1514 case get_kw:
1515 case set_kw:
1516 // 'get' and 'set' are keywords in classdef method
1517 // declarations.
1518 if (! maybe_classdef_get_set_method)
1519 return 0;
1520 break;
1521
1522 case enumeration_kw:
1523 case events_kw:
1524 case methods_kw:
1525 case properties_kw:
1526 // 'properties', 'methods' and 'events' are keywords for
1527 // classdef blocks.
1528 if (! parsing_classdef)
1529 return 0;
1530 // fall through ...
1531
1532 case classdef_kw:
1533 // 'classdef' is always a keyword.
1534 promptflag--;
1535 break;
1536
1537 case function_kw:
1538 promptflag--;
1539
1540 defining_func++;
1541 parsed_function_name.push (false);
1542
1543 if (! (reading_fcn_file || reading_script_file
1544 || reading_classdef_file))
1545 input_line_number = 1;
1546 break;
1547
1548 case magic_file_kw:
1549 {
1550 if ((reading_fcn_file || reading_script_file
1551 || reading_classdef_file)
1552 && ! curr_fcn_file_full_name.empty ())
1553 yylval.tok_val = new token (curr_fcn_file_full_name, l, c);
1554 else
1555 yylval.tok_val = new token ("stdin", l, c);
1556 }
1557 break;
1558
1559 case magic_line_kw:
1560 yylval.tok_val = new token (static_cast<double> (l), "", l, c);
1561 break;
1562
1563 default:
1564 panic_impossible ();
1565 }
1566
1567 if (! yylval.tok_val)
1568 yylval.tok_val = new token (l, c);
1569
1570 token_stack.push (yylval.tok_val);
1571
1572 return kw->tok;
1573 }
1574
1575 return 0;
1576 }
1577
1578 bool
1579 lexical_feedback::is_variable (const std::string& name)
1580 {
1581 return (symbol_table::is_variable (name)
1582 || (pending_local_variables.find (name)
1583 != pending_local_variables.end ()));
1584 }
1585
1586 std::string
1587 lexical_feedback::grab_block_comment (stream_reader& reader, bool& eof)
1588 {
1589 std::string buf;
1590
1591 bool at_bol = true;
1592 bool look_for_marker = false;
1593
1594 bool warned_incompatible = false;
1595
1596 int c = 0;
1597
1598 while ((c = reader.getc ()) != EOF)
1599 {
1600 current_input_column++;
1601
1602 if (look_for_marker)
1603 {
1604 at_bol = false;
1605 look_for_marker = false;
1606
1607 if (c == '{' || c == '}')
1608 {
1609 std::string tmp_buf (1, static_cast<char> (c));
1610
1611 int type = c;
1612
1613 bool done = false;
1614
1615 while ((c = reader.getc ()) != EOF && ! done)
1616 {
1617 current_input_column++;
1618
1619 switch (c)
1620 {
1621 case ' ':
1622 case '\t':
1623 tmp_buf += static_cast<char> (c);
1624 break;
1625
1626 case '\n':
1627 {
1628 current_input_column = 0;
1629 at_bol = true;
1630 done = true;
1631
1632 if (type == '{')
1633 {
1634 block_comment_nesting_level++;
1635 promptflag--;
1636 }
1637 else
1638 {
1639 block_comment_nesting_level--;
1640 promptflag++;
1641
1642 if (block_comment_nesting_level == 0)
1643 {
1644 buf += grab_comment_block (reader, true, eof);
1645
1646 return buf;
1647 }
1648 }
1649 }
1650 break;
1651
1652 default:
1653 at_bol = false;
1654 tmp_buf += static_cast<char> (c);
1655 buf += tmp_buf;
1656 done = true;
1657 break;
1658 }
1659 }
1660 }
1661 }
1662
1663 if (at_bol && (c == '%' || c == '#'))
1664 {
1665 if (c == '#' && ! warned_incompatible)
1666 {
1667 warned_incompatible = true;
1668 maybe_gripe_matlab_incompatible_comment (c);
1669 }
1670
1671 at_bol = false;
1672 look_for_marker = true;
1673 }
1674 else
1675 {
1676 buf += static_cast<char> (c);
1677
1678 if (c == '\n')
1679 {
1680 current_input_column = 0;
1681 at_bol = true;
1682 }
1683 }
1684 }
1685
1686 if (c == EOF)
1687 eof = true;
1688
1689 return buf;
1690 }
1691
1692 std::string
1693 lexical_feedback::grab_comment_block (stream_reader& reader, bool at_bol,
1694 bool& eof)
1695 {
1696 std::string buf;
1697
1698 // TRUE means we are at the beginning of a comment block.
1699 bool begin_comment = false;
1700
1701 // TRUE means we are currently reading a comment block.
1702 bool in_comment = false;
1703
1704 bool warned_incompatible = false;
1705
1706 int c = 0;
1707
1708 while ((c = reader.getc ()) != EOF)
1709 {
1710 current_input_column++;
1711
1712 if (begin_comment)
1713 {
1714 if (c == '%' || c == '#')
1715 {
1716 at_bol = false;
1717 continue;
1718 }
1719 else if (at_bol && c == '{')
1720 {
1721 std::string tmp_buf (1, static_cast<char> (c));
1722
1723 bool done = false;
1724
1725 while ((c = reader.getc ()) != EOF && ! done)
1726 {
1727 current_input_column++;
1728
1729 switch (c)
1730 {
1731 case ' ':
1732 case '\t':
1733 tmp_buf += static_cast<char> (c);
1734 break;
1735
1736 case '\n':
1737 {
1738 current_input_column = 0;
1739 at_bol = true;
1740 done = true;
1741
1742 block_comment_nesting_level++;
1743 promptflag--;
1744
1745 buf += grab_block_comment (reader, eof);
1746
1747 in_comment = false;
1748
1749 if (eof)
1750 goto done;
1751 }
1752 break;
1753
1754 default:
1755 at_bol = false;
1756 tmp_buf += static_cast<char> (c);
1757 buf += tmp_buf;
1758 done = true;
1759 break;
1760 }
1761 }
1762 }
1763 else
1764 {
1765 at_bol = false;
1766 begin_comment = false;
1767 }
1768 }
1769
1770 if (in_comment)
1771 {
1772 buf += static_cast<char> (c);
1773
1774 if (c == '\n')
1775 {
1776 at_bol = true;
1777 current_input_column = 0;
1778 in_comment = false;
1779
1780 // FIXME -- bailing out here prevents things like
1781 //
1782 // octave> # comment
1783 // octave> x = 1
1784 //
1785 // from failing at the command line, while still
1786 // allowing blocks of comments to be grabbed properly
1787 // for function doc strings. But only the first line of
1788 // a mult-line doc string will be picked up for
1789 // functions defined on the command line. We need a
1790 // better way of collecting these comments...
1791 if (! (reading_fcn_file || reading_script_file))
1792 goto done;
1793 }
1794 }
1795 else
1796 {
1797 switch (c)
1798 {
1799 case ' ':
1800 case '\t':
1801 break;
1802
1803 case '#':
1804 if (! warned_incompatible)
1805 {
1806 warned_incompatible = true;
1807 maybe_gripe_matlab_incompatible_comment (c);
1808 }
1809 // fall through...
1810
1811 case '%':
1812 in_comment = true;
1813 begin_comment = true;
1814 break;
1815
1816 default:
1817 current_input_column--;
1818 reader.ungetc (c);
1819 goto done;
1820 }
1821 }
1822 }
1823
1824 done:
1825
1826 if (c == EOF)
1827 eof = true;
1828
1829 return buf;
1830 }
1831
1832 class 1236 class
1833 flex_stream_reader : public stream_reader 1237 flex_stream_reader : public stream_reader
1834 { 1238 {
1835 public: 1239 public:
1836 flex_stream_reader (char *buf_arg) : stream_reader (), buf (buf_arg) { } 1240 flex_stream_reader (char *buf_arg) : stream_reader (), buf (buf_arg) { }
1846 1250
1847 flex_stream_reader& operator = (const flex_stream_reader&); 1251 flex_stream_reader& operator = (const flex_stream_reader&);
1848 1252
1849 char *buf; 1253 char *buf;
1850 }; 1254 };
1851
1852 int
1853 lexical_feedback::process_comment (bool start_in_block, bool& eof)
1854 {
1855 eof = false;
1856
1857 std::string help_txt;
1858
1859 if (! help_buf.empty ())
1860 help_txt = help_buf.top ();
1861
1862 flex_stream_reader flex_reader (yytext);
1863
1864 // process_comment is only supposed to be called when we are not
1865 // initially looking at a block comment.
1866
1867 std::string txt = start_in_block
1868 ? grab_block_comment (flex_reader, eof)
1869 : grab_comment_block (flex_reader, false, eof);
1870
1871 if (lexer_debug_flag)
1872 std::cerr << "C: " << txt << std::endl;
1873
1874 if (help_txt.empty () && nesting_level.none ())
1875 {
1876 if (! help_buf.empty ())
1877 help_buf.pop ();
1878
1879 help_buf.push (txt);
1880 }
1881
1882 octave_comment_buffer::append (txt);
1883
1884 current_input_column = 1;
1885 quote_is_transpose = false;
1886 convert_spaces_to_comma = true;
1887 at_beginning_of_statement = true;
1888
1889 if (YY_START == COMMAND_START)
1890 BEGIN (INITIAL);
1891
1892 if (nesting_level.none ())
1893 return '\n';
1894 else if (nesting_level.is_bracket_or_brace ())
1895 return ';';
1896 else
1897 return 0;
1898 }
1899 1255
1900 // Return 1 if the given character matches any character in the given 1256 // Return 1 if the given character matches any character in the given
1901 // string. 1257 // string.
1902 1258
1903 static bool 1259 static bool
1924 looks_like_bin_op (bool spc_prev, int next_char) 1280 looks_like_bin_op (bool spc_prev, int next_char)
1925 { 1281 {
1926 bool spc_next = (next_char == ' ' || next_char == '\t'); 1282 bool spc_next = (next_char == ' ' || next_char == '\t');
1927 1283
1928 return ((spc_prev && spc_next) || ! spc_prev); 1284 return ((spc_prev && spc_next) || ! spc_prev);
1929 }
1930
1931 // Recognize separators. If the separator is a CRLF pair, it is
1932 // replaced by a single LF.
1933
1934 bool
1935 lexical_feedback::next_token_is_sep_op (void)
1936 {
1937 bool retval = false;
1938
1939 int c = text_yyinput ();
1940
1941 retval = match_any (c, ",;\n]");
1942
1943 xunput (c, yytext);
1944
1945 return retval;
1946 }
1947
1948 // Try to determine if the next token should be treated as a postfix
1949 // unary operator. This is ugly, but it seems to do the right thing.
1950
1951 bool
1952 lexical_feedback::next_token_is_postfix_unary_op (bool spc_prev)
1953 {
1954 bool un_op = false;
1955
1956 int c0 = text_yyinput ();
1957
1958 if (c0 == '\'' && ! spc_prev)
1959 {
1960 un_op = true;
1961 }
1962 else if (c0 == '.')
1963 {
1964 int c1 = text_yyinput ();
1965 un_op = (c1 == '\'');
1966 xunput (c1, yytext);
1967 }
1968 else if (c0 == '+')
1969 {
1970 int c1 = text_yyinput ();
1971 un_op = (c1 == '+');
1972 xunput (c1, yytext);
1973 }
1974 else if (c0 == '-')
1975 {
1976 int c1 = text_yyinput ();
1977 un_op = (c1 == '-');
1978 xunput (c1, yytext);
1979 }
1980
1981 xunput (c0, yytext);
1982
1983 return un_op;
1984 }
1985
1986 // Try to determine if the next token should be treated as a binary
1987 // operator.
1988 //
1989 // This kluge exists because whitespace is not always ignored inside
1990 // the square brackets that are used to create matrix objects (though
1991 // spacing only really matters in the cases that can be interpreted
1992 // either as binary ops or prefix unary ops: currently just +, -).
1993 //
1994 // Note that a line continuation directly following a + or - operator
1995 // (e.g., the characters '[' 'a' ' ' '+' '\' LFD 'b' ']') will be
1996 // parsed as a binary operator.
1997
1998 bool
1999 lexical_feedback::next_token_is_bin_op (bool spc_prev)
2000 {
2001 bool bin_op = false;
2002
2003 int c0 = text_yyinput ();
2004
2005 switch (c0)
2006 {
2007 case '+':
2008 case '-':
2009 {
2010 int c1 = text_yyinput ();
2011
2012 switch (c1)
2013 {
2014 case '+':
2015 case '-':
2016 // Unary ops, spacing doesn't matter.
2017 break;
2018
2019 case '=':
2020 // Binary ops, spacing doesn't matter.
2021 bin_op = true;
2022 break;
2023
2024 default:
2025 // Could be either, spacing matters.
2026 bin_op = looks_like_bin_op (spc_prev, c1);
2027 break;
2028 }
2029
2030 xunput (c1, yytext);
2031 }
2032 break;
2033
2034 case ':':
2035 case '/':
2036 case '\\':
2037 case '^':
2038 // Always a binary op (may also include /=, \=, and ^=).
2039 bin_op = true;
2040 break;
2041
2042 // .+ .- ./ .\ .^ .* .**
2043 case '.':
2044 {
2045 int c1 = text_yyinput ();
2046
2047 if (match_any (c1, "+-/\\^*"))
2048 // Always a binary op (may also include .+=, .-=, ./=, ...).
2049 bin_op = true;
2050 else if (! isdigit (c1) && c1 != ' ' && c1 != '\t' && c1 != '.')
2051 // A structure element reference is a binary op.
2052 bin_op = true;
2053
2054 xunput (c1, yytext);
2055 }
2056 break;
2057
2058 // = == & && | || * **
2059 case '=':
2060 case '&':
2061 case '|':
2062 case '*':
2063 // Always a binary op (may also include ==, &&, ||, **).
2064 bin_op = true;
2065 break;
2066
2067 // < <= <> > >=
2068 case '<':
2069 case '>':
2070 // Always a binary op (may also include <=, <>, >=).
2071 bin_op = true;
2072 break;
2073
2074 // ~= !=
2075 case '~':
2076 case '!':
2077 {
2078 int c1 = text_yyinput ();
2079
2080 // ~ and ! can be unary ops, so require following =.
2081 if (c1 == '=')
2082 bin_op = true;
2083
2084 xunput (c1, yytext);
2085 }
2086 break;
2087
2088 default:
2089 break;
2090 }
2091
2092 xunput (c0, yytext);
2093
2094 return bin_op;
2095 }
2096
2097 // Used to delete trailing white space from tokens.
2098
2099 static std::string
2100 strip_trailing_whitespace (char *s)
2101 {
2102 std::string retval = s;
2103
2104 size_t pos = retval.find_first_of (" \t");
2105
2106 if (pos != std::string::npos)
2107 retval.resize (pos);
2108
2109 return retval;
2110 }
2111
2112 // FIXME -- we need to handle block comments here.
2113
2114 void
2115 lexical_feedback::scan_for_comments (const char *text)
2116 {
2117 std::string comment_buf;
2118
2119 bool in_comment = false;
2120 bool beginning_of_comment = false;
2121
2122 int len = strlen (text);
2123 int i = 0;
2124
2125 while (i < len)
2126 {
2127 char c = text[i++];
2128
2129 switch (c)
2130 {
2131 case '%':
2132 case '#':
2133 if (in_comment)
2134 {
2135 if (! beginning_of_comment)
2136 comment_buf += static_cast<char> (c);
2137 }
2138 else
2139 {
2140 maybe_gripe_matlab_incompatible_comment (c);
2141 in_comment = true;
2142 beginning_of_comment = true;
2143 }
2144 break;
2145
2146 case '\n':
2147 if (in_comment)
2148 {
2149 comment_buf += static_cast<char> (c);
2150 octave_comment_buffer::append (comment_buf);
2151 comment_buf.resize (0);
2152 in_comment = false;
2153 beginning_of_comment = false;
2154 }
2155 break;
2156
2157 default:
2158 if (in_comment)
2159 {
2160 comment_buf += static_cast<char> (c);
2161 beginning_of_comment = false;
2162 }
2163 break;
2164 }
2165 }
2166
2167 if (! comment_buf.empty ())
2168 octave_comment_buffer::append (comment_buf);
2169 }
2170
2171 // Discard whitespace, including comments and continuations.
2172
2173 // FIXME -- we need to handle block comments here.
2174
2175 int
2176 lexical_feedback::eat_whitespace (void)
2177 {
2178 int retval = lexical_feedback::NO_WHITESPACE;
2179
2180 std::string comment_buf;
2181
2182 bool in_comment = false;
2183 bool beginning_of_comment = false;
2184
2185 int c = 0;
2186
2187 while ((c = text_yyinput ()) != EOF)
2188 {
2189 current_input_column++;
2190
2191 switch (c)
2192 {
2193 case ' ':
2194 case '\t':
2195 if (in_comment)
2196 {
2197 comment_buf += static_cast<char> (c);
2198 beginning_of_comment = false;
2199 }
2200 retval |= lexical_feedback::SPACE_OR_TAB;
2201 break;
2202
2203 case '\n':
2204 retval |= lexical_feedback::NEWLINE;
2205 if (in_comment)
2206 {
2207 comment_buf += static_cast<char> (c);
2208 octave_comment_buffer::append (comment_buf);
2209 comment_buf.resize (0);
2210 in_comment = false;
2211 beginning_of_comment = false;
2212 }
2213 current_input_column = 0;
2214 break;
2215
2216 case '#':
2217 case '%':
2218 if (in_comment)
2219 {
2220 if (! beginning_of_comment)
2221 comment_buf += static_cast<char> (c);
2222 }
2223 else
2224 {
2225 maybe_gripe_matlab_incompatible_comment (c);
2226 in_comment = true;
2227 beginning_of_comment = true;
2228 }
2229 break;
2230
2231 case '.':
2232 if (in_comment)
2233 {
2234 comment_buf += static_cast<char> (c);
2235 beginning_of_comment = false;
2236 break;
2237 }
2238 else
2239 {
2240 if (have_ellipsis_continuation ())
2241 break;
2242 else
2243 goto done;
2244 }
2245
2246 case '\\':
2247 if (in_comment)
2248 {
2249 comment_buf += static_cast<char> (c);
2250 beginning_of_comment = false;
2251 break;
2252 }
2253 else
2254 {
2255 if (have_continuation ())
2256 break;
2257 else
2258 goto done;
2259 }
2260
2261 default:
2262 if (in_comment)
2263 {
2264 comment_buf += static_cast<char> (c);
2265 beginning_of_comment = false;
2266 break;
2267 }
2268 else
2269 goto done;
2270 }
2271 }
2272
2273 if (! comment_buf.empty ())
2274 octave_comment_buffer::append (comment_buf);
2275
2276 done:
2277 xunput (c, yytext);
2278 current_input_column--;
2279 return retval;
2280 }
2281
2282 static inline bool
2283 looks_like_hex (const char *s, int len)
2284 {
2285 return (len > 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X'));
2286 }
2287
2288 void
2289 lexical_feedback::handle_number (void)
2290 {
2291 double value = 0.0;
2292 int nread = 0;
2293
2294 if (looks_like_hex (yytext, strlen (yytext)))
2295 {
2296 unsigned long ival;
2297
2298 nread = sscanf (yytext, "%lx", &ival);
2299
2300 value = static_cast<double> (ival);
2301 }
2302 else
2303 {
2304 char *tmp = strsave (yytext);
2305
2306 char *idx = strpbrk (tmp, "Dd");
2307
2308 if (idx)
2309 *idx = 'e';
2310
2311 nread = sscanf (tmp, "%lf", &value);
2312
2313 delete [] tmp;
2314 }
2315
2316 // If yytext doesn't contain a valid number, we are in deep doo doo.
2317
2318 assert (nread == 1);
2319
2320 quote_is_transpose = true;
2321 convert_spaces_to_comma = true;
2322 looking_for_object_index = false;
2323 at_beginning_of_statement = false;
2324
2325 yylval.tok_val = new token (value, yytext, input_line_number,
2326 current_input_column);
2327
2328 token_stack.push (yylval.tok_val);
2329
2330 current_input_column += yyleng;
2331
2332 do_comma_insert_check ();
2333 }
2334
2335 // We have seen a backslash and need to find out if it should be
2336 // treated as a continuation character. If so, this eats it, up to
2337 // and including the new line character.
2338 //
2339 // Match whitespace only, followed by a comment character or newline.
2340 // Once a comment character is found, discard all input until newline.
2341 // If non-whitespace characters are found before comment
2342 // characters, return 0. Otherwise, return 1.
2343
2344 // FIXME -- we need to handle block comments here.
2345
2346 bool
2347 lexical_feedback::have_continuation (bool trailing_comments_ok)
2348 {
2349 std::ostringstream buf;
2350
2351 std::string comment_buf;
2352
2353 bool in_comment = false;
2354 bool beginning_of_comment = false;
2355
2356 int c = 0;
2357
2358 while ((c = text_yyinput ()) != EOF)
2359 {
2360 buf << static_cast<char> (c);
2361
2362 switch (c)
2363 {
2364 case ' ':
2365 case '\t':
2366 if (in_comment)
2367 {
2368 comment_buf += static_cast<char> (c);
2369 beginning_of_comment = false;
2370 }
2371 break;
2372
2373 case '%':
2374 case '#':
2375 if (trailing_comments_ok)
2376 {
2377 if (in_comment)
2378 {
2379 if (! beginning_of_comment)
2380 comment_buf += static_cast<char> (c);
2381 }
2382 else
2383 {
2384 maybe_gripe_matlab_incompatible_comment (c);
2385 in_comment = true;
2386 beginning_of_comment = true;
2387 }
2388 }
2389 else
2390 goto cleanup;
2391 break;
2392
2393 case '\n':
2394 if (in_comment)
2395 {
2396 comment_buf += static_cast<char> (c);
2397 octave_comment_buffer::append (comment_buf);
2398 }
2399 current_input_column = 0;
2400 promptflag--;
2401 gripe_matlab_incompatible_continuation ();
2402 return true;
2403
2404 default:
2405 if (in_comment)
2406 {
2407 comment_buf += static_cast<char> (c);
2408 beginning_of_comment = false;
2409 }
2410 else
2411 goto cleanup;
2412 break;
2413 }
2414 }
2415
2416 xunput (c, yytext);
2417 return false;
2418
2419 cleanup:
2420
2421 std::string s = buf.str ();
2422
2423 int len = s.length ();
2424 while (len--)
2425 xunput (s[len], yytext);
2426
2427 return false;
2428 }
2429
2430 // We have seen a '.' and need to see if it is the start of a
2431 // continuation. If so, this eats it, up to and including the new
2432 // line character.
2433
2434 bool
2435 lexical_feedback::have_ellipsis_continuation (bool trailing_comments_ok)
2436 {
2437 char c1 = text_yyinput ();
2438 if (c1 == '.')
2439 {
2440 char c2 = text_yyinput ();
2441 if (c2 == '.' && have_continuation (trailing_comments_ok))
2442 return true;
2443 else
2444 {
2445 xunput (c2, yytext);
2446 xunput (c1, yytext);
2447 }
2448 }
2449 else
2450 xunput (c1, yytext);
2451
2452 return false;
2453 }
2454
2455 // See if we have a continuation line. If so, eat it and the leading
2456 // whitespace on the next line.
2457
2458 int
2459 lexical_feedback::eat_continuation (void)
2460 {
2461 int retval = lexical_feedback::NO_WHITESPACE;
2462
2463 int c = text_yyinput ();
2464
2465 if ((c == '.' && have_ellipsis_continuation ())
2466 || (c == '\\' && have_continuation ()))
2467 retval = eat_whitespace ();
2468 else
2469 xunput (c, yytext);
2470
2471 return retval;
2472 }
2473
2474 int
2475 lexical_feedback::handle_string (char delim)
2476 {
2477 std::ostringstream buf;
2478
2479 int bos_line = input_line_number;
2480 int bos_col = current_input_column;
2481
2482 int c;
2483 int escape_pending = 0;
2484
2485 while ((c = text_yyinput ()) != EOF)
2486 {
2487 current_input_column++;
2488
2489 if (c == '\\')
2490 {
2491 if (delim == '\'' || escape_pending)
2492 {
2493 buf << static_cast<char> (c);
2494 escape_pending = 0;
2495 }
2496 else
2497 {
2498 if (have_continuation (false))
2499 escape_pending = 0;
2500 else
2501 {
2502 buf << static_cast<char> (c);
2503 escape_pending = 1;
2504 }
2505 }
2506 continue;
2507 }
2508 else if (c == '.')
2509 {
2510 if (delim == '\'' || ! have_ellipsis_continuation (false))
2511 buf << static_cast<char> (c);
2512 }
2513 else if (c == '\n')
2514 {
2515 error ("unterminated string constant");
2516 break;
2517 }
2518 else if (c == delim)
2519 {
2520 if (escape_pending)
2521 buf << static_cast<char> (c);
2522 else
2523 {
2524 c = text_yyinput ();
2525 if (c == delim)
2526 {
2527 buf << static_cast<char> (c);
2528 }
2529 else
2530 {
2531 std::string s;
2532 xunput (c, yytext);
2533
2534 if (delim == '\'')
2535 s = buf.str ();
2536 else
2537 s = do_string_escapes (buf.str ());
2538
2539 quote_is_transpose = true;
2540 convert_spaces_to_comma = true;
2541
2542 yylval.tok_val = new token (s, bos_line, bos_col);
2543 token_stack.push (yylval.tok_val);
2544
2545 if (delim == '"')
2546 gripe_matlab_incompatible ("\" used as string delimiter");
2547 else if (delim == '\'')
2548 gripe_single_quote_string ();
2549
2550 looking_for_object_index = true;
2551 at_beginning_of_statement = false;
2552
2553 return delim == '"' ? DQ_STRING : SQ_STRING;
2554 }
2555 }
2556 }
2557 else
2558 {
2559 buf << static_cast<char> (c);
2560 }
2561
2562 escape_pending = 0;
2563 }
2564
2565 return LEXICAL_ERROR;
2566 }
2567
2568 bool
2569 lexical_feedback::next_token_is_assign_op (void)
2570 {
2571 bool retval = false;
2572
2573 int c0 = text_yyinput ();
2574
2575 switch (c0)
2576 {
2577 case '=':
2578 {
2579 int c1 = text_yyinput ();
2580 xunput (c1, yytext);
2581 if (c1 != '=')
2582 retval = true;
2583 }
2584 break;
2585
2586 case '+':
2587 case '-':
2588 case '*':
2589 case '/':
2590 case '\\':
2591 case '&':
2592 case '|':
2593 {
2594 int c1 = text_yyinput ();
2595 xunput (c1, yytext);
2596 if (c1 == '=')
2597 retval = true;
2598 }
2599 break;
2600
2601 case '.':
2602 {
2603 int c1 = text_yyinput ();
2604 if (match_any (c1, "+-*/\\"))
2605 {
2606 int c2 = text_yyinput ();
2607 xunput (c2, yytext);
2608 if (c2 == '=')
2609 retval = true;
2610 }
2611 xunput (c1, yytext);
2612 }
2613 break;
2614
2615 case '>':
2616 {
2617 int c1 = text_yyinput ();
2618 if (c1 == '>')
2619 {
2620 int c2 = text_yyinput ();
2621 xunput (c2, yytext);
2622 if (c2 == '=')
2623 retval = true;
2624 }
2625 xunput (c1, yytext);
2626 }
2627 break;
2628
2629 case '<':
2630 {
2631 int c1 = text_yyinput ();
2632 if (c1 == '<')
2633 {
2634 int c2 = text_yyinput ();
2635 xunput (c2, yytext);
2636 if (c2 == '=')
2637 retval = true;
2638 }
2639 xunput (c1, yytext);
2640 }
2641 break;
2642
2643 default:
2644 break;
2645 }
2646
2647 xunput (c0, yytext);
2648
2649 return retval;
2650 }
2651
2652 bool
2653 lexical_feedback::next_token_is_index_op (void)
2654 {
2655 int c = text_yyinput ();
2656 xunput (c, yytext);
2657 return c == '(' || c == '{';
2658 }
2659
2660 int
2661 lexical_feedback::handle_close_bracket (bool spc_gobbled, int bracket_type)
2662 {
2663 int retval = bracket_type;
2664
2665 if (! nesting_level.none ())
2666 {
2667 nesting_level.remove ();
2668
2669 if (bracket_type == ']')
2670 bracketflag--;
2671 else if (bracket_type == '}')
2672 braceflag--;
2673 else
2674 panic_impossible ();
2675 }
2676
2677 if (bracketflag == 0 && braceflag == 0)
2678 BEGIN (INITIAL);
2679
2680 if (bracket_type == ']'
2681 && next_token_is_assign_op ()
2682 && ! looking_at_return_list)
2683 {
2684 retval = CLOSE_BRACE;
2685 }
2686 else if ((bracketflag || braceflag)
2687 && convert_spaces_to_comma
2688 && (nesting_level.is_bracket ()
2689 || (nesting_level.is_brace ()
2690 && ! looking_at_object_index.front ())))
2691 {
2692 bool index_op = next_token_is_index_op ();
2693
2694 // Don't insert comma if we are looking at something like
2695 //
2696 // [x{i}{j}] or [x{i}(j)]
2697 //
2698 // but do if we are looking at
2699 //
2700 // [x{i} {j}] or [x{i} (j)]
2701
2702 if (spc_gobbled || ! (bracket_type == '}' && index_op))
2703 {
2704 bool bin_op = next_token_is_bin_op (spc_gobbled);
2705
2706 bool postfix_un_op = next_token_is_postfix_unary_op (spc_gobbled);
2707
2708 bool sep_op = next_token_is_sep_op ();
2709
2710 if (! (postfix_un_op || bin_op || sep_op))
2711 {
2712 maybe_warn_separator_insert (',');
2713
2714 xunput (',', yytext);
2715 return retval;
2716 }
2717 }
2718 }
2719
2720 quote_is_transpose = true;
2721 convert_spaces_to_comma = true;
2722
2723 return retval;
2724 }
2725
2726 void
2727 lexical_feedback::maybe_unput_comma (int spc_gobbled)
2728 {
2729 if (nesting_level.is_bracket ()
2730 || (nesting_level.is_brace ()
2731 && ! looking_at_object_index.front ()))
2732 {
2733 int bin_op = next_token_is_bin_op (spc_gobbled);
2734
2735 int postfix_un_op = next_token_is_postfix_unary_op (spc_gobbled);
2736
2737 int c1 = text_yyinput ();
2738 int c2 = text_yyinput ();
2739
2740 xunput (c2, yytext);
2741 xunput (c1, yytext);
2742
2743 int sep_op = next_token_is_sep_op ();
2744
2745 int dot_op = (c1 == '.'
2746 && (isalpha (c2) || isspace (c2) || c2 == '_'));
2747
2748 if (postfix_un_op || bin_op || sep_op || dot_op)
2749 return;
2750
2751 int index_op = (c1 == '(' || c1 == '{');
2752
2753 // If there is no space before the indexing op, we don't insert
2754 // a comma.
2755
2756 if (index_op && ! spc_gobbled)
2757 return;
2758
2759 maybe_warn_separator_insert (',');
2760
2761 xunput (',', yytext);
2762 }
2763 }
2764
2765 bool
2766 lexical_feedback::next_token_can_follow_bin_op (void)
2767 {
2768 std::stack<char> buf;
2769
2770 int c = EOF;
2771
2772 // Skip whitespace in current statement on current line
2773 while (true)
2774 {
2775 c = text_yyinput ();
2776
2777 buf.push (c);
2778
2779 if (match_any (c, ",;\n") || (c != ' ' && c != '\t'))
2780 break;
2781 }
2782
2783 // Restore input.
2784 while (! buf.empty ())
2785 {
2786 xunput (buf.top (), yytext);
2787
2788 buf.pop ();
2789 }
2790
2791 return (isalnum (c) || match_any (c, "!\"'(-[_{~"));
2792 }
2793
2794 static bool
2795 can_be_command (const std::string& tok)
2796 {
2797 // Don't allow these names to be treated as commands to avoid
2798 // surprises when parsing things like "NaN ^2".
2799
2800 return ! (tok == "e"
2801 || tok == "I" || tok == "i"
2802 || tok == "J" || tok == "j"
2803 || tok == "Inf" || tok == "inf"
2804 || tok == "NaN" || tok == "nan");
2805 }
2806
2807 bool
2808 lexical_feedback::looks_like_command_arg (void)
2809 {
2810 bool retval = true;
2811
2812 int c0 = text_yyinput ();
2813
2814 switch (c0)
2815 {
2816 // = ==
2817 case '=':
2818 {
2819 int c1 = text_yyinput ();
2820
2821 if (c1 == '=')
2822 {
2823 int c2 = text_yyinput ();
2824
2825 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
2826 && next_token_can_follow_bin_op ())
2827 retval = false;
2828
2829 xunput (c2, yytext);
2830 }
2831 else
2832 retval = false;
2833
2834 xunput (c1, yytext);
2835 }
2836 break;
2837
2838 case '(':
2839 case '{':
2840 // Indexing.
2841 retval = false;
2842 break;
2843
2844 case '\n':
2845 // EOL.
2846 break;
2847
2848 case '\'':
2849 case '"':
2850 // Beginning of a character string.
2851 break;
2852
2853 // + - ++ -- += -=
2854 case '+':
2855 case '-':
2856 {
2857 int c1 = text_yyinput ();
2858
2859 switch (c1)
2860 {
2861 case '\n':
2862 // EOL.
2863 case '+':
2864 case '-':
2865 // Unary ops, spacing doesn't matter.
2866 break;
2867
2868 case '\t':
2869 case ' ':
2870 {
2871 if (next_token_can_follow_bin_op ())
2872 retval = false;
2873 }
2874 break;
2875
2876 case '=':
2877 {
2878 int c2 = text_yyinput ();
2879
2880 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
2881 && next_token_can_follow_bin_op ())
2882 retval = false;
2883
2884 xunput (c2, yytext);
2885 }
2886 break;
2887 }
2888
2889 xunput (c1, yytext);
2890 }
2891 break;
2892
2893 case ':':
2894 case '/':
2895 case '\\':
2896 case '^':
2897 {
2898 int c1 = text_yyinput ();
2899
2900 if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t')
2901 && next_token_can_follow_bin_op ())
2902 retval = false;
2903
2904 xunput (c1, yytext);
2905 }
2906 break;
2907
2908 // .+ .- ./ .\ .^ .* .**
2909 case '.':
2910 {
2911 int c1 = text_yyinput ();
2912
2913 if (match_any (c1, "+-/\\^*"))
2914 {
2915 int c2 = text_yyinput ();
2916
2917 if (c2 == '=')
2918 {
2919 int c3 = text_yyinput ();
2920
2921 if (! match_any (c3, ",;\n") && (c3 == ' ' || c3 == '\t')
2922 && next_token_can_follow_bin_op ())
2923 retval = false;
2924
2925 xunput (c3, yytext);
2926 }
2927 else if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
2928 && next_token_can_follow_bin_op ())
2929 retval = false;
2930
2931 xunput (c2, yytext);
2932 }
2933 else if (! match_any (c1, ",;\n")
2934 && (! isdigit (c1) && c1 != ' ' && c1 != '\t'
2935 && c1 != '.'))
2936 {
2937 // Structure reference. FIXME -- is this a complete check?
2938
2939 retval = false;
2940 }
2941
2942 xunput (c1, yytext);
2943 }
2944 break;
2945
2946 // & && | || * **
2947 case '&':
2948 case '|':
2949 case '*':
2950 {
2951 int c1 = text_yyinput ();
2952
2953 if (c1 == c0)
2954 {
2955 int c2 = text_yyinput ();
2956
2957 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
2958 && next_token_can_follow_bin_op ())
2959 retval = false;
2960
2961 xunput (c2, yytext);
2962 }
2963 else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t')
2964 && next_token_can_follow_bin_op ())
2965 retval = false;
2966
2967 xunput (c1, yytext);
2968 }
2969 break;
2970
2971 // < <= > >=
2972 case '<':
2973 case '>':
2974 {
2975 int c1 = text_yyinput ();
2976
2977 if (c1 == '=')
2978 {
2979 int c2 = text_yyinput ();
2980
2981 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
2982 && next_token_can_follow_bin_op ())
2983 retval = false;
2984
2985 xunput (c2, yytext);
2986 }
2987 else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t')
2988 && next_token_can_follow_bin_op ())
2989 retval = false;
2990
2991 xunput (c1, yytext);
2992 }
2993 break;
2994
2995 // ~= !=
2996 case '~':
2997 case '!':
2998 {
2999 int c1 = text_yyinput ();
3000
3001 // ~ and ! can be unary ops, so require following =.
3002 if (c1 == '=')
3003 {
3004 int c2 = text_yyinput ();
3005
3006 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
3007 && next_token_can_follow_bin_op ())
3008 retval = false;
3009
3010 xunput (c2, yytext);
3011 }
3012 else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t')
3013 && next_token_can_follow_bin_op ())
3014 retval = false;
3015
3016 xunput (c1, yytext);
3017 }
3018 break;
3019
3020 default:
3021 break;
3022 }
3023
3024 xunput (c0, yytext);
3025
3026 return retval;
3027 }
3028
3029 int
3030 lexical_feedback::handle_superclass_identifier (void)
3031 {
3032 eat_continuation ();
3033
3034 std::string pkg;
3035 std::string meth = strip_trailing_whitespace (yytext);
3036 size_t pos = meth.find ("@");
3037 std::string cls = meth.substr (pos).substr (1);
3038 meth = meth.substr (0, pos - 1);
3039
3040 pos = cls.find (".");
3041 if (pos != std::string::npos)
3042 {
3043 pkg = cls.substr (pos).substr (1);
3044 cls = cls.substr (0, pos - 1);
3045 }
3046
3047 int kw_token = (is_keyword_token (meth) || is_keyword_token (cls)
3048 || is_keyword_token (pkg));
3049 if (kw_token)
3050 {
3051 error ("method, class and package names may not be keywords");
3052 return LEXICAL_ERROR;
3053 }
3054
3055 yylval.tok_val
3056 = new token (meth.empty () ? 0 : &(symbol_table::insert (meth)),
3057 cls.empty () ? 0 : &(symbol_table::insert (cls)),
3058 pkg.empty () ? 0 : &(symbol_table::insert (pkg)),
3059 input_line_number,
3060 current_input_column);
3061 token_stack.push (yylval.tok_val);
3062
3063 convert_spaces_to_comma = true;
3064 current_input_column += yyleng;
3065
3066 return SUPERCLASSREF;
3067 }
3068
3069 int
3070 lexical_feedback::handle_meta_identifier (void)
3071 {
3072 eat_continuation ();
3073
3074 std::string pkg;
3075 std::string cls = strip_trailing_whitespace (yytext).substr (1);
3076 size_t pos = cls.find (".");
3077
3078 if (pos != std::string::npos)
3079 {
3080 pkg = cls.substr (pos).substr (1);
3081 cls = cls.substr (0, pos - 1);
3082 }
3083
3084 int kw_token = is_keyword_token (cls) || is_keyword_token (pkg);
3085 if (kw_token)
3086 {
3087 error ("class and package names may not be keywords");
3088 return LEXICAL_ERROR;
3089 }
3090
3091 yylval.tok_val
3092 = new token (cls.empty () ? 0 : &(symbol_table::insert (cls)),
3093 pkg.empty () ? 0 : &(symbol_table::insert (pkg)),
3094 input_line_number,
3095 current_input_column);
3096
3097 token_stack.push (yylval.tok_val);
3098
3099 convert_spaces_to_comma = true;
3100 current_input_column += yyleng;
3101
3102 return METAQUERY;
3103 }
3104
3105 // Figure out exactly what kind of token to return when we have seen
3106 // an identifier. Handles keywords. Return -1 if the identifier
3107 // should be ignored.
3108
3109 int
3110 lexical_feedback::handle_identifier (void)
3111 {
3112 bool at_bos = at_beginning_of_statement;
3113
3114 std::string tok = strip_trailing_whitespace (yytext);
3115
3116 int c = yytext[yyleng-1];
3117
3118 bool cont_is_spc = (eat_continuation () != lexical_feedback::NO_WHITESPACE);
3119
3120 int spc_gobbled = (cont_is_spc || c == ' ' || c == '\t');
3121
3122 // If we are expecting a structure element, avoid recognizing
3123 // keywords and other special names and return STRUCT_ELT, which is
3124 // a string that is also a valid identifier. But first, we have to
3125 // decide whether to insert a comma.
3126
3127 if (looking_at_indirect_ref)
3128 {
3129 do_comma_insert_check ();
3130
3131 maybe_unput_comma (spc_gobbled);
3132
3133 yylval.tok_val = new token (tok, input_line_number,
3134 current_input_column);
3135
3136 token_stack.push (yylval.tok_val);
3137
3138 quote_is_transpose = true;
3139 convert_spaces_to_comma = true;
3140 looking_for_object_index = true;
3141
3142 current_input_column += yyleng;
3143
3144 return STRUCT_ELT;
3145 }
3146
3147 at_beginning_of_statement = false;
3148
3149 // The is_keyword_token may reset
3150 // at_beginning_of_statement. For example, if it sees
3151 // an else token, then the next token is at the beginning of a
3152 // statement.
3153
3154 int kw_token = is_keyword_token (tok);
3155
3156 // If we found a keyword token, then the beginning_of_statement flag
3157 // is already set. Otherwise, we won't be at the beginning of a
3158 // statement.
3159
3160 if (looking_at_function_handle)
3161 {
3162 if (kw_token)
3163 {
3164 error ("function handles may not refer to keywords");
3165
3166 return LEXICAL_ERROR;
3167 }
3168 else
3169 {
3170 yylval.tok_val = new token (tok, input_line_number,
3171 current_input_column);
3172
3173 token_stack.push (yylval.tok_val);
3174
3175 current_input_column += yyleng;
3176 quote_is_transpose = false;
3177 convert_spaces_to_comma = true;
3178 looking_for_object_index = true;
3179
3180 return FCN_HANDLE;
3181 }
3182 }
3183
3184 // If we have a regular keyword, return it.
3185 // Keywords can be followed by identifiers.
3186
3187 if (kw_token)
3188 {
3189 if (kw_token >= 0)
3190 {
3191 current_input_column += yyleng;
3192 quote_is_transpose = false;
3193 convert_spaces_to_comma = true;
3194 looking_for_object_index = false;
3195 }
3196
3197 return kw_token;
3198 }
3199
3200 // See if we have a plot keyword (title, using, with, or clear).
3201
3202 int c1 = text_yyinput ();
3203
3204 bool next_tok_is_eq = false;
3205 if (c1 == '=')
3206 {
3207 int c2 = text_yyinput ();
3208 xunput (c2, yytext);
3209
3210 if (c2 != '=')
3211 next_tok_is_eq = true;
3212 }
3213
3214 xunput (c1, yytext);
3215
3216 // Kluge alert.
3217 //
3218 // If we are looking at a text style function, set up to gobble its
3219 // arguments.
3220 //
3221 // If the following token is '=', or if we are parsing a function
3222 // return list or function parameter list, or if we are looking at
3223 // something like [ab,cd] = foo (), force the symbol to be inserted
3224 // as a variable in the current symbol table.
3225
3226 if (! is_variable (tok))
3227 {
3228 if (at_bos && spc_gobbled && can_be_command (tok)
3229 && looks_like_command_arg ())
3230 {
3231 BEGIN (COMMAND_START);
3232 }
3233 else if (next_tok_is_eq
3234 || looking_at_decl_list
3235 || looking_at_return_list
3236 || (looking_at_parameter_list
3237 && ! looking_at_initializer_expression))
3238 {
3239 symbol_table::force_variable (tok);
3240 }
3241 else if (looking_at_matrix_or_assign_lhs)
3242 {
3243 pending_local_variables.insert (tok);
3244 }
3245 }
3246
3247 // Find the token in the symbol table. Beware the magic
3248 // transformation of the end keyword...
3249
3250 if (tok == "end")
3251 tok = "__end__";
3252
3253 yylval.tok_val = new token (&(symbol_table::insert (tok)),
3254 input_line_number,
3255 current_input_column);
3256
3257 token_stack.push (yylval.tok_val);
3258
3259 // After seeing an identifer, it is ok to convert spaces to a comma
3260 // (if needed).
3261
3262 convert_spaces_to_comma = true;
3263
3264 if (! (next_tok_is_eq || YY_START == COMMAND_START))
3265 {
3266 quote_is_transpose = true;
3267
3268 do_comma_insert_check ();
3269
3270 maybe_unput_comma (spc_gobbled);
3271 }
3272
3273 current_input_column += yyleng;
3274
3275 if (tok != "__end__")
3276 looking_for_object_index = true;
3277
3278 return NAME;
3279 } 1285 }
3280 1286
3281 bool 1287 bool
3282 is_keyword (const std::string& s) 1288 is_keyword (const std::string& s)
3283 { 1289 {
3356 1362
3357 void 1363 void
3358 prep_lexer_for_function_file (void) 1364 prep_lexer_for_function_file (void)
3359 { 1365 {
3360 BEGIN (FUNCTION_FILE_BEGIN); 1366 BEGIN (FUNCTION_FILE_BEGIN);
1367 }
1368
1369 // Used to delete trailing white space from tokens.
1370
1371 static std::string
1372 strip_trailing_whitespace (char *s)
1373 {
1374 std::string retval = s;
1375
1376 size_t pos = retval.find_first_of (" \t");
1377
1378 if (pos != std::string::npos)
1379 retval.resize (pos);
1380
1381 return retval;
3361 } 1382 }
3362 1383
3363 static int 1384 static int
3364 octave_read (char *buf, unsigned max_size) 1385 octave_read (char *buf, unsigned max_size)
3365 { 1386 {
3420 if (! eof) 1441 if (! eof)
3421 YY_FATAL_ERROR ("octave_read () in flex scanner failed"); 1442 YY_FATAL_ERROR ("octave_read () in flex scanner failed");
3422 } 1443 }
3423 1444
3424 return status; 1445 return status;
3425 }
3426
3427 void
3428 lexical_feedback::maybe_warn_separator_insert (char sep)
3429 {
3430 std::string nm = curr_fcn_file_full_name;
3431
3432 if (nm.empty ())
3433 warning_with_id ("Octave:separator-insert",
3434 "potential auto-insertion of '%c' near line %d",
3435 sep, input_line_number);
3436 else
3437 warning_with_id ("Octave:separator-insert",
3438 "potential auto-insertion of '%c' near line %d of file %s",
3439 sep, input_line_number, nm.c_str ());
3440 }
3441
3442 void
3443 lexical_feedback::gripe_single_quote_string (void)
3444 {
3445 std::string nm = curr_fcn_file_full_name;
3446
3447 if (nm.empty ())
3448 warning_with_id ("Octave:single-quote-string",
3449 "single quote delimited string near line %d",
3450 input_line_number);
3451 else
3452 warning_with_id ("Octave:single-quote-string",
3453 "single quote delimited string near line %d of file %s",
3454 input_line_number, nm.c_str ());
3455 }
3456
3457 void
3458 lexical_feedback::gripe_matlab_incompatible (const std::string& msg)
3459 {
3460 std::string nm = curr_fcn_file_full_name;
3461
3462 if (nm.empty ())
3463 warning_with_id ("Octave:matlab-incompatible",
3464 "potential Matlab compatibility problem: %s",
3465 msg.c_str ());
3466 else
3467 warning_with_id ("Octave:matlab-incompatible",
3468 "potential Matlab compatibility problem: %s near line %d offile %s",
3469 msg.c_str (), input_line_number, nm.c_str ());
3470 }
3471
3472 void
3473 lexical_feedback::maybe_gripe_matlab_incompatible_comment (char c)
3474 {
3475 if (c == '#')
3476 gripe_matlab_incompatible ("# used as comment character");
3477 }
3478
3479 void
3480 lexical_feedback::gripe_matlab_incompatible_continuation (void)
3481 {
3482 gripe_matlab_incompatible ("\\ used as line continuation marker");
3483 }
3484
3485 void
3486 lexical_feedback::gripe_matlab_incompatible_operator (const std::string& op)
3487 {
3488 std::string t = op;
3489 int n = t.length ();
3490 if (t[n-1] == '\n')
3491 t.resize (n-1);
3492 gripe_matlab_incompatible (t + " used as operator");
3493 } 1446 }
3494 1447
3495 static void 1448 static void
3496 display_token (int tok) 1449 display_token (int tok)
3497 { 1450 {
3692 retval = set_internal_variable (lexer_debug_flag, args, nargout, 1645 retval = set_internal_variable (lexer_debug_flag, args, nargout,
3693 "__lexer_debug_flag__"); 1646 "__lexer_debug_flag__");
3694 1647
3695 return retval; 1648 return retval;
3696 } 1649 }
1650
1651 lexical_feedback::~lexical_feedback (void)
1652 {
1653 // Clear out the stack of token info used to track line and
1654 // column numbers.
1655
1656 while (! token_stack.empty ())
1657 {
1658 delete token_stack.top ();
1659 token_stack.pop ();
1660 }
1661 }
1662
1663 // GAG.
1664 //
1665 // If we're reading a matrix and the next character is '[', make sure
1666 // that we insert a comma ahead of it.
1667
1668 void
1669 lexical_feedback::do_comma_insert_check (void)
1670 {
1671 bool spc_gobbled = (eat_continuation () != lexical_feedback::NO_WHITESPACE);
1672
1673 int c = text_yyinput ();
1674
1675 xunput (c, yytext);
1676
1677 if (spc_gobbled)
1678 xunput (' ', yytext);
1679
1680 do_comma_insert = (! looking_at_object_index.front ()
1681 && bracketflag && c == '[');
1682 }
1683
1684 int
1685 lexical_feedback::text_yyinput (void)
1686 {
1687 int c = yyinput ();
1688
1689 if (lexer_debug_flag)
1690 {
1691 std::cerr << "I: ";
1692 display_character (c);
1693 std::cerr << std::endl;
1694 }
1695
1696 // Convert CRLF into just LF and single CR into LF.
1697
1698 if (c == '\r')
1699 {
1700 c = yyinput ();
1701
1702 if (lexer_debug_flag)
1703 {
1704 std::cerr << "I: ";
1705 display_character (c);
1706 std::cerr << std::endl;
1707 }
1708
1709 if (c != '\n')
1710 {
1711 xunput (c, yytext);
1712 c = '\n';
1713 }
1714 }
1715
1716 if (c == '\n')
1717 input_line_number++;
1718
1719 return c;
1720 }
1721
1722 void
1723 lexical_feedback::xunput (char c, char *buf)
1724 {
1725 if (lexer_debug_flag)
1726 {
1727 std::cerr << "U: ";
1728 display_character (c);
1729 std::cerr << std::endl;
1730 }
1731
1732 if (c == '\n')
1733 input_line_number--;
1734
1735 yyunput (c, buf);
1736 }
1737
1738 // If we read some newlines, we need figure out what column we're
1739 // really looking at.
1740
1741 void
1742 lexical_feedback::fixup_column_count (char *s)
1743 {
1744 char c;
1745 while ((c = *s++) != '\0')
1746 {
1747 if (c == '\n')
1748 {
1749 input_line_number++;
1750 current_input_column = 1;
1751 }
1752 else
1753 current_input_column++;
1754 }
1755 }
1756
1757 bool
1758 lexical_feedback::inside_any_object_index (void)
1759 {
1760 bool retval = false;
1761
1762 for (std::list<bool>::const_iterator i = looking_at_object_index.begin ();
1763 i != looking_at_object_index.end (); i++)
1764 {
1765 if (*i)
1766 {
1767 retval = true;
1768 break;
1769 }
1770 }
1771
1772 return retval;
1773 }
1774
1775 // Handle keywords. Return -1 if the keyword should be ignored.
1776
1777 int
1778 lexical_feedback::is_keyword_token (const std::string& s)
1779 {
1780 int l = input_line_number;
1781 int c = current_input_column;
1782
1783 int len = s.length ();
1784
1785 const octave_kw *kw = octave_kw_hash::in_word_set (s.c_str (), len);
1786
1787 if (kw)
1788 {
1789 yylval.tok_val = 0;
1790
1791 switch (kw->kw_id)
1792 {
1793 case break_kw:
1794 case catch_kw:
1795 case continue_kw:
1796 case else_kw:
1797 case otherwise_kw:
1798 case return_kw:
1799 case unwind_protect_cleanup_kw:
1800 at_beginning_of_statement = true;
1801 break;
1802
1803 case static_kw:
1804 if ((reading_fcn_file || reading_script_file
1805 || reading_classdef_file)
1806 && ! curr_fcn_file_full_name.empty ())
1807 warning_with_id ("Octave:deprecated-keyword",
1808 "the 'static' keyword is obsolete and will be removed from a future version of Octave; please use 'persistent' instead; near line %d of file '%s'",
1809 input_line_number,
1810 curr_fcn_file_full_name.c_str ());
1811 else
1812 warning_with_id ("Octave:deprecated-keyword",
1813 "the 'static' keyword is obsolete and will be removed from a future version of Octave; please use 'persistent' instead; near line %d",
1814 input_line_number);
1815 // fall through ...
1816
1817 case persistent_kw:
1818 break;
1819
1820 case case_kw:
1821 case elseif_kw:
1822 case global_kw:
1823 case until_kw:
1824 break;
1825
1826 case end_kw:
1827 if (inside_any_object_index ()
1828 || (! reading_classdef_file
1829 && (defining_func
1830 && ! (looking_at_return_list
1831 || parsed_function_name.top ()))))
1832 return 0;
1833
1834 yylval.tok_val = new token (token::simple_end, l, c);
1835 at_beginning_of_statement = true;
1836 break;
1837
1838 case end_try_catch_kw:
1839 yylval.tok_val = new token (token::try_catch_end, l, c);
1840 at_beginning_of_statement = true;
1841 break;
1842
1843 case end_unwind_protect_kw:
1844 yylval.tok_val = new token (token::unwind_protect_end, l, c);
1845 at_beginning_of_statement = true;
1846 break;
1847
1848 case endfor_kw:
1849 yylval.tok_val = new token (token::for_end, l, c);
1850 at_beginning_of_statement = true;
1851 break;
1852
1853 case endfunction_kw:
1854 yylval.tok_val = new token (token::function_end, l, c);
1855 at_beginning_of_statement = true;
1856 break;
1857
1858 case endif_kw:
1859 yylval.tok_val = new token (token::if_end, l, c);
1860 at_beginning_of_statement = true;
1861 break;
1862
1863 case endparfor_kw:
1864 yylval.tok_val = new token (token::parfor_end, l, c);
1865 at_beginning_of_statement = true;
1866 break;
1867
1868 case endswitch_kw:
1869 yylval.tok_val = new token (token::switch_end, l, c);
1870 at_beginning_of_statement = true;
1871 break;
1872
1873 case endwhile_kw:
1874 yylval.tok_val = new token (token::while_end, l, c);
1875 at_beginning_of_statement = true;
1876 break;
1877
1878 case endclassdef_kw:
1879 yylval.tok_val = new token (token::classdef_end, l, c);
1880 at_beginning_of_statement = true;
1881 break;
1882
1883 case endenumeration_kw:
1884 yylval.tok_val = new token (token::enumeration_end, l, c);
1885 at_beginning_of_statement = true;
1886 break;
1887
1888 case endevents_kw:
1889 yylval.tok_val = new token (token::events_end, l, c);
1890 at_beginning_of_statement = true;
1891 break;
1892
1893 case endmethods_kw:
1894 yylval.tok_val = new token (token::methods_end, l, c);
1895 at_beginning_of_statement = true;
1896 break;
1897
1898 case endproperties_kw:
1899 yylval.tok_val = new token (token::properties_end, l, c);
1900 at_beginning_of_statement = true;
1901 break;
1902
1903
1904 case for_kw:
1905 case parfor_kw:
1906 case while_kw:
1907 promptflag--;
1908 looping++;
1909 break;
1910
1911 case do_kw:
1912 at_beginning_of_statement = true;
1913 promptflag--;
1914 looping++;
1915 break;
1916
1917 case try_kw:
1918 case unwind_protect_kw:
1919 at_beginning_of_statement = true;
1920 promptflag--;
1921 break;
1922
1923 case if_kw:
1924 case switch_kw:
1925 promptflag--;
1926 break;
1927
1928 case get_kw:
1929 case set_kw:
1930 // 'get' and 'set' are keywords in classdef method
1931 // declarations.
1932 if (! maybe_classdef_get_set_method)
1933 return 0;
1934 break;
1935
1936 case enumeration_kw:
1937 case events_kw:
1938 case methods_kw:
1939 case properties_kw:
1940 // 'properties', 'methods' and 'events' are keywords for
1941 // classdef blocks.
1942 if (! parsing_classdef)
1943 return 0;
1944 // fall through ...
1945
1946 case classdef_kw:
1947 // 'classdef' is always a keyword.
1948 promptflag--;
1949 break;
1950
1951 case function_kw:
1952 promptflag--;
1953
1954 defining_func++;
1955 parsed_function_name.push (false);
1956
1957 if (! (reading_fcn_file || reading_script_file
1958 || reading_classdef_file))
1959 input_line_number = 1;
1960 break;
1961
1962 case magic_file_kw:
1963 {
1964 if ((reading_fcn_file || reading_script_file
1965 || reading_classdef_file)
1966 && ! curr_fcn_file_full_name.empty ())
1967 yylval.tok_val = new token (curr_fcn_file_full_name, l, c);
1968 else
1969 yylval.tok_val = new token ("stdin", l, c);
1970 }
1971 break;
1972
1973 case magic_line_kw:
1974 yylval.tok_val = new token (static_cast<double> (l), "", l, c);
1975 break;
1976
1977 default:
1978 panic_impossible ();
1979 }
1980
1981 if (! yylval.tok_val)
1982 yylval.tok_val = new token (l, c);
1983
1984 token_stack.push (yylval.tok_val);
1985
1986 return kw->tok;
1987 }
1988
1989 return 0;
1990 }
1991
1992 bool
1993 lexical_feedback::is_variable (const std::string& name)
1994 {
1995 return (symbol_table::is_variable (name)
1996 || (pending_local_variables.find (name)
1997 != pending_local_variables.end ()));
1998 }
1999
2000 std::string
2001 lexical_feedback::grab_block_comment (stream_reader& reader, bool& eof)
2002 {
2003 std::string buf;
2004
2005 bool at_bol = true;
2006 bool look_for_marker = false;
2007
2008 bool warned_incompatible = false;
2009
2010 int c = 0;
2011
2012 while ((c = reader.getc ()) != EOF)
2013 {
2014 current_input_column++;
2015
2016 if (look_for_marker)
2017 {
2018 at_bol = false;
2019 look_for_marker = false;
2020
2021 if (c == '{' || c == '}')
2022 {
2023 std::string tmp_buf (1, static_cast<char> (c));
2024
2025 int type = c;
2026
2027 bool done = false;
2028
2029 while ((c = reader.getc ()) != EOF && ! done)
2030 {
2031 current_input_column++;
2032
2033 switch (c)
2034 {
2035 case ' ':
2036 case '\t':
2037 tmp_buf += static_cast<char> (c);
2038 break;
2039
2040 case '\n':
2041 {
2042 current_input_column = 0;
2043 at_bol = true;
2044 done = true;
2045
2046 if (type == '{')
2047 {
2048 block_comment_nesting_level++;
2049 promptflag--;
2050 }
2051 else
2052 {
2053 block_comment_nesting_level--;
2054 promptflag++;
2055
2056 if (block_comment_nesting_level == 0)
2057 {
2058 buf += grab_comment_block (reader, true, eof);
2059
2060 return buf;
2061 }
2062 }
2063 }
2064 break;
2065
2066 default:
2067 at_bol = false;
2068 tmp_buf += static_cast<char> (c);
2069 buf += tmp_buf;
2070 done = true;
2071 break;
2072 }
2073 }
2074 }
2075 }
2076
2077 if (at_bol && (c == '%' || c == '#'))
2078 {
2079 if (c == '#' && ! warned_incompatible)
2080 {
2081 warned_incompatible = true;
2082 maybe_gripe_matlab_incompatible_comment (c);
2083 }
2084
2085 at_bol = false;
2086 look_for_marker = true;
2087 }
2088 else
2089 {
2090 buf += static_cast<char> (c);
2091
2092 if (c == '\n')
2093 {
2094 current_input_column = 0;
2095 at_bol = true;
2096 }
2097 }
2098 }
2099
2100 if (c == EOF)
2101 eof = true;
2102
2103 return buf;
2104 }
2105
2106 std::string
2107 lexical_feedback::grab_comment_block (stream_reader& reader, bool at_bol,
2108 bool& eof)
2109 {
2110 std::string buf;
2111
2112 // TRUE means we are at the beginning of a comment block.
2113 bool begin_comment = false;
2114
2115 // TRUE means we are currently reading a comment block.
2116 bool in_comment = false;
2117
2118 bool warned_incompatible = false;
2119
2120 int c = 0;
2121
2122 while ((c = reader.getc ()) != EOF)
2123 {
2124 current_input_column++;
2125
2126 if (begin_comment)
2127 {
2128 if (c == '%' || c == '#')
2129 {
2130 at_bol = false;
2131 continue;
2132 }
2133 else if (at_bol && c == '{')
2134 {
2135 std::string tmp_buf (1, static_cast<char> (c));
2136
2137 bool done = false;
2138
2139 while ((c = reader.getc ()) != EOF && ! done)
2140 {
2141 current_input_column++;
2142
2143 switch (c)
2144 {
2145 case ' ':
2146 case '\t':
2147 tmp_buf += static_cast<char> (c);
2148 break;
2149
2150 case '\n':
2151 {
2152 current_input_column = 0;
2153 at_bol = true;
2154 done = true;
2155
2156 block_comment_nesting_level++;
2157 promptflag--;
2158
2159 buf += grab_block_comment (reader, eof);
2160
2161 in_comment = false;
2162
2163 if (eof)
2164 goto done;
2165 }
2166 break;
2167
2168 default:
2169 at_bol = false;
2170 tmp_buf += static_cast<char> (c);
2171 buf += tmp_buf;
2172 done = true;
2173 break;
2174 }
2175 }
2176 }
2177 else
2178 {
2179 at_bol = false;
2180 begin_comment = false;
2181 }
2182 }
2183
2184 if (in_comment)
2185 {
2186 buf += static_cast<char> (c);
2187
2188 if (c == '\n')
2189 {
2190 at_bol = true;
2191 current_input_column = 0;
2192 in_comment = false;
2193
2194 // FIXME -- bailing out here prevents things like
2195 //
2196 // octave> # comment
2197 // octave> x = 1
2198 //
2199 // from failing at the command line, while still
2200 // allowing blocks of comments to be grabbed properly
2201 // for function doc strings. But only the first line of
2202 // a mult-line doc string will be picked up for
2203 // functions defined on the command line. We need a
2204 // better way of collecting these comments...
2205 if (! (reading_fcn_file || reading_script_file))
2206 goto done;
2207 }
2208 }
2209 else
2210 {
2211 switch (c)
2212 {
2213 case ' ':
2214 case '\t':
2215 break;
2216
2217 case '#':
2218 if (! warned_incompatible)
2219 {
2220 warned_incompatible = true;
2221 maybe_gripe_matlab_incompatible_comment (c);
2222 }
2223 // fall through...
2224
2225 case '%':
2226 in_comment = true;
2227 begin_comment = true;
2228 break;
2229
2230 default:
2231 current_input_column--;
2232 reader.ungetc (c);
2233 goto done;
2234 }
2235 }
2236 }
2237
2238 done:
2239
2240 if (c == EOF)
2241 eof = true;
2242
2243 return buf;
2244 }
2245
2246 int
2247 lexical_feedback::process_comment (bool start_in_block, bool& eof)
2248 {
2249 eof = false;
2250
2251 std::string help_txt;
2252
2253 if (! help_buf.empty ())
2254 help_txt = help_buf.top ();
2255
2256 flex_stream_reader flex_reader (yytext);
2257
2258 // process_comment is only supposed to be called when we are not
2259 // initially looking at a block comment.
2260
2261 std::string txt = start_in_block
2262 ? grab_block_comment (flex_reader, eof)
2263 : grab_comment_block (flex_reader, false, eof);
2264
2265 if (lexer_debug_flag)
2266 std::cerr << "C: " << txt << std::endl;
2267
2268 if (help_txt.empty () && nesting_level.none ())
2269 {
2270 if (! help_buf.empty ())
2271 help_buf.pop ();
2272
2273 help_buf.push (txt);
2274 }
2275
2276 octave_comment_buffer::append (txt);
2277
2278 current_input_column = 1;
2279 quote_is_transpose = false;
2280 convert_spaces_to_comma = true;
2281 at_beginning_of_statement = true;
2282
2283 if (YY_START == COMMAND_START)
2284 BEGIN (INITIAL);
2285
2286 if (nesting_level.none ())
2287 return '\n';
2288 else if (nesting_level.is_bracket_or_brace ())
2289 return ';';
2290 else
2291 return 0;
2292 }
2293
2294 // Recognize separators. If the separator is a CRLF pair, it is
2295 // replaced by a single LF.
2296
2297 bool
2298 lexical_feedback::next_token_is_sep_op (void)
2299 {
2300 bool retval = false;
2301
2302 int c = text_yyinput ();
2303
2304 retval = match_any (c, ",;\n]");
2305
2306 xunput (c, yytext);
2307
2308 return retval;
2309 }
2310
2311 // Try to determine if the next token should be treated as a postfix
2312 // unary operator. This is ugly, but it seems to do the right thing.
2313
2314 bool
2315 lexical_feedback::next_token_is_postfix_unary_op (bool spc_prev)
2316 {
2317 bool un_op = false;
2318
2319 int c0 = text_yyinput ();
2320
2321 if (c0 == '\'' && ! spc_prev)
2322 {
2323 un_op = true;
2324 }
2325 else if (c0 == '.')
2326 {
2327 int c1 = text_yyinput ();
2328 un_op = (c1 == '\'');
2329 xunput (c1, yytext);
2330 }
2331 else if (c0 == '+')
2332 {
2333 int c1 = text_yyinput ();
2334 un_op = (c1 == '+');
2335 xunput (c1, yytext);
2336 }
2337 else if (c0 == '-')
2338 {
2339 int c1 = text_yyinput ();
2340 un_op = (c1 == '-');
2341 xunput (c1, yytext);
2342 }
2343
2344 xunput (c0, yytext);
2345
2346 return un_op;
2347 }
2348
2349 // Try to determine if the next token should be treated as a binary
2350 // operator.
2351 //
2352 // This kluge exists because whitespace is not always ignored inside
2353 // the square brackets that are used to create matrix objects (though
2354 // spacing only really matters in the cases that can be interpreted
2355 // either as binary ops or prefix unary ops: currently just +, -).
2356 //
2357 // Note that a line continuation directly following a + or - operator
2358 // (e.g., the characters '[' 'a' ' ' '+' '\' LFD 'b' ']') will be
2359 // parsed as a binary operator.
2360
2361 bool
2362 lexical_feedback::next_token_is_bin_op (bool spc_prev)
2363 {
2364 bool bin_op = false;
2365
2366 int c0 = text_yyinput ();
2367
2368 switch (c0)
2369 {
2370 case '+':
2371 case '-':
2372 {
2373 int c1 = text_yyinput ();
2374
2375 switch (c1)
2376 {
2377 case '+':
2378 case '-':
2379 // Unary ops, spacing doesn't matter.
2380 break;
2381
2382 case '=':
2383 // Binary ops, spacing doesn't matter.
2384 bin_op = true;
2385 break;
2386
2387 default:
2388 // Could be either, spacing matters.
2389 bin_op = looks_like_bin_op (spc_prev, c1);
2390 break;
2391 }
2392
2393 xunput (c1, yytext);
2394 }
2395 break;
2396
2397 case ':':
2398 case '/':
2399 case '\\':
2400 case '^':
2401 // Always a binary op (may also include /=, \=, and ^=).
2402 bin_op = true;
2403 break;
2404
2405 // .+ .- ./ .\ .^ .* .**
2406 case '.':
2407 {
2408 int c1 = text_yyinput ();
2409
2410 if (match_any (c1, "+-/\\^*"))
2411 // Always a binary op (may also include .+=, .-=, ./=, ...).
2412 bin_op = true;
2413 else if (! isdigit (c1) && c1 != ' ' && c1 != '\t' && c1 != '.')
2414 // A structure element reference is a binary op.
2415 bin_op = true;
2416
2417 xunput (c1, yytext);
2418 }
2419 break;
2420
2421 // = == & && | || * **
2422 case '=':
2423 case '&':
2424 case '|':
2425 case '*':
2426 // Always a binary op (may also include ==, &&, ||, **).
2427 bin_op = true;
2428 break;
2429
2430 // < <= <> > >=
2431 case '<':
2432 case '>':
2433 // Always a binary op (may also include <=, <>, >=).
2434 bin_op = true;
2435 break;
2436
2437 // ~= !=
2438 case '~':
2439 case '!':
2440 {
2441 int c1 = text_yyinput ();
2442
2443 // ~ and ! can be unary ops, so require following =.
2444 if (c1 == '=')
2445 bin_op = true;
2446
2447 xunput (c1, yytext);
2448 }
2449 break;
2450
2451 default:
2452 break;
2453 }
2454
2455 xunput (c0, yytext);
2456
2457 return bin_op;
2458 }
2459
2460 // FIXME -- we need to handle block comments here.
2461
2462 void
2463 lexical_feedback::scan_for_comments (const char *text)
2464 {
2465 std::string comment_buf;
2466
2467 bool in_comment = false;
2468 bool beginning_of_comment = false;
2469
2470 int len = strlen (text);
2471 int i = 0;
2472
2473 while (i < len)
2474 {
2475 char c = text[i++];
2476
2477 switch (c)
2478 {
2479 case '%':
2480 case '#':
2481 if (in_comment)
2482 {
2483 if (! beginning_of_comment)
2484 comment_buf += static_cast<char> (c);
2485 }
2486 else
2487 {
2488 maybe_gripe_matlab_incompatible_comment (c);
2489 in_comment = true;
2490 beginning_of_comment = true;
2491 }
2492 break;
2493
2494 case '\n':
2495 if (in_comment)
2496 {
2497 comment_buf += static_cast<char> (c);
2498 octave_comment_buffer::append (comment_buf);
2499 comment_buf.resize (0);
2500 in_comment = false;
2501 beginning_of_comment = false;
2502 }
2503 break;
2504
2505 default:
2506 if (in_comment)
2507 {
2508 comment_buf += static_cast<char> (c);
2509 beginning_of_comment = false;
2510 }
2511 break;
2512 }
2513 }
2514
2515 if (! comment_buf.empty ())
2516 octave_comment_buffer::append (comment_buf);
2517 }
2518
2519 // Discard whitespace, including comments and continuations.
2520
2521 // FIXME -- we need to handle block comments here.
2522
2523 int
2524 lexical_feedback::eat_whitespace (void)
2525 {
2526 int retval = lexical_feedback::NO_WHITESPACE;
2527
2528 std::string comment_buf;
2529
2530 bool in_comment = false;
2531 bool beginning_of_comment = false;
2532
2533 int c = 0;
2534
2535 while ((c = text_yyinput ()) != EOF)
2536 {
2537 current_input_column++;
2538
2539 switch (c)
2540 {
2541 case ' ':
2542 case '\t':
2543 if (in_comment)
2544 {
2545 comment_buf += static_cast<char> (c);
2546 beginning_of_comment = false;
2547 }
2548 retval |= lexical_feedback::SPACE_OR_TAB;
2549 break;
2550
2551 case '\n':
2552 retval |= lexical_feedback::NEWLINE;
2553 if (in_comment)
2554 {
2555 comment_buf += static_cast<char> (c);
2556 octave_comment_buffer::append (comment_buf);
2557 comment_buf.resize (0);
2558 in_comment = false;
2559 beginning_of_comment = false;
2560 }
2561 current_input_column = 0;
2562 break;
2563
2564 case '#':
2565 case '%':
2566 if (in_comment)
2567 {
2568 if (! beginning_of_comment)
2569 comment_buf += static_cast<char> (c);
2570 }
2571 else
2572 {
2573 maybe_gripe_matlab_incompatible_comment (c);
2574 in_comment = true;
2575 beginning_of_comment = true;
2576 }
2577 break;
2578
2579 case '.':
2580 if (in_comment)
2581 {
2582 comment_buf += static_cast<char> (c);
2583 beginning_of_comment = false;
2584 break;
2585 }
2586 else
2587 {
2588 if (have_ellipsis_continuation ())
2589 break;
2590 else
2591 goto done;
2592 }
2593
2594 case '\\':
2595 if (in_comment)
2596 {
2597 comment_buf += static_cast<char> (c);
2598 beginning_of_comment = false;
2599 break;
2600 }
2601 else
2602 {
2603 if (have_continuation ())
2604 break;
2605 else
2606 goto done;
2607 }
2608
2609 default:
2610 if (in_comment)
2611 {
2612 comment_buf += static_cast<char> (c);
2613 beginning_of_comment = false;
2614 break;
2615 }
2616 else
2617 goto done;
2618 }
2619 }
2620
2621 if (! comment_buf.empty ())
2622 octave_comment_buffer::append (comment_buf);
2623
2624 done:
2625 xunput (c, yytext);
2626 current_input_column--;
2627 return retval;
2628 }
2629
2630 static inline bool
2631 looks_like_hex (const char *s, int len)
2632 {
2633 return (len > 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X'));
2634 }
2635
2636 void
2637 lexical_feedback::handle_number (void)
2638 {
2639 double value = 0.0;
2640 int nread = 0;
2641
2642 if (looks_like_hex (yytext, strlen (yytext)))
2643 {
2644 unsigned long ival;
2645
2646 nread = sscanf (yytext, "%lx", &ival);
2647
2648 value = static_cast<double> (ival);
2649 }
2650 else
2651 {
2652 char *tmp = strsave (yytext);
2653
2654 char *idx = strpbrk (tmp, "Dd");
2655
2656 if (idx)
2657 *idx = 'e';
2658
2659 nread = sscanf (tmp, "%lf", &value);
2660
2661 delete [] tmp;
2662 }
2663
2664 // If yytext doesn't contain a valid number, we are in deep doo doo.
2665
2666 assert (nread == 1);
2667
2668 quote_is_transpose = true;
2669 convert_spaces_to_comma = true;
2670 looking_for_object_index = false;
2671 at_beginning_of_statement = false;
2672
2673 yylval.tok_val = new token (value, yytext, input_line_number,
2674 current_input_column);
2675
2676 token_stack.push (yylval.tok_val);
2677
2678 current_input_column += yyleng;
2679
2680 do_comma_insert_check ();
2681 }
2682
2683 // We have seen a backslash and need to find out if it should be
2684 // treated as a continuation character. If so, this eats it, up to
2685 // and including the new line character.
2686 //
2687 // Match whitespace only, followed by a comment character or newline.
2688 // Once a comment character is found, discard all input until newline.
2689 // If non-whitespace characters are found before comment
2690 // characters, return 0. Otherwise, return 1.
2691
2692 // FIXME -- we need to handle block comments here.
2693
2694 bool
2695 lexical_feedback::have_continuation (bool trailing_comments_ok)
2696 {
2697 std::ostringstream buf;
2698
2699 std::string comment_buf;
2700
2701 bool in_comment = false;
2702 bool beginning_of_comment = false;
2703
2704 int c = 0;
2705
2706 while ((c = text_yyinput ()) != EOF)
2707 {
2708 buf << static_cast<char> (c);
2709
2710 switch (c)
2711 {
2712 case ' ':
2713 case '\t':
2714 if (in_comment)
2715 {
2716 comment_buf += static_cast<char> (c);
2717 beginning_of_comment = false;
2718 }
2719 break;
2720
2721 case '%':
2722 case '#':
2723 if (trailing_comments_ok)
2724 {
2725 if (in_comment)
2726 {
2727 if (! beginning_of_comment)
2728 comment_buf += static_cast<char> (c);
2729 }
2730 else
2731 {
2732 maybe_gripe_matlab_incompatible_comment (c);
2733 in_comment = true;
2734 beginning_of_comment = true;
2735 }
2736 }
2737 else
2738 goto cleanup;
2739 break;
2740
2741 case '\n':
2742 if (in_comment)
2743 {
2744 comment_buf += static_cast<char> (c);
2745 octave_comment_buffer::append (comment_buf);
2746 }
2747 current_input_column = 0;
2748 promptflag--;
2749 gripe_matlab_incompatible_continuation ();
2750 return true;
2751
2752 default:
2753 if (in_comment)
2754 {
2755 comment_buf += static_cast<char> (c);
2756 beginning_of_comment = false;
2757 }
2758 else
2759 goto cleanup;
2760 break;
2761 }
2762 }
2763
2764 xunput (c, yytext);
2765 return false;
2766
2767 cleanup:
2768
2769 std::string s = buf.str ();
2770
2771 int len = s.length ();
2772 while (len--)
2773 xunput (s[len], yytext);
2774
2775 return false;
2776 }
2777
2778 // We have seen a '.' and need to see if it is the start of a
2779 // continuation. If so, this eats it, up to and including the new
2780 // line character.
2781
2782 bool
2783 lexical_feedback::have_ellipsis_continuation (bool trailing_comments_ok)
2784 {
2785 char c1 = text_yyinput ();
2786 if (c1 == '.')
2787 {
2788 char c2 = text_yyinput ();
2789 if (c2 == '.' && have_continuation (trailing_comments_ok))
2790 return true;
2791 else
2792 {
2793 xunput (c2, yytext);
2794 xunput (c1, yytext);
2795 }
2796 }
2797 else
2798 xunput (c1, yytext);
2799
2800 return false;
2801 }
2802
2803 // See if we have a continuation line. If so, eat it and the leading
2804 // whitespace on the next line.
2805
2806 int
2807 lexical_feedback::eat_continuation (void)
2808 {
2809 int retval = lexical_feedback::NO_WHITESPACE;
2810
2811 int c = text_yyinput ();
2812
2813 if ((c == '.' && have_ellipsis_continuation ())
2814 || (c == '\\' && have_continuation ()))
2815 retval = eat_whitespace ();
2816 else
2817 xunput (c, yytext);
2818
2819 return retval;
2820 }
2821
2822 int
2823 lexical_feedback::handle_string (char delim)
2824 {
2825 std::ostringstream buf;
2826
2827 int bos_line = input_line_number;
2828 int bos_col = current_input_column;
2829
2830 int c;
2831 int escape_pending = 0;
2832
2833 while ((c = text_yyinput ()) != EOF)
2834 {
2835 current_input_column++;
2836
2837 if (c == '\\')
2838 {
2839 if (delim == '\'' || escape_pending)
2840 {
2841 buf << static_cast<char> (c);
2842 escape_pending = 0;
2843 }
2844 else
2845 {
2846 if (have_continuation (false))
2847 escape_pending = 0;
2848 else
2849 {
2850 buf << static_cast<char> (c);
2851 escape_pending = 1;
2852 }
2853 }
2854 continue;
2855 }
2856 else if (c == '.')
2857 {
2858 if (delim == '\'' || ! have_ellipsis_continuation (false))
2859 buf << static_cast<char> (c);
2860 }
2861 else if (c == '\n')
2862 {
2863 error ("unterminated string constant");
2864 break;
2865 }
2866 else if (c == delim)
2867 {
2868 if (escape_pending)
2869 buf << static_cast<char> (c);
2870 else
2871 {
2872 c = text_yyinput ();
2873 if (c == delim)
2874 {
2875 buf << static_cast<char> (c);
2876 }
2877 else
2878 {
2879 std::string s;
2880 xunput (c, yytext);
2881
2882 if (delim == '\'')
2883 s = buf.str ();
2884 else
2885 s = do_string_escapes (buf.str ());
2886
2887 quote_is_transpose = true;
2888 convert_spaces_to_comma = true;
2889
2890 yylval.tok_val = new token (s, bos_line, bos_col);
2891 token_stack.push (yylval.tok_val);
2892
2893 if (delim == '"')
2894 gripe_matlab_incompatible ("\" used as string delimiter");
2895 else if (delim == '\'')
2896 gripe_single_quote_string ();
2897
2898 looking_for_object_index = true;
2899 at_beginning_of_statement = false;
2900
2901 return delim == '"' ? DQ_STRING : SQ_STRING;
2902 }
2903 }
2904 }
2905 else
2906 {
2907 buf << static_cast<char> (c);
2908 }
2909
2910 escape_pending = 0;
2911 }
2912
2913 return LEXICAL_ERROR;
2914 }
2915
2916 bool
2917 lexical_feedback::next_token_is_assign_op (void)
2918 {
2919 bool retval = false;
2920
2921 int c0 = text_yyinput ();
2922
2923 switch (c0)
2924 {
2925 case '=':
2926 {
2927 int c1 = text_yyinput ();
2928 xunput (c1, yytext);
2929 if (c1 != '=')
2930 retval = true;
2931 }
2932 break;
2933
2934 case '+':
2935 case '-':
2936 case '*':
2937 case '/':
2938 case '\\':
2939 case '&':
2940 case '|':
2941 {
2942 int c1 = text_yyinput ();
2943 xunput (c1, yytext);
2944 if (c1 == '=')
2945 retval = true;
2946 }
2947 break;
2948
2949 case '.':
2950 {
2951 int c1 = text_yyinput ();
2952 if (match_any (c1, "+-*/\\"))
2953 {
2954 int c2 = text_yyinput ();
2955 xunput (c2, yytext);
2956 if (c2 == '=')
2957 retval = true;
2958 }
2959 xunput (c1, yytext);
2960 }
2961 break;
2962
2963 case '>':
2964 {
2965 int c1 = text_yyinput ();
2966 if (c1 == '>')
2967 {
2968 int c2 = text_yyinput ();
2969 xunput (c2, yytext);
2970 if (c2 == '=')
2971 retval = true;
2972 }
2973 xunput (c1, yytext);
2974 }
2975 break;
2976
2977 case '<':
2978 {
2979 int c1 = text_yyinput ();
2980 if (c1 == '<')
2981 {
2982 int c2 = text_yyinput ();
2983 xunput (c2, yytext);
2984 if (c2 == '=')
2985 retval = true;
2986 }
2987 xunput (c1, yytext);
2988 }
2989 break;
2990
2991 default:
2992 break;
2993 }
2994
2995 xunput (c0, yytext);
2996
2997 return retval;
2998 }
2999
3000 bool
3001 lexical_feedback::next_token_is_index_op (void)
3002 {
3003 int c = text_yyinput ();
3004 xunput (c, yytext);
3005 return c == '(' || c == '{';
3006 }
3007
3008 int
3009 lexical_feedback::handle_close_bracket (bool spc_gobbled, int bracket_type)
3010 {
3011 int retval = bracket_type;
3012
3013 if (! nesting_level.none ())
3014 {
3015 nesting_level.remove ();
3016
3017 if (bracket_type == ']')
3018 bracketflag--;
3019 else if (bracket_type == '}')
3020 braceflag--;
3021 else
3022 panic_impossible ();
3023 }
3024
3025 if (bracketflag == 0 && braceflag == 0)
3026 BEGIN (INITIAL);
3027
3028 if (bracket_type == ']'
3029 && next_token_is_assign_op ()
3030 && ! looking_at_return_list)
3031 {
3032 retval = CLOSE_BRACE;
3033 }
3034 else if ((bracketflag || braceflag)
3035 && convert_spaces_to_comma
3036 && (nesting_level.is_bracket ()
3037 || (nesting_level.is_brace ()
3038 && ! looking_at_object_index.front ())))
3039 {
3040 bool index_op = next_token_is_index_op ();
3041
3042 // Don't insert comma if we are looking at something like
3043 //
3044 // [x{i}{j}] or [x{i}(j)]
3045 //
3046 // but do if we are looking at
3047 //
3048 // [x{i} {j}] or [x{i} (j)]
3049
3050 if (spc_gobbled || ! (bracket_type == '}' && index_op))
3051 {
3052 bool bin_op = next_token_is_bin_op (spc_gobbled);
3053
3054 bool postfix_un_op = next_token_is_postfix_unary_op (spc_gobbled);
3055
3056 bool sep_op = next_token_is_sep_op ();
3057
3058 if (! (postfix_un_op || bin_op || sep_op))
3059 {
3060 maybe_warn_separator_insert (',');
3061
3062 xunput (',', yytext);
3063 return retval;
3064 }
3065 }
3066 }
3067
3068 quote_is_transpose = true;
3069 convert_spaces_to_comma = true;
3070
3071 return retval;
3072 }
3073
3074 void
3075 lexical_feedback::maybe_unput_comma (int spc_gobbled)
3076 {
3077 if (nesting_level.is_bracket ()
3078 || (nesting_level.is_brace ()
3079 && ! looking_at_object_index.front ()))
3080 {
3081 int bin_op = next_token_is_bin_op (spc_gobbled);
3082
3083 int postfix_un_op = next_token_is_postfix_unary_op (spc_gobbled);
3084
3085 int c1 = text_yyinput ();
3086 int c2 = text_yyinput ();
3087
3088 xunput (c2, yytext);
3089 xunput (c1, yytext);
3090
3091 int sep_op = next_token_is_sep_op ();
3092
3093 int dot_op = (c1 == '.'
3094 && (isalpha (c2) || isspace (c2) || c2 == '_'));
3095
3096 if (postfix_un_op || bin_op || sep_op || dot_op)
3097 return;
3098
3099 int index_op = (c1 == '(' || c1 == '{');
3100
3101 // If there is no space before the indexing op, we don't insert
3102 // a comma.
3103
3104 if (index_op && ! spc_gobbled)
3105 return;
3106
3107 maybe_warn_separator_insert (',');
3108
3109 xunput (',', yytext);
3110 }
3111 }
3112
3113 bool
3114 lexical_feedback::next_token_can_follow_bin_op (void)
3115 {
3116 std::stack<char> buf;
3117
3118 int c = EOF;
3119
3120 // Skip whitespace in current statement on current line
3121 while (true)
3122 {
3123 c = text_yyinput ();
3124
3125 buf.push (c);
3126
3127 if (match_any (c, ",;\n") || (c != ' ' && c != '\t'))
3128 break;
3129 }
3130
3131 // Restore input.
3132 while (! buf.empty ())
3133 {
3134 xunput (buf.top (), yytext);
3135
3136 buf.pop ();
3137 }
3138
3139 return (isalnum (c) || match_any (c, "!\"'(-[_{~"));
3140 }
3141
3142 static bool
3143 can_be_command (const std::string& tok)
3144 {
3145 // Don't allow these names to be treated as commands to avoid
3146 // surprises when parsing things like "NaN ^2".
3147
3148 return ! (tok == "e"
3149 || tok == "I" || tok == "i"
3150 || tok == "J" || tok == "j"
3151 || tok == "Inf" || tok == "inf"
3152 || tok == "NaN" || tok == "nan");
3153 }
3154
3155 bool
3156 lexical_feedback::looks_like_command_arg (void)
3157 {
3158 bool retval = true;
3159
3160 int c0 = text_yyinput ();
3161
3162 switch (c0)
3163 {
3164 // = ==
3165 case '=':
3166 {
3167 int c1 = text_yyinput ();
3168
3169 if (c1 == '=')
3170 {
3171 int c2 = text_yyinput ();
3172
3173 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
3174 && next_token_can_follow_bin_op ())
3175 retval = false;
3176
3177 xunput (c2, yytext);
3178 }
3179 else
3180 retval = false;
3181
3182 xunput (c1, yytext);
3183 }
3184 break;
3185
3186 case '(':
3187 case '{':
3188 // Indexing.
3189 retval = false;
3190 break;
3191
3192 case '\n':
3193 // EOL.
3194 break;
3195
3196 case '\'':
3197 case '"':
3198 // Beginning of a character string.
3199 break;
3200
3201 // + - ++ -- += -=
3202 case '+':
3203 case '-':
3204 {
3205 int c1 = text_yyinput ();
3206
3207 switch (c1)
3208 {
3209 case '\n':
3210 // EOL.
3211 case '+':
3212 case '-':
3213 // Unary ops, spacing doesn't matter.
3214 break;
3215
3216 case '\t':
3217 case ' ':
3218 {
3219 if (next_token_can_follow_bin_op ())
3220 retval = false;
3221 }
3222 break;
3223
3224 case '=':
3225 {
3226 int c2 = text_yyinput ();
3227
3228 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
3229 && next_token_can_follow_bin_op ())
3230 retval = false;
3231
3232 xunput (c2, yytext);
3233 }
3234 break;
3235 }
3236
3237 xunput (c1, yytext);
3238 }
3239 break;
3240
3241 case ':':
3242 case '/':
3243 case '\\':
3244 case '^':
3245 {
3246 int c1 = text_yyinput ();
3247
3248 if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t')
3249 && next_token_can_follow_bin_op ())
3250 retval = false;
3251
3252 xunput (c1, yytext);
3253 }
3254 break;
3255
3256 // .+ .- ./ .\ .^ .* .**
3257 case '.':
3258 {
3259 int c1 = text_yyinput ();
3260
3261 if (match_any (c1, "+-/\\^*"))
3262 {
3263 int c2 = text_yyinput ();
3264
3265 if (c2 == '=')
3266 {
3267 int c3 = text_yyinput ();
3268
3269 if (! match_any (c3, ",;\n") && (c3 == ' ' || c3 == '\t')
3270 && next_token_can_follow_bin_op ())
3271 retval = false;
3272
3273 xunput (c3, yytext);
3274 }
3275 else if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
3276 && next_token_can_follow_bin_op ())
3277 retval = false;
3278
3279 xunput (c2, yytext);
3280 }
3281 else if (! match_any (c1, ",;\n")
3282 && (! isdigit (c1) && c1 != ' ' && c1 != '\t'
3283 && c1 != '.'))
3284 {
3285 // Structure reference. FIXME -- is this a complete check?
3286
3287 retval = false;
3288 }
3289
3290 xunput (c1, yytext);
3291 }
3292 break;
3293
3294 // & && | || * **
3295 case '&':
3296 case '|':
3297 case '*':
3298 {
3299 int c1 = text_yyinput ();
3300
3301 if (c1 == c0)
3302 {
3303 int c2 = text_yyinput ();
3304
3305 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
3306 && next_token_can_follow_bin_op ())
3307 retval = false;
3308
3309 xunput (c2, yytext);
3310 }
3311 else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t')
3312 && next_token_can_follow_bin_op ())
3313 retval = false;
3314
3315 xunput (c1, yytext);
3316 }
3317 break;
3318
3319 // < <= > >=
3320 case '<':
3321 case '>':
3322 {
3323 int c1 = text_yyinput ();
3324
3325 if (c1 == '=')
3326 {
3327 int c2 = text_yyinput ();
3328
3329 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
3330 && next_token_can_follow_bin_op ())
3331 retval = false;
3332
3333 xunput (c2, yytext);
3334 }
3335 else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t')
3336 && next_token_can_follow_bin_op ())
3337 retval = false;
3338
3339 xunput (c1, yytext);
3340 }
3341 break;
3342
3343 // ~= !=
3344 case '~':
3345 case '!':
3346 {
3347 int c1 = text_yyinput ();
3348
3349 // ~ and ! can be unary ops, so require following =.
3350 if (c1 == '=')
3351 {
3352 int c2 = text_yyinput ();
3353
3354 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
3355 && next_token_can_follow_bin_op ())
3356 retval = false;
3357
3358 xunput (c2, yytext);
3359 }
3360 else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t')
3361 && next_token_can_follow_bin_op ())
3362 retval = false;
3363
3364 xunput (c1, yytext);
3365 }
3366 break;
3367
3368 default:
3369 break;
3370 }
3371
3372 xunput (c0, yytext);
3373
3374 return retval;
3375 }
3376
3377 int
3378 lexical_feedback::handle_superclass_identifier (void)
3379 {
3380 eat_continuation ();
3381
3382 std::string pkg;
3383 std::string meth = strip_trailing_whitespace (yytext);
3384 size_t pos = meth.find ("@");
3385 std::string cls = meth.substr (pos).substr (1);
3386 meth = meth.substr (0, pos - 1);
3387
3388 pos = cls.find (".");
3389 if (pos != std::string::npos)
3390 {
3391 pkg = cls.substr (pos).substr (1);
3392 cls = cls.substr (0, pos - 1);
3393 }
3394
3395 int kw_token = (is_keyword_token (meth) || is_keyword_token (cls)
3396 || is_keyword_token (pkg));
3397 if (kw_token)
3398 {
3399 error ("method, class and package names may not be keywords");
3400 return LEXICAL_ERROR;
3401 }
3402
3403 yylval.tok_val
3404 = new token (meth.empty () ? 0 : &(symbol_table::insert (meth)),
3405 cls.empty () ? 0 : &(symbol_table::insert (cls)),
3406 pkg.empty () ? 0 : &(symbol_table::insert (pkg)),
3407 input_line_number,
3408 current_input_column);
3409 token_stack.push (yylval.tok_val);
3410
3411 convert_spaces_to_comma = true;
3412 current_input_column += yyleng;
3413
3414 return SUPERCLASSREF;
3415 }
3416
3417 int
3418 lexical_feedback::handle_meta_identifier (void)
3419 {
3420 eat_continuation ();
3421
3422 std::string pkg;
3423 std::string cls = strip_trailing_whitespace (yytext).substr (1);
3424 size_t pos = cls.find (".");
3425
3426 if (pos != std::string::npos)
3427 {
3428 pkg = cls.substr (pos).substr (1);
3429 cls = cls.substr (0, pos - 1);
3430 }
3431
3432 int kw_token = is_keyword_token (cls) || is_keyword_token (pkg);
3433 if (kw_token)
3434 {
3435 error ("class and package names may not be keywords");
3436 return LEXICAL_ERROR;
3437 }
3438
3439 yylval.tok_val
3440 = new token (cls.empty () ? 0 : &(symbol_table::insert (cls)),
3441 pkg.empty () ? 0 : &(symbol_table::insert (pkg)),
3442 input_line_number,
3443 current_input_column);
3444
3445 token_stack.push (yylval.tok_val);
3446
3447 convert_spaces_to_comma = true;
3448 current_input_column += yyleng;
3449
3450 return METAQUERY;
3451 }
3452
3453 // Figure out exactly what kind of token to return when we have seen
3454 // an identifier. Handles keywords. Return -1 if the identifier
3455 // should be ignored.
3456
3457 int
3458 lexical_feedback::handle_identifier (void)
3459 {
3460 bool at_bos = at_beginning_of_statement;
3461
3462 std::string tok = strip_trailing_whitespace (yytext);
3463
3464 int c = yytext[yyleng-1];
3465
3466 bool cont_is_spc = (eat_continuation () != lexical_feedback::NO_WHITESPACE);
3467
3468 int spc_gobbled = (cont_is_spc || c == ' ' || c == '\t');
3469
3470 // If we are expecting a structure element, avoid recognizing
3471 // keywords and other special names and return STRUCT_ELT, which is
3472 // a string that is also a valid identifier. But first, we have to
3473 // decide whether to insert a comma.
3474
3475 if (looking_at_indirect_ref)
3476 {
3477 do_comma_insert_check ();
3478
3479 maybe_unput_comma (spc_gobbled);
3480
3481 yylval.tok_val = new token (tok, input_line_number,
3482 current_input_column);
3483
3484 token_stack.push (yylval.tok_val);
3485
3486 quote_is_transpose = true;
3487 convert_spaces_to_comma = true;
3488 looking_for_object_index = true;
3489
3490 current_input_column += yyleng;
3491
3492 return STRUCT_ELT;
3493 }
3494
3495 at_beginning_of_statement = false;
3496
3497 // The is_keyword_token may reset
3498 // at_beginning_of_statement. For example, if it sees
3499 // an else token, then the next token is at the beginning of a
3500 // statement.
3501
3502 int kw_token = is_keyword_token (tok);
3503
3504 // If we found a keyword token, then the beginning_of_statement flag
3505 // is already set. Otherwise, we won't be at the beginning of a
3506 // statement.
3507
3508 if (looking_at_function_handle)
3509 {
3510 if (kw_token)
3511 {
3512 error ("function handles may not refer to keywords");
3513
3514 return LEXICAL_ERROR;
3515 }
3516 else
3517 {
3518 yylval.tok_val = new token (tok, input_line_number,
3519 current_input_column);
3520
3521 token_stack.push (yylval.tok_val);
3522
3523 current_input_column += yyleng;
3524 quote_is_transpose = false;
3525 convert_spaces_to_comma = true;
3526 looking_for_object_index = true;
3527
3528 return FCN_HANDLE;
3529 }
3530 }
3531
3532 // If we have a regular keyword, return it.
3533 // Keywords can be followed by identifiers.
3534
3535 if (kw_token)
3536 {
3537 if (kw_token >= 0)
3538 {
3539 current_input_column += yyleng;
3540 quote_is_transpose = false;
3541 convert_spaces_to_comma = true;
3542 looking_for_object_index = false;
3543 }
3544
3545 return kw_token;
3546 }
3547
3548 // See if we have a plot keyword (title, using, with, or clear).
3549
3550 int c1 = text_yyinput ();
3551
3552 bool next_tok_is_eq = false;
3553 if (c1 == '=')
3554 {
3555 int c2 = text_yyinput ();
3556 xunput (c2, yytext);
3557
3558 if (c2 != '=')
3559 next_tok_is_eq = true;
3560 }
3561
3562 xunput (c1, yytext);
3563
3564 // Kluge alert.
3565 //
3566 // If we are looking at a text style function, set up to gobble its
3567 // arguments.
3568 //
3569 // If the following token is '=', or if we are parsing a function
3570 // return list or function parameter list, or if we are looking at
3571 // something like [ab,cd] = foo (), force the symbol to be inserted
3572 // as a variable in the current symbol table.
3573
3574 if (! is_variable (tok))
3575 {
3576 if (at_bos && spc_gobbled && can_be_command (tok)
3577 && looks_like_command_arg ())
3578 {
3579 BEGIN (COMMAND_START);
3580 }
3581 else if (next_tok_is_eq
3582 || looking_at_decl_list
3583 || looking_at_return_list
3584 || (looking_at_parameter_list
3585 && ! looking_at_initializer_expression))
3586 {
3587 symbol_table::force_variable (tok);
3588 }
3589 else if (looking_at_matrix_or_assign_lhs)
3590 {
3591 pending_local_variables.insert (tok);
3592 }
3593 }
3594
3595 // Find the token in the symbol table. Beware the magic
3596 // transformation of the end keyword...
3597
3598 if (tok == "end")
3599 tok = "__end__";
3600
3601 yylval.tok_val = new token (&(symbol_table::insert (tok)),
3602 input_line_number,
3603 current_input_column);
3604
3605 token_stack.push (yylval.tok_val);
3606
3607 // After seeing an identifer, it is ok to convert spaces to a comma
3608 // (if needed).
3609
3610 convert_spaces_to_comma = true;
3611
3612 if (! (next_tok_is_eq || YY_START == COMMAND_START))
3613 {
3614 quote_is_transpose = true;
3615
3616 do_comma_insert_check ();
3617
3618 maybe_unput_comma (spc_gobbled);
3619 }
3620
3621 current_input_column += yyleng;
3622
3623 if (tok != "__end__")
3624 looking_for_object_index = true;
3625
3626 return NAME;
3627 }
3628
3629 void
3630 lexical_feedback::maybe_warn_separator_insert (char sep)
3631 {
3632 std::string nm = curr_fcn_file_full_name;
3633
3634 if (nm.empty ())
3635 warning_with_id ("Octave:separator-insert",
3636 "potential auto-insertion of '%c' near line %d",
3637 sep, input_line_number);
3638 else
3639 warning_with_id ("Octave:separator-insert",
3640 "potential auto-insertion of '%c' near line %d of file %s",
3641 sep, input_line_number, nm.c_str ());
3642 }
3643
3644 void
3645 lexical_feedback::gripe_single_quote_string (void)
3646 {
3647 std::string nm = curr_fcn_file_full_name;
3648
3649 if (nm.empty ())
3650 warning_with_id ("Octave:single-quote-string",
3651 "single quote delimited string near line %d",
3652 input_line_number);
3653 else
3654 warning_with_id ("Octave:single-quote-string",
3655 "single quote delimited string near line %d of file %s",
3656 input_line_number, nm.c_str ());
3657 }
3658
3659 void
3660 lexical_feedback::gripe_matlab_incompatible (const std::string& msg)
3661 {
3662 std::string nm = curr_fcn_file_full_name;
3663
3664 if (nm.empty ())
3665 warning_with_id ("Octave:matlab-incompatible",
3666 "potential Matlab compatibility problem: %s",
3667 msg.c_str ());
3668 else
3669 warning_with_id ("Octave:matlab-incompatible",
3670 "potential Matlab compatibility problem: %s near line %d offile %s",
3671 msg.c_str (), input_line_number, nm.c_str ());
3672 }
3673
3674 void
3675 lexical_feedback::maybe_gripe_matlab_incompatible_comment (char c)
3676 {
3677 if (c == '#')
3678 gripe_matlab_incompatible ("# used as comment character");
3679 }
3680
3681 void
3682 lexical_feedback::gripe_matlab_incompatible_continuation (void)
3683 {
3684 gripe_matlab_incompatible ("\\ used as line continuation marker");
3685 }
3686
3687 void
3688 lexical_feedback::gripe_matlab_incompatible_operator (const std::string& op)
3689 {
3690 std::string t = op;
3691 int n = t.length ();
3692 if (t[n-1] == '\n')
3693 t.resize (n-1);
3694 gripe_matlab_incompatible (t + " used as operator");
3695 }