Mercurial > hg > octave-nkf
comparison libinterp/parse-tree/lex.ll @ 16114:73a21ade0b6b
* lex.ll: Reorder function definitions.
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Tue, 26 Feb 2013 02:52:48 -0500 |
parents | 7c5e5e97a3bc |
children | 67f71e2a6190 |
comparison
equal
deleted
inserted
replaced
16113:7c5e5e97a3bc | 16114:73a21ade0b6b |
---|---|
220 | 220 |
221 // Internal variable for lexer debugging state. | 221 // Internal variable for lexer debugging state. |
222 static bool lexer_debug_flag = false; | 222 static bool lexer_debug_flag = false; |
223 | 223 |
224 // Forward declarations for functions defined at the bottom of this | 224 // Forward declarations for functions defined at the bottom of this |
225 // file. | 225 // file that are needed inside the lexer actions. |
226 | 226 |
227 static bool match_any (char c, const char *s); | |
228 static std::string strip_trailing_whitespace (char *s); | 227 static std::string strip_trailing_whitespace (char *s); |
229 static int octave_read (char *buf, unsigned int max_size); | 228 static int octave_read (char *buf, unsigned int max_size); |
230 static void display_token (int tok); | 229 static void display_token (int tok); |
231 static void lexer_debug (const char *pattern, const char *text); | 230 static void lexer_debug (const char *pattern, const char *text); |
232 | 231 |
978 TOK_RETURN (END_OF_INPUT); | 977 TOK_RETURN (END_OF_INPUT); |
979 } | 978 } |
980 | 979 |
981 %% | 980 %% |
982 | 981 |
983 // GAG. | |
984 // | |
985 // If we're reading a matrix and the next character is '[', make sure | |
986 // that we insert a comma ahead of it. | |
987 | |
988 void | |
989 lexical_feedback::do_comma_insert_check (void) | |
990 { | |
991 bool spc_gobbled = (eat_continuation () != lexical_feedback::NO_WHITESPACE); | |
992 | |
993 int c = text_yyinput (); | |
994 | |
995 xunput (c, yytext); | |
996 | |
997 if (spc_gobbled) | |
998 xunput (' ', yytext); | |
999 | |
1000 do_comma_insert = (! looking_at_object_index.front () | |
1001 && bracketflag && c == '['); | |
1002 } | |
1003 | |
1004 // Fix things up for errors or interrupts. The parser is never called | 982 // Fix things up for errors or interrupts. The parser is never called |
1005 // recursively, so it is always safe to reinitialize its state before | 983 // recursively, so it is always safe to reinitialize its state before |
1006 // doing any parsing. | 984 // doing any parsing. |
1007 | 985 |
1008 void | 986 void |
1177 std::cerr << "DEL"; | 1155 std::cerr << "DEL"; |
1178 break; | 1156 break; |
1179 } | 1157 } |
1180 } | 1158 } |
1181 | 1159 |
1182 lexical_feedback::~lexical_feedback (void) | |
1183 { | |
1184 // Clear out the stack of token info used to track line and | |
1185 // column numbers. | |
1186 | |
1187 while (! token_stack.empty ()) | |
1188 { | |
1189 delete token_stack.top (); | |
1190 token_stack.pop (); | |
1191 } | |
1192 } | |
1193 | |
1194 int | |
1195 lexical_feedback::text_yyinput (void) | |
1196 { | |
1197 int c = yyinput (); | |
1198 | |
1199 if (lexer_debug_flag) | |
1200 { | |
1201 std::cerr << "I: "; | |
1202 display_character (c); | |
1203 std::cerr << std::endl; | |
1204 } | |
1205 | |
1206 // Convert CRLF into just LF and single CR into LF. | |
1207 | |
1208 if (c == '\r') | |
1209 { | |
1210 c = yyinput (); | |
1211 | |
1212 if (lexer_debug_flag) | |
1213 { | |
1214 std::cerr << "I: "; | |
1215 display_character (c); | |
1216 std::cerr << std::endl; | |
1217 } | |
1218 | |
1219 if (c != '\n') | |
1220 { | |
1221 xunput (c, yytext); | |
1222 c = '\n'; | |
1223 } | |
1224 } | |
1225 | |
1226 if (c == '\n') | |
1227 input_line_number++; | |
1228 | |
1229 return c; | |
1230 } | |
1231 | |
1232 void | |
1233 lexical_feedback::xunput (char c, char *buf) | |
1234 { | |
1235 if (lexer_debug_flag) | |
1236 { | |
1237 std::cerr << "U: "; | |
1238 display_character (c); | |
1239 std::cerr << std::endl; | |
1240 } | |
1241 | |
1242 if (c == '\n') | |
1243 input_line_number--; | |
1244 | |
1245 yyunput (c, buf); | |
1246 } | |
1247 | |
1248 // If we read some newlines, we need figure out what column we're | |
1249 // really looking at. | |
1250 | |
1251 void | |
1252 lexical_feedback::fixup_column_count (char *s) | |
1253 { | |
1254 char c; | |
1255 while ((c = *s++) != '\0') | |
1256 { | |
1257 if (c == '\n') | |
1258 { | |
1259 input_line_number++; | |
1260 current_input_column = 1; | |
1261 } | |
1262 else | |
1263 current_input_column++; | |
1264 } | |
1265 } | |
1266 | |
1267 // Include these so that we don't have to link to libfl.a. | 1160 // Include these so that we don't have to link to libfl.a. |
1268 | 1161 |
1269 int | 1162 int |
1270 yywrap (void) | 1163 yywrap (void) |
1271 { | 1164 { |
1338 delete_input_buffer (void *buf) | 1231 delete_input_buffer (void *buf) |
1339 { | 1232 { |
1340 delete_buffer (static_cast<YY_BUFFER_STATE> (buf)); | 1233 delete_buffer (static_cast<YY_BUFFER_STATE> (buf)); |
1341 } | 1234 } |
1342 | 1235 |
1343 bool | |
1344 lexical_feedback::inside_any_object_index (void) | |
1345 { | |
1346 bool retval = false; | |
1347 | |
1348 for (std::list<bool>::const_iterator i = looking_at_object_index.begin (); | |
1349 i != looking_at_object_index.end (); i++) | |
1350 { | |
1351 if (*i) | |
1352 { | |
1353 retval = true; | |
1354 break; | |
1355 } | |
1356 } | |
1357 | |
1358 return retval; | |
1359 } | |
1360 | |
1361 // Handle keywords. Return -1 if the keyword should be ignored. | |
1362 | |
1363 int | |
1364 lexical_feedback::is_keyword_token (const std::string& s) | |
1365 { | |
1366 int l = input_line_number; | |
1367 int c = current_input_column; | |
1368 | |
1369 int len = s.length (); | |
1370 | |
1371 const octave_kw *kw = octave_kw_hash::in_word_set (s.c_str (), len); | |
1372 | |
1373 if (kw) | |
1374 { | |
1375 yylval.tok_val = 0; | |
1376 | |
1377 switch (kw->kw_id) | |
1378 { | |
1379 case break_kw: | |
1380 case catch_kw: | |
1381 case continue_kw: | |
1382 case else_kw: | |
1383 case otherwise_kw: | |
1384 case return_kw: | |
1385 case unwind_protect_cleanup_kw: | |
1386 at_beginning_of_statement = true; | |
1387 break; | |
1388 | |
1389 case static_kw: | |
1390 if ((reading_fcn_file || reading_script_file | |
1391 || reading_classdef_file) | |
1392 && ! curr_fcn_file_full_name.empty ()) | |
1393 warning_with_id ("Octave:deprecated-keyword", | |
1394 "the 'static' keyword is obsolete and will be removed from a future version of Octave; please use 'persistent' instead; near line %d of file '%s'", | |
1395 input_line_number, | |
1396 curr_fcn_file_full_name.c_str ()); | |
1397 else | |
1398 warning_with_id ("Octave:deprecated-keyword", | |
1399 "the 'static' keyword is obsolete and will be removed from a future version of Octave; please use 'persistent' instead; near line %d", | |
1400 input_line_number); | |
1401 // fall through ... | |
1402 | |
1403 case persistent_kw: | |
1404 break; | |
1405 | |
1406 case case_kw: | |
1407 case elseif_kw: | |
1408 case global_kw: | |
1409 case until_kw: | |
1410 break; | |
1411 | |
1412 case end_kw: | |
1413 if (inside_any_object_index () | |
1414 || (! reading_classdef_file | |
1415 && (defining_func | |
1416 && ! (looking_at_return_list | |
1417 || parsed_function_name.top ())))) | |
1418 return 0; | |
1419 | |
1420 yylval.tok_val = new token (token::simple_end, l, c); | |
1421 at_beginning_of_statement = true; | |
1422 break; | |
1423 | |
1424 case end_try_catch_kw: | |
1425 yylval.tok_val = new token (token::try_catch_end, l, c); | |
1426 at_beginning_of_statement = true; | |
1427 break; | |
1428 | |
1429 case end_unwind_protect_kw: | |
1430 yylval.tok_val = new token (token::unwind_protect_end, l, c); | |
1431 at_beginning_of_statement = true; | |
1432 break; | |
1433 | |
1434 case endfor_kw: | |
1435 yylval.tok_val = new token (token::for_end, l, c); | |
1436 at_beginning_of_statement = true; | |
1437 break; | |
1438 | |
1439 case endfunction_kw: | |
1440 yylval.tok_val = new token (token::function_end, l, c); | |
1441 at_beginning_of_statement = true; | |
1442 break; | |
1443 | |
1444 case endif_kw: | |
1445 yylval.tok_val = new token (token::if_end, l, c); | |
1446 at_beginning_of_statement = true; | |
1447 break; | |
1448 | |
1449 case endparfor_kw: | |
1450 yylval.tok_val = new token (token::parfor_end, l, c); | |
1451 at_beginning_of_statement = true; | |
1452 break; | |
1453 | |
1454 case endswitch_kw: | |
1455 yylval.tok_val = new token (token::switch_end, l, c); | |
1456 at_beginning_of_statement = true; | |
1457 break; | |
1458 | |
1459 case endwhile_kw: | |
1460 yylval.tok_val = new token (token::while_end, l, c); | |
1461 at_beginning_of_statement = true; | |
1462 break; | |
1463 | |
1464 case endclassdef_kw: | |
1465 yylval.tok_val = new token (token::classdef_end, l, c); | |
1466 at_beginning_of_statement = true; | |
1467 break; | |
1468 | |
1469 case endenumeration_kw: | |
1470 yylval.tok_val = new token (token::enumeration_end, l, c); | |
1471 at_beginning_of_statement = true; | |
1472 break; | |
1473 | |
1474 case endevents_kw: | |
1475 yylval.tok_val = new token (token::events_end, l, c); | |
1476 at_beginning_of_statement = true; | |
1477 break; | |
1478 | |
1479 case endmethods_kw: | |
1480 yylval.tok_val = new token (token::methods_end, l, c); | |
1481 at_beginning_of_statement = true; | |
1482 break; | |
1483 | |
1484 case endproperties_kw: | |
1485 yylval.tok_val = new token (token::properties_end, l, c); | |
1486 at_beginning_of_statement = true; | |
1487 break; | |
1488 | |
1489 | |
1490 case for_kw: | |
1491 case parfor_kw: | |
1492 case while_kw: | |
1493 promptflag--; | |
1494 looping++; | |
1495 break; | |
1496 | |
1497 case do_kw: | |
1498 at_beginning_of_statement = true; | |
1499 promptflag--; | |
1500 looping++; | |
1501 break; | |
1502 | |
1503 case try_kw: | |
1504 case unwind_protect_kw: | |
1505 at_beginning_of_statement = true; | |
1506 promptflag--; | |
1507 break; | |
1508 | |
1509 case if_kw: | |
1510 case switch_kw: | |
1511 promptflag--; | |
1512 break; | |
1513 | |
1514 case get_kw: | |
1515 case set_kw: | |
1516 // 'get' and 'set' are keywords in classdef method | |
1517 // declarations. | |
1518 if (! maybe_classdef_get_set_method) | |
1519 return 0; | |
1520 break; | |
1521 | |
1522 case enumeration_kw: | |
1523 case events_kw: | |
1524 case methods_kw: | |
1525 case properties_kw: | |
1526 // 'properties', 'methods' and 'events' are keywords for | |
1527 // classdef blocks. | |
1528 if (! parsing_classdef) | |
1529 return 0; | |
1530 // fall through ... | |
1531 | |
1532 case classdef_kw: | |
1533 // 'classdef' is always a keyword. | |
1534 promptflag--; | |
1535 break; | |
1536 | |
1537 case function_kw: | |
1538 promptflag--; | |
1539 | |
1540 defining_func++; | |
1541 parsed_function_name.push (false); | |
1542 | |
1543 if (! (reading_fcn_file || reading_script_file | |
1544 || reading_classdef_file)) | |
1545 input_line_number = 1; | |
1546 break; | |
1547 | |
1548 case magic_file_kw: | |
1549 { | |
1550 if ((reading_fcn_file || reading_script_file | |
1551 || reading_classdef_file) | |
1552 && ! curr_fcn_file_full_name.empty ()) | |
1553 yylval.tok_val = new token (curr_fcn_file_full_name, l, c); | |
1554 else | |
1555 yylval.tok_val = new token ("stdin", l, c); | |
1556 } | |
1557 break; | |
1558 | |
1559 case magic_line_kw: | |
1560 yylval.tok_val = new token (static_cast<double> (l), "", l, c); | |
1561 break; | |
1562 | |
1563 default: | |
1564 panic_impossible (); | |
1565 } | |
1566 | |
1567 if (! yylval.tok_val) | |
1568 yylval.tok_val = new token (l, c); | |
1569 | |
1570 token_stack.push (yylval.tok_val); | |
1571 | |
1572 return kw->tok; | |
1573 } | |
1574 | |
1575 return 0; | |
1576 } | |
1577 | |
1578 bool | |
1579 lexical_feedback::is_variable (const std::string& name) | |
1580 { | |
1581 return (symbol_table::is_variable (name) | |
1582 || (pending_local_variables.find (name) | |
1583 != pending_local_variables.end ())); | |
1584 } | |
1585 | |
1586 std::string | |
1587 lexical_feedback::grab_block_comment (stream_reader& reader, bool& eof) | |
1588 { | |
1589 std::string buf; | |
1590 | |
1591 bool at_bol = true; | |
1592 bool look_for_marker = false; | |
1593 | |
1594 bool warned_incompatible = false; | |
1595 | |
1596 int c = 0; | |
1597 | |
1598 while ((c = reader.getc ()) != EOF) | |
1599 { | |
1600 current_input_column++; | |
1601 | |
1602 if (look_for_marker) | |
1603 { | |
1604 at_bol = false; | |
1605 look_for_marker = false; | |
1606 | |
1607 if (c == '{' || c == '}') | |
1608 { | |
1609 std::string tmp_buf (1, static_cast<char> (c)); | |
1610 | |
1611 int type = c; | |
1612 | |
1613 bool done = false; | |
1614 | |
1615 while ((c = reader.getc ()) != EOF && ! done) | |
1616 { | |
1617 current_input_column++; | |
1618 | |
1619 switch (c) | |
1620 { | |
1621 case ' ': | |
1622 case '\t': | |
1623 tmp_buf += static_cast<char> (c); | |
1624 break; | |
1625 | |
1626 case '\n': | |
1627 { | |
1628 current_input_column = 0; | |
1629 at_bol = true; | |
1630 done = true; | |
1631 | |
1632 if (type == '{') | |
1633 { | |
1634 block_comment_nesting_level++; | |
1635 promptflag--; | |
1636 } | |
1637 else | |
1638 { | |
1639 block_comment_nesting_level--; | |
1640 promptflag++; | |
1641 | |
1642 if (block_comment_nesting_level == 0) | |
1643 { | |
1644 buf += grab_comment_block (reader, true, eof); | |
1645 | |
1646 return buf; | |
1647 } | |
1648 } | |
1649 } | |
1650 break; | |
1651 | |
1652 default: | |
1653 at_bol = false; | |
1654 tmp_buf += static_cast<char> (c); | |
1655 buf += tmp_buf; | |
1656 done = true; | |
1657 break; | |
1658 } | |
1659 } | |
1660 } | |
1661 } | |
1662 | |
1663 if (at_bol && (c == '%' || c == '#')) | |
1664 { | |
1665 if (c == '#' && ! warned_incompatible) | |
1666 { | |
1667 warned_incompatible = true; | |
1668 maybe_gripe_matlab_incompatible_comment (c); | |
1669 } | |
1670 | |
1671 at_bol = false; | |
1672 look_for_marker = true; | |
1673 } | |
1674 else | |
1675 { | |
1676 buf += static_cast<char> (c); | |
1677 | |
1678 if (c == '\n') | |
1679 { | |
1680 current_input_column = 0; | |
1681 at_bol = true; | |
1682 } | |
1683 } | |
1684 } | |
1685 | |
1686 if (c == EOF) | |
1687 eof = true; | |
1688 | |
1689 return buf; | |
1690 } | |
1691 | |
1692 std::string | |
1693 lexical_feedback::grab_comment_block (stream_reader& reader, bool at_bol, | |
1694 bool& eof) | |
1695 { | |
1696 std::string buf; | |
1697 | |
1698 // TRUE means we are at the beginning of a comment block. | |
1699 bool begin_comment = false; | |
1700 | |
1701 // TRUE means we are currently reading a comment block. | |
1702 bool in_comment = false; | |
1703 | |
1704 bool warned_incompatible = false; | |
1705 | |
1706 int c = 0; | |
1707 | |
1708 while ((c = reader.getc ()) != EOF) | |
1709 { | |
1710 current_input_column++; | |
1711 | |
1712 if (begin_comment) | |
1713 { | |
1714 if (c == '%' || c == '#') | |
1715 { | |
1716 at_bol = false; | |
1717 continue; | |
1718 } | |
1719 else if (at_bol && c == '{') | |
1720 { | |
1721 std::string tmp_buf (1, static_cast<char> (c)); | |
1722 | |
1723 bool done = false; | |
1724 | |
1725 while ((c = reader.getc ()) != EOF && ! done) | |
1726 { | |
1727 current_input_column++; | |
1728 | |
1729 switch (c) | |
1730 { | |
1731 case ' ': | |
1732 case '\t': | |
1733 tmp_buf += static_cast<char> (c); | |
1734 break; | |
1735 | |
1736 case '\n': | |
1737 { | |
1738 current_input_column = 0; | |
1739 at_bol = true; | |
1740 done = true; | |
1741 | |
1742 block_comment_nesting_level++; | |
1743 promptflag--; | |
1744 | |
1745 buf += grab_block_comment (reader, eof); | |
1746 | |
1747 in_comment = false; | |
1748 | |
1749 if (eof) | |
1750 goto done; | |
1751 } | |
1752 break; | |
1753 | |
1754 default: | |
1755 at_bol = false; | |
1756 tmp_buf += static_cast<char> (c); | |
1757 buf += tmp_buf; | |
1758 done = true; | |
1759 break; | |
1760 } | |
1761 } | |
1762 } | |
1763 else | |
1764 { | |
1765 at_bol = false; | |
1766 begin_comment = false; | |
1767 } | |
1768 } | |
1769 | |
1770 if (in_comment) | |
1771 { | |
1772 buf += static_cast<char> (c); | |
1773 | |
1774 if (c == '\n') | |
1775 { | |
1776 at_bol = true; | |
1777 current_input_column = 0; | |
1778 in_comment = false; | |
1779 | |
1780 // FIXME -- bailing out here prevents things like | |
1781 // | |
1782 // octave> # comment | |
1783 // octave> x = 1 | |
1784 // | |
1785 // from failing at the command line, while still | |
1786 // allowing blocks of comments to be grabbed properly | |
1787 // for function doc strings. But only the first line of | |
1788 // a mult-line doc string will be picked up for | |
1789 // functions defined on the command line. We need a | |
1790 // better way of collecting these comments... | |
1791 if (! (reading_fcn_file || reading_script_file)) | |
1792 goto done; | |
1793 } | |
1794 } | |
1795 else | |
1796 { | |
1797 switch (c) | |
1798 { | |
1799 case ' ': | |
1800 case '\t': | |
1801 break; | |
1802 | |
1803 case '#': | |
1804 if (! warned_incompatible) | |
1805 { | |
1806 warned_incompatible = true; | |
1807 maybe_gripe_matlab_incompatible_comment (c); | |
1808 } | |
1809 // fall through... | |
1810 | |
1811 case '%': | |
1812 in_comment = true; | |
1813 begin_comment = true; | |
1814 break; | |
1815 | |
1816 default: | |
1817 current_input_column--; | |
1818 reader.ungetc (c); | |
1819 goto done; | |
1820 } | |
1821 } | |
1822 } | |
1823 | |
1824 done: | |
1825 | |
1826 if (c == EOF) | |
1827 eof = true; | |
1828 | |
1829 return buf; | |
1830 } | |
1831 | |
1832 class | 1236 class |
1833 flex_stream_reader : public stream_reader | 1237 flex_stream_reader : public stream_reader |
1834 { | 1238 { |
1835 public: | 1239 public: |
1836 flex_stream_reader (char *buf_arg) : stream_reader (), buf (buf_arg) { } | 1240 flex_stream_reader (char *buf_arg) : stream_reader (), buf (buf_arg) { } |
1846 | 1250 |
1847 flex_stream_reader& operator = (const flex_stream_reader&); | 1251 flex_stream_reader& operator = (const flex_stream_reader&); |
1848 | 1252 |
1849 char *buf; | 1253 char *buf; |
1850 }; | 1254 }; |
1851 | |
1852 int | |
1853 lexical_feedback::process_comment (bool start_in_block, bool& eof) | |
1854 { | |
1855 eof = false; | |
1856 | |
1857 std::string help_txt; | |
1858 | |
1859 if (! help_buf.empty ()) | |
1860 help_txt = help_buf.top (); | |
1861 | |
1862 flex_stream_reader flex_reader (yytext); | |
1863 | |
1864 // process_comment is only supposed to be called when we are not | |
1865 // initially looking at a block comment. | |
1866 | |
1867 std::string txt = start_in_block | |
1868 ? grab_block_comment (flex_reader, eof) | |
1869 : grab_comment_block (flex_reader, false, eof); | |
1870 | |
1871 if (lexer_debug_flag) | |
1872 std::cerr << "C: " << txt << std::endl; | |
1873 | |
1874 if (help_txt.empty () && nesting_level.none ()) | |
1875 { | |
1876 if (! help_buf.empty ()) | |
1877 help_buf.pop (); | |
1878 | |
1879 help_buf.push (txt); | |
1880 } | |
1881 | |
1882 octave_comment_buffer::append (txt); | |
1883 | |
1884 current_input_column = 1; | |
1885 quote_is_transpose = false; | |
1886 convert_spaces_to_comma = true; | |
1887 at_beginning_of_statement = true; | |
1888 | |
1889 if (YY_START == COMMAND_START) | |
1890 BEGIN (INITIAL); | |
1891 | |
1892 if (nesting_level.none ()) | |
1893 return '\n'; | |
1894 else if (nesting_level.is_bracket_or_brace ()) | |
1895 return ';'; | |
1896 else | |
1897 return 0; | |
1898 } | |
1899 | 1255 |
1900 // Return 1 if the given character matches any character in the given | 1256 // Return 1 if the given character matches any character in the given |
1901 // string. | 1257 // string. |
1902 | 1258 |
1903 static bool | 1259 static bool |
1924 looks_like_bin_op (bool spc_prev, int next_char) | 1280 looks_like_bin_op (bool spc_prev, int next_char) |
1925 { | 1281 { |
1926 bool spc_next = (next_char == ' ' || next_char == '\t'); | 1282 bool spc_next = (next_char == ' ' || next_char == '\t'); |
1927 | 1283 |
1928 return ((spc_prev && spc_next) || ! spc_prev); | 1284 return ((spc_prev && spc_next) || ! spc_prev); |
1929 } | |
1930 | |
1931 // Recognize separators. If the separator is a CRLF pair, it is | |
1932 // replaced by a single LF. | |
1933 | |
1934 bool | |
1935 lexical_feedback::next_token_is_sep_op (void) | |
1936 { | |
1937 bool retval = false; | |
1938 | |
1939 int c = text_yyinput (); | |
1940 | |
1941 retval = match_any (c, ",;\n]"); | |
1942 | |
1943 xunput (c, yytext); | |
1944 | |
1945 return retval; | |
1946 } | |
1947 | |
1948 // Try to determine if the next token should be treated as a postfix | |
1949 // unary operator. This is ugly, but it seems to do the right thing. | |
1950 | |
1951 bool | |
1952 lexical_feedback::next_token_is_postfix_unary_op (bool spc_prev) | |
1953 { | |
1954 bool un_op = false; | |
1955 | |
1956 int c0 = text_yyinput (); | |
1957 | |
1958 if (c0 == '\'' && ! spc_prev) | |
1959 { | |
1960 un_op = true; | |
1961 } | |
1962 else if (c0 == '.') | |
1963 { | |
1964 int c1 = text_yyinput (); | |
1965 un_op = (c1 == '\''); | |
1966 xunput (c1, yytext); | |
1967 } | |
1968 else if (c0 == '+') | |
1969 { | |
1970 int c1 = text_yyinput (); | |
1971 un_op = (c1 == '+'); | |
1972 xunput (c1, yytext); | |
1973 } | |
1974 else if (c0 == '-') | |
1975 { | |
1976 int c1 = text_yyinput (); | |
1977 un_op = (c1 == '-'); | |
1978 xunput (c1, yytext); | |
1979 } | |
1980 | |
1981 xunput (c0, yytext); | |
1982 | |
1983 return un_op; | |
1984 } | |
1985 | |
1986 // Try to determine if the next token should be treated as a binary | |
1987 // operator. | |
1988 // | |
1989 // This kluge exists because whitespace is not always ignored inside | |
1990 // the square brackets that are used to create matrix objects (though | |
1991 // spacing only really matters in the cases that can be interpreted | |
1992 // either as binary ops or prefix unary ops: currently just +, -). | |
1993 // | |
1994 // Note that a line continuation directly following a + or - operator | |
1995 // (e.g., the characters '[' 'a' ' ' '+' '\' LFD 'b' ']') will be | |
1996 // parsed as a binary operator. | |
1997 | |
1998 bool | |
1999 lexical_feedback::next_token_is_bin_op (bool spc_prev) | |
2000 { | |
2001 bool bin_op = false; | |
2002 | |
2003 int c0 = text_yyinput (); | |
2004 | |
2005 switch (c0) | |
2006 { | |
2007 case '+': | |
2008 case '-': | |
2009 { | |
2010 int c1 = text_yyinput (); | |
2011 | |
2012 switch (c1) | |
2013 { | |
2014 case '+': | |
2015 case '-': | |
2016 // Unary ops, spacing doesn't matter. | |
2017 break; | |
2018 | |
2019 case '=': | |
2020 // Binary ops, spacing doesn't matter. | |
2021 bin_op = true; | |
2022 break; | |
2023 | |
2024 default: | |
2025 // Could be either, spacing matters. | |
2026 bin_op = looks_like_bin_op (spc_prev, c1); | |
2027 break; | |
2028 } | |
2029 | |
2030 xunput (c1, yytext); | |
2031 } | |
2032 break; | |
2033 | |
2034 case ':': | |
2035 case '/': | |
2036 case '\\': | |
2037 case '^': | |
2038 // Always a binary op (may also include /=, \=, and ^=). | |
2039 bin_op = true; | |
2040 break; | |
2041 | |
2042 // .+ .- ./ .\ .^ .* .** | |
2043 case '.': | |
2044 { | |
2045 int c1 = text_yyinput (); | |
2046 | |
2047 if (match_any (c1, "+-/\\^*")) | |
2048 // Always a binary op (may also include .+=, .-=, ./=, ...). | |
2049 bin_op = true; | |
2050 else if (! isdigit (c1) && c1 != ' ' && c1 != '\t' && c1 != '.') | |
2051 // A structure element reference is a binary op. | |
2052 bin_op = true; | |
2053 | |
2054 xunput (c1, yytext); | |
2055 } | |
2056 break; | |
2057 | |
2058 // = == & && | || * ** | |
2059 case '=': | |
2060 case '&': | |
2061 case '|': | |
2062 case '*': | |
2063 // Always a binary op (may also include ==, &&, ||, **). | |
2064 bin_op = true; | |
2065 break; | |
2066 | |
2067 // < <= <> > >= | |
2068 case '<': | |
2069 case '>': | |
2070 // Always a binary op (may also include <=, <>, >=). | |
2071 bin_op = true; | |
2072 break; | |
2073 | |
2074 // ~= != | |
2075 case '~': | |
2076 case '!': | |
2077 { | |
2078 int c1 = text_yyinput (); | |
2079 | |
2080 // ~ and ! can be unary ops, so require following =. | |
2081 if (c1 == '=') | |
2082 bin_op = true; | |
2083 | |
2084 xunput (c1, yytext); | |
2085 } | |
2086 break; | |
2087 | |
2088 default: | |
2089 break; | |
2090 } | |
2091 | |
2092 xunput (c0, yytext); | |
2093 | |
2094 return bin_op; | |
2095 } | |
2096 | |
2097 // Used to delete trailing white space from tokens. | |
2098 | |
2099 static std::string | |
2100 strip_trailing_whitespace (char *s) | |
2101 { | |
2102 std::string retval = s; | |
2103 | |
2104 size_t pos = retval.find_first_of (" \t"); | |
2105 | |
2106 if (pos != std::string::npos) | |
2107 retval.resize (pos); | |
2108 | |
2109 return retval; | |
2110 } | |
2111 | |
2112 // FIXME -- we need to handle block comments here. | |
2113 | |
2114 void | |
2115 lexical_feedback::scan_for_comments (const char *text) | |
2116 { | |
2117 std::string comment_buf; | |
2118 | |
2119 bool in_comment = false; | |
2120 bool beginning_of_comment = false; | |
2121 | |
2122 int len = strlen (text); | |
2123 int i = 0; | |
2124 | |
2125 while (i < len) | |
2126 { | |
2127 char c = text[i++]; | |
2128 | |
2129 switch (c) | |
2130 { | |
2131 case '%': | |
2132 case '#': | |
2133 if (in_comment) | |
2134 { | |
2135 if (! beginning_of_comment) | |
2136 comment_buf += static_cast<char> (c); | |
2137 } | |
2138 else | |
2139 { | |
2140 maybe_gripe_matlab_incompatible_comment (c); | |
2141 in_comment = true; | |
2142 beginning_of_comment = true; | |
2143 } | |
2144 break; | |
2145 | |
2146 case '\n': | |
2147 if (in_comment) | |
2148 { | |
2149 comment_buf += static_cast<char> (c); | |
2150 octave_comment_buffer::append (comment_buf); | |
2151 comment_buf.resize (0); | |
2152 in_comment = false; | |
2153 beginning_of_comment = false; | |
2154 } | |
2155 break; | |
2156 | |
2157 default: | |
2158 if (in_comment) | |
2159 { | |
2160 comment_buf += static_cast<char> (c); | |
2161 beginning_of_comment = false; | |
2162 } | |
2163 break; | |
2164 } | |
2165 } | |
2166 | |
2167 if (! comment_buf.empty ()) | |
2168 octave_comment_buffer::append (comment_buf); | |
2169 } | |
2170 | |
2171 // Discard whitespace, including comments and continuations. | |
2172 | |
2173 // FIXME -- we need to handle block comments here. | |
2174 | |
2175 int | |
2176 lexical_feedback::eat_whitespace (void) | |
2177 { | |
2178 int retval = lexical_feedback::NO_WHITESPACE; | |
2179 | |
2180 std::string comment_buf; | |
2181 | |
2182 bool in_comment = false; | |
2183 bool beginning_of_comment = false; | |
2184 | |
2185 int c = 0; | |
2186 | |
2187 while ((c = text_yyinput ()) != EOF) | |
2188 { | |
2189 current_input_column++; | |
2190 | |
2191 switch (c) | |
2192 { | |
2193 case ' ': | |
2194 case '\t': | |
2195 if (in_comment) | |
2196 { | |
2197 comment_buf += static_cast<char> (c); | |
2198 beginning_of_comment = false; | |
2199 } | |
2200 retval |= lexical_feedback::SPACE_OR_TAB; | |
2201 break; | |
2202 | |
2203 case '\n': | |
2204 retval |= lexical_feedback::NEWLINE; | |
2205 if (in_comment) | |
2206 { | |
2207 comment_buf += static_cast<char> (c); | |
2208 octave_comment_buffer::append (comment_buf); | |
2209 comment_buf.resize (0); | |
2210 in_comment = false; | |
2211 beginning_of_comment = false; | |
2212 } | |
2213 current_input_column = 0; | |
2214 break; | |
2215 | |
2216 case '#': | |
2217 case '%': | |
2218 if (in_comment) | |
2219 { | |
2220 if (! beginning_of_comment) | |
2221 comment_buf += static_cast<char> (c); | |
2222 } | |
2223 else | |
2224 { | |
2225 maybe_gripe_matlab_incompatible_comment (c); | |
2226 in_comment = true; | |
2227 beginning_of_comment = true; | |
2228 } | |
2229 break; | |
2230 | |
2231 case '.': | |
2232 if (in_comment) | |
2233 { | |
2234 comment_buf += static_cast<char> (c); | |
2235 beginning_of_comment = false; | |
2236 break; | |
2237 } | |
2238 else | |
2239 { | |
2240 if (have_ellipsis_continuation ()) | |
2241 break; | |
2242 else | |
2243 goto done; | |
2244 } | |
2245 | |
2246 case '\\': | |
2247 if (in_comment) | |
2248 { | |
2249 comment_buf += static_cast<char> (c); | |
2250 beginning_of_comment = false; | |
2251 break; | |
2252 } | |
2253 else | |
2254 { | |
2255 if (have_continuation ()) | |
2256 break; | |
2257 else | |
2258 goto done; | |
2259 } | |
2260 | |
2261 default: | |
2262 if (in_comment) | |
2263 { | |
2264 comment_buf += static_cast<char> (c); | |
2265 beginning_of_comment = false; | |
2266 break; | |
2267 } | |
2268 else | |
2269 goto done; | |
2270 } | |
2271 } | |
2272 | |
2273 if (! comment_buf.empty ()) | |
2274 octave_comment_buffer::append (comment_buf); | |
2275 | |
2276 done: | |
2277 xunput (c, yytext); | |
2278 current_input_column--; | |
2279 return retval; | |
2280 } | |
2281 | |
2282 static inline bool | |
2283 looks_like_hex (const char *s, int len) | |
2284 { | |
2285 return (len > 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')); | |
2286 } | |
2287 | |
2288 void | |
2289 lexical_feedback::handle_number (void) | |
2290 { | |
2291 double value = 0.0; | |
2292 int nread = 0; | |
2293 | |
2294 if (looks_like_hex (yytext, strlen (yytext))) | |
2295 { | |
2296 unsigned long ival; | |
2297 | |
2298 nread = sscanf (yytext, "%lx", &ival); | |
2299 | |
2300 value = static_cast<double> (ival); | |
2301 } | |
2302 else | |
2303 { | |
2304 char *tmp = strsave (yytext); | |
2305 | |
2306 char *idx = strpbrk (tmp, "Dd"); | |
2307 | |
2308 if (idx) | |
2309 *idx = 'e'; | |
2310 | |
2311 nread = sscanf (tmp, "%lf", &value); | |
2312 | |
2313 delete [] tmp; | |
2314 } | |
2315 | |
2316 // If yytext doesn't contain a valid number, we are in deep doo doo. | |
2317 | |
2318 assert (nread == 1); | |
2319 | |
2320 quote_is_transpose = true; | |
2321 convert_spaces_to_comma = true; | |
2322 looking_for_object_index = false; | |
2323 at_beginning_of_statement = false; | |
2324 | |
2325 yylval.tok_val = new token (value, yytext, input_line_number, | |
2326 current_input_column); | |
2327 | |
2328 token_stack.push (yylval.tok_val); | |
2329 | |
2330 current_input_column += yyleng; | |
2331 | |
2332 do_comma_insert_check (); | |
2333 } | |
2334 | |
2335 // We have seen a backslash and need to find out if it should be | |
2336 // treated as a continuation character. If so, this eats it, up to | |
2337 // and including the new line character. | |
2338 // | |
2339 // Match whitespace only, followed by a comment character or newline. | |
2340 // Once a comment character is found, discard all input until newline. | |
2341 // If non-whitespace characters are found before comment | |
2342 // characters, return 0. Otherwise, return 1. | |
2343 | |
2344 // FIXME -- we need to handle block comments here. | |
2345 | |
2346 bool | |
2347 lexical_feedback::have_continuation (bool trailing_comments_ok) | |
2348 { | |
2349 std::ostringstream buf; | |
2350 | |
2351 std::string comment_buf; | |
2352 | |
2353 bool in_comment = false; | |
2354 bool beginning_of_comment = false; | |
2355 | |
2356 int c = 0; | |
2357 | |
2358 while ((c = text_yyinput ()) != EOF) | |
2359 { | |
2360 buf << static_cast<char> (c); | |
2361 | |
2362 switch (c) | |
2363 { | |
2364 case ' ': | |
2365 case '\t': | |
2366 if (in_comment) | |
2367 { | |
2368 comment_buf += static_cast<char> (c); | |
2369 beginning_of_comment = false; | |
2370 } | |
2371 break; | |
2372 | |
2373 case '%': | |
2374 case '#': | |
2375 if (trailing_comments_ok) | |
2376 { | |
2377 if (in_comment) | |
2378 { | |
2379 if (! beginning_of_comment) | |
2380 comment_buf += static_cast<char> (c); | |
2381 } | |
2382 else | |
2383 { | |
2384 maybe_gripe_matlab_incompatible_comment (c); | |
2385 in_comment = true; | |
2386 beginning_of_comment = true; | |
2387 } | |
2388 } | |
2389 else | |
2390 goto cleanup; | |
2391 break; | |
2392 | |
2393 case '\n': | |
2394 if (in_comment) | |
2395 { | |
2396 comment_buf += static_cast<char> (c); | |
2397 octave_comment_buffer::append (comment_buf); | |
2398 } | |
2399 current_input_column = 0; | |
2400 promptflag--; | |
2401 gripe_matlab_incompatible_continuation (); | |
2402 return true; | |
2403 | |
2404 default: | |
2405 if (in_comment) | |
2406 { | |
2407 comment_buf += static_cast<char> (c); | |
2408 beginning_of_comment = false; | |
2409 } | |
2410 else | |
2411 goto cleanup; | |
2412 break; | |
2413 } | |
2414 } | |
2415 | |
2416 xunput (c, yytext); | |
2417 return false; | |
2418 | |
2419 cleanup: | |
2420 | |
2421 std::string s = buf.str (); | |
2422 | |
2423 int len = s.length (); | |
2424 while (len--) | |
2425 xunput (s[len], yytext); | |
2426 | |
2427 return false; | |
2428 } | |
2429 | |
2430 // We have seen a '.' and need to see if it is the start of a | |
2431 // continuation. If so, this eats it, up to and including the new | |
2432 // line character. | |
2433 | |
2434 bool | |
2435 lexical_feedback::have_ellipsis_continuation (bool trailing_comments_ok) | |
2436 { | |
2437 char c1 = text_yyinput (); | |
2438 if (c1 == '.') | |
2439 { | |
2440 char c2 = text_yyinput (); | |
2441 if (c2 == '.' && have_continuation (trailing_comments_ok)) | |
2442 return true; | |
2443 else | |
2444 { | |
2445 xunput (c2, yytext); | |
2446 xunput (c1, yytext); | |
2447 } | |
2448 } | |
2449 else | |
2450 xunput (c1, yytext); | |
2451 | |
2452 return false; | |
2453 } | |
2454 | |
2455 // See if we have a continuation line. If so, eat it and the leading | |
2456 // whitespace on the next line. | |
2457 | |
2458 int | |
2459 lexical_feedback::eat_continuation (void) | |
2460 { | |
2461 int retval = lexical_feedback::NO_WHITESPACE; | |
2462 | |
2463 int c = text_yyinput (); | |
2464 | |
2465 if ((c == '.' && have_ellipsis_continuation ()) | |
2466 || (c == '\\' && have_continuation ())) | |
2467 retval = eat_whitespace (); | |
2468 else | |
2469 xunput (c, yytext); | |
2470 | |
2471 return retval; | |
2472 } | |
2473 | |
2474 int | |
2475 lexical_feedback::handle_string (char delim) | |
2476 { | |
2477 std::ostringstream buf; | |
2478 | |
2479 int bos_line = input_line_number; | |
2480 int bos_col = current_input_column; | |
2481 | |
2482 int c; | |
2483 int escape_pending = 0; | |
2484 | |
2485 while ((c = text_yyinput ()) != EOF) | |
2486 { | |
2487 current_input_column++; | |
2488 | |
2489 if (c == '\\') | |
2490 { | |
2491 if (delim == '\'' || escape_pending) | |
2492 { | |
2493 buf << static_cast<char> (c); | |
2494 escape_pending = 0; | |
2495 } | |
2496 else | |
2497 { | |
2498 if (have_continuation (false)) | |
2499 escape_pending = 0; | |
2500 else | |
2501 { | |
2502 buf << static_cast<char> (c); | |
2503 escape_pending = 1; | |
2504 } | |
2505 } | |
2506 continue; | |
2507 } | |
2508 else if (c == '.') | |
2509 { | |
2510 if (delim == '\'' || ! have_ellipsis_continuation (false)) | |
2511 buf << static_cast<char> (c); | |
2512 } | |
2513 else if (c == '\n') | |
2514 { | |
2515 error ("unterminated string constant"); | |
2516 break; | |
2517 } | |
2518 else if (c == delim) | |
2519 { | |
2520 if (escape_pending) | |
2521 buf << static_cast<char> (c); | |
2522 else | |
2523 { | |
2524 c = text_yyinput (); | |
2525 if (c == delim) | |
2526 { | |
2527 buf << static_cast<char> (c); | |
2528 } | |
2529 else | |
2530 { | |
2531 std::string s; | |
2532 xunput (c, yytext); | |
2533 | |
2534 if (delim == '\'') | |
2535 s = buf.str (); | |
2536 else | |
2537 s = do_string_escapes (buf.str ()); | |
2538 | |
2539 quote_is_transpose = true; | |
2540 convert_spaces_to_comma = true; | |
2541 | |
2542 yylval.tok_val = new token (s, bos_line, bos_col); | |
2543 token_stack.push (yylval.tok_val); | |
2544 | |
2545 if (delim == '"') | |
2546 gripe_matlab_incompatible ("\" used as string delimiter"); | |
2547 else if (delim == '\'') | |
2548 gripe_single_quote_string (); | |
2549 | |
2550 looking_for_object_index = true; | |
2551 at_beginning_of_statement = false; | |
2552 | |
2553 return delim == '"' ? DQ_STRING : SQ_STRING; | |
2554 } | |
2555 } | |
2556 } | |
2557 else | |
2558 { | |
2559 buf << static_cast<char> (c); | |
2560 } | |
2561 | |
2562 escape_pending = 0; | |
2563 } | |
2564 | |
2565 return LEXICAL_ERROR; | |
2566 } | |
2567 | |
2568 bool | |
2569 lexical_feedback::next_token_is_assign_op (void) | |
2570 { | |
2571 bool retval = false; | |
2572 | |
2573 int c0 = text_yyinput (); | |
2574 | |
2575 switch (c0) | |
2576 { | |
2577 case '=': | |
2578 { | |
2579 int c1 = text_yyinput (); | |
2580 xunput (c1, yytext); | |
2581 if (c1 != '=') | |
2582 retval = true; | |
2583 } | |
2584 break; | |
2585 | |
2586 case '+': | |
2587 case '-': | |
2588 case '*': | |
2589 case '/': | |
2590 case '\\': | |
2591 case '&': | |
2592 case '|': | |
2593 { | |
2594 int c1 = text_yyinput (); | |
2595 xunput (c1, yytext); | |
2596 if (c1 == '=') | |
2597 retval = true; | |
2598 } | |
2599 break; | |
2600 | |
2601 case '.': | |
2602 { | |
2603 int c1 = text_yyinput (); | |
2604 if (match_any (c1, "+-*/\\")) | |
2605 { | |
2606 int c2 = text_yyinput (); | |
2607 xunput (c2, yytext); | |
2608 if (c2 == '=') | |
2609 retval = true; | |
2610 } | |
2611 xunput (c1, yytext); | |
2612 } | |
2613 break; | |
2614 | |
2615 case '>': | |
2616 { | |
2617 int c1 = text_yyinput (); | |
2618 if (c1 == '>') | |
2619 { | |
2620 int c2 = text_yyinput (); | |
2621 xunput (c2, yytext); | |
2622 if (c2 == '=') | |
2623 retval = true; | |
2624 } | |
2625 xunput (c1, yytext); | |
2626 } | |
2627 break; | |
2628 | |
2629 case '<': | |
2630 { | |
2631 int c1 = text_yyinput (); | |
2632 if (c1 == '<') | |
2633 { | |
2634 int c2 = text_yyinput (); | |
2635 xunput (c2, yytext); | |
2636 if (c2 == '=') | |
2637 retval = true; | |
2638 } | |
2639 xunput (c1, yytext); | |
2640 } | |
2641 break; | |
2642 | |
2643 default: | |
2644 break; | |
2645 } | |
2646 | |
2647 xunput (c0, yytext); | |
2648 | |
2649 return retval; | |
2650 } | |
2651 | |
2652 bool | |
2653 lexical_feedback::next_token_is_index_op (void) | |
2654 { | |
2655 int c = text_yyinput (); | |
2656 xunput (c, yytext); | |
2657 return c == '(' || c == '{'; | |
2658 } | |
2659 | |
2660 int | |
2661 lexical_feedback::handle_close_bracket (bool spc_gobbled, int bracket_type) | |
2662 { | |
2663 int retval = bracket_type; | |
2664 | |
2665 if (! nesting_level.none ()) | |
2666 { | |
2667 nesting_level.remove (); | |
2668 | |
2669 if (bracket_type == ']') | |
2670 bracketflag--; | |
2671 else if (bracket_type == '}') | |
2672 braceflag--; | |
2673 else | |
2674 panic_impossible (); | |
2675 } | |
2676 | |
2677 if (bracketflag == 0 && braceflag == 0) | |
2678 BEGIN (INITIAL); | |
2679 | |
2680 if (bracket_type == ']' | |
2681 && next_token_is_assign_op () | |
2682 && ! looking_at_return_list) | |
2683 { | |
2684 retval = CLOSE_BRACE; | |
2685 } | |
2686 else if ((bracketflag || braceflag) | |
2687 && convert_spaces_to_comma | |
2688 && (nesting_level.is_bracket () | |
2689 || (nesting_level.is_brace () | |
2690 && ! looking_at_object_index.front ()))) | |
2691 { | |
2692 bool index_op = next_token_is_index_op (); | |
2693 | |
2694 // Don't insert comma if we are looking at something like | |
2695 // | |
2696 // [x{i}{j}] or [x{i}(j)] | |
2697 // | |
2698 // but do if we are looking at | |
2699 // | |
2700 // [x{i} {j}] or [x{i} (j)] | |
2701 | |
2702 if (spc_gobbled || ! (bracket_type == '}' && index_op)) | |
2703 { | |
2704 bool bin_op = next_token_is_bin_op (spc_gobbled); | |
2705 | |
2706 bool postfix_un_op = next_token_is_postfix_unary_op (spc_gobbled); | |
2707 | |
2708 bool sep_op = next_token_is_sep_op (); | |
2709 | |
2710 if (! (postfix_un_op || bin_op || sep_op)) | |
2711 { | |
2712 maybe_warn_separator_insert (','); | |
2713 | |
2714 xunput (',', yytext); | |
2715 return retval; | |
2716 } | |
2717 } | |
2718 } | |
2719 | |
2720 quote_is_transpose = true; | |
2721 convert_spaces_to_comma = true; | |
2722 | |
2723 return retval; | |
2724 } | |
2725 | |
2726 void | |
2727 lexical_feedback::maybe_unput_comma (int spc_gobbled) | |
2728 { | |
2729 if (nesting_level.is_bracket () | |
2730 || (nesting_level.is_brace () | |
2731 && ! looking_at_object_index.front ())) | |
2732 { | |
2733 int bin_op = next_token_is_bin_op (spc_gobbled); | |
2734 | |
2735 int postfix_un_op = next_token_is_postfix_unary_op (spc_gobbled); | |
2736 | |
2737 int c1 = text_yyinput (); | |
2738 int c2 = text_yyinput (); | |
2739 | |
2740 xunput (c2, yytext); | |
2741 xunput (c1, yytext); | |
2742 | |
2743 int sep_op = next_token_is_sep_op (); | |
2744 | |
2745 int dot_op = (c1 == '.' | |
2746 && (isalpha (c2) || isspace (c2) || c2 == '_')); | |
2747 | |
2748 if (postfix_un_op || bin_op || sep_op || dot_op) | |
2749 return; | |
2750 | |
2751 int index_op = (c1 == '(' || c1 == '{'); | |
2752 | |
2753 // If there is no space before the indexing op, we don't insert | |
2754 // a comma. | |
2755 | |
2756 if (index_op && ! spc_gobbled) | |
2757 return; | |
2758 | |
2759 maybe_warn_separator_insert (','); | |
2760 | |
2761 xunput (',', yytext); | |
2762 } | |
2763 } | |
2764 | |
2765 bool | |
2766 lexical_feedback::next_token_can_follow_bin_op (void) | |
2767 { | |
2768 std::stack<char> buf; | |
2769 | |
2770 int c = EOF; | |
2771 | |
2772 // Skip whitespace in current statement on current line | |
2773 while (true) | |
2774 { | |
2775 c = text_yyinput (); | |
2776 | |
2777 buf.push (c); | |
2778 | |
2779 if (match_any (c, ",;\n") || (c != ' ' && c != '\t')) | |
2780 break; | |
2781 } | |
2782 | |
2783 // Restore input. | |
2784 while (! buf.empty ()) | |
2785 { | |
2786 xunput (buf.top (), yytext); | |
2787 | |
2788 buf.pop (); | |
2789 } | |
2790 | |
2791 return (isalnum (c) || match_any (c, "!\"'(-[_{~")); | |
2792 } | |
2793 | |
2794 static bool | |
2795 can_be_command (const std::string& tok) | |
2796 { | |
2797 // Don't allow these names to be treated as commands to avoid | |
2798 // surprises when parsing things like "NaN ^2". | |
2799 | |
2800 return ! (tok == "e" | |
2801 || tok == "I" || tok == "i" | |
2802 || tok == "J" || tok == "j" | |
2803 || tok == "Inf" || tok == "inf" | |
2804 || tok == "NaN" || tok == "nan"); | |
2805 } | |
2806 | |
2807 bool | |
2808 lexical_feedback::looks_like_command_arg (void) | |
2809 { | |
2810 bool retval = true; | |
2811 | |
2812 int c0 = text_yyinput (); | |
2813 | |
2814 switch (c0) | |
2815 { | |
2816 // = == | |
2817 case '=': | |
2818 { | |
2819 int c1 = text_yyinput (); | |
2820 | |
2821 if (c1 == '=') | |
2822 { | |
2823 int c2 = text_yyinput (); | |
2824 | |
2825 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
2826 && next_token_can_follow_bin_op ()) | |
2827 retval = false; | |
2828 | |
2829 xunput (c2, yytext); | |
2830 } | |
2831 else | |
2832 retval = false; | |
2833 | |
2834 xunput (c1, yytext); | |
2835 } | |
2836 break; | |
2837 | |
2838 case '(': | |
2839 case '{': | |
2840 // Indexing. | |
2841 retval = false; | |
2842 break; | |
2843 | |
2844 case '\n': | |
2845 // EOL. | |
2846 break; | |
2847 | |
2848 case '\'': | |
2849 case '"': | |
2850 // Beginning of a character string. | |
2851 break; | |
2852 | |
2853 // + - ++ -- += -= | |
2854 case '+': | |
2855 case '-': | |
2856 { | |
2857 int c1 = text_yyinput (); | |
2858 | |
2859 switch (c1) | |
2860 { | |
2861 case '\n': | |
2862 // EOL. | |
2863 case '+': | |
2864 case '-': | |
2865 // Unary ops, spacing doesn't matter. | |
2866 break; | |
2867 | |
2868 case '\t': | |
2869 case ' ': | |
2870 { | |
2871 if (next_token_can_follow_bin_op ()) | |
2872 retval = false; | |
2873 } | |
2874 break; | |
2875 | |
2876 case '=': | |
2877 { | |
2878 int c2 = text_yyinput (); | |
2879 | |
2880 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
2881 && next_token_can_follow_bin_op ()) | |
2882 retval = false; | |
2883 | |
2884 xunput (c2, yytext); | |
2885 } | |
2886 break; | |
2887 } | |
2888 | |
2889 xunput (c1, yytext); | |
2890 } | |
2891 break; | |
2892 | |
2893 case ':': | |
2894 case '/': | |
2895 case '\\': | |
2896 case '^': | |
2897 { | |
2898 int c1 = text_yyinput (); | |
2899 | |
2900 if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t') | |
2901 && next_token_can_follow_bin_op ()) | |
2902 retval = false; | |
2903 | |
2904 xunput (c1, yytext); | |
2905 } | |
2906 break; | |
2907 | |
2908 // .+ .- ./ .\ .^ .* .** | |
2909 case '.': | |
2910 { | |
2911 int c1 = text_yyinput (); | |
2912 | |
2913 if (match_any (c1, "+-/\\^*")) | |
2914 { | |
2915 int c2 = text_yyinput (); | |
2916 | |
2917 if (c2 == '=') | |
2918 { | |
2919 int c3 = text_yyinput (); | |
2920 | |
2921 if (! match_any (c3, ",;\n") && (c3 == ' ' || c3 == '\t') | |
2922 && next_token_can_follow_bin_op ()) | |
2923 retval = false; | |
2924 | |
2925 xunput (c3, yytext); | |
2926 } | |
2927 else if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
2928 && next_token_can_follow_bin_op ()) | |
2929 retval = false; | |
2930 | |
2931 xunput (c2, yytext); | |
2932 } | |
2933 else if (! match_any (c1, ",;\n") | |
2934 && (! isdigit (c1) && c1 != ' ' && c1 != '\t' | |
2935 && c1 != '.')) | |
2936 { | |
2937 // Structure reference. FIXME -- is this a complete check? | |
2938 | |
2939 retval = false; | |
2940 } | |
2941 | |
2942 xunput (c1, yytext); | |
2943 } | |
2944 break; | |
2945 | |
2946 // & && | || * ** | |
2947 case '&': | |
2948 case '|': | |
2949 case '*': | |
2950 { | |
2951 int c1 = text_yyinput (); | |
2952 | |
2953 if (c1 == c0) | |
2954 { | |
2955 int c2 = text_yyinput (); | |
2956 | |
2957 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
2958 && next_token_can_follow_bin_op ()) | |
2959 retval = false; | |
2960 | |
2961 xunput (c2, yytext); | |
2962 } | |
2963 else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t') | |
2964 && next_token_can_follow_bin_op ()) | |
2965 retval = false; | |
2966 | |
2967 xunput (c1, yytext); | |
2968 } | |
2969 break; | |
2970 | |
2971 // < <= > >= | |
2972 case '<': | |
2973 case '>': | |
2974 { | |
2975 int c1 = text_yyinput (); | |
2976 | |
2977 if (c1 == '=') | |
2978 { | |
2979 int c2 = text_yyinput (); | |
2980 | |
2981 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
2982 && next_token_can_follow_bin_op ()) | |
2983 retval = false; | |
2984 | |
2985 xunput (c2, yytext); | |
2986 } | |
2987 else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t') | |
2988 && next_token_can_follow_bin_op ()) | |
2989 retval = false; | |
2990 | |
2991 xunput (c1, yytext); | |
2992 } | |
2993 break; | |
2994 | |
2995 // ~= != | |
2996 case '~': | |
2997 case '!': | |
2998 { | |
2999 int c1 = text_yyinput (); | |
3000 | |
3001 // ~ and ! can be unary ops, so require following =. | |
3002 if (c1 == '=') | |
3003 { | |
3004 int c2 = text_yyinput (); | |
3005 | |
3006 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
3007 && next_token_can_follow_bin_op ()) | |
3008 retval = false; | |
3009 | |
3010 xunput (c2, yytext); | |
3011 } | |
3012 else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t') | |
3013 && next_token_can_follow_bin_op ()) | |
3014 retval = false; | |
3015 | |
3016 xunput (c1, yytext); | |
3017 } | |
3018 break; | |
3019 | |
3020 default: | |
3021 break; | |
3022 } | |
3023 | |
3024 xunput (c0, yytext); | |
3025 | |
3026 return retval; | |
3027 } | |
3028 | |
3029 int | |
3030 lexical_feedback::handle_superclass_identifier (void) | |
3031 { | |
3032 eat_continuation (); | |
3033 | |
3034 std::string pkg; | |
3035 std::string meth = strip_trailing_whitespace (yytext); | |
3036 size_t pos = meth.find ("@"); | |
3037 std::string cls = meth.substr (pos).substr (1); | |
3038 meth = meth.substr (0, pos - 1); | |
3039 | |
3040 pos = cls.find ("."); | |
3041 if (pos != std::string::npos) | |
3042 { | |
3043 pkg = cls.substr (pos).substr (1); | |
3044 cls = cls.substr (0, pos - 1); | |
3045 } | |
3046 | |
3047 int kw_token = (is_keyword_token (meth) || is_keyword_token (cls) | |
3048 || is_keyword_token (pkg)); | |
3049 if (kw_token) | |
3050 { | |
3051 error ("method, class and package names may not be keywords"); | |
3052 return LEXICAL_ERROR; | |
3053 } | |
3054 | |
3055 yylval.tok_val | |
3056 = new token (meth.empty () ? 0 : &(symbol_table::insert (meth)), | |
3057 cls.empty () ? 0 : &(symbol_table::insert (cls)), | |
3058 pkg.empty () ? 0 : &(symbol_table::insert (pkg)), | |
3059 input_line_number, | |
3060 current_input_column); | |
3061 token_stack.push (yylval.tok_val); | |
3062 | |
3063 convert_spaces_to_comma = true; | |
3064 current_input_column += yyleng; | |
3065 | |
3066 return SUPERCLASSREF; | |
3067 } | |
3068 | |
3069 int | |
3070 lexical_feedback::handle_meta_identifier (void) | |
3071 { | |
3072 eat_continuation (); | |
3073 | |
3074 std::string pkg; | |
3075 std::string cls = strip_trailing_whitespace (yytext).substr (1); | |
3076 size_t pos = cls.find ("."); | |
3077 | |
3078 if (pos != std::string::npos) | |
3079 { | |
3080 pkg = cls.substr (pos).substr (1); | |
3081 cls = cls.substr (0, pos - 1); | |
3082 } | |
3083 | |
3084 int kw_token = is_keyword_token (cls) || is_keyword_token (pkg); | |
3085 if (kw_token) | |
3086 { | |
3087 error ("class and package names may not be keywords"); | |
3088 return LEXICAL_ERROR; | |
3089 } | |
3090 | |
3091 yylval.tok_val | |
3092 = new token (cls.empty () ? 0 : &(symbol_table::insert (cls)), | |
3093 pkg.empty () ? 0 : &(symbol_table::insert (pkg)), | |
3094 input_line_number, | |
3095 current_input_column); | |
3096 | |
3097 token_stack.push (yylval.tok_val); | |
3098 | |
3099 convert_spaces_to_comma = true; | |
3100 current_input_column += yyleng; | |
3101 | |
3102 return METAQUERY; | |
3103 } | |
3104 | |
3105 // Figure out exactly what kind of token to return when we have seen | |
3106 // an identifier. Handles keywords. Return -1 if the identifier | |
3107 // should be ignored. | |
3108 | |
3109 int | |
3110 lexical_feedback::handle_identifier (void) | |
3111 { | |
3112 bool at_bos = at_beginning_of_statement; | |
3113 | |
3114 std::string tok = strip_trailing_whitespace (yytext); | |
3115 | |
3116 int c = yytext[yyleng-1]; | |
3117 | |
3118 bool cont_is_spc = (eat_continuation () != lexical_feedback::NO_WHITESPACE); | |
3119 | |
3120 int spc_gobbled = (cont_is_spc || c == ' ' || c == '\t'); | |
3121 | |
3122 // If we are expecting a structure element, avoid recognizing | |
3123 // keywords and other special names and return STRUCT_ELT, which is | |
3124 // a string that is also a valid identifier. But first, we have to | |
3125 // decide whether to insert a comma. | |
3126 | |
3127 if (looking_at_indirect_ref) | |
3128 { | |
3129 do_comma_insert_check (); | |
3130 | |
3131 maybe_unput_comma (spc_gobbled); | |
3132 | |
3133 yylval.tok_val = new token (tok, input_line_number, | |
3134 current_input_column); | |
3135 | |
3136 token_stack.push (yylval.tok_val); | |
3137 | |
3138 quote_is_transpose = true; | |
3139 convert_spaces_to_comma = true; | |
3140 looking_for_object_index = true; | |
3141 | |
3142 current_input_column += yyleng; | |
3143 | |
3144 return STRUCT_ELT; | |
3145 } | |
3146 | |
3147 at_beginning_of_statement = false; | |
3148 | |
3149 // The is_keyword_token may reset | |
3150 // at_beginning_of_statement. For example, if it sees | |
3151 // an else token, then the next token is at the beginning of a | |
3152 // statement. | |
3153 | |
3154 int kw_token = is_keyword_token (tok); | |
3155 | |
3156 // If we found a keyword token, then the beginning_of_statement flag | |
3157 // is already set. Otherwise, we won't be at the beginning of a | |
3158 // statement. | |
3159 | |
3160 if (looking_at_function_handle) | |
3161 { | |
3162 if (kw_token) | |
3163 { | |
3164 error ("function handles may not refer to keywords"); | |
3165 | |
3166 return LEXICAL_ERROR; | |
3167 } | |
3168 else | |
3169 { | |
3170 yylval.tok_val = new token (tok, input_line_number, | |
3171 current_input_column); | |
3172 | |
3173 token_stack.push (yylval.tok_val); | |
3174 | |
3175 current_input_column += yyleng; | |
3176 quote_is_transpose = false; | |
3177 convert_spaces_to_comma = true; | |
3178 looking_for_object_index = true; | |
3179 | |
3180 return FCN_HANDLE; | |
3181 } | |
3182 } | |
3183 | |
3184 // If we have a regular keyword, return it. | |
3185 // Keywords can be followed by identifiers. | |
3186 | |
3187 if (kw_token) | |
3188 { | |
3189 if (kw_token >= 0) | |
3190 { | |
3191 current_input_column += yyleng; | |
3192 quote_is_transpose = false; | |
3193 convert_spaces_to_comma = true; | |
3194 looking_for_object_index = false; | |
3195 } | |
3196 | |
3197 return kw_token; | |
3198 } | |
3199 | |
3200 // See if we have a plot keyword (title, using, with, or clear). | |
3201 | |
3202 int c1 = text_yyinput (); | |
3203 | |
3204 bool next_tok_is_eq = false; | |
3205 if (c1 == '=') | |
3206 { | |
3207 int c2 = text_yyinput (); | |
3208 xunput (c2, yytext); | |
3209 | |
3210 if (c2 != '=') | |
3211 next_tok_is_eq = true; | |
3212 } | |
3213 | |
3214 xunput (c1, yytext); | |
3215 | |
3216 // Kluge alert. | |
3217 // | |
3218 // If we are looking at a text style function, set up to gobble its | |
3219 // arguments. | |
3220 // | |
3221 // If the following token is '=', or if we are parsing a function | |
3222 // return list or function parameter list, or if we are looking at | |
3223 // something like [ab,cd] = foo (), force the symbol to be inserted | |
3224 // as a variable in the current symbol table. | |
3225 | |
3226 if (! is_variable (tok)) | |
3227 { | |
3228 if (at_bos && spc_gobbled && can_be_command (tok) | |
3229 && looks_like_command_arg ()) | |
3230 { | |
3231 BEGIN (COMMAND_START); | |
3232 } | |
3233 else if (next_tok_is_eq | |
3234 || looking_at_decl_list | |
3235 || looking_at_return_list | |
3236 || (looking_at_parameter_list | |
3237 && ! looking_at_initializer_expression)) | |
3238 { | |
3239 symbol_table::force_variable (tok); | |
3240 } | |
3241 else if (looking_at_matrix_or_assign_lhs) | |
3242 { | |
3243 pending_local_variables.insert (tok); | |
3244 } | |
3245 } | |
3246 | |
3247 // Find the token in the symbol table. Beware the magic | |
3248 // transformation of the end keyword... | |
3249 | |
3250 if (tok == "end") | |
3251 tok = "__end__"; | |
3252 | |
3253 yylval.tok_val = new token (&(symbol_table::insert (tok)), | |
3254 input_line_number, | |
3255 current_input_column); | |
3256 | |
3257 token_stack.push (yylval.tok_val); | |
3258 | |
3259 // After seeing an identifer, it is ok to convert spaces to a comma | |
3260 // (if needed). | |
3261 | |
3262 convert_spaces_to_comma = true; | |
3263 | |
3264 if (! (next_tok_is_eq || YY_START == COMMAND_START)) | |
3265 { | |
3266 quote_is_transpose = true; | |
3267 | |
3268 do_comma_insert_check (); | |
3269 | |
3270 maybe_unput_comma (spc_gobbled); | |
3271 } | |
3272 | |
3273 current_input_column += yyleng; | |
3274 | |
3275 if (tok != "__end__") | |
3276 looking_for_object_index = true; | |
3277 | |
3278 return NAME; | |
3279 } | 1285 } |
3280 | 1286 |
3281 bool | 1287 bool |
3282 is_keyword (const std::string& s) | 1288 is_keyword (const std::string& s) |
3283 { | 1289 { |
3356 | 1362 |
3357 void | 1363 void |
3358 prep_lexer_for_function_file (void) | 1364 prep_lexer_for_function_file (void) |
3359 { | 1365 { |
3360 BEGIN (FUNCTION_FILE_BEGIN); | 1366 BEGIN (FUNCTION_FILE_BEGIN); |
1367 } | |
1368 | |
1369 // Used to delete trailing white space from tokens. | |
1370 | |
1371 static std::string | |
1372 strip_trailing_whitespace (char *s) | |
1373 { | |
1374 std::string retval = s; | |
1375 | |
1376 size_t pos = retval.find_first_of (" \t"); | |
1377 | |
1378 if (pos != std::string::npos) | |
1379 retval.resize (pos); | |
1380 | |
1381 return retval; | |
3361 } | 1382 } |
3362 | 1383 |
3363 static int | 1384 static int |
3364 octave_read (char *buf, unsigned max_size) | 1385 octave_read (char *buf, unsigned max_size) |
3365 { | 1386 { |
3420 if (! eof) | 1441 if (! eof) |
3421 YY_FATAL_ERROR ("octave_read () in flex scanner failed"); | 1442 YY_FATAL_ERROR ("octave_read () in flex scanner failed"); |
3422 } | 1443 } |
3423 | 1444 |
3424 return status; | 1445 return status; |
3425 } | |
3426 | |
3427 void | |
3428 lexical_feedback::maybe_warn_separator_insert (char sep) | |
3429 { | |
3430 std::string nm = curr_fcn_file_full_name; | |
3431 | |
3432 if (nm.empty ()) | |
3433 warning_with_id ("Octave:separator-insert", | |
3434 "potential auto-insertion of '%c' near line %d", | |
3435 sep, input_line_number); | |
3436 else | |
3437 warning_with_id ("Octave:separator-insert", | |
3438 "potential auto-insertion of '%c' near line %d of file %s", | |
3439 sep, input_line_number, nm.c_str ()); | |
3440 } | |
3441 | |
3442 void | |
3443 lexical_feedback::gripe_single_quote_string (void) | |
3444 { | |
3445 std::string nm = curr_fcn_file_full_name; | |
3446 | |
3447 if (nm.empty ()) | |
3448 warning_with_id ("Octave:single-quote-string", | |
3449 "single quote delimited string near line %d", | |
3450 input_line_number); | |
3451 else | |
3452 warning_with_id ("Octave:single-quote-string", | |
3453 "single quote delimited string near line %d of file %s", | |
3454 input_line_number, nm.c_str ()); | |
3455 } | |
3456 | |
3457 void | |
3458 lexical_feedback::gripe_matlab_incompatible (const std::string& msg) | |
3459 { | |
3460 std::string nm = curr_fcn_file_full_name; | |
3461 | |
3462 if (nm.empty ()) | |
3463 warning_with_id ("Octave:matlab-incompatible", | |
3464 "potential Matlab compatibility problem: %s", | |
3465 msg.c_str ()); | |
3466 else | |
3467 warning_with_id ("Octave:matlab-incompatible", | |
3468 "potential Matlab compatibility problem: %s near line %d offile %s", | |
3469 msg.c_str (), input_line_number, nm.c_str ()); | |
3470 } | |
3471 | |
3472 void | |
3473 lexical_feedback::maybe_gripe_matlab_incompatible_comment (char c) | |
3474 { | |
3475 if (c == '#') | |
3476 gripe_matlab_incompatible ("# used as comment character"); | |
3477 } | |
3478 | |
3479 void | |
3480 lexical_feedback::gripe_matlab_incompatible_continuation (void) | |
3481 { | |
3482 gripe_matlab_incompatible ("\\ used as line continuation marker"); | |
3483 } | |
3484 | |
3485 void | |
3486 lexical_feedback::gripe_matlab_incompatible_operator (const std::string& op) | |
3487 { | |
3488 std::string t = op; | |
3489 int n = t.length (); | |
3490 if (t[n-1] == '\n') | |
3491 t.resize (n-1); | |
3492 gripe_matlab_incompatible (t + " used as operator"); | |
3493 } | 1446 } |
3494 | 1447 |
3495 static void | 1448 static void |
3496 display_token (int tok) | 1449 display_token (int tok) |
3497 { | 1450 { |
3692 retval = set_internal_variable (lexer_debug_flag, args, nargout, | 1645 retval = set_internal_variable (lexer_debug_flag, args, nargout, |
3693 "__lexer_debug_flag__"); | 1646 "__lexer_debug_flag__"); |
3694 | 1647 |
3695 return retval; | 1648 return retval; |
3696 } | 1649 } |
1650 | |
1651 lexical_feedback::~lexical_feedback (void) | |
1652 { | |
1653 // Clear out the stack of token info used to track line and | |
1654 // column numbers. | |
1655 | |
1656 while (! token_stack.empty ()) | |
1657 { | |
1658 delete token_stack.top (); | |
1659 token_stack.pop (); | |
1660 } | |
1661 } | |
1662 | |
1663 // GAG. | |
1664 // | |
1665 // If we're reading a matrix and the next character is '[', make sure | |
1666 // that we insert a comma ahead of it. | |
1667 | |
1668 void | |
1669 lexical_feedback::do_comma_insert_check (void) | |
1670 { | |
1671 bool spc_gobbled = (eat_continuation () != lexical_feedback::NO_WHITESPACE); | |
1672 | |
1673 int c = text_yyinput (); | |
1674 | |
1675 xunput (c, yytext); | |
1676 | |
1677 if (spc_gobbled) | |
1678 xunput (' ', yytext); | |
1679 | |
1680 do_comma_insert = (! looking_at_object_index.front () | |
1681 && bracketflag && c == '['); | |
1682 } | |
1683 | |
1684 int | |
1685 lexical_feedback::text_yyinput (void) | |
1686 { | |
1687 int c = yyinput (); | |
1688 | |
1689 if (lexer_debug_flag) | |
1690 { | |
1691 std::cerr << "I: "; | |
1692 display_character (c); | |
1693 std::cerr << std::endl; | |
1694 } | |
1695 | |
1696 // Convert CRLF into just LF and single CR into LF. | |
1697 | |
1698 if (c == '\r') | |
1699 { | |
1700 c = yyinput (); | |
1701 | |
1702 if (lexer_debug_flag) | |
1703 { | |
1704 std::cerr << "I: "; | |
1705 display_character (c); | |
1706 std::cerr << std::endl; | |
1707 } | |
1708 | |
1709 if (c != '\n') | |
1710 { | |
1711 xunput (c, yytext); | |
1712 c = '\n'; | |
1713 } | |
1714 } | |
1715 | |
1716 if (c == '\n') | |
1717 input_line_number++; | |
1718 | |
1719 return c; | |
1720 } | |
1721 | |
1722 void | |
1723 lexical_feedback::xunput (char c, char *buf) | |
1724 { | |
1725 if (lexer_debug_flag) | |
1726 { | |
1727 std::cerr << "U: "; | |
1728 display_character (c); | |
1729 std::cerr << std::endl; | |
1730 } | |
1731 | |
1732 if (c == '\n') | |
1733 input_line_number--; | |
1734 | |
1735 yyunput (c, buf); | |
1736 } | |
1737 | |
1738 // If we read some newlines, we need figure out what column we're | |
1739 // really looking at. | |
1740 | |
1741 void | |
1742 lexical_feedback::fixup_column_count (char *s) | |
1743 { | |
1744 char c; | |
1745 while ((c = *s++) != '\0') | |
1746 { | |
1747 if (c == '\n') | |
1748 { | |
1749 input_line_number++; | |
1750 current_input_column = 1; | |
1751 } | |
1752 else | |
1753 current_input_column++; | |
1754 } | |
1755 } | |
1756 | |
1757 bool | |
1758 lexical_feedback::inside_any_object_index (void) | |
1759 { | |
1760 bool retval = false; | |
1761 | |
1762 for (std::list<bool>::const_iterator i = looking_at_object_index.begin (); | |
1763 i != looking_at_object_index.end (); i++) | |
1764 { | |
1765 if (*i) | |
1766 { | |
1767 retval = true; | |
1768 break; | |
1769 } | |
1770 } | |
1771 | |
1772 return retval; | |
1773 } | |
1774 | |
1775 // Handle keywords. Return -1 if the keyword should be ignored. | |
1776 | |
1777 int | |
1778 lexical_feedback::is_keyword_token (const std::string& s) | |
1779 { | |
1780 int l = input_line_number; | |
1781 int c = current_input_column; | |
1782 | |
1783 int len = s.length (); | |
1784 | |
1785 const octave_kw *kw = octave_kw_hash::in_word_set (s.c_str (), len); | |
1786 | |
1787 if (kw) | |
1788 { | |
1789 yylval.tok_val = 0; | |
1790 | |
1791 switch (kw->kw_id) | |
1792 { | |
1793 case break_kw: | |
1794 case catch_kw: | |
1795 case continue_kw: | |
1796 case else_kw: | |
1797 case otherwise_kw: | |
1798 case return_kw: | |
1799 case unwind_protect_cleanup_kw: | |
1800 at_beginning_of_statement = true; | |
1801 break; | |
1802 | |
1803 case static_kw: | |
1804 if ((reading_fcn_file || reading_script_file | |
1805 || reading_classdef_file) | |
1806 && ! curr_fcn_file_full_name.empty ()) | |
1807 warning_with_id ("Octave:deprecated-keyword", | |
1808 "the 'static' keyword is obsolete and will be removed from a future version of Octave; please use 'persistent' instead; near line %d of file '%s'", | |
1809 input_line_number, | |
1810 curr_fcn_file_full_name.c_str ()); | |
1811 else | |
1812 warning_with_id ("Octave:deprecated-keyword", | |
1813 "the 'static' keyword is obsolete and will be removed from a future version of Octave; please use 'persistent' instead; near line %d", | |
1814 input_line_number); | |
1815 // fall through ... | |
1816 | |
1817 case persistent_kw: | |
1818 break; | |
1819 | |
1820 case case_kw: | |
1821 case elseif_kw: | |
1822 case global_kw: | |
1823 case until_kw: | |
1824 break; | |
1825 | |
1826 case end_kw: | |
1827 if (inside_any_object_index () | |
1828 || (! reading_classdef_file | |
1829 && (defining_func | |
1830 && ! (looking_at_return_list | |
1831 || parsed_function_name.top ())))) | |
1832 return 0; | |
1833 | |
1834 yylval.tok_val = new token (token::simple_end, l, c); | |
1835 at_beginning_of_statement = true; | |
1836 break; | |
1837 | |
1838 case end_try_catch_kw: | |
1839 yylval.tok_val = new token (token::try_catch_end, l, c); | |
1840 at_beginning_of_statement = true; | |
1841 break; | |
1842 | |
1843 case end_unwind_protect_kw: | |
1844 yylval.tok_val = new token (token::unwind_protect_end, l, c); | |
1845 at_beginning_of_statement = true; | |
1846 break; | |
1847 | |
1848 case endfor_kw: | |
1849 yylval.tok_val = new token (token::for_end, l, c); | |
1850 at_beginning_of_statement = true; | |
1851 break; | |
1852 | |
1853 case endfunction_kw: | |
1854 yylval.tok_val = new token (token::function_end, l, c); | |
1855 at_beginning_of_statement = true; | |
1856 break; | |
1857 | |
1858 case endif_kw: | |
1859 yylval.tok_val = new token (token::if_end, l, c); | |
1860 at_beginning_of_statement = true; | |
1861 break; | |
1862 | |
1863 case endparfor_kw: | |
1864 yylval.tok_val = new token (token::parfor_end, l, c); | |
1865 at_beginning_of_statement = true; | |
1866 break; | |
1867 | |
1868 case endswitch_kw: | |
1869 yylval.tok_val = new token (token::switch_end, l, c); | |
1870 at_beginning_of_statement = true; | |
1871 break; | |
1872 | |
1873 case endwhile_kw: | |
1874 yylval.tok_val = new token (token::while_end, l, c); | |
1875 at_beginning_of_statement = true; | |
1876 break; | |
1877 | |
1878 case endclassdef_kw: | |
1879 yylval.tok_val = new token (token::classdef_end, l, c); | |
1880 at_beginning_of_statement = true; | |
1881 break; | |
1882 | |
1883 case endenumeration_kw: | |
1884 yylval.tok_val = new token (token::enumeration_end, l, c); | |
1885 at_beginning_of_statement = true; | |
1886 break; | |
1887 | |
1888 case endevents_kw: | |
1889 yylval.tok_val = new token (token::events_end, l, c); | |
1890 at_beginning_of_statement = true; | |
1891 break; | |
1892 | |
1893 case endmethods_kw: | |
1894 yylval.tok_val = new token (token::methods_end, l, c); | |
1895 at_beginning_of_statement = true; | |
1896 break; | |
1897 | |
1898 case endproperties_kw: | |
1899 yylval.tok_val = new token (token::properties_end, l, c); | |
1900 at_beginning_of_statement = true; | |
1901 break; | |
1902 | |
1903 | |
1904 case for_kw: | |
1905 case parfor_kw: | |
1906 case while_kw: | |
1907 promptflag--; | |
1908 looping++; | |
1909 break; | |
1910 | |
1911 case do_kw: | |
1912 at_beginning_of_statement = true; | |
1913 promptflag--; | |
1914 looping++; | |
1915 break; | |
1916 | |
1917 case try_kw: | |
1918 case unwind_protect_kw: | |
1919 at_beginning_of_statement = true; | |
1920 promptflag--; | |
1921 break; | |
1922 | |
1923 case if_kw: | |
1924 case switch_kw: | |
1925 promptflag--; | |
1926 break; | |
1927 | |
1928 case get_kw: | |
1929 case set_kw: | |
1930 // 'get' and 'set' are keywords in classdef method | |
1931 // declarations. | |
1932 if (! maybe_classdef_get_set_method) | |
1933 return 0; | |
1934 break; | |
1935 | |
1936 case enumeration_kw: | |
1937 case events_kw: | |
1938 case methods_kw: | |
1939 case properties_kw: | |
1940 // 'properties', 'methods' and 'events' are keywords for | |
1941 // classdef blocks. | |
1942 if (! parsing_classdef) | |
1943 return 0; | |
1944 // fall through ... | |
1945 | |
1946 case classdef_kw: | |
1947 // 'classdef' is always a keyword. | |
1948 promptflag--; | |
1949 break; | |
1950 | |
1951 case function_kw: | |
1952 promptflag--; | |
1953 | |
1954 defining_func++; | |
1955 parsed_function_name.push (false); | |
1956 | |
1957 if (! (reading_fcn_file || reading_script_file | |
1958 || reading_classdef_file)) | |
1959 input_line_number = 1; | |
1960 break; | |
1961 | |
1962 case magic_file_kw: | |
1963 { | |
1964 if ((reading_fcn_file || reading_script_file | |
1965 || reading_classdef_file) | |
1966 && ! curr_fcn_file_full_name.empty ()) | |
1967 yylval.tok_val = new token (curr_fcn_file_full_name, l, c); | |
1968 else | |
1969 yylval.tok_val = new token ("stdin", l, c); | |
1970 } | |
1971 break; | |
1972 | |
1973 case magic_line_kw: | |
1974 yylval.tok_val = new token (static_cast<double> (l), "", l, c); | |
1975 break; | |
1976 | |
1977 default: | |
1978 panic_impossible (); | |
1979 } | |
1980 | |
1981 if (! yylval.tok_val) | |
1982 yylval.tok_val = new token (l, c); | |
1983 | |
1984 token_stack.push (yylval.tok_val); | |
1985 | |
1986 return kw->tok; | |
1987 } | |
1988 | |
1989 return 0; | |
1990 } | |
1991 | |
1992 bool | |
1993 lexical_feedback::is_variable (const std::string& name) | |
1994 { | |
1995 return (symbol_table::is_variable (name) | |
1996 || (pending_local_variables.find (name) | |
1997 != pending_local_variables.end ())); | |
1998 } | |
1999 | |
2000 std::string | |
2001 lexical_feedback::grab_block_comment (stream_reader& reader, bool& eof) | |
2002 { | |
2003 std::string buf; | |
2004 | |
2005 bool at_bol = true; | |
2006 bool look_for_marker = false; | |
2007 | |
2008 bool warned_incompatible = false; | |
2009 | |
2010 int c = 0; | |
2011 | |
2012 while ((c = reader.getc ()) != EOF) | |
2013 { | |
2014 current_input_column++; | |
2015 | |
2016 if (look_for_marker) | |
2017 { | |
2018 at_bol = false; | |
2019 look_for_marker = false; | |
2020 | |
2021 if (c == '{' || c == '}') | |
2022 { | |
2023 std::string tmp_buf (1, static_cast<char> (c)); | |
2024 | |
2025 int type = c; | |
2026 | |
2027 bool done = false; | |
2028 | |
2029 while ((c = reader.getc ()) != EOF && ! done) | |
2030 { | |
2031 current_input_column++; | |
2032 | |
2033 switch (c) | |
2034 { | |
2035 case ' ': | |
2036 case '\t': | |
2037 tmp_buf += static_cast<char> (c); | |
2038 break; | |
2039 | |
2040 case '\n': | |
2041 { | |
2042 current_input_column = 0; | |
2043 at_bol = true; | |
2044 done = true; | |
2045 | |
2046 if (type == '{') | |
2047 { | |
2048 block_comment_nesting_level++; | |
2049 promptflag--; | |
2050 } | |
2051 else | |
2052 { | |
2053 block_comment_nesting_level--; | |
2054 promptflag++; | |
2055 | |
2056 if (block_comment_nesting_level == 0) | |
2057 { | |
2058 buf += grab_comment_block (reader, true, eof); | |
2059 | |
2060 return buf; | |
2061 } | |
2062 } | |
2063 } | |
2064 break; | |
2065 | |
2066 default: | |
2067 at_bol = false; | |
2068 tmp_buf += static_cast<char> (c); | |
2069 buf += tmp_buf; | |
2070 done = true; | |
2071 break; | |
2072 } | |
2073 } | |
2074 } | |
2075 } | |
2076 | |
2077 if (at_bol && (c == '%' || c == '#')) | |
2078 { | |
2079 if (c == '#' && ! warned_incompatible) | |
2080 { | |
2081 warned_incompatible = true; | |
2082 maybe_gripe_matlab_incompatible_comment (c); | |
2083 } | |
2084 | |
2085 at_bol = false; | |
2086 look_for_marker = true; | |
2087 } | |
2088 else | |
2089 { | |
2090 buf += static_cast<char> (c); | |
2091 | |
2092 if (c == '\n') | |
2093 { | |
2094 current_input_column = 0; | |
2095 at_bol = true; | |
2096 } | |
2097 } | |
2098 } | |
2099 | |
2100 if (c == EOF) | |
2101 eof = true; | |
2102 | |
2103 return buf; | |
2104 } | |
2105 | |
2106 std::string | |
2107 lexical_feedback::grab_comment_block (stream_reader& reader, bool at_bol, | |
2108 bool& eof) | |
2109 { | |
2110 std::string buf; | |
2111 | |
2112 // TRUE means we are at the beginning of a comment block. | |
2113 bool begin_comment = false; | |
2114 | |
2115 // TRUE means we are currently reading a comment block. | |
2116 bool in_comment = false; | |
2117 | |
2118 bool warned_incompatible = false; | |
2119 | |
2120 int c = 0; | |
2121 | |
2122 while ((c = reader.getc ()) != EOF) | |
2123 { | |
2124 current_input_column++; | |
2125 | |
2126 if (begin_comment) | |
2127 { | |
2128 if (c == '%' || c == '#') | |
2129 { | |
2130 at_bol = false; | |
2131 continue; | |
2132 } | |
2133 else if (at_bol && c == '{') | |
2134 { | |
2135 std::string tmp_buf (1, static_cast<char> (c)); | |
2136 | |
2137 bool done = false; | |
2138 | |
2139 while ((c = reader.getc ()) != EOF && ! done) | |
2140 { | |
2141 current_input_column++; | |
2142 | |
2143 switch (c) | |
2144 { | |
2145 case ' ': | |
2146 case '\t': | |
2147 tmp_buf += static_cast<char> (c); | |
2148 break; | |
2149 | |
2150 case '\n': | |
2151 { | |
2152 current_input_column = 0; | |
2153 at_bol = true; | |
2154 done = true; | |
2155 | |
2156 block_comment_nesting_level++; | |
2157 promptflag--; | |
2158 | |
2159 buf += grab_block_comment (reader, eof); | |
2160 | |
2161 in_comment = false; | |
2162 | |
2163 if (eof) | |
2164 goto done; | |
2165 } | |
2166 break; | |
2167 | |
2168 default: | |
2169 at_bol = false; | |
2170 tmp_buf += static_cast<char> (c); | |
2171 buf += tmp_buf; | |
2172 done = true; | |
2173 break; | |
2174 } | |
2175 } | |
2176 } | |
2177 else | |
2178 { | |
2179 at_bol = false; | |
2180 begin_comment = false; | |
2181 } | |
2182 } | |
2183 | |
2184 if (in_comment) | |
2185 { | |
2186 buf += static_cast<char> (c); | |
2187 | |
2188 if (c == '\n') | |
2189 { | |
2190 at_bol = true; | |
2191 current_input_column = 0; | |
2192 in_comment = false; | |
2193 | |
2194 // FIXME -- bailing out here prevents things like | |
2195 // | |
2196 // octave> # comment | |
2197 // octave> x = 1 | |
2198 // | |
2199 // from failing at the command line, while still | |
2200 // allowing blocks of comments to be grabbed properly | |
2201 // for function doc strings. But only the first line of | |
2202 // a mult-line doc string will be picked up for | |
2203 // functions defined on the command line. We need a | |
2204 // better way of collecting these comments... | |
2205 if (! (reading_fcn_file || reading_script_file)) | |
2206 goto done; | |
2207 } | |
2208 } | |
2209 else | |
2210 { | |
2211 switch (c) | |
2212 { | |
2213 case ' ': | |
2214 case '\t': | |
2215 break; | |
2216 | |
2217 case '#': | |
2218 if (! warned_incompatible) | |
2219 { | |
2220 warned_incompatible = true; | |
2221 maybe_gripe_matlab_incompatible_comment (c); | |
2222 } | |
2223 // fall through... | |
2224 | |
2225 case '%': | |
2226 in_comment = true; | |
2227 begin_comment = true; | |
2228 break; | |
2229 | |
2230 default: | |
2231 current_input_column--; | |
2232 reader.ungetc (c); | |
2233 goto done; | |
2234 } | |
2235 } | |
2236 } | |
2237 | |
2238 done: | |
2239 | |
2240 if (c == EOF) | |
2241 eof = true; | |
2242 | |
2243 return buf; | |
2244 } | |
2245 | |
2246 int | |
2247 lexical_feedback::process_comment (bool start_in_block, bool& eof) | |
2248 { | |
2249 eof = false; | |
2250 | |
2251 std::string help_txt; | |
2252 | |
2253 if (! help_buf.empty ()) | |
2254 help_txt = help_buf.top (); | |
2255 | |
2256 flex_stream_reader flex_reader (yytext); | |
2257 | |
2258 // process_comment is only supposed to be called when we are not | |
2259 // initially looking at a block comment. | |
2260 | |
2261 std::string txt = start_in_block | |
2262 ? grab_block_comment (flex_reader, eof) | |
2263 : grab_comment_block (flex_reader, false, eof); | |
2264 | |
2265 if (lexer_debug_flag) | |
2266 std::cerr << "C: " << txt << std::endl; | |
2267 | |
2268 if (help_txt.empty () && nesting_level.none ()) | |
2269 { | |
2270 if (! help_buf.empty ()) | |
2271 help_buf.pop (); | |
2272 | |
2273 help_buf.push (txt); | |
2274 } | |
2275 | |
2276 octave_comment_buffer::append (txt); | |
2277 | |
2278 current_input_column = 1; | |
2279 quote_is_transpose = false; | |
2280 convert_spaces_to_comma = true; | |
2281 at_beginning_of_statement = true; | |
2282 | |
2283 if (YY_START == COMMAND_START) | |
2284 BEGIN (INITIAL); | |
2285 | |
2286 if (nesting_level.none ()) | |
2287 return '\n'; | |
2288 else if (nesting_level.is_bracket_or_brace ()) | |
2289 return ';'; | |
2290 else | |
2291 return 0; | |
2292 } | |
2293 | |
2294 // Recognize separators. If the separator is a CRLF pair, it is | |
2295 // replaced by a single LF. | |
2296 | |
2297 bool | |
2298 lexical_feedback::next_token_is_sep_op (void) | |
2299 { | |
2300 bool retval = false; | |
2301 | |
2302 int c = text_yyinput (); | |
2303 | |
2304 retval = match_any (c, ",;\n]"); | |
2305 | |
2306 xunput (c, yytext); | |
2307 | |
2308 return retval; | |
2309 } | |
2310 | |
2311 // Try to determine if the next token should be treated as a postfix | |
2312 // unary operator. This is ugly, but it seems to do the right thing. | |
2313 | |
2314 bool | |
2315 lexical_feedback::next_token_is_postfix_unary_op (bool spc_prev) | |
2316 { | |
2317 bool un_op = false; | |
2318 | |
2319 int c0 = text_yyinput (); | |
2320 | |
2321 if (c0 == '\'' && ! spc_prev) | |
2322 { | |
2323 un_op = true; | |
2324 } | |
2325 else if (c0 == '.') | |
2326 { | |
2327 int c1 = text_yyinput (); | |
2328 un_op = (c1 == '\''); | |
2329 xunput (c1, yytext); | |
2330 } | |
2331 else if (c0 == '+') | |
2332 { | |
2333 int c1 = text_yyinput (); | |
2334 un_op = (c1 == '+'); | |
2335 xunput (c1, yytext); | |
2336 } | |
2337 else if (c0 == '-') | |
2338 { | |
2339 int c1 = text_yyinput (); | |
2340 un_op = (c1 == '-'); | |
2341 xunput (c1, yytext); | |
2342 } | |
2343 | |
2344 xunput (c0, yytext); | |
2345 | |
2346 return un_op; | |
2347 } | |
2348 | |
2349 // Try to determine if the next token should be treated as a binary | |
2350 // operator. | |
2351 // | |
2352 // This kluge exists because whitespace is not always ignored inside | |
2353 // the square brackets that are used to create matrix objects (though | |
2354 // spacing only really matters in the cases that can be interpreted | |
2355 // either as binary ops or prefix unary ops: currently just +, -). | |
2356 // | |
2357 // Note that a line continuation directly following a + or - operator | |
2358 // (e.g., the characters '[' 'a' ' ' '+' '\' LFD 'b' ']') will be | |
2359 // parsed as a binary operator. | |
2360 | |
2361 bool | |
2362 lexical_feedback::next_token_is_bin_op (bool spc_prev) | |
2363 { | |
2364 bool bin_op = false; | |
2365 | |
2366 int c0 = text_yyinput (); | |
2367 | |
2368 switch (c0) | |
2369 { | |
2370 case '+': | |
2371 case '-': | |
2372 { | |
2373 int c1 = text_yyinput (); | |
2374 | |
2375 switch (c1) | |
2376 { | |
2377 case '+': | |
2378 case '-': | |
2379 // Unary ops, spacing doesn't matter. | |
2380 break; | |
2381 | |
2382 case '=': | |
2383 // Binary ops, spacing doesn't matter. | |
2384 bin_op = true; | |
2385 break; | |
2386 | |
2387 default: | |
2388 // Could be either, spacing matters. | |
2389 bin_op = looks_like_bin_op (spc_prev, c1); | |
2390 break; | |
2391 } | |
2392 | |
2393 xunput (c1, yytext); | |
2394 } | |
2395 break; | |
2396 | |
2397 case ':': | |
2398 case '/': | |
2399 case '\\': | |
2400 case '^': | |
2401 // Always a binary op (may also include /=, \=, and ^=). | |
2402 bin_op = true; | |
2403 break; | |
2404 | |
2405 // .+ .- ./ .\ .^ .* .** | |
2406 case '.': | |
2407 { | |
2408 int c1 = text_yyinput (); | |
2409 | |
2410 if (match_any (c1, "+-/\\^*")) | |
2411 // Always a binary op (may also include .+=, .-=, ./=, ...). | |
2412 bin_op = true; | |
2413 else if (! isdigit (c1) && c1 != ' ' && c1 != '\t' && c1 != '.') | |
2414 // A structure element reference is a binary op. | |
2415 bin_op = true; | |
2416 | |
2417 xunput (c1, yytext); | |
2418 } | |
2419 break; | |
2420 | |
2421 // = == & && | || * ** | |
2422 case '=': | |
2423 case '&': | |
2424 case '|': | |
2425 case '*': | |
2426 // Always a binary op (may also include ==, &&, ||, **). | |
2427 bin_op = true; | |
2428 break; | |
2429 | |
2430 // < <= <> > >= | |
2431 case '<': | |
2432 case '>': | |
2433 // Always a binary op (may also include <=, <>, >=). | |
2434 bin_op = true; | |
2435 break; | |
2436 | |
2437 // ~= != | |
2438 case '~': | |
2439 case '!': | |
2440 { | |
2441 int c1 = text_yyinput (); | |
2442 | |
2443 // ~ and ! can be unary ops, so require following =. | |
2444 if (c1 == '=') | |
2445 bin_op = true; | |
2446 | |
2447 xunput (c1, yytext); | |
2448 } | |
2449 break; | |
2450 | |
2451 default: | |
2452 break; | |
2453 } | |
2454 | |
2455 xunput (c0, yytext); | |
2456 | |
2457 return bin_op; | |
2458 } | |
2459 | |
2460 // FIXME -- we need to handle block comments here. | |
2461 | |
2462 void | |
2463 lexical_feedback::scan_for_comments (const char *text) | |
2464 { | |
2465 std::string comment_buf; | |
2466 | |
2467 bool in_comment = false; | |
2468 bool beginning_of_comment = false; | |
2469 | |
2470 int len = strlen (text); | |
2471 int i = 0; | |
2472 | |
2473 while (i < len) | |
2474 { | |
2475 char c = text[i++]; | |
2476 | |
2477 switch (c) | |
2478 { | |
2479 case '%': | |
2480 case '#': | |
2481 if (in_comment) | |
2482 { | |
2483 if (! beginning_of_comment) | |
2484 comment_buf += static_cast<char> (c); | |
2485 } | |
2486 else | |
2487 { | |
2488 maybe_gripe_matlab_incompatible_comment (c); | |
2489 in_comment = true; | |
2490 beginning_of_comment = true; | |
2491 } | |
2492 break; | |
2493 | |
2494 case '\n': | |
2495 if (in_comment) | |
2496 { | |
2497 comment_buf += static_cast<char> (c); | |
2498 octave_comment_buffer::append (comment_buf); | |
2499 comment_buf.resize (0); | |
2500 in_comment = false; | |
2501 beginning_of_comment = false; | |
2502 } | |
2503 break; | |
2504 | |
2505 default: | |
2506 if (in_comment) | |
2507 { | |
2508 comment_buf += static_cast<char> (c); | |
2509 beginning_of_comment = false; | |
2510 } | |
2511 break; | |
2512 } | |
2513 } | |
2514 | |
2515 if (! comment_buf.empty ()) | |
2516 octave_comment_buffer::append (comment_buf); | |
2517 } | |
2518 | |
2519 // Discard whitespace, including comments and continuations. | |
2520 | |
2521 // FIXME -- we need to handle block comments here. | |
2522 | |
2523 int | |
2524 lexical_feedback::eat_whitespace (void) | |
2525 { | |
2526 int retval = lexical_feedback::NO_WHITESPACE; | |
2527 | |
2528 std::string comment_buf; | |
2529 | |
2530 bool in_comment = false; | |
2531 bool beginning_of_comment = false; | |
2532 | |
2533 int c = 0; | |
2534 | |
2535 while ((c = text_yyinput ()) != EOF) | |
2536 { | |
2537 current_input_column++; | |
2538 | |
2539 switch (c) | |
2540 { | |
2541 case ' ': | |
2542 case '\t': | |
2543 if (in_comment) | |
2544 { | |
2545 comment_buf += static_cast<char> (c); | |
2546 beginning_of_comment = false; | |
2547 } | |
2548 retval |= lexical_feedback::SPACE_OR_TAB; | |
2549 break; | |
2550 | |
2551 case '\n': | |
2552 retval |= lexical_feedback::NEWLINE; | |
2553 if (in_comment) | |
2554 { | |
2555 comment_buf += static_cast<char> (c); | |
2556 octave_comment_buffer::append (comment_buf); | |
2557 comment_buf.resize (0); | |
2558 in_comment = false; | |
2559 beginning_of_comment = false; | |
2560 } | |
2561 current_input_column = 0; | |
2562 break; | |
2563 | |
2564 case '#': | |
2565 case '%': | |
2566 if (in_comment) | |
2567 { | |
2568 if (! beginning_of_comment) | |
2569 comment_buf += static_cast<char> (c); | |
2570 } | |
2571 else | |
2572 { | |
2573 maybe_gripe_matlab_incompatible_comment (c); | |
2574 in_comment = true; | |
2575 beginning_of_comment = true; | |
2576 } | |
2577 break; | |
2578 | |
2579 case '.': | |
2580 if (in_comment) | |
2581 { | |
2582 comment_buf += static_cast<char> (c); | |
2583 beginning_of_comment = false; | |
2584 break; | |
2585 } | |
2586 else | |
2587 { | |
2588 if (have_ellipsis_continuation ()) | |
2589 break; | |
2590 else | |
2591 goto done; | |
2592 } | |
2593 | |
2594 case '\\': | |
2595 if (in_comment) | |
2596 { | |
2597 comment_buf += static_cast<char> (c); | |
2598 beginning_of_comment = false; | |
2599 break; | |
2600 } | |
2601 else | |
2602 { | |
2603 if (have_continuation ()) | |
2604 break; | |
2605 else | |
2606 goto done; | |
2607 } | |
2608 | |
2609 default: | |
2610 if (in_comment) | |
2611 { | |
2612 comment_buf += static_cast<char> (c); | |
2613 beginning_of_comment = false; | |
2614 break; | |
2615 } | |
2616 else | |
2617 goto done; | |
2618 } | |
2619 } | |
2620 | |
2621 if (! comment_buf.empty ()) | |
2622 octave_comment_buffer::append (comment_buf); | |
2623 | |
2624 done: | |
2625 xunput (c, yytext); | |
2626 current_input_column--; | |
2627 return retval; | |
2628 } | |
2629 | |
2630 static inline bool | |
2631 looks_like_hex (const char *s, int len) | |
2632 { | |
2633 return (len > 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')); | |
2634 } | |
2635 | |
2636 void | |
2637 lexical_feedback::handle_number (void) | |
2638 { | |
2639 double value = 0.0; | |
2640 int nread = 0; | |
2641 | |
2642 if (looks_like_hex (yytext, strlen (yytext))) | |
2643 { | |
2644 unsigned long ival; | |
2645 | |
2646 nread = sscanf (yytext, "%lx", &ival); | |
2647 | |
2648 value = static_cast<double> (ival); | |
2649 } | |
2650 else | |
2651 { | |
2652 char *tmp = strsave (yytext); | |
2653 | |
2654 char *idx = strpbrk (tmp, "Dd"); | |
2655 | |
2656 if (idx) | |
2657 *idx = 'e'; | |
2658 | |
2659 nread = sscanf (tmp, "%lf", &value); | |
2660 | |
2661 delete [] tmp; | |
2662 } | |
2663 | |
2664 // If yytext doesn't contain a valid number, we are in deep doo doo. | |
2665 | |
2666 assert (nread == 1); | |
2667 | |
2668 quote_is_transpose = true; | |
2669 convert_spaces_to_comma = true; | |
2670 looking_for_object_index = false; | |
2671 at_beginning_of_statement = false; | |
2672 | |
2673 yylval.tok_val = new token (value, yytext, input_line_number, | |
2674 current_input_column); | |
2675 | |
2676 token_stack.push (yylval.tok_val); | |
2677 | |
2678 current_input_column += yyleng; | |
2679 | |
2680 do_comma_insert_check (); | |
2681 } | |
2682 | |
2683 // We have seen a backslash and need to find out if it should be | |
2684 // treated as a continuation character. If so, this eats it, up to | |
2685 // and including the new line character. | |
2686 // | |
2687 // Match whitespace only, followed by a comment character or newline. | |
2688 // Once a comment character is found, discard all input until newline. | |
2689 // If non-whitespace characters are found before comment | |
2690 // characters, return 0. Otherwise, return 1. | |
2691 | |
2692 // FIXME -- we need to handle block comments here. | |
2693 | |
2694 bool | |
2695 lexical_feedback::have_continuation (bool trailing_comments_ok) | |
2696 { | |
2697 std::ostringstream buf; | |
2698 | |
2699 std::string comment_buf; | |
2700 | |
2701 bool in_comment = false; | |
2702 bool beginning_of_comment = false; | |
2703 | |
2704 int c = 0; | |
2705 | |
2706 while ((c = text_yyinput ()) != EOF) | |
2707 { | |
2708 buf << static_cast<char> (c); | |
2709 | |
2710 switch (c) | |
2711 { | |
2712 case ' ': | |
2713 case '\t': | |
2714 if (in_comment) | |
2715 { | |
2716 comment_buf += static_cast<char> (c); | |
2717 beginning_of_comment = false; | |
2718 } | |
2719 break; | |
2720 | |
2721 case '%': | |
2722 case '#': | |
2723 if (trailing_comments_ok) | |
2724 { | |
2725 if (in_comment) | |
2726 { | |
2727 if (! beginning_of_comment) | |
2728 comment_buf += static_cast<char> (c); | |
2729 } | |
2730 else | |
2731 { | |
2732 maybe_gripe_matlab_incompatible_comment (c); | |
2733 in_comment = true; | |
2734 beginning_of_comment = true; | |
2735 } | |
2736 } | |
2737 else | |
2738 goto cleanup; | |
2739 break; | |
2740 | |
2741 case '\n': | |
2742 if (in_comment) | |
2743 { | |
2744 comment_buf += static_cast<char> (c); | |
2745 octave_comment_buffer::append (comment_buf); | |
2746 } | |
2747 current_input_column = 0; | |
2748 promptflag--; | |
2749 gripe_matlab_incompatible_continuation (); | |
2750 return true; | |
2751 | |
2752 default: | |
2753 if (in_comment) | |
2754 { | |
2755 comment_buf += static_cast<char> (c); | |
2756 beginning_of_comment = false; | |
2757 } | |
2758 else | |
2759 goto cleanup; | |
2760 break; | |
2761 } | |
2762 } | |
2763 | |
2764 xunput (c, yytext); | |
2765 return false; | |
2766 | |
2767 cleanup: | |
2768 | |
2769 std::string s = buf.str (); | |
2770 | |
2771 int len = s.length (); | |
2772 while (len--) | |
2773 xunput (s[len], yytext); | |
2774 | |
2775 return false; | |
2776 } | |
2777 | |
2778 // We have seen a '.' and need to see if it is the start of a | |
2779 // continuation. If so, this eats it, up to and including the new | |
2780 // line character. | |
2781 | |
2782 bool | |
2783 lexical_feedback::have_ellipsis_continuation (bool trailing_comments_ok) | |
2784 { | |
2785 char c1 = text_yyinput (); | |
2786 if (c1 == '.') | |
2787 { | |
2788 char c2 = text_yyinput (); | |
2789 if (c2 == '.' && have_continuation (trailing_comments_ok)) | |
2790 return true; | |
2791 else | |
2792 { | |
2793 xunput (c2, yytext); | |
2794 xunput (c1, yytext); | |
2795 } | |
2796 } | |
2797 else | |
2798 xunput (c1, yytext); | |
2799 | |
2800 return false; | |
2801 } | |
2802 | |
2803 // See if we have a continuation line. If so, eat it and the leading | |
2804 // whitespace on the next line. | |
2805 | |
2806 int | |
2807 lexical_feedback::eat_continuation (void) | |
2808 { | |
2809 int retval = lexical_feedback::NO_WHITESPACE; | |
2810 | |
2811 int c = text_yyinput (); | |
2812 | |
2813 if ((c == '.' && have_ellipsis_continuation ()) | |
2814 || (c == '\\' && have_continuation ())) | |
2815 retval = eat_whitespace (); | |
2816 else | |
2817 xunput (c, yytext); | |
2818 | |
2819 return retval; | |
2820 } | |
2821 | |
2822 int | |
2823 lexical_feedback::handle_string (char delim) | |
2824 { | |
2825 std::ostringstream buf; | |
2826 | |
2827 int bos_line = input_line_number; | |
2828 int bos_col = current_input_column; | |
2829 | |
2830 int c; | |
2831 int escape_pending = 0; | |
2832 | |
2833 while ((c = text_yyinput ()) != EOF) | |
2834 { | |
2835 current_input_column++; | |
2836 | |
2837 if (c == '\\') | |
2838 { | |
2839 if (delim == '\'' || escape_pending) | |
2840 { | |
2841 buf << static_cast<char> (c); | |
2842 escape_pending = 0; | |
2843 } | |
2844 else | |
2845 { | |
2846 if (have_continuation (false)) | |
2847 escape_pending = 0; | |
2848 else | |
2849 { | |
2850 buf << static_cast<char> (c); | |
2851 escape_pending = 1; | |
2852 } | |
2853 } | |
2854 continue; | |
2855 } | |
2856 else if (c == '.') | |
2857 { | |
2858 if (delim == '\'' || ! have_ellipsis_continuation (false)) | |
2859 buf << static_cast<char> (c); | |
2860 } | |
2861 else if (c == '\n') | |
2862 { | |
2863 error ("unterminated string constant"); | |
2864 break; | |
2865 } | |
2866 else if (c == delim) | |
2867 { | |
2868 if (escape_pending) | |
2869 buf << static_cast<char> (c); | |
2870 else | |
2871 { | |
2872 c = text_yyinput (); | |
2873 if (c == delim) | |
2874 { | |
2875 buf << static_cast<char> (c); | |
2876 } | |
2877 else | |
2878 { | |
2879 std::string s; | |
2880 xunput (c, yytext); | |
2881 | |
2882 if (delim == '\'') | |
2883 s = buf.str (); | |
2884 else | |
2885 s = do_string_escapes (buf.str ()); | |
2886 | |
2887 quote_is_transpose = true; | |
2888 convert_spaces_to_comma = true; | |
2889 | |
2890 yylval.tok_val = new token (s, bos_line, bos_col); | |
2891 token_stack.push (yylval.tok_val); | |
2892 | |
2893 if (delim == '"') | |
2894 gripe_matlab_incompatible ("\" used as string delimiter"); | |
2895 else if (delim == '\'') | |
2896 gripe_single_quote_string (); | |
2897 | |
2898 looking_for_object_index = true; | |
2899 at_beginning_of_statement = false; | |
2900 | |
2901 return delim == '"' ? DQ_STRING : SQ_STRING; | |
2902 } | |
2903 } | |
2904 } | |
2905 else | |
2906 { | |
2907 buf << static_cast<char> (c); | |
2908 } | |
2909 | |
2910 escape_pending = 0; | |
2911 } | |
2912 | |
2913 return LEXICAL_ERROR; | |
2914 } | |
2915 | |
2916 bool | |
2917 lexical_feedback::next_token_is_assign_op (void) | |
2918 { | |
2919 bool retval = false; | |
2920 | |
2921 int c0 = text_yyinput (); | |
2922 | |
2923 switch (c0) | |
2924 { | |
2925 case '=': | |
2926 { | |
2927 int c1 = text_yyinput (); | |
2928 xunput (c1, yytext); | |
2929 if (c1 != '=') | |
2930 retval = true; | |
2931 } | |
2932 break; | |
2933 | |
2934 case '+': | |
2935 case '-': | |
2936 case '*': | |
2937 case '/': | |
2938 case '\\': | |
2939 case '&': | |
2940 case '|': | |
2941 { | |
2942 int c1 = text_yyinput (); | |
2943 xunput (c1, yytext); | |
2944 if (c1 == '=') | |
2945 retval = true; | |
2946 } | |
2947 break; | |
2948 | |
2949 case '.': | |
2950 { | |
2951 int c1 = text_yyinput (); | |
2952 if (match_any (c1, "+-*/\\")) | |
2953 { | |
2954 int c2 = text_yyinput (); | |
2955 xunput (c2, yytext); | |
2956 if (c2 == '=') | |
2957 retval = true; | |
2958 } | |
2959 xunput (c1, yytext); | |
2960 } | |
2961 break; | |
2962 | |
2963 case '>': | |
2964 { | |
2965 int c1 = text_yyinput (); | |
2966 if (c1 == '>') | |
2967 { | |
2968 int c2 = text_yyinput (); | |
2969 xunput (c2, yytext); | |
2970 if (c2 == '=') | |
2971 retval = true; | |
2972 } | |
2973 xunput (c1, yytext); | |
2974 } | |
2975 break; | |
2976 | |
2977 case '<': | |
2978 { | |
2979 int c1 = text_yyinput (); | |
2980 if (c1 == '<') | |
2981 { | |
2982 int c2 = text_yyinput (); | |
2983 xunput (c2, yytext); | |
2984 if (c2 == '=') | |
2985 retval = true; | |
2986 } | |
2987 xunput (c1, yytext); | |
2988 } | |
2989 break; | |
2990 | |
2991 default: | |
2992 break; | |
2993 } | |
2994 | |
2995 xunput (c0, yytext); | |
2996 | |
2997 return retval; | |
2998 } | |
2999 | |
3000 bool | |
3001 lexical_feedback::next_token_is_index_op (void) | |
3002 { | |
3003 int c = text_yyinput (); | |
3004 xunput (c, yytext); | |
3005 return c == '(' || c == '{'; | |
3006 } | |
3007 | |
3008 int | |
3009 lexical_feedback::handle_close_bracket (bool spc_gobbled, int bracket_type) | |
3010 { | |
3011 int retval = bracket_type; | |
3012 | |
3013 if (! nesting_level.none ()) | |
3014 { | |
3015 nesting_level.remove (); | |
3016 | |
3017 if (bracket_type == ']') | |
3018 bracketflag--; | |
3019 else if (bracket_type == '}') | |
3020 braceflag--; | |
3021 else | |
3022 panic_impossible (); | |
3023 } | |
3024 | |
3025 if (bracketflag == 0 && braceflag == 0) | |
3026 BEGIN (INITIAL); | |
3027 | |
3028 if (bracket_type == ']' | |
3029 && next_token_is_assign_op () | |
3030 && ! looking_at_return_list) | |
3031 { | |
3032 retval = CLOSE_BRACE; | |
3033 } | |
3034 else if ((bracketflag || braceflag) | |
3035 && convert_spaces_to_comma | |
3036 && (nesting_level.is_bracket () | |
3037 || (nesting_level.is_brace () | |
3038 && ! looking_at_object_index.front ()))) | |
3039 { | |
3040 bool index_op = next_token_is_index_op (); | |
3041 | |
3042 // Don't insert comma if we are looking at something like | |
3043 // | |
3044 // [x{i}{j}] or [x{i}(j)] | |
3045 // | |
3046 // but do if we are looking at | |
3047 // | |
3048 // [x{i} {j}] or [x{i} (j)] | |
3049 | |
3050 if (spc_gobbled || ! (bracket_type == '}' && index_op)) | |
3051 { | |
3052 bool bin_op = next_token_is_bin_op (spc_gobbled); | |
3053 | |
3054 bool postfix_un_op = next_token_is_postfix_unary_op (spc_gobbled); | |
3055 | |
3056 bool sep_op = next_token_is_sep_op (); | |
3057 | |
3058 if (! (postfix_un_op || bin_op || sep_op)) | |
3059 { | |
3060 maybe_warn_separator_insert (','); | |
3061 | |
3062 xunput (',', yytext); | |
3063 return retval; | |
3064 } | |
3065 } | |
3066 } | |
3067 | |
3068 quote_is_transpose = true; | |
3069 convert_spaces_to_comma = true; | |
3070 | |
3071 return retval; | |
3072 } | |
3073 | |
3074 void | |
3075 lexical_feedback::maybe_unput_comma (int spc_gobbled) | |
3076 { | |
3077 if (nesting_level.is_bracket () | |
3078 || (nesting_level.is_brace () | |
3079 && ! looking_at_object_index.front ())) | |
3080 { | |
3081 int bin_op = next_token_is_bin_op (spc_gobbled); | |
3082 | |
3083 int postfix_un_op = next_token_is_postfix_unary_op (spc_gobbled); | |
3084 | |
3085 int c1 = text_yyinput (); | |
3086 int c2 = text_yyinput (); | |
3087 | |
3088 xunput (c2, yytext); | |
3089 xunput (c1, yytext); | |
3090 | |
3091 int sep_op = next_token_is_sep_op (); | |
3092 | |
3093 int dot_op = (c1 == '.' | |
3094 && (isalpha (c2) || isspace (c2) || c2 == '_')); | |
3095 | |
3096 if (postfix_un_op || bin_op || sep_op || dot_op) | |
3097 return; | |
3098 | |
3099 int index_op = (c1 == '(' || c1 == '{'); | |
3100 | |
3101 // If there is no space before the indexing op, we don't insert | |
3102 // a comma. | |
3103 | |
3104 if (index_op && ! spc_gobbled) | |
3105 return; | |
3106 | |
3107 maybe_warn_separator_insert (','); | |
3108 | |
3109 xunput (',', yytext); | |
3110 } | |
3111 } | |
3112 | |
3113 bool | |
3114 lexical_feedback::next_token_can_follow_bin_op (void) | |
3115 { | |
3116 std::stack<char> buf; | |
3117 | |
3118 int c = EOF; | |
3119 | |
3120 // Skip whitespace in current statement on current line | |
3121 while (true) | |
3122 { | |
3123 c = text_yyinput (); | |
3124 | |
3125 buf.push (c); | |
3126 | |
3127 if (match_any (c, ",;\n") || (c != ' ' && c != '\t')) | |
3128 break; | |
3129 } | |
3130 | |
3131 // Restore input. | |
3132 while (! buf.empty ()) | |
3133 { | |
3134 xunput (buf.top (), yytext); | |
3135 | |
3136 buf.pop (); | |
3137 } | |
3138 | |
3139 return (isalnum (c) || match_any (c, "!\"'(-[_{~")); | |
3140 } | |
3141 | |
3142 static bool | |
3143 can_be_command (const std::string& tok) | |
3144 { | |
3145 // Don't allow these names to be treated as commands to avoid | |
3146 // surprises when parsing things like "NaN ^2". | |
3147 | |
3148 return ! (tok == "e" | |
3149 || tok == "I" || tok == "i" | |
3150 || tok == "J" || tok == "j" | |
3151 || tok == "Inf" || tok == "inf" | |
3152 || tok == "NaN" || tok == "nan"); | |
3153 } | |
3154 | |
3155 bool | |
3156 lexical_feedback::looks_like_command_arg (void) | |
3157 { | |
3158 bool retval = true; | |
3159 | |
3160 int c0 = text_yyinput (); | |
3161 | |
3162 switch (c0) | |
3163 { | |
3164 // = == | |
3165 case '=': | |
3166 { | |
3167 int c1 = text_yyinput (); | |
3168 | |
3169 if (c1 == '=') | |
3170 { | |
3171 int c2 = text_yyinput (); | |
3172 | |
3173 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
3174 && next_token_can_follow_bin_op ()) | |
3175 retval = false; | |
3176 | |
3177 xunput (c2, yytext); | |
3178 } | |
3179 else | |
3180 retval = false; | |
3181 | |
3182 xunput (c1, yytext); | |
3183 } | |
3184 break; | |
3185 | |
3186 case '(': | |
3187 case '{': | |
3188 // Indexing. | |
3189 retval = false; | |
3190 break; | |
3191 | |
3192 case '\n': | |
3193 // EOL. | |
3194 break; | |
3195 | |
3196 case '\'': | |
3197 case '"': | |
3198 // Beginning of a character string. | |
3199 break; | |
3200 | |
3201 // + - ++ -- += -= | |
3202 case '+': | |
3203 case '-': | |
3204 { | |
3205 int c1 = text_yyinput (); | |
3206 | |
3207 switch (c1) | |
3208 { | |
3209 case '\n': | |
3210 // EOL. | |
3211 case '+': | |
3212 case '-': | |
3213 // Unary ops, spacing doesn't matter. | |
3214 break; | |
3215 | |
3216 case '\t': | |
3217 case ' ': | |
3218 { | |
3219 if (next_token_can_follow_bin_op ()) | |
3220 retval = false; | |
3221 } | |
3222 break; | |
3223 | |
3224 case '=': | |
3225 { | |
3226 int c2 = text_yyinput (); | |
3227 | |
3228 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
3229 && next_token_can_follow_bin_op ()) | |
3230 retval = false; | |
3231 | |
3232 xunput (c2, yytext); | |
3233 } | |
3234 break; | |
3235 } | |
3236 | |
3237 xunput (c1, yytext); | |
3238 } | |
3239 break; | |
3240 | |
3241 case ':': | |
3242 case '/': | |
3243 case '\\': | |
3244 case '^': | |
3245 { | |
3246 int c1 = text_yyinput (); | |
3247 | |
3248 if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t') | |
3249 && next_token_can_follow_bin_op ()) | |
3250 retval = false; | |
3251 | |
3252 xunput (c1, yytext); | |
3253 } | |
3254 break; | |
3255 | |
3256 // .+ .- ./ .\ .^ .* .** | |
3257 case '.': | |
3258 { | |
3259 int c1 = text_yyinput (); | |
3260 | |
3261 if (match_any (c1, "+-/\\^*")) | |
3262 { | |
3263 int c2 = text_yyinput (); | |
3264 | |
3265 if (c2 == '=') | |
3266 { | |
3267 int c3 = text_yyinput (); | |
3268 | |
3269 if (! match_any (c3, ",;\n") && (c3 == ' ' || c3 == '\t') | |
3270 && next_token_can_follow_bin_op ()) | |
3271 retval = false; | |
3272 | |
3273 xunput (c3, yytext); | |
3274 } | |
3275 else if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
3276 && next_token_can_follow_bin_op ()) | |
3277 retval = false; | |
3278 | |
3279 xunput (c2, yytext); | |
3280 } | |
3281 else if (! match_any (c1, ",;\n") | |
3282 && (! isdigit (c1) && c1 != ' ' && c1 != '\t' | |
3283 && c1 != '.')) | |
3284 { | |
3285 // Structure reference. FIXME -- is this a complete check? | |
3286 | |
3287 retval = false; | |
3288 } | |
3289 | |
3290 xunput (c1, yytext); | |
3291 } | |
3292 break; | |
3293 | |
3294 // & && | || * ** | |
3295 case '&': | |
3296 case '|': | |
3297 case '*': | |
3298 { | |
3299 int c1 = text_yyinput (); | |
3300 | |
3301 if (c1 == c0) | |
3302 { | |
3303 int c2 = text_yyinput (); | |
3304 | |
3305 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
3306 && next_token_can_follow_bin_op ()) | |
3307 retval = false; | |
3308 | |
3309 xunput (c2, yytext); | |
3310 } | |
3311 else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t') | |
3312 && next_token_can_follow_bin_op ()) | |
3313 retval = false; | |
3314 | |
3315 xunput (c1, yytext); | |
3316 } | |
3317 break; | |
3318 | |
3319 // < <= > >= | |
3320 case '<': | |
3321 case '>': | |
3322 { | |
3323 int c1 = text_yyinput (); | |
3324 | |
3325 if (c1 == '=') | |
3326 { | |
3327 int c2 = text_yyinput (); | |
3328 | |
3329 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
3330 && next_token_can_follow_bin_op ()) | |
3331 retval = false; | |
3332 | |
3333 xunput (c2, yytext); | |
3334 } | |
3335 else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t') | |
3336 && next_token_can_follow_bin_op ()) | |
3337 retval = false; | |
3338 | |
3339 xunput (c1, yytext); | |
3340 } | |
3341 break; | |
3342 | |
3343 // ~= != | |
3344 case '~': | |
3345 case '!': | |
3346 { | |
3347 int c1 = text_yyinput (); | |
3348 | |
3349 // ~ and ! can be unary ops, so require following =. | |
3350 if (c1 == '=') | |
3351 { | |
3352 int c2 = text_yyinput (); | |
3353 | |
3354 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
3355 && next_token_can_follow_bin_op ()) | |
3356 retval = false; | |
3357 | |
3358 xunput (c2, yytext); | |
3359 } | |
3360 else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t') | |
3361 && next_token_can_follow_bin_op ()) | |
3362 retval = false; | |
3363 | |
3364 xunput (c1, yytext); | |
3365 } | |
3366 break; | |
3367 | |
3368 default: | |
3369 break; | |
3370 } | |
3371 | |
3372 xunput (c0, yytext); | |
3373 | |
3374 return retval; | |
3375 } | |
3376 | |
3377 int | |
3378 lexical_feedback::handle_superclass_identifier (void) | |
3379 { | |
3380 eat_continuation (); | |
3381 | |
3382 std::string pkg; | |
3383 std::string meth = strip_trailing_whitespace (yytext); | |
3384 size_t pos = meth.find ("@"); | |
3385 std::string cls = meth.substr (pos).substr (1); | |
3386 meth = meth.substr (0, pos - 1); | |
3387 | |
3388 pos = cls.find ("."); | |
3389 if (pos != std::string::npos) | |
3390 { | |
3391 pkg = cls.substr (pos).substr (1); | |
3392 cls = cls.substr (0, pos - 1); | |
3393 } | |
3394 | |
3395 int kw_token = (is_keyword_token (meth) || is_keyword_token (cls) | |
3396 || is_keyword_token (pkg)); | |
3397 if (kw_token) | |
3398 { | |
3399 error ("method, class and package names may not be keywords"); | |
3400 return LEXICAL_ERROR; | |
3401 } | |
3402 | |
3403 yylval.tok_val | |
3404 = new token (meth.empty () ? 0 : &(symbol_table::insert (meth)), | |
3405 cls.empty () ? 0 : &(symbol_table::insert (cls)), | |
3406 pkg.empty () ? 0 : &(symbol_table::insert (pkg)), | |
3407 input_line_number, | |
3408 current_input_column); | |
3409 token_stack.push (yylval.tok_val); | |
3410 | |
3411 convert_spaces_to_comma = true; | |
3412 current_input_column += yyleng; | |
3413 | |
3414 return SUPERCLASSREF; | |
3415 } | |
3416 | |
3417 int | |
3418 lexical_feedback::handle_meta_identifier (void) | |
3419 { | |
3420 eat_continuation (); | |
3421 | |
3422 std::string pkg; | |
3423 std::string cls = strip_trailing_whitespace (yytext).substr (1); | |
3424 size_t pos = cls.find ("."); | |
3425 | |
3426 if (pos != std::string::npos) | |
3427 { | |
3428 pkg = cls.substr (pos).substr (1); | |
3429 cls = cls.substr (0, pos - 1); | |
3430 } | |
3431 | |
3432 int kw_token = is_keyword_token (cls) || is_keyword_token (pkg); | |
3433 if (kw_token) | |
3434 { | |
3435 error ("class and package names may not be keywords"); | |
3436 return LEXICAL_ERROR; | |
3437 } | |
3438 | |
3439 yylval.tok_val | |
3440 = new token (cls.empty () ? 0 : &(symbol_table::insert (cls)), | |
3441 pkg.empty () ? 0 : &(symbol_table::insert (pkg)), | |
3442 input_line_number, | |
3443 current_input_column); | |
3444 | |
3445 token_stack.push (yylval.tok_val); | |
3446 | |
3447 convert_spaces_to_comma = true; | |
3448 current_input_column += yyleng; | |
3449 | |
3450 return METAQUERY; | |
3451 } | |
3452 | |
3453 // Figure out exactly what kind of token to return when we have seen | |
3454 // an identifier. Handles keywords. Return -1 if the identifier | |
3455 // should be ignored. | |
3456 | |
3457 int | |
3458 lexical_feedback::handle_identifier (void) | |
3459 { | |
3460 bool at_bos = at_beginning_of_statement; | |
3461 | |
3462 std::string tok = strip_trailing_whitespace (yytext); | |
3463 | |
3464 int c = yytext[yyleng-1]; | |
3465 | |
3466 bool cont_is_spc = (eat_continuation () != lexical_feedback::NO_WHITESPACE); | |
3467 | |
3468 int spc_gobbled = (cont_is_spc || c == ' ' || c == '\t'); | |
3469 | |
3470 // If we are expecting a structure element, avoid recognizing | |
3471 // keywords and other special names and return STRUCT_ELT, which is | |
3472 // a string that is also a valid identifier. But first, we have to | |
3473 // decide whether to insert a comma. | |
3474 | |
3475 if (looking_at_indirect_ref) | |
3476 { | |
3477 do_comma_insert_check (); | |
3478 | |
3479 maybe_unput_comma (spc_gobbled); | |
3480 | |
3481 yylval.tok_val = new token (tok, input_line_number, | |
3482 current_input_column); | |
3483 | |
3484 token_stack.push (yylval.tok_val); | |
3485 | |
3486 quote_is_transpose = true; | |
3487 convert_spaces_to_comma = true; | |
3488 looking_for_object_index = true; | |
3489 | |
3490 current_input_column += yyleng; | |
3491 | |
3492 return STRUCT_ELT; | |
3493 } | |
3494 | |
3495 at_beginning_of_statement = false; | |
3496 | |
3497 // The is_keyword_token may reset | |
3498 // at_beginning_of_statement. For example, if it sees | |
3499 // an else token, then the next token is at the beginning of a | |
3500 // statement. | |
3501 | |
3502 int kw_token = is_keyword_token (tok); | |
3503 | |
3504 // If we found a keyword token, then the beginning_of_statement flag | |
3505 // is already set. Otherwise, we won't be at the beginning of a | |
3506 // statement. | |
3507 | |
3508 if (looking_at_function_handle) | |
3509 { | |
3510 if (kw_token) | |
3511 { | |
3512 error ("function handles may not refer to keywords"); | |
3513 | |
3514 return LEXICAL_ERROR; | |
3515 } | |
3516 else | |
3517 { | |
3518 yylval.tok_val = new token (tok, input_line_number, | |
3519 current_input_column); | |
3520 | |
3521 token_stack.push (yylval.tok_val); | |
3522 | |
3523 current_input_column += yyleng; | |
3524 quote_is_transpose = false; | |
3525 convert_spaces_to_comma = true; | |
3526 looking_for_object_index = true; | |
3527 | |
3528 return FCN_HANDLE; | |
3529 } | |
3530 } | |
3531 | |
3532 // If we have a regular keyword, return it. | |
3533 // Keywords can be followed by identifiers. | |
3534 | |
3535 if (kw_token) | |
3536 { | |
3537 if (kw_token >= 0) | |
3538 { | |
3539 current_input_column += yyleng; | |
3540 quote_is_transpose = false; | |
3541 convert_spaces_to_comma = true; | |
3542 looking_for_object_index = false; | |
3543 } | |
3544 | |
3545 return kw_token; | |
3546 } | |
3547 | |
3548 // See if we have a plot keyword (title, using, with, or clear). | |
3549 | |
3550 int c1 = text_yyinput (); | |
3551 | |
3552 bool next_tok_is_eq = false; | |
3553 if (c1 == '=') | |
3554 { | |
3555 int c2 = text_yyinput (); | |
3556 xunput (c2, yytext); | |
3557 | |
3558 if (c2 != '=') | |
3559 next_tok_is_eq = true; | |
3560 } | |
3561 | |
3562 xunput (c1, yytext); | |
3563 | |
3564 // Kluge alert. | |
3565 // | |
3566 // If we are looking at a text style function, set up to gobble its | |
3567 // arguments. | |
3568 // | |
3569 // If the following token is '=', or if we are parsing a function | |
3570 // return list or function parameter list, or if we are looking at | |
3571 // something like [ab,cd] = foo (), force the symbol to be inserted | |
3572 // as a variable in the current symbol table. | |
3573 | |
3574 if (! is_variable (tok)) | |
3575 { | |
3576 if (at_bos && spc_gobbled && can_be_command (tok) | |
3577 && looks_like_command_arg ()) | |
3578 { | |
3579 BEGIN (COMMAND_START); | |
3580 } | |
3581 else if (next_tok_is_eq | |
3582 || looking_at_decl_list | |
3583 || looking_at_return_list | |
3584 || (looking_at_parameter_list | |
3585 && ! looking_at_initializer_expression)) | |
3586 { | |
3587 symbol_table::force_variable (tok); | |
3588 } | |
3589 else if (looking_at_matrix_or_assign_lhs) | |
3590 { | |
3591 pending_local_variables.insert (tok); | |
3592 } | |
3593 } | |
3594 | |
3595 // Find the token in the symbol table. Beware the magic | |
3596 // transformation of the end keyword... | |
3597 | |
3598 if (tok == "end") | |
3599 tok = "__end__"; | |
3600 | |
3601 yylval.tok_val = new token (&(symbol_table::insert (tok)), | |
3602 input_line_number, | |
3603 current_input_column); | |
3604 | |
3605 token_stack.push (yylval.tok_val); | |
3606 | |
3607 // After seeing an identifer, it is ok to convert spaces to a comma | |
3608 // (if needed). | |
3609 | |
3610 convert_spaces_to_comma = true; | |
3611 | |
3612 if (! (next_tok_is_eq || YY_START == COMMAND_START)) | |
3613 { | |
3614 quote_is_transpose = true; | |
3615 | |
3616 do_comma_insert_check (); | |
3617 | |
3618 maybe_unput_comma (spc_gobbled); | |
3619 } | |
3620 | |
3621 current_input_column += yyleng; | |
3622 | |
3623 if (tok != "__end__") | |
3624 looking_for_object_index = true; | |
3625 | |
3626 return NAME; | |
3627 } | |
3628 | |
3629 void | |
3630 lexical_feedback::maybe_warn_separator_insert (char sep) | |
3631 { | |
3632 std::string nm = curr_fcn_file_full_name; | |
3633 | |
3634 if (nm.empty ()) | |
3635 warning_with_id ("Octave:separator-insert", | |
3636 "potential auto-insertion of '%c' near line %d", | |
3637 sep, input_line_number); | |
3638 else | |
3639 warning_with_id ("Octave:separator-insert", | |
3640 "potential auto-insertion of '%c' near line %d of file %s", | |
3641 sep, input_line_number, nm.c_str ()); | |
3642 } | |
3643 | |
3644 void | |
3645 lexical_feedback::gripe_single_quote_string (void) | |
3646 { | |
3647 std::string nm = curr_fcn_file_full_name; | |
3648 | |
3649 if (nm.empty ()) | |
3650 warning_with_id ("Octave:single-quote-string", | |
3651 "single quote delimited string near line %d", | |
3652 input_line_number); | |
3653 else | |
3654 warning_with_id ("Octave:single-quote-string", | |
3655 "single quote delimited string near line %d of file %s", | |
3656 input_line_number, nm.c_str ()); | |
3657 } | |
3658 | |
3659 void | |
3660 lexical_feedback::gripe_matlab_incompatible (const std::string& msg) | |
3661 { | |
3662 std::string nm = curr_fcn_file_full_name; | |
3663 | |
3664 if (nm.empty ()) | |
3665 warning_with_id ("Octave:matlab-incompatible", | |
3666 "potential Matlab compatibility problem: %s", | |
3667 msg.c_str ()); | |
3668 else | |
3669 warning_with_id ("Octave:matlab-incompatible", | |
3670 "potential Matlab compatibility problem: %s near line %d offile %s", | |
3671 msg.c_str (), input_line_number, nm.c_str ()); | |
3672 } | |
3673 | |
3674 void | |
3675 lexical_feedback::maybe_gripe_matlab_incompatible_comment (char c) | |
3676 { | |
3677 if (c == '#') | |
3678 gripe_matlab_incompatible ("# used as comment character"); | |
3679 } | |
3680 | |
3681 void | |
3682 lexical_feedback::gripe_matlab_incompatible_continuation (void) | |
3683 { | |
3684 gripe_matlab_incompatible ("\\ used as line continuation marker"); | |
3685 } | |
3686 | |
3687 void | |
3688 lexical_feedback::gripe_matlab_incompatible_operator (const std::string& op) | |
3689 { | |
3690 std::string t = op; | |
3691 int n = t.length (); | |
3692 if (t[n-1] == '\n') | |
3693 t.resize (n-1); | |
3694 gripe_matlab_incompatible (t + " used as operator"); | |
3695 } |