Mercurial > hg > octave-nkf
comparison src/lex.l @ 3246:a41cc560087a
[project @ 1999-06-19 06:46:20 by jwe]
author | jwe |
---|---|
date | Sat, 19 Jun 1999 06:46:35 +0000 |
parents | 041ea33fbbf4 |
children | 4964d5391acc |
comparison
equal
deleted
inserted
replaced
3245:2270329efd14 | 3246:a41cc560087a |
---|---|
168 static int is_keyword (const string& s); | 168 static int is_keyword (const string& s); |
169 static string plot_style_token (const string& s); | 169 static string plot_style_token (const string& s); |
170 static symbol_record *lookup_identifier (const string& s); | 170 static symbol_record *lookup_identifier (const string& s); |
171 static void grab_help_text (void); | 171 static void grab_help_text (void); |
172 static bool match_any (char c, const char *s); | 172 static bool match_any (char c, const char *s); |
173 static bool next_token_is_bin_op (int spc_prev, char *yytext); | 173 static bool next_token_is_bin_op (bool spc_prev); |
174 static bool next_token_is_postfix_unary_op (int spc_prev, char *yytext); | 174 static bool next_token_is_postfix_unary_op (bool spc_prev); |
175 static string strip_trailing_whitespace (char *s); | 175 static string strip_trailing_whitespace (char *s); |
176 static void handle_number (char *yytext); | 176 static void handle_number (void); |
177 static int handle_string (char delim, int text_style = 0); | 177 static int handle_string (char delim, int text_style = 0); |
178 static int handle_close_brace (int spc_gobbled); | 178 static int handle_close_brace (int spc_gobbled); |
179 static int handle_identifier (const string& tok, int spc_gobbled); | 179 static int handle_identifier (const string& tok, int spc_gobbled); |
180 static bool have_continuation (bool trailing_comments_ok = true); | 180 static bool have_continuation (bool trailing_comments_ok = true); |
181 static bool have_ellipsis_continuation (bool trailing_comments_ok = true); | 181 static bool have_ellipsis_continuation (bool trailing_comments_ok = true); |
234 } | 234 } |
235 } | 235 } |
236 | 236 |
237 <TEXT_FCN>[\"\'] { | 237 <TEXT_FCN>[\"\'] { |
238 current_input_column++; | 238 current_input_column++; |
239 return handle_string (yytext[0], 1); | 239 return handle_string (yytext[0], true); |
240 } | 240 } |
241 | 241 |
242 <TEXT_FCN>[^ \t\n\;\,\"\'][^ \t\n\;\,]*{S}* { | 242 <TEXT_FCN>[^ \t\n\;\,\"\'][^ \t\n\;\,]*{S}* { |
243 string tok = strip_trailing_whitespace (yytext); | 243 string tok = strip_trailing_whitespace (yytext); |
244 TOK_PUSH_AND_RETURN (tok, TEXT); | 244 TOK_PUSH_AND_RETURN (tok, TEXT); |
292 <MATRIX>{S}+ { | 292 <MATRIX>{S}+ { |
293 current_input_column += yyleng; | 293 current_input_column += yyleng; |
294 if (Vwhitespace_in_literal_matrix != 2) | 294 if (Vwhitespace_in_literal_matrix != 2) |
295 { | 295 { |
296 int tmp = eat_continuation (); | 296 int tmp = eat_continuation (); |
297 int bin_op = next_token_is_bin_op (1, yytext); | 297 int bin_op = next_token_is_bin_op (true); |
298 int postfix_un_op = next_token_is_postfix_unary_op (1, yytext); | 298 int postfix_un_op = next_token_is_postfix_unary_op (true); |
299 | 299 |
300 if (! (postfix_un_op || bin_op) | 300 if (! (postfix_un_op || bin_op) |
301 && nesting_level.is_brace () | 301 && nesting_level.is_brace () |
302 && lexer_flags.convert_spaces_to_comma) | 302 && lexer_flags.convert_spaces_to_comma) |
303 { | 303 { |
395 %{ | 395 %{ |
396 // Imaginary numbers. | 396 // Imaginary numbers. |
397 %} | 397 %} |
398 | 398 |
399 {NUMBER}{Im} { | 399 {NUMBER}{Im} { |
400 handle_number (yytext); | 400 handle_number (); |
401 return IMAG_NUM; | 401 return IMAG_NUM; |
402 } | 402 } |
403 | 403 |
404 %{ | 404 %{ |
405 // Real numbers. Don't grab the `.' part of a dot operator as part of | 405 // Real numbers. Don't grab the `.' part of a dot operator as part of |
406 // the constant. | 406 // the constant. |
407 %} | 407 %} |
408 | 408 |
409 {D}+/\.[\*/\\^'] | | 409 {D}+/\.[\*/\\^'] | |
410 {NUMBER} { | 410 {NUMBER} { |
411 handle_number (yytext); | 411 handle_number (); |
412 return NUM; | 412 return NUM; |
413 } | 413 } |
414 | 414 |
415 %{ | 415 %{ |
416 // Eat whitespace. Whitespace inside matrix constants is handled by | 416 // Eat whitespace. Whitespace inside matrix constants is handled by |
666 { | 666 { |
667 int spc_gobbled = eat_continuation (); | 667 int spc_gobbled = eat_continuation (); |
668 | 668 |
669 int c = yyinput (); | 669 int c = yyinput (); |
670 | 670 |
671 yyunput (c, yytext); | 671 unput (c); |
672 | 672 |
673 if (spc_gobbled) | 673 if (spc_gobbled) |
674 yyunput (' ', yytext); | 674 unput (' '); |
675 | 675 |
676 lexer_flags.do_comma_insert = (lexer_flags.braceflag && c == '['); | 676 lexer_flags.do_comma_insert = (lexer_flags.braceflag && c == '['); |
677 } | 677 } |
678 | 678 |
679 // Fix things up for errors or interrupts. The parser is never called | 679 // Fix things up for errors or interrupts. The parser is never called |
1170 } | 1170 } |
1171 | 1171 |
1172 done: | 1172 done: |
1173 | 1173 |
1174 if (c) | 1174 if (c) |
1175 yyunput (c, yytext); | 1175 unput (c); |
1176 } | 1176 } |
1177 | 1177 |
1178 // Return 1 if the given character matches any character in the given | 1178 // Return 1 if the given character matches any character in the given |
1179 // string. | 1179 // string. |
1180 | 1180 |
1195 // operator. For example, | 1195 // operator. For example, |
1196 // | 1196 // |
1197 // [ 1 + 2 ] or [ 1+ 2] or [ 1+2 ] ==> binary | 1197 // [ 1 + 2 ] or [ 1+ 2] or [ 1+2 ] ==> binary |
1198 | 1198 |
1199 static bool | 1199 static bool |
1200 looks_like_bin_op (int spc_prev, int spc_next) | 1200 looks_like_bin_op (bool spc_prev, int next_char) |
1201 { | 1201 { |
1202 bool spc_next = (next_char == ' ' || next_char == '\t'); | |
1203 | |
1202 return ((spc_prev && spc_next) || ! spc_prev); | 1204 return ((spc_prev && spc_next) || ! spc_prev); |
1203 } | 1205 } |
1204 | 1206 |
1205 // Try to determine if the next token should be treated as a postfix | 1207 // Try to determine if the next token should be treated as a postfix |
1206 // unary operator. This is ugly, but it seems to do the right thing. | 1208 // unary operator. This is ugly, but it seems to do the right thing. |
1207 | 1209 |
1208 static bool | 1210 static bool |
1209 next_token_is_postfix_unary_op (int spc_prev, char *yytext) | 1211 next_token_is_postfix_unary_op (bool spc_prev) |
1210 { | 1212 { |
1211 bool un_op = false; | 1213 bool un_op = false; |
1212 | 1214 |
1213 int c0 = yyinput (); | 1215 int c0 = yyinput (); |
1214 int c1 = yyinput (); | 1216 |
1215 | 1217 if (c0 == '\'' && ! spc_prev) |
1216 yyunput (c1, yytext); | 1218 { |
1217 yyunput (c0, yytext); | 1219 un_op = true; |
1218 | 1220 } |
1219 int transpose = (c0 == '.' && c1 == '\''); | 1221 else if (c0 == '.') |
1220 int hermitian = (c0 == '\''); | 1222 { |
1221 | 1223 int c1 = yyinput (); |
1222 un_op = (transpose || (hermitian && ! spc_prev)); | 1224 un_op = (c1 == '\''); |
1225 unput (c1); | |
1226 } | |
1227 | |
1228 unput (c0); | |
1223 | 1229 |
1224 return un_op; | 1230 return un_op; |
1225 } | 1231 } |
1226 | 1232 |
1227 // Try to determine if the next token should be treated as a binary | 1233 // Try to determine if the next token should be treated as a binary |
1228 // operator. This is even uglier, but it also seems to do the right | 1234 // operator. |
1229 // thing. Note that it is only necessary to check the spacing for `+' | 1235 // |
1230 // and `-', since those are the only tokens that can appear as unary | 1236 // This kluge exists because whitespace is not always ignored inside |
1231 // ops too. | 1237 // the square brackets that are used to create matrix objects. |
1232 // | 1238 // |
1233 // Note that this never returns true for `.', even though it can be a | 1239 // Line continuations directly after the operator will cause this |
1234 // binary operator (the structure reference thing). The only time | 1240 // function to return FALSE. |
1235 // this appears to matter is for things like | |
1236 // | |
1237 // [ a . b ] | |
1238 // | |
1239 // which probably doesn't occur that often, can be worked around by | |
1240 // eliminating the whitespace, putting the expression in parentheses, | |
1241 // or using `whitespace_in_literal_matrix = "ignored"', so I think it | |
1242 // is an acceptable change. It would be quite a bit harder to `fix' | |
1243 // this. (Well, maybe not. the best fix would be to do away with the | |
1244 // specialness of whitespace inside of `[ ... ]'). | |
1245 // | |
1246 // However, we still do check for `.+', `.*', etc. | |
1247 | 1241 |
1248 static bool | 1242 static bool |
1249 next_token_is_bin_op (int spc_prev, char *yytext) | 1243 next_token_is_bin_op (bool spc_prev) |
1250 { | 1244 { |
1251 bool bin_op = false; | 1245 bool bin_op = false; |
1252 | 1246 |
1253 int c0 = yyinput (); | 1247 int c0 = yyinput (); |
1254 | 1248 |
1255 switch (c0) | 1249 switch (c0) |
1256 { | 1250 { |
1251 case ':': | |
1257 case '+': | 1252 case '+': |
1258 case '-': | 1253 case '-': |
1254 case '/': | |
1255 case '\\': | |
1256 case '^': | |
1259 { | 1257 { |
1260 int c1 = yyinput (); | 1258 int c1 = yyinput (); |
1261 yyunput (c1, yytext); | 1259 bin_op = looks_like_bin_op (spc_prev, c1); |
1262 int spc_next = (c1 == ' ' || c1 == '\t'); | 1260 unput (c1); |
1263 bin_op = looks_like_bin_op (spc_prev, spc_next); | |
1264 } | 1261 } |
1265 break; | 1262 break; |
1266 | 1263 |
1264 // .+ .- ./ .\ .^ .* .** | |
1267 case '.': | 1265 case '.': |
1268 { | 1266 { |
1269 int c1 = yyinput (); | 1267 int c1 = yyinput (); |
1270 yyunput (c1, yytext); | 1268 |
1271 bin_op = match_any (c1, "+-*/\\^"); | 1269 if (match_any (c1, "+-/\\^")) |
1270 { | |
1271 int c2 = yyinput (); | |
1272 bin_op = looks_like_bin_op (spc_prev, c2); | |
1273 unput (c2); | |
1274 } | |
1275 else if (c1 == '*') | |
1276 { | |
1277 int c2 = yyinput (); | |
1278 | |
1279 if (c2 == '*') | |
1280 { | |
1281 int c3 = yyinput (); | |
1282 bin_op = looks_like_bin_op (spc_prev, c3); | |
1283 unput (c3); | |
1284 } | |
1285 else | |
1286 bin_op = looks_like_bin_op (spc_prev, c2); | |
1287 | |
1288 unput (c2); | |
1289 } | |
1290 else if (! isdigit (c1) && c1 != ' ' && c1 != '\t') | |
1291 { | |
1292 bin_op = true; | |
1293 } | |
1294 | |
1295 unput (c1); | |
1272 } | 1296 } |
1273 break; | 1297 break; |
1274 | 1298 |
1275 case '/': | 1299 // = == & && | || * ** |
1276 case ':': | 1300 case '=': |
1277 case '\\': | |
1278 case '^': | |
1279 case '&': | 1301 case '&': |
1302 case '|': | |
1280 case '*': | 1303 case '*': |
1281 case '|': | 1304 { |
1305 int c1 = yyinput (); | |
1306 | |
1307 if (c1 == c0) | |
1308 { | |
1309 int c2 = yyinput (); | |
1310 bin_op = looks_like_bin_op (spc_prev, c2); | |
1311 unput (c2); | |
1312 } | |
1313 else | |
1314 bin_op = looks_like_bin_op (spc_prev, c1); | |
1315 | |
1316 unput (c1); | |
1317 } | |
1318 break; | |
1319 | |
1320 // <= >= <> ~= != < > | |
1282 case '<': | 1321 case '<': |
1283 case '>': | 1322 case '>': |
1284 case '~': | 1323 case '~': |
1285 case '!': | 1324 case '!': |
1286 case '=': | 1325 { |
1287 bin_op = true; | 1326 int c1 = yyinput (); |
1327 | |
1328 if ((c1 == '=') || (c1 == '<' && c1 == '>')) | |
1329 { | |
1330 int c2 = yyinput (); | |
1331 bin_op = looks_like_bin_op (spc_prev, c2); | |
1332 unput (c2); | |
1333 } | |
1334 else if (c1 != '~' && c1 != '!') | |
1335 bin_op = looks_like_bin_op (spc_prev, c1); | |
1336 | |
1337 unput (c1); | |
1338 } | |
1288 break; | 1339 break; |
1289 | 1340 |
1290 default: | 1341 default: |
1291 break; | 1342 break; |
1292 } | 1343 } |
1293 | 1344 |
1294 yyunput (c0, yytext); | 1345 unput (c0); |
1295 | 1346 |
1296 return bin_op; | 1347 return bin_op; |
1297 } | 1348 } |
1298 | 1349 |
1299 // Used to delete trailing white space from tokens. | 1350 // Used to delete trailing white space from tokens. |
1376 goto done; | 1427 goto done; |
1377 } | 1428 } |
1378 } | 1429 } |
1379 | 1430 |
1380 done: | 1431 done: |
1381 yyunput (c, yytext); | 1432 unput (c); |
1382 current_input_column--; | 1433 current_input_column--; |
1383 return retval; | 1434 return retval; |
1384 } | 1435 } |
1385 | 1436 |
1386 static inline bool | 1437 static inline bool |
1388 { | 1439 { |
1389 return (len > 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')); | 1440 return (len > 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')); |
1390 } | 1441 } |
1391 | 1442 |
1392 static void | 1443 static void |
1393 handle_number (char *yytext) | 1444 handle_number (void) |
1394 { | 1445 { |
1395 char *tmp = strsave (yytext); | 1446 char *tmp = strsave (yytext); |
1396 | 1447 |
1397 char *idx = strpbrk (tmp, "Dd"); | 1448 char *idx = strpbrk (tmp, "Dd"); |
1398 | 1449 |
1478 goto cleanup; | 1529 goto cleanup; |
1479 break; | 1530 break; |
1480 } | 1531 } |
1481 } | 1532 } |
1482 | 1533 |
1483 yyunput (c, yytext); | 1534 unput (c); |
1484 return false; | 1535 return false; |
1485 | 1536 |
1486 cleanup: | 1537 cleanup: |
1487 buf << ends; | 1538 buf << ends; |
1488 char *s = buf.str (); | 1539 char *s = buf.str (); |
1489 if (s) | 1540 if (s) |
1490 { | 1541 { |
1491 int len = strlen (s); | 1542 int len = strlen (s); |
1492 while (len--) | 1543 while (len--) |
1493 yyunput (s[len], yytext); | 1544 unput (s[len]); |
1494 } | 1545 } |
1495 delete [] s; | 1546 delete [] s; |
1496 | 1547 |
1497 return false; | 1548 return false; |
1498 } | 1549 } |
1510 char c2 = yyinput (); | 1561 char c2 = yyinput (); |
1511 if (c2 == '.' && have_continuation (trailing_comments_ok)) | 1562 if (c2 == '.' && have_continuation (trailing_comments_ok)) |
1512 return true; | 1563 return true; |
1513 else | 1564 else |
1514 { | 1565 { |
1515 yyunput (c2, yytext); | 1566 unput (c2); |
1516 yyunput (c1, yytext); | 1567 unput (c1); |
1517 } | 1568 } |
1518 } | 1569 } |
1519 else | 1570 else |
1520 yyunput (c1, yytext); | 1571 unput (c1); |
1521 | 1572 |
1522 return false; | 1573 return false; |
1523 } | 1574 } |
1524 | 1575 |
1525 // See if we have a continuation line. If so, eat it and the leading | 1576 // See if we have a continuation line. If so, eat it and the leading |
1534 int c = yyinput (); | 1585 int c = yyinput (); |
1535 if ((c == '.' && have_ellipsis_continuation ()) | 1586 if ((c == '.' && have_ellipsis_continuation ()) |
1536 || (c == '\\' && have_continuation ())) | 1587 || (c == '\\' && have_continuation ())) |
1537 retval = eat_whitespace (); | 1588 retval = eat_whitespace (); |
1538 else | 1589 else |
1539 yyunput (c, yytext); | 1590 unput (c); |
1540 | 1591 |
1541 return retval; | 1592 return retval; |
1542 } | 1593 } |
1543 | 1594 |
1544 static int | 1595 static int |
1591 c = yyinput (); | 1642 c = yyinput (); |
1592 if (c == delim) | 1643 if (c == delim) |
1593 buf << (char) c; | 1644 buf << (char) c; |
1594 else | 1645 else |
1595 { | 1646 { |
1596 yyunput (c, yytext); | 1647 unput (c); |
1597 buf << ends; | 1648 buf << ends; |
1598 char *t = buf.str (); | 1649 char *t = buf.str (); |
1599 string s = do_string_escapes (t); | 1650 string s = do_string_escapes (t); |
1600 delete [] t; | 1651 delete [] t; |
1601 | 1652 |
1734 int c1 = yyinput (); | 1785 int c1 = yyinput (); |
1735 unput (c1); | 1786 unput (c1); |
1736 | 1787 |
1737 if (lexer_flags.braceflag && Vwhitespace_in_literal_matrix != 2) | 1788 if (lexer_flags.braceflag && Vwhitespace_in_literal_matrix != 2) |
1738 { | 1789 { |
1739 int bin_op = next_token_is_bin_op (spc_gobbled, yytext); | 1790 int bin_op = next_token_is_bin_op (spc_gobbled); |
1740 int postfix_un_op = next_token_is_postfix_unary_op | 1791 int postfix_un_op = next_token_is_postfix_unary_op (spc_gobbled); |
1741 (spc_gobbled, yytext); | |
1742 | 1792 |
1743 int other_op = match_any (c1, ",;\n]"); | 1793 int other_op = match_any (c1, ",;\n]"); |
1744 | 1794 |
1745 if (! (postfix_un_op || bin_op || other_op) | 1795 if (! (postfix_un_op || bin_op || other_op) |
1746 && nesting_level.is_brace () | 1796 && nesting_level.is_brace () |
1762 static void | 1812 static void |
1763 maybe_unput_comma (int spc_gobbled) | 1813 maybe_unput_comma (int spc_gobbled) |
1764 { | 1814 { |
1765 if (Vwhitespace_in_literal_matrix != 2 && nesting_level.is_brace ()) | 1815 if (Vwhitespace_in_literal_matrix != 2 && nesting_level.is_brace ()) |
1766 { | 1816 { |
1767 int bin_op = next_token_is_bin_op (spc_gobbled, yytext); | 1817 int bin_op = next_token_is_bin_op (spc_gobbled); |
1768 | 1818 |
1769 int postfix_un_op | 1819 int postfix_un_op = next_token_is_postfix_unary_op (spc_gobbled); |
1770 = next_token_is_postfix_unary_op (spc_gobbled, yytext); | |
1771 | 1820 |
1772 int c1 = yyinput (); | 1821 int c1 = yyinput (); |
1773 int c2 = yyinput (); | 1822 int c2 = yyinput (); |
1774 | 1823 |
1775 unput (c2); | 1824 unput (c2); |
1867 if (lexer_flags.cant_be_identifier && plot_option_kw) | 1916 if (lexer_flags.cant_be_identifier && plot_option_kw) |
1868 TOK_RETURN (plot_option_kw); | 1917 TOK_RETURN (plot_option_kw); |
1869 } | 1918 } |
1870 | 1919 |
1871 int c = yyinput (); | 1920 int c = yyinput (); |
1872 yyunput (c, yytext); | 1921 unput (c); |
1873 bool next_tok_is_eq = (c == '='); | 1922 bool next_tok_is_eq = (c == '='); |
1923 bool next_tok_is_dot = (c == '.'); | |
1874 bool next_tok_is_paren = (c == '('); | 1924 bool next_tok_is_paren = (c == '('); |
1875 | 1925 |
1876 // Make sure we put the return values of a function in the symbol | 1926 // Make sure we put the return values of a function in the symbol |
1877 // table that is local to the function. | 1927 // table that is local to the function. |
1878 | 1928 |
1899 if (is_text_function_name (tok) && ! is_variable (tok)) | 1949 if (is_text_function_name (tok) && ! is_variable (tok)) |
1900 { | 1950 { |
1901 if (next_tok_is_eq | 1951 if (next_tok_is_eq |
1902 || lexer_flags.looking_at_return_list | 1952 || lexer_flags.looking_at_return_list |
1903 || lexer_flags.looking_at_parameter_list | 1953 || lexer_flags.looking_at_parameter_list |
1904 || lexer_flags.looking_at_matrix_or_assign_lhs) | 1954 || lexer_flags.looking_at_matrix_or_assign_lhs |
1955 || (next_tok_is_dot && next_token_is_bin_op (spc_gobbled))) | |
1905 { | 1956 { |
1906 force_local_variable (tok); | 1957 force_local_variable (tok); |
1907 } | 1958 } |
1908 else if (! next_tok_is_paren) | 1959 else if (! next_tok_is_paren) |
1909 { | 1960 { |
1948 void | 1999 void |
1949 check_for_garbage_after_fcn_def (void) | 2000 check_for_garbage_after_fcn_def (void) |
1950 { | 2001 { |
1951 // By making a newline be the next character to be read, we will | 2002 // By making a newline be the next character to be read, we will |
1952 // force the parser to return after reading the function. Calling | 2003 // force the parser to return after reading the function. Calling |
1953 // yyunput with EOF seems not to work... | 2004 // unput with EOF does not work. |
1954 | 2005 |
1955 bool in_comment = false; | 2006 bool in_comment = false; |
1956 int lineno = input_line_number; | 2007 int lineno = input_line_number; |
1957 int c; | 2008 int c; |
1958 while ((c = yyinput ()) != EOF) | 2009 while ((c = yyinput ()) != EOF) |
1981 else | 2032 else |
1982 { | 2033 { |
1983 warning ("ignoring trailing garbage after end of function\n\ | 2034 warning ("ignoring trailing garbage after end of function\n\ |
1984 near line %d of file `%s.m'", lineno, curr_fcn_file_name.c_str ()); | 2035 near line %d of file `%s.m'", lineno, curr_fcn_file_name.c_str ()); |
1985 | 2036 |
1986 yyunput ('\n', yytext); | 2037 unput ('\n'); |
1987 return; | 2038 return; |
1988 } | 2039 } |
1989 } | 2040 } |
1990 } | 2041 } |
1991 yyunput ('\n', yytext); | 2042 unput ('\n'); |
1992 } | 2043 } |
1993 | 2044 |
1994 void | 2045 void |
1995 lexical_feedback::init (void) | 2046 lexical_feedback::init (void) |
1996 { | 2047 { |
2031 | 2082 |
2032 // Quote marks strings intially. | 2083 // Quote marks strings intially. |
2033 quote_is_transpose = false; | 2084 quote_is_transpose = false; |
2034 } | 2085 } |
2035 | 2086 |
2036 int | 2087 static int |
2037 whitespace_in_literal_matrix (void) | 2088 whitespace_in_literal_matrix (void) |
2038 { | 2089 { |
2039 int pref = 0; | 2090 int pref = 0; |
2040 | 2091 |
2041 string val = builtin_string_variable ("whitespace_in_literal_matrix"); | 2092 string val = builtin_string_variable ("whitespace_in_literal_matrix"); |