comparison src/lex.l @ 3246:a41cc560087a

[project @ 1999-06-19 06:46:20 by jwe]
author jwe
date Sat, 19 Jun 1999 06:46:35 +0000
parents 041ea33fbbf4
children 4964d5391acc
comparison
equal deleted inserted replaced
3245:2270329efd14 3246:a41cc560087a
168 static int is_keyword (const string& s); 168 static int is_keyword (const string& s);
169 static string plot_style_token (const string& s); 169 static string plot_style_token (const string& s);
170 static symbol_record *lookup_identifier (const string& s); 170 static symbol_record *lookup_identifier (const string& s);
171 static void grab_help_text (void); 171 static void grab_help_text (void);
172 static bool match_any (char c, const char *s); 172 static bool match_any (char c, const char *s);
173 static bool next_token_is_bin_op (int spc_prev, char *yytext); 173 static bool next_token_is_bin_op (bool spc_prev);
174 static bool next_token_is_postfix_unary_op (int spc_prev, char *yytext); 174 static bool next_token_is_postfix_unary_op (bool spc_prev);
175 static string strip_trailing_whitespace (char *s); 175 static string strip_trailing_whitespace (char *s);
176 static void handle_number (char *yytext); 176 static void handle_number (void);
177 static int handle_string (char delim, int text_style = 0); 177 static int handle_string (char delim, int text_style = 0);
178 static int handle_close_brace (int spc_gobbled); 178 static int handle_close_brace (int spc_gobbled);
179 static int handle_identifier (const string& tok, int spc_gobbled); 179 static int handle_identifier (const string& tok, int spc_gobbled);
180 static bool have_continuation (bool trailing_comments_ok = true); 180 static bool have_continuation (bool trailing_comments_ok = true);
181 static bool have_ellipsis_continuation (bool trailing_comments_ok = true); 181 static bool have_ellipsis_continuation (bool trailing_comments_ok = true);
234 } 234 }
235 } 235 }
236 236
237 <TEXT_FCN>[\"\'] { 237 <TEXT_FCN>[\"\'] {
238 current_input_column++; 238 current_input_column++;
239 return handle_string (yytext[0], 1); 239 return handle_string (yytext[0], true);
240 } 240 }
241 241
242 <TEXT_FCN>[^ \t\n\;\,\"\'][^ \t\n\;\,]*{S}* { 242 <TEXT_FCN>[^ \t\n\;\,\"\'][^ \t\n\;\,]*{S}* {
243 string tok = strip_trailing_whitespace (yytext); 243 string tok = strip_trailing_whitespace (yytext);
244 TOK_PUSH_AND_RETURN (tok, TEXT); 244 TOK_PUSH_AND_RETURN (tok, TEXT);
292 <MATRIX>{S}+ { 292 <MATRIX>{S}+ {
293 current_input_column += yyleng; 293 current_input_column += yyleng;
294 if (Vwhitespace_in_literal_matrix != 2) 294 if (Vwhitespace_in_literal_matrix != 2)
295 { 295 {
296 int tmp = eat_continuation (); 296 int tmp = eat_continuation ();
297 int bin_op = next_token_is_bin_op (1, yytext); 297 int bin_op = next_token_is_bin_op (true);
298 int postfix_un_op = next_token_is_postfix_unary_op (1, yytext); 298 int postfix_un_op = next_token_is_postfix_unary_op (true);
299 299
300 if (! (postfix_un_op || bin_op) 300 if (! (postfix_un_op || bin_op)
301 && nesting_level.is_brace () 301 && nesting_level.is_brace ()
302 && lexer_flags.convert_spaces_to_comma) 302 && lexer_flags.convert_spaces_to_comma)
303 { 303 {
395 %{ 395 %{
396 // Imaginary numbers. 396 // Imaginary numbers.
397 %} 397 %}
398 398
399 {NUMBER}{Im} { 399 {NUMBER}{Im} {
400 handle_number (yytext); 400 handle_number ();
401 return IMAG_NUM; 401 return IMAG_NUM;
402 } 402 }
403 403
404 %{ 404 %{
405 // Real numbers. Don't grab the `.' part of a dot operator as part of 405 // Real numbers. Don't grab the `.' part of a dot operator as part of
406 // the constant. 406 // the constant.
407 %} 407 %}
408 408
409 {D}+/\.[\*/\\^'] | 409 {D}+/\.[\*/\\^'] |
410 {NUMBER} { 410 {NUMBER} {
411 handle_number (yytext); 411 handle_number ();
412 return NUM; 412 return NUM;
413 } 413 }
414 414
415 %{ 415 %{
416 // Eat whitespace. Whitespace inside matrix constants is handled by 416 // Eat whitespace. Whitespace inside matrix constants is handled by
666 { 666 {
667 int spc_gobbled = eat_continuation (); 667 int spc_gobbled = eat_continuation ();
668 668
669 int c = yyinput (); 669 int c = yyinput ();
670 670
671 yyunput (c, yytext); 671 unput (c);
672 672
673 if (spc_gobbled) 673 if (spc_gobbled)
674 yyunput (' ', yytext); 674 unput (' ');
675 675
676 lexer_flags.do_comma_insert = (lexer_flags.braceflag && c == '['); 676 lexer_flags.do_comma_insert = (lexer_flags.braceflag && c == '[');
677 } 677 }
678 678
679 // Fix things up for errors or interrupts. The parser is never called 679 // Fix things up for errors or interrupts. The parser is never called
1170 } 1170 }
1171 1171
1172 done: 1172 done:
1173 1173
1174 if (c) 1174 if (c)
1175 yyunput (c, yytext); 1175 unput (c);
1176 } 1176 }
1177 1177
1178 // Return 1 if the given character matches any character in the given 1178 // Return 1 if the given character matches any character in the given
1179 // string. 1179 // string.
1180 1180
1195 // operator. For example, 1195 // operator. For example,
1196 // 1196 //
1197 // [ 1 + 2 ] or [ 1+ 2] or [ 1+2 ] ==> binary 1197 // [ 1 + 2 ] or [ 1+ 2] or [ 1+2 ] ==> binary
1198 1198
1199 static bool 1199 static bool
1200 looks_like_bin_op (int spc_prev, int spc_next) 1200 looks_like_bin_op (bool spc_prev, int next_char)
1201 { 1201 {
1202 bool spc_next = (next_char == ' ' || next_char == '\t');
1203
1202 return ((spc_prev && spc_next) || ! spc_prev); 1204 return ((spc_prev && spc_next) || ! spc_prev);
1203 } 1205 }
1204 1206
1205 // Try to determine if the next token should be treated as a postfix 1207 // Try to determine if the next token should be treated as a postfix
1206 // unary operator. This is ugly, but it seems to do the right thing. 1208 // unary operator. This is ugly, but it seems to do the right thing.
1207 1209
1208 static bool 1210 static bool
1209 next_token_is_postfix_unary_op (int spc_prev, char *yytext) 1211 next_token_is_postfix_unary_op (bool spc_prev)
1210 { 1212 {
1211 bool un_op = false; 1213 bool un_op = false;
1212 1214
1213 int c0 = yyinput (); 1215 int c0 = yyinput ();
1214 int c1 = yyinput (); 1216
1215 1217 if (c0 == '\'' && ! spc_prev)
1216 yyunput (c1, yytext); 1218 {
1217 yyunput (c0, yytext); 1219 un_op = true;
1218 1220 }
1219 int transpose = (c0 == '.' && c1 == '\''); 1221 else if (c0 == '.')
1220 int hermitian = (c0 == '\''); 1222 {
1221 1223 int c1 = yyinput ();
1222 un_op = (transpose || (hermitian && ! spc_prev)); 1224 un_op = (c1 == '\'');
1225 unput (c1);
1226 }
1227
1228 unput (c0);
1223 1229
1224 return un_op; 1230 return un_op;
1225 } 1231 }
1226 1232
1227 // Try to determine if the next token should be treated as a binary 1233 // Try to determine if the next token should be treated as a binary
1228 // operator. This is even uglier, but it also seems to do the right 1234 // operator.
1229 // thing. Note that it is only necessary to check the spacing for `+' 1235 //
1230 // and `-', since those are the only tokens that can appear as unary 1236 // This kluge exists because whitespace is not always ignored inside
1231 // ops too. 1237 // the square brackets that are used to create matrix objects.
1232 // 1238 //
1233 // Note that this never returns true for `.', even though it can be a 1239 // Line continuations directly after the operator will cause this
1234 // binary operator (the structure reference thing). The only time 1240 // function to return FALSE.
1235 // this appears to matter is for things like
1236 //
1237 // [ a . b ]
1238 //
1239 // which probably doesn't occur that often, can be worked around by
1240 // eliminating the whitespace, putting the expression in parentheses,
1241 // or using `whitespace_in_literal_matrix = "ignored"', so I think it
1242 // is an acceptable change. It would be quite a bit harder to `fix'
1243 // this. (Well, maybe not. the best fix would be to do away with the
1244 // specialness of whitespace inside of `[ ... ]').
1245 //
1246 // However, we still do check for `.+', `.*', etc.
1247 1241
1248 static bool 1242 static bool
1249 next_token_is_bin_op (int spc_prev, char *yytext) 1243 next_token_is_bin_op (bool spc_prev)
1250 { 1244 {
1251 bool bin_op = false; 1245 bool bin_op = false;
1252 1246
1253 int c0 = yyinput (); 1247 int c0 = yyinput ();
1254 1248
1255 switch (c0) 1249 switch (c0)
1256 { 1250 {
1251 case ':':
1257 case '+': 1252 case '+':
1258 case '-': 1253 case '-':
1254 case '/':
1255 case '\\':
1256 case '^':
1259 { 1257 {
1260 int c1 = yyinput (); 1258 int c1 = yyinput ();
1261 yyunput (c1, yytext); 1259 bin_op = looks_like_bin_op (spc_prev, c1);
1262 int spc_next = (c1 == ' ' || c1 == '\t'); 1260 unput (c1);
1263 bin_op = looks_like_bin_op (spc_prev, spc_next);
1264 } 1261 }
1265 break; 1262 break;
1266 1263
1264 // .+ .- ./ .\ .^ .* .**
1267 case '.': 1265 case '.':
1268 { 1266 {
1269 int c1 = yyinput (); 1267 int c1 = yyinput ();
1270 yyunput (c1, yytext); 1268
1271 bin_op = match_any (c1, "+-*/\\^"); 1269 if (match_any (c1, "+-/\\^"))
1270 {
1271 int c2 = yyinput ();
1272 bin_op = looks_like_bin_op (spc_prev, c2);
1273 unput (c2);
1274 }
1275 else if (c1 == '*')
1276 {
1277 int c2 = yyinput ();
1278
1279 if (c2 == '*')
1280 {
1281 int c3 = yyinput ();
1282 bin_op = looks_like_bin_op (spc_prev, c3);
1283 unput (c3);
1284 }
1285 else
1286 bin_op = looks_like_bin_op (spc_prev, c2);
1287
1288 unput (c2);
1289 }
1290 else if (! isdigit (c1) && c1 != ' ' && c1 != '\t')
1291 {
1292 bin_op = true;
1293 }
1294
1295 unput (c1);
1272 } 1296 }
1273 break; 1297 break;
1274 1298
1275 case '/': 1299 // = == & && | || * **
1276 case ':': 1300 case '=':
1277 case '\\':
1278 case '^':
1279 case '&': 1301 case '&':
1302 case '|':
1280 case '*': 1303 case '*':
1281 case '|': 1304 {
1305 int c1 = yyinput ();
1306
1307 if (c1 == c0)
1308 {
1309 int c2 = yyinput ();
1310 bin_op = looks_like_bin_op (spc_prev, c2);
1311 unput (c2);
1312 }
1313 else
1314 bin_op = looks_like_bin_op (spc_prev, c1);
1315
1316 unput (c1);
1317 }
1318 break;
1319
1320 // <= >= <> ~= != < >
1282 case '<': 1321 case '<':
1283 case '>': 1322 case '>':
1284 case '~': 1323 case '~':
1285 case '!': 1324 case '!':
1286 case '=': 1325 {
1287 bin_op = true; 1326 int c1 = yyinput ();
1327
1328 if ((c1 == '=') || (c1 == '<' && c1 == '>'))
1329 {
1330 int c2 = yyinput ();
1331 bin_op = looks_like_bin_op (spc_prev, c2);
1332 unput (c2);
1333 }
1334 else if (c1 != '~' && c1 != '!')
1335 bin_op = looks_like_bin_op (spc_prev, c1);
1336
1337 unput (c1);
1338 }
1288 break; 1339 break;
1289 1340
1290 default: 1341 default:
1291 break; 1342 break;
1292 } 1343 }
1293 1344
1294 yyunput (c0, yytext); 1345 unput (c0);
1295 1346
1296 return bin_op; 1347 return bin_op;
1297 } 1348 }
1298 1349
1299 // Used to delete trailing white space from tokens. 1350 // Used to delete trailing white space from tokens.
1376 goto done; 1427 goto done;
1377 } 1428 }
1378 } 1429 }
1379 1430
1380 done: 1431 done:
1381 yyunput (c, yytext); 1432 unput (c);
1382 current_input_column--; 1433 current_input_column--;
1383 return retval; 1434 return retval;
1384 } 1435 }
1385 1436
1386 static inline bool 1437 static inline bool
1388 { 1439 {
1389 return (len > 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')); 1440 return (len > 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X'));
1390 } 1441 }
1391 1442
1392 static void 1443 static void
1393 handle_number (char *yytext) 1444 handle_number (void)
1394 { 1445 {
1395 char *tmp = strsave (yytext); 1446 char *tmp = strsave (yytext);
1396 1447
1397 char *idx = strpbrk (tmp, "Dd"); 1448 char *idx = strpbrk (tmp, "Dd");
1398 1449
1478 goto cleanup; 1529 goto cleanup;
1479 break; 1530 break;
1480 } 1531 }
1481 } 1532 }
1482 1533
1483 yyunput (c, yytext); 1534 unput (c);
1484 return false; 1535 return false;
1485 1536
1486 cleanup: 1537 cleanup:
1487 buf << ends; 1538 buf << ends;
1488 char *s = buf.str (); 1539 char *s = buf.str ();
1489 if (s) 1540 if (s)
1490 { 1541 {
1491 int len = strlen (s); 1542 int len = strlen (s);
1492 while (len--) 1543 while (len--)
1493 yyunput (s[len], yytext); 1544 unput (s[len]);
1494 } 1545 }
1495 delete [] s; 1546 delete [] s;
1496 1547
1497 return false; 1548 return false;
1498 } 1549 }
1510 char c2 = yyinput (); 1561 char c2 = yyinput ();
1511 if (c2 == '.' && have_continuation (trailing_comments_ok)) 1562 if (c2 == '.' && have_continuation (trailing_comments_ok))
1512 return true; 1563 return true;
1513 else 1564 else
1514 { 1565 {
1515 yyunput (c2, yytext); 1566 unput (c2);
1516 yyunput (c1, yytext); 1567 unput (c1);
1517 } 1568 }
1518 } 1569 }
1519 else 1570 else
1520 yyunput (c1, yytext); 1571 unput (c1);
1521 1572
1522 return false; 1573 return false;
1523 } 1574 }
1524 1575
1525 // See if we have a continuation line. If so, eat it and the leading 1576 // See if we have a continuation line. If so, eat it and the leading
1534 int c = yyinput (); 1585 int c = yyinput ();
1535 if ((c == '.' && have_ellipsis_continuation ()) 1586 if ((c == '.' && have_ellipsis_continuation ())
1536 || (c == '\\' && have_continuation ())) 1587 || (c == '\\' && have_continuation ()))
1537 retval = eat_whitespace (); 1588 retval = eat_whitespace ();
1538 else 1589 else
1539 yyunput (c, yytext); 1590 unput (c);
1540 1591
1541 return retval; 1592 return retval;
1542 } 1593 }
1543 1594
1544 static int 1595 static int
1591 c = yyinput (); 1642 c = yyinput ();
1592 if (c == delim) 1643 if (c == delim)
1593 buf << (char) c; 1644 buf << (char) c;
1594 else 1645 else
1595 { 1646 {
1596 yyunput (c, yytext); 1647 unput (c);
1597 buf << ends; 1648 buf << ends;
1598 char *t = buf.str (); 1649 char *t = buf.str ();
1599 string s = do_string_escapes (t); 1650 string s = do_string_escapes (t);
1600 delete [] t; 1651 delete [] t;
1601 1652
1734 int c1 = yyinput (); 1785 int c1 = yyinput ();
1735 unput (c1); 1786 unput (c1);
1736 1787
1737 if (lexer_flags.braceflag && Vwhitespace_in_literal_matrix != 2) 1788 if (lexer_flags.braceflag && Vwhitespace_in_literal_matrix != 2)
1738 { 1789 {
1739 int bin_op = next_token_is_bin_op (spc_gobbled, yytext); 1790 int bin_op = next_token_is_bin_op (spc_gobbled);
1740 int postfix_un_op = next_token_is_postfix_unary_op 1791 int postfix_un_op = next_token_is_postfix_unary_op (spc_gobbled);
1741 (spc_gobbled, yytext);
1742 1792
1743 int other_op = match_any (c1, ",;\n]"); 1793 int other_op = match_any (c1, ",;\n]");
1744 1794
1745 if (! (postfix_un_op || bin_op || other_op) 1795 if (! (postfix_un_op || bin_op || other_op)
1746 && nesting_level.is_brace () 1796 && nesting_level.is_brace ()
1762 static void 1812 static void
1763 maybe_unput_comma (int spc_gobbled) 1813 maybe_unput_comma (int spc_gobbled)
1764 { 1814 {
1765 if (Vwhitespace_in_literal_matrix != 2 && nesting_level.is_brace ()) 1815 if (Vwhitespace_in_literal_matrix != 2 && nesting_level.is_brace ())
1766 { 1816 {
1767 int bin_op = next_token_is_bin_op (spc_gobbled, yytext); 1817 int bin_op = next_token_is_bin_op (spc_gobbled);
1768 1818
1769 int postfix_un_op 1819 int postfix_un_op = next_token_is_postfix_unary_op (spc_gobbled);
1770 = next_token_is_postfix_unary_op (spc_gobbled, yytext);
1771 1820
1772 int c1 = yyinput (); 1821 int c1 = yyinput ();
1773 int c2 = yyinput (); 1822 int c2 = yyinput ();
1774 1823
1775 unput (c2); 1824 unput (c2);
1867 if (lexer_flags.cant_be_identifier && plot_option_kw) 1916 if (lexer_flags.cant_be_identifier && plot_option_kw)
1868 TOK_RETURN (plot_option_kw); 1917 TOK_RETURN (plot_option_kw);
1869 } 1918 }
1870 1919
1871 int c = yyinput (); 1920 int c = yyinput ();
1872 yyunput (c, yytext); 1921 unput (c);
1873 bool next_tok_is_eq = (c == '='); 1922 bool next_tok_is_eq = (c == '=');
1923 bool next_tok_is_dot = (c == '.');
1874 bool next_tok_is_paren = (c == '('); 1924 bool next_tok_is_paren = (c == '(');
1875 1925
1876 // Make sure we put the return values of a function in the symbol 1926 // Make sure we put the return values of a function in the symbol
1877 // table that is local to the function. 1927 // table that is local to the function.
1878 1928
1899 if (is_text_function_name (tok) && ! is_variable (tok)) 1949 if (is_text_function_name (tok) && ! is_variable (tok))
1900 { 1950 {
1901 if (next_tok_is_eq 1951 if (next_tok_is_eq
1902 || lexer_flags.looking_at_return_list 1952 || lexer_flags.looking_at_return_list
1903 || lexer_flags.looking_at_parameter_list 1953 || lexer_flags.looking_at_parameter_list
1904 || lexer_flags.looking_at_matrix_or_assign_lhs) 1954 || lexer_flags.looking_at_matrix_or_assign_lhs
1955 || (next_tok_is_dot && next_token_is_bin_op (spc_gobbled)))
1905 { 1956 {
1906 force_local_variable (tok); 1957 force_local_variable (tok);
1907 } 1958 }
1908 else if (! next_tok_is_paren) 1959 else if (! next_tok_is_paren)
1909 { 1960 {
1948 void 1999 void
1949 check_for_garbage_after_fcn_def (void) 2000 check_for_garbage_after_fcn_def (void)
1950 { 2001 {
1951 // By making a newline be the next character to be read, we will 2002 // By making a newline be the next character to be read, we will
1952 // force the parser to return after reading the function. Calling 2003 // force the parser to return after reading the function. Calling
1953 // yyunput with EOF seems not to work... 2004 // unput with EOF does not work.
1954 2005
1955 bool in_comment = false; 2006 bool in_comment = false;
1956 int lineno = input_line_number; 2007 int lineno = input_line_number;
1957 int c; 2008 int c;
1958 while ((c = yyinput ()) != EOF) 2009 while ((c = yyinput ()) != EOF)
1981 else 2032 else
1982 { 2033 {
1983 warning ("ignoring trailing garbage after end of function\n\ 2034 warning ("ignoring trailing garbage after end of function\n\
1984 near line %d of file `%s.m'", lineno, curr_fcn_file_name.c_str ()); 2035 near line %d of file `%s.m'", lineno, curr_fcn_file_name.c_str ());
1985 2036
1986 yyunput ('\n', yytext); 2037 unput ('\n');
1987 return; 2038 return;
1988 } 2039 }
1989 } 2040 }
1990 } 2041 }
1991 yyunput ('\n', yytext); 2042 unput ('\n');
1992 } 2043 }
1993 2044
1994 void 2045 void
1995 lexical_feedback::init (void) 2046 lexical_feedback::init (void)
1996 { 2047 {
2031 2082
2032 // Quote marks strings intially. 2083 // Quote marks strings intially.
2033 quote_is_transpose = false; 2084 quote_is_transpose = false;
2034 } 2085 }
2035 2086
2036 int 2087 static int
2037 whitespace_in_literal_matrix (void) 2088 whitespace_in_literal_matrix (void)
2038 { 2089 {
2039 int pref = 0; 2090 int pref = 0;
2040 2091
2041 string val = builtin_string_variable ("whitespace_in_literal_matrix"); 2092 string val = builtin_string_variable ("whitespace_in_literal_matrix");