Mercurial > hg > octave-lyh
annotate src/lex.l @ 7634:ae90e05ad299
fix parameter list initializer bug
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Tue, 25 Mar 2008 14:32:00 -0400 |
parents | 1f662945c2be |
children | 5b4d278ec828 |
rev | line source |
---|---|
1994 | 1 /* |
1 | 2 |
7017 | 3 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, |
4 2002, 2003, 2004, 2005, 2006, 2007 John W. Eaton | |
1 | 5 |
6 This file is part of Octave. | |
7 | |
8 Octave is free software; you can redistribute it and/or modify it | |
9 under the terms of the GNU General Public License as published by the | |
7016 | 10 Free Software Foundation; either version 3 of the License, or (at your |
11 option) any later version. | |
1 | 12 |
13 Octave is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 for more details. | |
17 | |
18 You should have received a copy of the GNU General Public License | |
7016 | 19 along with Octave; see the file COPYING. If not, see |
20 <http://www.gnu.org/licenses/>. | |
1 | 21 |
22 */ | |
23 | |
4753 | 24 %option prefix = "octave_" |
25 | |
4208 | 26 %s COMMAND_START |
27 %s MATRIX_START | |
4240 | 28 |
29 %x NESTED_FUNCTION_END | |
30 %x NESTED_FUNCTION_BEGIN | |
1 | 31 |
32 %{ | |
240 | 33 #ifdef HAVE_CONFIG_H |
1220 | 34 #include <config.h> |
240 | 35 #endif |
36 | |
1341 | 37 #include <cctype> |
38 #include <cstring> | |
39 | |
5765 | 40 #include <sstream> |
1823 | 41 #include <string> |
4214 | 42 #include <stack> |
1823 | 43 |
4093 | 44 #ifdef HAVE_UNISTD_H |
45 #ifdef HAVE_SYS_TYPES_H | |
46 #include <sys/types.h> | |
47 #endif | |
48 #include <unistd.h> | |
49 #endif | |
50 | |
2926 | 51 #include "cmd-edit.h" |
4153 | 52 #include "quit.h" |
4910 | 53 #include "lo-mappers.h" |
2926 | 54 |
1497 | 55 // These would be alphabetical, but y.tab.h must be included before |
56 // oct-gperf.h and y.tab.h must be included after token.h and the tree | |
57 // class declarations. We can't include y.tab.h in oct-gperf.h | |
58 // because it may not be protected to allow it to be included multiple | |
59 // times. | |
60 | |
4264 | 61 #include "Cell.h" |
3665 | 62 #include "comment-list.h" |
2181 | 63 #include "defun.h" |
1355 | 64 #include "error.h" |
4910 | 65 #include "gripes.h" |
1351 | 66 #include "input.h" |
1355 | 67 #include "lex.h" |
2891 | 68 #include "ov.h" |
1355 | 69 #include "parse.h" |
2987 | 70 #include "pt-all.h" |
2891 | 71 #include "symtab.h" |
72 #include "token.h" | |
73 #include "toplev.h" | |
1355 | 74 #include "utils.h" |
75 #include "variables.h" | |
2492 | 76 #include <y.tab.h> |
77 #include <oct-gperf.h> | |
1 | 78 |
2716 | 79 #if ! (defined (FLEX_SCANNER) \ |
80 && defined (YY_FLEX_MAJOR_VERSION) && YY_FLEX_MAJOR_VERSION >= 2 \ | |
81 && defined (YY_FLEX_MINOR_VERSION) && YY_FLEX_MINOR_VERSION >= 5) | |
82 #error lex.l requires flex version 2.5.4 or later | |
83 #endif | |
84 | |
4753 | 85 #define yylval octave_lval |
86 | |
87 // Arrange to get input via readline. | |
88 | |
89 #ifdef YY_INPUT | |
90 #undef YY_INPUT | |
91 #endif | |
92 #define YY_INPUT(buf, result, max_size) \ | |
93 if ((result = octave_read (buf, max_size)) < 0) \ | |
94 YY_FATAL_ERROR ("octave_read () in flex scanner failed"); | |
95 | |
96 // Try to avoid crashing out completely on fatal scanner errors. | |
97 // The call to yy_fatal_error should never happen, but it avoids a | |
98 // `static function defined but not used' warning from gcc. | |
99 | |
100 #ifdef YY_FATAL_ERROR | |
101 #undef YY_FATAL_ERROR | |
102 #endif | |
103 #define YY_FATAL_ERROR(msg) \ | |
104 do \ | |
105 { \ | |
106 error (msg); \ | |
107 OCTAVE_QUIT; \ | |
108 yy_fatal_error (msg); \ | |
109 } \ | |
110 while (0) | |
111 | |
4910 | 112 #define COUNT_TOK_AND_RETURN(tok) \ |
113 do \ | |
114 { \ | |
115 Vtoken_count++; \ | |
116 return tok; \ | |
117 } \ | |
118 while (0) | |
119 | |
4753 | 120 #define TOK_RETURN(tok) \ |
121 do \ | |
122 { \ | |
123 current_input_column += yyleng; \ | |
124 lexer_flags.quote_is_transpose = false; \ | |
125 lexer_flags.convert_spaces_to_comma = true; \ | |
4910 | 126 COUNT_TOK_AND_RETURN (tok); \ |
4753 | 127 } \ |
128 while (0) | |
129 | |
130 #define TOK_PUSH_AND_RETURN(name, tok) \ | |
131 do \ | |
132 { \ | |
133 yylval.tok_val = new token (name, input_line_number, \ | |
134 current_input_column); \ | |
135 token_stack.push (yylval.tok_val); \ | |
136 TOK_RETURN (tok); \ | |
137 } \ | |
138 while (0) | |
139 | |
140 #define BIN_OP_RETURN(tok, convert) \ | |
141 do \ | |
142 { \ | |
143 yylval.tok_val = new token (input_line_number, current_input_column); \ | |
144 token_stack.push (yylval.tok_val); \ | |
145 current_input_column += yyleng; \ | |
146 lexer_flags.quote_is_transpose = false; \ | |
147 lexer_flags.convert_spaces_to_comma = convert; \ | |
4910 | 148 COUNT_TOK_AND_RETURN (tok); \ |
4753 | 149 } \ |
150 while (0) | |
151 | |
152 #define XBIN_OP_RETURN(tok, convert) \ | |
153 do \ | |
154 { \ | |
155 gripe_matlab_incompatible_operator (yytext); \ | |
156 BIN_OP_RETURN (tok, convert); \ | |
157 } \ | |
158 while (0) | |
159 | |
3883 | 160 // TRUE means that we have encountered EOF on the input stream. |
161 bool parser_end_of_input = false; | |
162 | |
1826 | 163 // Flags that need to be shared between the lexer and parser. |
164 lexical_feedback lexer_flags; | |
165 | |
1351 | 166 // Stack to hold tokens so that we can delete them when the parser is |
167 // reset and avoid growing forever just because we are stashing some | |
168 // information. This has to appear before lex.h is included, because | |
169 // one of the macros defined there uses token_stack. | |
2614 | 170 // |
5775 | 171 // FIXME -- this should really be static, but that causes |
2614 | 172 // problems on some systems. |
4214 | 173 std::stack <token*> token_stack; |
1351 | 174 |
1826 | 175 // Did eat_whitespace() eat a space or tab, or a newline, or both? |
1 | 176 |
1826 | 177 typedef int yum_yum; |
1 | 178 |
1826 | 179 const yum_yum ATE_NOTHING = 0; |
180 const yum_yum ATE_SPACE_OR_TAB = 1; | |
181 const yum_yum ATE_NEWLINE = 2; | |
1088 | 182 |
3351 | 183 // Is the closest nesting level a square bracket, squiggly brace or a paren? |
1826 | 184 |
4214 | 185 class bracket_brace_paren_nesting_level |
1826 | 186 { |
187 public: | |
188 | |
4214 | 189 bracket_brace_paren_nesting_level (void) : context () { } |
1826 | 190 |
3351 | 191 ~bracket_brace_paren_nesting_level (void) { } |
192 | |
4214 | 193 void bracket (void) { context.push (BRACKET); } |
194 bool is_bracket (void) | |
195 { return ! context.empty () && context.top () == BRACKET; } | |
196 | |
197 void brace (void) { context.push (BRACE); } | |
198 bool is_brace (void) | |
199 { return ! context.empty () && context.top () == BRACE; } | |
200 | |
201 void paren (void) { context.push (PAREN); } | |
202 bool is_paren (void) | |
203 { return ! context.empty () && context.top () == PAREN; } | |
204 | |
4608 | 205 bool is_bracket_or_brace (void) |
206 { return (! context.empty () | |
207 && (context.top () == BRACKET || context.top () == BRACE)); } | |
208 | |
4214 | 209 bool none (void) { return context.empty (); } |
210 | |
211 void remove (void) { if (! context.empty ()) context.pop (); } | |
212 | |
213 void clear (void) { while (! context.empty ()) context.pop (); } | |
1826 | 214 |
215 private: | |
216 | |
4214 | 217 std::stack<int> context; |
218 | |
5225 | 219 static const int BRACKET; |
220 static const int BRACE; | |
221 static const int PAREN; | |
1826 | 222 |
3351 | 223 bracket_brace_paren_nesting_level (const bracket_brace_paren_nesting_level&); |
1826 | 224 |
3351 | 225 bracket_brace_paren_nesting_level& |
226 operator = (const bracket_brace_paren_nesting_level&); | |
1826 | 227 }; |
228 | |
5225 | 229 const int bracket_brace_paren_nesting_level::BRACKET = 1; |
230 const int bracket_brace_paren_nesting_level::BRACE = 2; | |
231 const int bracket_brace_paren_nesting_level::PAREN = 3; | |
232 | |
3351 | 233 static bracket_brace_paren_nesting_level nesting_level; |
1 | 234 |
4910 | 235 static unsigned int Vtoken_count = 0; |
236 | |
146 | 237 // Forward declarations for functions defined at the bottom of this |
238 // file. | |
239 | |
1 | 240 static void fixup_column_count (char *s); |
146 | 241 static void do_comma_insert_check (void); |
4867 | 242 static int is_keyword_token (const std::string& s); |
4238 | 243 static void prep_for_function (void); |
244 static void prep_for_nested_function (void); | |
4426 | 245 static std::string grab_help_text (void); |
2857 | 246 static bool match_any (char c, const char *s); |
3263 | 247 static bool next_token_is_sep_op (void); |
3246 | 248 static bool next_token_is_bin_op (bool spc_prev); |
249 static bool next_token_is_postfix_unary_op (bool spc_prev); | |
3523 | 250 static std::string strip_trailing_whitespace (char *s); |
3246 | 251 static void handle_number (void); |
975 | 252 static int handle_string (char delim, int text_style = 0); |
4612 | 253 static int handle_close_bracket (bool spc_gobbled, int bracket_type); |
3974 | 254 static int handle_identifier (void); |
3096 | 255 static bool have_continuation (bool trailing_comments_ok = true); |
256 static bool have_ellipsis_continuation (bool trailing_comments_ok = true); | |
3665 | 257 static void scan_for_comments (const char *); |
1826 | 258 static yum_yum eat_whitespace (void); |
259 static yum_yum eat_continuation (void); | |
3388 | 260 static void maybe_warn_separator_insert (char sep); |
3400 | 261 static void gripe_single_quote_string (void); |
4037 | 262 static void gripe_matlab_incompatible (const std::string& msg); |
263 static void maybe_gripe_matlab_incompatible_comment (char c); | |
264 static void gripe_matlab_incompatible_continuation (void); | |
265 static void gripe_matlab_incompatible_operator (const std::string& op); | |
1 | 266 |
267 %} | |
268 | |
269 D [0-9] | |
270 S [ \t] | |
5570 | 271 NL ((\n)|(\r)|(\r\n)) |
2042 | 272 SNL ({S}|{NL}) |
1 | 273 EL (\.\.\.) |
967 | 274 BS (\\) |
275 CONT ({EL}|{BS}) | |
1 | 276 Im [iIjJ] |
967 | 277 CCHAR [#%] |
278 COMMENT ({CCHAR}.*{NL}) | |
279 SNLCMT ({SNL}|{COMMENT}) | |
280 NOT ((\~)|(\!)) | |
4037 | 281 POW ((\*\*)|(\^)) |
282 EPOW (\.{POW}) | |
5290 | 283 IDENT ([_$a-zA-Z][_$a-zA-Z0-9]*) |
1 | 284 EXPON ([DdEe][+-]?{D}+) |
3220 | 285 NUMBER (({D}+\.?{D}*{EXPON}?)|(\.{D}+{EXPON}?)|(0[xX][0-9a-fA-F]+)) |
1 | 286 %% |
287 | |
4240 | 288 <NESTED_FUNCTION_END>. { |
4323 | 289 BEGIN (NESTED_FUNCTION_BEGIN); |
4410 | 290 yyunput (yytext[0], yytext); |
4910 | 291 COUNT_TOK_AND_RETURN (';'); |
4240 | 292 } |
293 | |
294 <NESTED_FUNCTION_BEGIN>. { | |
4323 | 295 BEGIN (INITIAL); |
4410 | 296 yyunput (yytext[0], yytext); |
4238 | 297 prep_for_nested_function (); |
4910 | 298 COUNT_TOK_AND_RETURN (FCN); |
4238 | 299 } |
300 | |
968 | 301 %{ |
4208 | 302 // Help and other command-style functions are a pain in the ass. This |
968 | 303 // stuff needs to be simplified. May require some changes in the |
304 // parser too. | |
305 %} | |
306 | |
4208 | 307 <COMMAND_START>{NL} { |
4323 | 308 BEGIN (INITIAL); |
967 | 309 current_input_column = 1; |
2857 | 310 lexer_flags.quote_is_transpose = false; |
311 lexer_flags.convert_spaces_to_comma = true; | |
5212 | 312 lexer_flags.doing_rawcommand = false; |
4910 | 313 COUNT_TOK_AND_RETURN ('\n'); |
967 | 314 } |
1 | 315 |
4208 | 316 <COMMAND_START>[\;\,] { |
5102 | 317 if (lexer_flags.doing_rawcommand) |
5279 | 318 TOK_PUSH_AND_RETURN (yytext, SQ_STRING); |
5102 | 319 |
320 BEGIN (INITIAL); | |
321 | |
322 if (strcmp (yytext, ",") == 0) | |
323 TOK_RETURN (','); | |
967 | 324 else |
5102 | 325 TOK_RETURN (';'); |
967 | 326 } |
1 | 327 |
4208 | 328 <COMMAND_START>[\"\'] { |
975 | 329 current_input_column++; |
4910 | 330 COUNT_TOK_AND_RETURN (handle_string (yytext[0], true)); |
975 | 331 } |
332 | |
4923 | 333 <COMMAND_START>[^#% \t\r\n\;\,\"\'][^ \t\r\n\;\,]*{S}* { |
3523 | 334 std::string tok = strip_trailing_whitespace (yytext); |
5279 | 335 TOK_PUSH_AND_RETURN (tok, SQ_STRING); |
967 | 336 } |
1 | 337 |
968 | 338 %{ |
1 | 339 // For this and the next two rules, we're looking at ']', and we |
971 | 340 // need to know if the next token is `=' or `=='. |
1 | 341 // |
342 // It would have been so much easier if the delimiters were simply | |
343 // different for the expression on the left hand side of the equals | |
344 // operator. | |
971 | 345 // |
346 // It's also a pain in the ass to decide whether to insert a comma | |
347 // after seeing a ']' character... | |
968 | 348 %} |
349 | |
4208 | 350 <MATRIX_START>{SNLCMT}*\]{S}* { |
3665 | 351 scan_for_comments (yytext); |
1001 | 352 fixup_column_count (yytext); |
353 int c = yytext[yyleng-1]; | |
354 int cont_is_spc = eat_continuation (); | |
4608 | 355 bool spc_gobbled = (cont_is_spc || c == ' ' || c == '\t'); |
5345 | 356 int tok_to_return = handle_close_bracket (spc_gobbled, ']'); |
357 if (spc_gobbled) | |
358 yyunput (' ', yytext); | |
359 COUNT_TOK_AND_RETURN (tok_to_return); | |
4608 | 360 } |
361 | |
362 <MATRIX_START>{SNLCMT}*\}{S}* { | |
363 scan_for_comments (yytext); | |
364 fixup_column_count (yytext); | |
365 int c = yytext[yyleng-1]; | |
366 int cont_is_spc = eat_continuation (); | |
367 bool spc_gobbled = (cont_is_spc || c == ' ' || c == '\t'); | |
5345 | 368 int tok_to_return = handle_close_bracket (spc_gobbled, '}'); |
369 if (spc_gobbled) | |
370 yyunput (' ', yytext); | |
371 COUNT_TOK_AND_RETURN (tok_to_return); | |
967 | 372 } |
1 | 373 |
968 | 374 %{ |
1088 | 375 // Commas are element separators in matrix constants. If we don't |
376 // check for continuations here we can end up inserting too many | |
377 // commas. | |
968 | 378 %} |
379 | |
4208 | 380 <MATRIX_START>{S}*\,{S}* { |
1088 | 381 current_input_column += yyleng; |
3388 | 382 |
1088 | 383 int tmp = eat_continuation (); |
3388 | 384 |
2857 | 385 lexer_flags.quote_is_transpose = false; |
386 lexer_flags.convert_spaces_to_comma = true; | |
3388 | 387 |
388 if ((tmp & ATE_NEWLINE) == ATE_NEWLINE) | |
389 { | |
390 maybe_warn_separator_insert (';'); | |
391 | |
4476 | 392 yyunput (';', yytext); |
3388 | 393 } |
394 | |
4910 | 395 COUNT_TOK_AND_RETURN (','); |
967 | 396 } |
1 | 397 |
968 | 398 %{ |
399 // In some cases, spaces in matrix constants can turn into commas. | |
400 // If commas are required, spaces are not important in matrix | |
1088 | 401 // constants so we just eat them. If we don't check for continuations |
402 // here we can end up inserting too many commas. | |
968 | 403 %} |
430 | 404 |
4208 | 405 <MATRIX_START>{S}+ { |
1088 | 406 current_input_column += yyleng; |
3388 | 407 |
408 int tmp = eat_continuation (); | |
409 int bin_op = next_token_is_bin_op (true); | |
410 int postfix_un_op = next_token_is_postfix_unary_op (true); | |
411 | |
412 if (! (postfix_un_op || bin_op) | |
4608 | 413 && nesting_level.is_bracket_or_brace () |
3388 | 414 && lexer_flags.convert_spaces_to_comma) |
967 | 415 { |
3388 | 416 if ((tmp & ATE_NEWLINE) == ATE_NEWLINE) |
417 { | |
418 maybe_warn_separator_insert (';'); | |
967 | 419 |
4476 | 420 yyunput (';', yytext); |
3388 | 421 } |
422 | |
4476 | 423 lexer_flags.quote_is_transpose = false; |
424 lexer_flags.convert_spaces_to_comma = true; | |
425 | |
426 maybe_warn_separator_insert (','); | |
427 | |
4910 | 428 COUNT_TOK_AND_RETURN (','); |
967 | 429 } |
430 } | |
430 | 431 |
968 | 432 %{ |
1088 | 433 // Semicolons are handled as row seprators in matrix constants. If we |
434 // don't eat whitespace here we can end up inserting too many | |
435 // semicolons. | |
968 | 436 %} |
437 | |
4208 | 438 <MATRIX_START>{SNLCMT}*;{SNLCMT}* { |
3665 | 439 scan_for_comments (yytext); |
967 | 440 fixup_column_count (yytext); |
1001 | 441 eat_whitespace (); |
2857 | 442 lexer_flags.quote_is_transpose = false; |
443 lexer_flags.convert_spaces_to_comma = true; | |
4910 | 444 COUNT_TOK_AND_RETURN (';'); |
967 | 445 } |
446 | |
968 | 447 %{ |
1088 | 448 // In some cases, new lines can also become row separators. If we |
449 // don't eat whitespace here we can end up inserting too many | |
450 // semicolons. | |
985 | 451 %} |
452 | |
4208 | 453 <MATRIX_START>{S}*{COMMENT}{SNLCMT}* | |
454 <MATRIX_START>{S}*{NL}{SNLCMT}* { | |
3665 | 455 scan_for_comments (yytext); |
1082 | 456 fixup_column_count (yytext); |
1088 | 457 eat_whitespace (); |
3388 | 458 |
4476 | 459 lexer_flags.quote_is_transpose = false; |
460 lexer_flags.convert_spaces_to_comma = true; | |
461 | |
462 if (nesting_level.none ()) | |
463 return LEXICAL_ERROR; | |
985 | 464 |
4608 | 465 if (nesting_level.is_bracket_or_brace ()) |
3388 | 466 { |
467 maybe_warn_separator_insert (';'); | |
468 | |
4910 | 469 COUNT_TOK_AND_RETURN (';'); |
985 | 470 } |
471 } | |
472 | |
967 | 473 \[{S}* { |
3351 | 474 nesting_level.bracket (); |
975 | 475 |
1082 | 476 current_input_column += yyleng; |
2857 | 477 lexer_flags.quote_is_transpose = false; |
478 lexer_flags.convert_spaces_to_comma = true; | |
975 | 479 |
5615 | 480 if (lexer_flags.defining_func && ! lexer_flags.parsed_function_name) |
481 lexer_flags.looking_at_return_list = true; | |
482 else | |
483 lexer_flags.looking_at_matrix_or_assign_lhs = true; | |
484 | |
975 | 485 promptflag--; |
486 eat_whitespace (); | |
487 | |
5102 | 488 lexer_flags.bracketflag++; |
489 BEGIN (MATRIX_START); | |
490 COUNT_TOK_AND_RETURN ('['); | |
967 | 491 } |
1 | 492 |
968 | 493 \] { |
1826 | 494 nesting_level.remove (); |
968 | 495 |
5102 | 496 TOK_RETURN (']'); |
968 | 497 } |
498 | |
499 %{ | |
500 // Imaginary numbers. | |
501 %} | |
502 | |
503 {NUMBER}{Im} { | |
3246 | 504 handle_number (); |
4910 | 505 COUNT_TOK_AND_RETURN (IMAG_NUM); |
968 | 506 } |
507 | |
508 %{ | |
509 // Real numbers. Don't grab the `.' part of a dot operator as part of | |
510 // the constant. | |
511 %} | |
512 | |
513 {D}+/\.[\*/\\^'] | | |
514 {NUMBER} { | |
3246 | 515 handle_number (); |
4910 | 516 COUNT_TOK_AND_RETURN (NUM); |
968 | 517 } |
518 | |
519 %{ | |
520 // Eat whitespace. Whitespace inside matrix constants is handled by | |
4208 | 521 // the <MATRIX_START> start state code above. |
968 | 522 %} |
523 | |
967 | 524 {S}* { |
525 current_input_column += yyleng; | |
526 } | |
527 | |
968 | 528 %{ |
529 // Continuation lines. Allow comments after continuations. | |
530 %} | |
531 | |
967 | 532 {CONT}{S}*{NL} | |
533 {CONT}{S}*{COMMENT} { | |
4037 | 534 if (yytext[0] == '\\') |
535 gripe_matlab_incompatible_continuation (); | |
3665 | 536 scan_for_comments (yytext); |
967 | 537 promptflag--; |
538 current_input_column = 1; | |
539 } | |
1 | 540 |
968 | 541 %{ |
542 // End of file. | |
543 %} | |
544 | |
967 | 545 <<EOF>> { |
546 TOK_RETURN (END_OF_INPUT); | |
547 } | |
1 | 548 |
968 | 549 %{ |
970 | 550 // Identifiers. Truncate the token at the first space or tab but |
551 // don't write directly on yytext. | |
968 | 552 %} |
553 | |
967 | 554 {IDENT}{S}* { |
4238 | 555 int id_tok = handle_identifier (); |
556 | |
557 if (id_tok >= 0) | |
4910 | 558 COUNT_TOK_AND_RETURN (id_tok); |
967 | 559 } |
1 | 560 |
968 | 561 %{ |
4342 | 562 // Function handles. |
563 %} | |
564 | |
4930 | 565 "@" { |
566 current_input_column++; | |
567 lexer_flags.quote_is_transpose = false; | |
568 lexer_flags.convert_spaces_to_comma = false; | |
569 lexer_flags.looking_at_function_handle++; | |
570 COUNT_TOK_AND_RETURN ('@'); | |
4342 | 571 } |
572 | |
573 %{ | |
968 | 574 // A new line character. New line characters inside matrix constants |
4208 | 575 // are handled by the <MATRIX_START> start state code above. If closest |
985 | 576 // nesting is inside parentheses, don't return a row separator. |
968 | 577 %} |
578 | |
967 | 579 {NL} { |
580 current_input_column = 1; | |
2857 | 581 lexer_flags.quote_is_transpose = false; |
582 lexer_flags.convert_spaces_to_comma = true; | |
1826 | 583 if (nesting_level.none ()) |
4910 | 584 COUNT_TOK_AND_RETURN ('\n'); |
4037 | 585 else if (nesting_level.is_paren ()) |
586 gripe_matlab_incompatible ("bare newline inside parentheses"); | |
4608 | 587 else if (nesting_level.is_bracket_or_brace ()) |
985 | 588 return LEXICAL_ERROR; |
967 | 589 } |
1 | 590 |
968 | 591 %{ |
592 // Single quote can either be the beginning of a string or a transpose | |
593 // operator. | |
594 %} | |
595 | |
967 | 596 "'" { |
597 current_input_column++; | |
2857 | 598 lexer_flags.convert_spaces_to_comma = true; |
1 | 599 |
1826 | 600 if (lexer_flags.quote_is_transpose) |
967 | 601 { |
602 do_comma_insert_check (); | |
4910 | 603 COUNT_TOK_AND_RETURN (QUOTE); |
967 | 604 } |
605 else | |
4910 | 606 COUNT_TOK_AND_RETURN (handle_string ('\'')); |
967 | 607 } |
1 | 608 |
968 | 609 %{ |
971 | 610 // Double quotes always begin strings. |
611 %} | |
612 | |
973 | 613 \" { |
614 current_input_column++; | |
4910 | 615 COUNT_TOK_AND_RETURN (handle_string ('"')); |
973 | 616 } |
971 | 617 |
618 %{ | |
985 | 619 // Gobble comments. If closest nesting is inside parentheses, don't |
620 // return a new line. | |
621 %} | |
968 | 622 |
967 | 623 {CCHAR} { |
4426 | 624 std::string help_txt; |
625 | |
626 if (! help_buf.empty ()) | |
627 help_txt = help_buf.top (); | |
628 | |
629 if (help_txt.empty () | |
1826 | 630 && lexer_flags.beginning_of_function |
631 && nesting_level.none ()) | |
967 | 632 { |
3665 | 633 lexer_flags.beginning_of_function = false; |
634 | |
4426 | 635 std::string txt = grab_help_text (); |
636 | |
637 if (! help_buf.empty ()) | |
638 help_buf.pop (); | |
639 | |
640 help_buf.push (txt); | |
641 | |
642 octave_comment_buffer::append (txt); | |
967 | 643 } |
644 else | |
645 { | |
3665 | 646 std::string buf; |
647 | |
648 bool begin_comment = true; | |
649 | |
967 | 650 int c; |
651 while ((c = yyinput ()) != EOF && c != '\n') | |
3665 | 652 { |
653 if (begin_comment && (c == '#' || c == '%')) | |
654 ; /* Skip leading comment characters. */ | |
655 else | |
3802 | 656 buf += static_cast<char> (c); |
3665 | 657 } |
658 | |
659 octave_comment_buffer::append (buf); | |
967 | 660 } |
440 | 661 |
967 | 662 current_input_column = 1; |
2857 | 663 lexer_flags.quote_is_transpose = false; |
664 lexer_flags.convert_spaces_to_comma = true; | |
985 | 665 |
4037 | 666 maybe_gripe_matlab_incompatible_comment (yytext[0]); |
667 | |
4323 | 668 if (YY_START == COMMAND_START) |
669 BEGIN (INITIAL); | |
670 | |
1826 | 671 if (nesting_level.none ()) |
5783 | 672 { |
673 lexer_flags.doing_rawcommand = false; | |
674 COUNT_TOK_AND_RETURN ('\n'); | |
675 } | |
4608 | 676 else if (nesting_level.is_bracket_or_brace ()) |
4910 | 677 COUNT_TOK_AND_RETURN (';'); |
967 | 678 } |
440 | 679 |
968 | 680 %{ |
681 // Other operators. | |
682 %} | |
683 | |
5102 | 684 ":" { BIN_OP_RETURN (':', false); } |
685 | |
4037 | 686 ".+" { XBIN_OP_RETURN (EPLUS, false); } |
687 ".-" { XBIN_OP_RETURN (EMINUS, false); } | |
2857 | 688 ".*" { BIN_OP_RETURN (EMUL, false); } |
689 "./" { BIN_OP_RETURN (EDIV, false); } | |
690 ".\\" { BIN_OP_RETURN (ELEFTDIV, false); } | |
4037 | 691 ".^" { BIN_OP_RETURN (EPOW, false); } |
692 ".**" { XBIN_OP_RETURN (EPOW, false); } | |
2857 | 693 ".'" { do_comma_insert_check (); BIN_OP_RETURN (TRANSPOSE, true); } |
4037 | 694 "++" { do_comma_insert_check (); XBIN_OP_RETURN (PLUS_PLUS, true); } |
695 "--" { do_comma_insert_check (); XBIN_OP_RETURN (MINUS_MINUS, true); } | |
2857 | 696 "<=" { BIN_OP_RETURN (EXPR_LE, false); } |
697 "==" { BIN_OP_RETURN (EXPR_EQ, false); } | |
4037 | 698 "~=" { BIN_OP_RETURN (EXPR_NE, false); } |
699 "!=" { XBIN_OP_RETURN (EXPR_NE, false); } | |
2857 | 700 ">=" { BIN_OP_RETURN (EXPR_GE, false); } |
2877 | 701 "&" { BIN_OP_RETURN (EXPR_AND, false); } |
2857 | 702 "|" { BIN_OP_RETURN (EXPR_OR, false); } |
703 "<" { BIN_OP_RETURN (EXPR_LT, false); } | |
704 ">" { BIN_OP_RETURN (EXPR_GT, false); } | |
5102 | 705 "+" { BIN_OP_RETURN ('+', false); } |
706 "-" { BIN_OP_RETURN ('-', false); } | |
2857 | 707 "*" { BIN_OP_RETURN ('*', false); } |
708 "/" { BIN_OP_RETURN ('/', false); } | |
709 "\\" { BIN_OP_RETURN (LEFTDIV, false); } | |
710 ";" { BIN_OP_RETURN (';', true); } | |
711 "," { BIN_OP_RETURN (',', true); } | |
4037 | 712 "^" { BIN_OP_RETURN (POW, false); } |
713 "**" { XBIN_OP_RETURN (POW, false); } | |
2857 | 714 "=" { BIN_OP_RETURN ('=', true); } |
2877 | 715 "&&" { BIN_OP_RETURN (EXPR_AND_AND, false); } |
2857 | 716 "||" { BIN_OP_RETURN (EXPR_OR_OR, false); } |
4037 | 717 "<<" { XBIN_OP_RETURN (LSHIFT, false); } |
718 ">>" { XBIN_OP_RETURN (RSHIFT, false); } | |
967 | 719 |
720 {NOT} { | |
4037 | 721 if (yytext[0] == '~') |
722 BIN_OP_RETURN (EXPR_NOT, false); | |
723 else | |
724 XBIN_OP_RETURN (EXPR_NOT, false); | |
967 | 725 } |
1 | 726 |
967 | 727 "(" { |
4131 | 728 lexer_flags.looking_at_indirect_ref = false; |
1826 | 729 nesting_level.paren (); |
985 | 730 promptflag--; |
967 | 731 TOK_RETURN ('('); |
732 } | |
733 | |
734 ")" { | |
1826 | 735 nesting_level.remove (); |
967 | 736 current_input_column++; |
2857 | 737 lexer_flags.quote_is_transpose = true; |
4608 | 738 lexer_flags.convert_spaces_to_comma = nesting_level.is_bracket_or_brace (); |
1001 | 739 do_comma_insert_check (); |
4910 | 740 COUNT_TOK_AND_RETURN (')'); |
967 | 741 } |
742 | |
5102 | 743 "." { TOK_RETURN ('.'); } |
2066 | 744 |
4037 | 745 "+=" { XBIN_OP_RETURN (ADD_EQ, false); } |
746 "-=" { XBIN_OP_RETURN (SUB_EQ, false); } | |
747 "*=" { XBIN_OP_RETURN (MUL_EQ, false); } | |
748 "/=" { XBIN_OP_RETURN (DIV_EQ, false); } | |
749 "\\=" { XBIN_OP_RETURN (LEFTDIV_EQ, false); } | |
750 ".+=" { XBIN_OP_RETURN (ADD_EQ, false); } | |
751 ".-=" { XBIN_OP_RETURN (SUB_EQ, false); } | |
752 ".*=" { XBIN_OP_RETURN (EMUL_EQ, false); } | |
753 "./=" { XBIN_OP_RETURN (EDIV_EQ, false); } | |
754 ".\\=" { XBIN_OP_RETURN (ELEFTDIV_EQ, false); } | |
755 {POW}= { XBIN_OP_RETURN (POW_EQ, false); } | |
756 {EPOW}= { XBIN_OP_RETURN (EPOW_EQ, false); } | |
757 "&=" { XBIN_OP_RETURN (AND_EQ, false); } | |
758 "|=" { XBIN_OP_RETURN (OR_EQ, false); } | |
759 "<<=" { XBIN_OP_RETURN (LSHIFT_EQ, false); } | |
760 ">>=" { XBIN_OP_RETURN (RSHIFT_EQ, false); } | |
2877 | 761 |
4608 | 762 \{{S}* { |
3351 | 763 nesting_level.brace (); |
4608 | 764 |
765 current_input_column += yyleng; | |
766 lexer_flags.quote_is_transpose = false; | |
767 lexer_flags.convert_spaces_to_comma = true; | |
768 | |
3351 | 769 promptflag--; |
4608 | 770 eat_whitespace (); |
771 | |
4613 | 772 lexer_flags.braceflag++; |
4608 | 773 BEGIN (MATRIX_START); |
4910 | 774 COUNT_TOK_AND_RETURN ('{'); |
3351 | 775 } |
776 | |
777 "}" { | |
778 nesting_level.remove (); | |
779 | |
4608 | 780 TOK_RETURN ('}'); |
3351 | 781 } |
782 | |
968 | 783 %{ |
2066 | 784 // Unrecognized input is a lexical error. |
968 | 785 %} |
1 | 786 |
2042 | 787 . { |
4240 | 788 // EOF happens here if we are parsing nested functions. |
789 | |
4410 | 790 yyunput (yytext[0], yytext); |
4248 | 791 |
792 int c = yyinput (); | |
793 | |
794 if (c != EOF) | |
4240 | 795 { |
796 current_input_column++; | |
797 | |
798 error ("invalid character `%s' (ASCII %d) near line %d, column %d", | |
4248 | 799 undo_string_escape (static_cast<char> (c)), c, |
4240 | 800 input_line_number, current_input_column); |
801 | |
802 return LEXICAL_ERROR; | |
803 } | |
804 else | |
805 TOK_RETURN (END_OF_INPUT); | |
2066 | 806 } |
1 | 807 |
808 %% | |
809 | |
767 | 810 // GAG. |
811 // | |
812 // If we're reading a matrix and the next character is '[', make sure | |
813 // that we insert a comma ahead of it. | |
814 | |
146 | 815 void |
1 | 816 do_comma_insert_check (void) |
817 { | |
1001 | 818 int spc_gobbled = eat_continuation (); |
2970 | 819 |
1 | 820 int c = yyinput (); |
2970 | 821 |
4410 | 822 yyunput (c, yytext); |
2970 | 823 |
1001 | 824 if (spc_gobbled) |
4410 | 825 yyunput (' ', yytext); |
2970 | 826 |
3351 | 827 lexer_flags.do_comma_insert = (lexer_flags.bracketflag && c == '['); |
1 | 828 } |
829 | |
767 | 830 // Fix things up for errors or interrupts. The parser is never called |
831 // recursively, so it is always safe to reinitialize its state before | |
832 // doing any parsing. | |
833 | |
1 | 834 void |
835 reset_parser (void) | |
836 { | |
1826 | 837 // Start off on the right foot. |
4323 | 838 BEGIN (INITIAL); |
4318 | 839 |
3883 | 840 parser_end_of_input = false; |
4238 | 841 end_tokens_expected = 0; |
842 | |
843 while (! symtab_context.empty ()) | |
844 symtab_context.pop (); | |
287 | 845 |
7336 | 846 symbol_table::reset_parent_scope (); |
847 | |
1826 | 848 // We do want a prompt by default. |
1 | 849 promptflag = 1; |
287 | 850 |
3351 | 851 // Error may have occurred inside some brackets, braces, or parentheses. |
985 | 852 nesting_level.clear (); |
287 | 853 |
1826 | 854 // Clear out the stack of token info used to track line and column |
855 // numbers. | |
143 | 856 while (! token_stack.empty ()) |
4214 | 857 { |
858 delete token_stack.top (); | |
859 token_stack.pop (); | |
860 } | |
287 | 861 |
1826 | 862 // Can be reset by defining a function. |
985 | 863 if (! (reading_script_file || reading_fcn_file)) |
864 { | |
865 current_input_column = 1; | |
2926 | 866 input_line_number = command_editor::current_command_number () - 1; |
985 | 867 } |
287 | 868 |
1826 | 869 // Only ask for input from stdin if we are expecting interactive |
870 // input. | |
3174 | 871 if ((interactive || forced_interactive) |
3880 | 872 && ! (reading_fcn_file |
873 || reading_script_file | |
874 || get_input_from_eval_string | |
3174 | 875 || input_from_startup_file)) |
287 | 876 yyrestart (stdin); |
991 | 877 |
1826 | 878 // Clear the buffer for help text. |
4426 | 879 while (! help_buf.empty ()) |
880 help_buf.pop (); | |
1755 | 881 |
1826 | 882 // Reset other flags. |
883 lexer_flags.init (); | |
1 | 884 } |
885 | |
767 | 886 // If we read some newlines, we need figure out what column we're |
887 // really looking at. | |
888 | |
1 | 889 static void |
890 fixup_column_count (char *s) | |
891 { | |
892 char c; | |
893 while ((c = *s++) != '\0') | |
894 { | |
895 if (c == '\n') | |
143 | 896 current_input_column = 1; |
1 | 897 else |
898 current_input_column++; | |
899 } | |
900 } | |
901 | |
767 | 902 // Include these so that we don't have to link to libfl.a. |
246 | 903 |
3332 | 904 int |
1 | 905 yywrap (void) |
906 { | |
287 | 907 return 1; |
1 | 908 } |
909 | |
767 | 910 // Tell us all what the current buffer is. |
911 | |
1 | 912 YY_BUFFER_STATE |
913 current_buffer (void) | |
914 { | |
915 return YY_CURRENT_BUFFER; | |
916 } | |
917 | |
767 | 918 // Create a new buffer. |
919 | |
1 | 920 YY_BUFFER_STATE |
921 create_buffer (FILE *f) | |
922 { | |
923 return yy_create_buffer (f, YY_BUF_SIZE); | |
924 } | |
925 | |
767 | 926 // Start reading a new buffer. |
927 | |
1 | 928 void |
929 switch_to_buffer (YY_BUFFER_STATE buf) | |
930 { | |
931 yy_switch_to_buffer (buf); | |
932 } | |
933 | |
767 | 934 // Delete a buffer. |
935 | |
1 | 936 void |
937 delete_buffer (YY_BUFFER_STATE buf) | |
938 { | |
939 yy_delete_buffer (buf); | |
940 } | |
941 | |
767 | 942 // Restore a buffer (for unwind-prot). |
943 | |
1 | 944 void |
945 restore_input_buffer (void *buf) | |
946 { | |
2861 | 947 switch_to_buffer (static_cast<YY_BUFFER_STATE> (buf)); |
1 | 948 } |
949 | |
767 | 950 // Delete a buffer (for unwind-prot). |
951 | |
1 | 952 void |
953 delete_input_buffer (void *buf) | |
954 { | |
2861 | 955 delete_buffer (static_cast<YY_BUFFER_STATE> (buf)); |
1 | 956 } |
957 | |
4238 | 958 static void |
959 prep_for_function (void) | |
960 { | |
961 end_tokens_expected++; | |
962 | |
963 promptflag--; | |
964 | |
965 lexer_flags.defining_func = true; | |
966 lexer_flags.parsed_function_name = false; | |
967 lexer_flags.beginning_of_function = true; | |
968 | |
969 if (! (reading_fcn_file || reading_script_file)) | |
970 input_line_number = 1; | |
971 } | |
972 | |
973 static void | |
974 prep_for_nested_function (void) | |
975 { | |
4240 | 976 lexer_flags.parsing_nested_function = 1; |
4426 | 977 help_buf.push (std::string ()); |
4238 | 978 prep_for_function (); |
4240 | 979 // We're still only expecting one end token for this set of functions. |
980 end_tokens_expected--; | |
4238 | 981 yylval.tok_val = new token (input_line_number, current_input_column); |
982 token_stack.push (yylval.tok_val); | |
983 } | |
984 | |
985 // Handle keywords. Return -1 if the keyword should be ignored. | |
767 | 986 |
1 | 987 static int |
4867 | 988 is_keyword_token (const std::string& s) |
1 | 989 { |
3805 | 990 int l = input_line_number; |
991 int c = current_input_column; | |
992 | |
1823 | 993 int len = s.length (); |
922 | 994 |
5088 | 995 const octave_kw *kw = octave_kw_hash::in_word_set (s.c_str (), len); |
191 | 996 |
1497 | 997 if (kw) |
143 | 998 { |
1497 | 999 yylval.tok_val = 0; |
1000 | |
1001 switch (kw->kw_id) | |
1002 { | |
1003 case break_kw: | |
2764 | 1004 case case_kw: |
1497 | 1005 case catch_kw: |
1006 case continue_kw: | |
1007 case else_kw: | |
1008 case elseif_kw: | |
1009 case global_kw: | |
2764 | 1010 case otherwise_kw: |
1497 | 1011 case return_kw: |
2846 | 1012 case static_kw: |
3484 | 1013 case until_kw: |
1497 | 1014 case unwind_protect_cleanup_kw: |
1015 break; | |
1016 | |
1017 case end_kw: | |
4234 | 1018 if (lexer_flags.looking_at_object_index) |
1019 return 0; | |
1020 else | |
4238 | 1021 { |
1022 if (reading_fcn_file && end_tokens_expected == 1) | |
1023 return -1; | |
1024 else | |
1025 { | |
1026 yylval.tok_val = new token (token::simple_end, l, c); | |
1027 end_tokens_expected--; | |
1028 } | |
1029 } | |
1497 | 1030 break; |
1031 | |
1032 case end_try_catch_kw: | |
4238 | 1033 end_tokens_expected--; |
1497 | 1034 yylval.tok_val = new token (token::try_catch_end, l, c); |
1035 break; | |
1036 | |
1037 case end_unwind_protect_kw: | |
4238 | 1038 end_tokens_expected--; |
1497 | 1039 yylval.tok_val = new token (token::unwind_protect_end, l, c); |
1040 break; | |
1041 | |
1042 case endfor_kw: | |
4238 | 1043 end_tokens_expected--; |
1497 | 1044 yylval.tok_val = new token (token::for_end, l, c); |
1045 break; | |
1046 | |
1047 case endfunction_kw: | |
4238 | 1048 { |
1049 if (reading_fcn_file && end_tokens_expected == 1) | |
1050 return -1; | |
1051 else | |
1052 { | |
1053 yylval.tok_val = new token (token::function_end, l, c); | |
1054 end_tokens_expected--; | |
1055 } | |
1056 } | |
1497 | 1057 break; |
1058 | |
1059 case endif_kw: | |
4238 | 1060 end_tokens_expected--; |
1497 | 1061 yylval.tok_val = new token (token::if_end, l, c); |
1062 break; | |
1063 | |
2764 | 1064 case endswitch_kw: |
4238 | 1065 end_tokens_expected--; |
2764 | 1066 yylval.tok_val = new token (token::switch_end, l, c); |
1067 break; | |
1068 | |
1497 | 1069 case endwhile_kw: |
4238 | 1070 end_tokens_expected--; |
1497 | 1071 yylval.tok_val = new token (token::while_end, l, c); |
1072 break; | |
1073 | |
1074 case for_kw: | |
1075 case while_kw: | |
4238 | 1076 end_tokens_expected++; |
1077 // Fall through... | |
1078 | |
1079 case do_kw: | |
1497 | 1080 promptflag--; |
1826 | 1081 lexer_flags.looping++; |
1497 | 1082 break; |
1083 | |
1084 case if_kw: | |
1085 case try_kw: | |
2764 | 1086 case switch_kw: |
1497 | 1087 case unwind_protect_kw: |
4238 | 1088 end_tokens_expected++; |
1497 | 1089 promptflag--; |
1090 break; | |
1091 | |
1092 case function_kw: | |
4238 | 1093 { |
1094 if (lexer_flags.defining_func) | |
1095 { | |
1096 if (reading_fcn_file) | |
1097 { | |
1098 if (lexer_flags.parsing_nested_function) | |
1099 { | |
4323 | 1100 BEGIN (NESTED_FUNCTION_END); |
4240 | 1101 |
4238 | 1102 yylval.tok_val = new token (token::function_end, l, c); |
4240 | 1103 token_stack.push (yylval.tok_val); |
1104 | |
1105 return END; | |
4238 | 1106 } |
1107 else | |
1108 { | |
1109 prep_for_nested_function (); | |
4240 | 1110 |
4238 | 1111 return FCN; |
1112 } | |
1113 } | |
1114 else | |
1115 { | |
1116 error ("nested functions not implemented in this context"); | |
1117 | |
1118 if ((reading_fcn_file || reading_script_file) | |
1119 && ! curr_fcn_file_name.empty ()) | |
1120 error ("near line %d of file `%s.m'", | |
1121 input_line_number, curr_fcn_file_name.c_str ()); | |
1122 else | |
1123 error ("near line %d", input_line_number); | |
1124 | |
1125 return LEXICAL_ERROR; | |
1126 } | |
1127 } | |
1128 else | |
1129 prep_for_function (); | |
1130 } | |
1497 | 1131 break; |
1132 | |
3174 | 1133 case magic_file_kw: |
1134 { | |
1135 if ((reading_fcn_file || reading_script_file) | |
1136 && ! curr_fcn_file_full_name.empty ()) | |
1137 yylval.tok_val = new token (curr_fcn_file_full_name, l, c); | |
1138 else | |
1139 yylval.tok_val = new token ("stdin", l, c); | |
1140 } | |
1141 break; | |
1142 | |
1143 case magic_line_kw: | |
1144 yylval.tok_val = new token (static_cast<double> (l), "", l, c); | |
1145 break; | |
1146 | |
1497 | 1147 default: |
1148 panic_impossible (); | |
1149 } | |
1150 | |
1151 if (! yylval.tok_val) | |
1152 yylval.tok_val = new token (l, c); | |
1153 | |
476 | 1154 token_stack.push (yylval.tok_val); |
1497 | 1155 |
1156 return kw->tok; | |
143 | 1157 } |
1 | 1158 |
1159 return 0; | |
1160 } | |
1161 | |
2702 | 1162 static bool |
3523 | 1163 is_variable (const std::string& name) |
2702 | 1164 { |
7336 | 1165 return symbol_table::is_variable (name); |
2702 | 1166 } |
1167 | |
1168 static void | |
3523 | 1169 force_local_variable (const std::string& name) |
2702 | 1170 { |
7336 | 1171 octave_value& val = symbol_table::varref (name); |
1172 | |
1173 if (! val.is_defined ()) | |
1174 val = Matrix (); | |
2702 | 1175 } |
1176 | |
4426 | 1177 // Grab the help text from an function file. |
767 | 1178 |
5775 | 1179 // FIXME -- gobble_leading_white_space() in parse.y |
2300 | 1180 // duplicates some of this code! |
1181 | |
4426 | 1182 static std::string |
1 | 1183 grab_help_text (void) |
1184 { | |
4426 | 1185 std::string buf; |
1019 | 1186 |
2300 | 1187 bool begin_comment = true; |
1188 bool in_comment = true; | |
3427 | 1189 bool discard_space = true; |
3665 | 1190 |
1019 | 1191 int c = 0; |
1 | 1192 |
1019 | 1193 while ((c = yyinput ()) != EOF) |
1194 { | |
2300 | 1195 if (begin_comment) |
1196 { | |
1197 if (c == '%' || c == '#') | |
1198 continue; | |
3427 | 1199 else if (discard_space && c == ' ') |
1200 { | |
1201 discard_space = false; | |
1202 continue; | |
1203 } | |
2300 | 1204 else |
1205 begin_comment = false; | |
1206 } | |
1207 | |
1019 | 1208 if (in_comment) |
1 | 1209 { |
4426 | 1210 buf += static_cast<char> (c); |
1755 | 1211 |
1019 | 1212 if (c == '\n') |
3427 | 1213 { |
1214 in_comment = false; | |
1215 discard_space = true; | |
1216 } | |
1019 | 1217 } |
1218 else | |
1219 { | |
1220 switch (c) | |
991 | 1221 { |
4037 | 1222 case '#': |
1019 | 1223 case '%': |
4037 | 1224 maybe_gripe_matlab_incompatible_comment (yytext[0]); |
2300 | 1225 in_comment = true; |
1226 begin_comment = true; | |
1019 | 1227 break; |
777 | 1228 |
1019 | 1229 case ' ': |
1230 case '\t': | |
1231 break; | |
777 | 1232 |
1019 | 1233 default: |
1234 goto done; | |
1 | 1235 } |
1236 } | |
1019 | 1237 } |
991 | 1238 |
1019 | 1239 done: |
991 | 1240 |
1019 | 1241 if (c) |
4410 | 1242 yyunput (c, yytext); |
4426 | 1243 |
1244 return buf; | |
1 | 1245 } |
1246 | |
767 | 1247 // Return 1 if the given character matches any character in the given |
1248 // string. | |
1249 | |
2857 | 1250 static bool |
2804 | 1251 match_any (char c, const char *s) |
1 | 1252 { |
1253 char tmp; | |
1254 while ((tmp = *s++) != '\0') | |
1255 { | |
1256 if (c == tmp) | |
2857 | 1257 return true; |
1 | 1258 } |
2857 | 1259 return false; |
1 | 1260 } |
1261 | |
767 | 1262 // Given information about the spacing surrounding an operator, |
1263 // return 1 if it looks like it should be treated as a binary | |
1264 // operator. For example, | |
1265 // | |
3774 | 1266 // [ 1 + 2 ] or [ 1+ 2] or [ 1+2 ] ==> binary |
1267 // | |
1268 // [ 1 +2 ] ==> unary | |
767 | 1269 |
2857 | 1270 static bool |
3246 | 1271 looks_like_bin_op (bool spc_prev, int next_char) |
1 | 1272 { |
3246 | 1273 bool spc_next = (next_char == ' ' || next_char == '\t'); |
1274 | |
608 | 1275 return ((spc_prev && spc_next) || ! spc_prev); |
1 | 1276 } |
1277 | |
3263 | 1278 // Recognize separators. If the separator is a CRLF pair, it is |
1279 // replaced by a single LF. | |
1280 | |
1281 static bool | |
1282 next_token_is_sep_op (void) | |
1283 { | |
1284 bool retval = false; | |
1285 | |
1286 int c1 = yyinput (); | |
1287 | |
1288 if (c1 == '\r') | |
1289 { | |
1290 int c2 = yyinput (); | |
1291 | |
1292 if (c2 == '\n') | |
1293 { | |
1294 c1 = '\n'; | |
1295 | |
1296 retval = true; | |
1297 } | |
1298 else | |
4410 | 1299 yyunput (c2, yytext); |
3263 | 1300 } |
1301 else | |
1302 retval = match_any (c1, ",;\n]"); | |
1303 | |
4410 | 1304 yyunput (c1, yytext); |
3263 | 1305 |
1306 return retval; | |
1307 } | |
1308 | |
767 | 1309 // Try to determine if the next token should be treated as a postfix |
1310 // unary operator. This is ugly, but it seems to do the right thing. | |
1311 | |
2857 | 1312 static bool |
3246 | 1313 next_token_is_postfix_unary_op (bool spc_prev) |
1 | 1314 { |
2857 | 1315 bool un_op = false; |
1 | 1316 |
1317 int c0 = yyinput (); | |
1318 | |
3246 | 1319 if (c0 == '\'' && ! spc_prev) |
1320 { | |
1321 un_op = true; | |
1322 } | |
1323 else if (c0 == '.') | |
1324 { | |
1325 int c1 = yyinput (); | |
1326 un_op = (c1 == '\''); | |
4410 | 1327 yyunput (c1, yytext); |
3246 | 1328 } |
4613 | 1329 else if (c0 == '+') |
1330 { | |
1331 int c1 = yyinput (); | |
1332 un_op = (c1 == '+'); | |
1333 yyunput (c1, yytext); | |
1334 } | |
1335 else if (c0 == '-') | |
1336 { | |
1337 int c1 = yyinput (); | |
1338 un_op = (c1 == '-'); | |
1339 yyunput (c1, yytext); | |
1340 } | |
1 | 1341 |
4410 | 1342 yyunput (c0, yytext); |
1 | 1343 |
1344 return un_op; | |
1345 } | |
1346 | |
767 | 1347 // Try to determine if the next token should be treated as a binary |
3246 | 1348 // operator. |
1521 | 1349 // |
3246 | 1350 // This kluge exists because whitespace is not always ignored inside |
3774 | 1351 // the square brackets that are used to create matrix objects (though |
1352 // spacing only really matters in the cases that can be interpreted | |
1353 // either as binary ops or prefix unary ops: currently just +, -). | |
1354 // | |
3779 | 1355 // Note that a line continuation directly following a + or - operator |
1356 // (e.g., the characters '[' 'a' ' ' '+' '\' LFD 'b' ']') will be | |
1357 // parsed as a binary operator. | |
767 | 1358 |
2857 | 1359 static bool |
3246 | 1360 next_token_is_bin_op (bool spc_prev) |
1 | 1361 { |
2857 | 1362 bool bin_op = false; |
1 | 1363 |
1364 int c0 = yyinput (); | |
1365 | |
1366 switch (c0) | |
1367 { | |
777 | 1368 case '+': |
1369 case '-': | |
3774 | 1370 { |
1371 int c1 = yyinput (); | |
1372 | |
1373 switch (c1) | |
1374 { | |
1375 case '+': | |
1376 case '-': | |
1377 // Unary ops, spacing doesn't matter. | |
1378 break; | |
1379 | |
1380 case '=': | |
1381 // Binary ops, spacing doesn't matter. | |
1382 bin_op = true; | |
1383 break; | |
1384 | |
1385 default: | |
1386 // Could be either, spacing matters. | |
1387 bin_op = looks_like_bin_op (spc_prev, c1); | |
1388 break; | |
1389 } | |
1390 | |
4410 | 1391 yyunput (c1, yytext); |
3774 | 1392 } |
1393 break; | |
1394 | |
1395 case ':': | |
3246 | 1396 case '/': |
1397 case '\\': | |
1398 case '^': | |
3774 | 1399 // Always a binary op (may also include /=, \=, and ^=). |
1400 bin_op = true; | |
1276 | 1401 break; |
1402 | |
3246 | 1403 // .+ .- ./ .\ .^ .* .** |
1554 | 1404 case '.': |
1405 { | |
1406 int c1 = yyinput (); | |
3246 | 1407 |
3774 | 1408 if (match_any (c1, "+-/\\^*")) |
1409 // Always a binary op (may also include .+=, .-=, ./=, ...). | |
1410 bin_op = true; | |
3698 | 1411 else if (! isdigit (c1) && c1 != ' ' && c1 != '\t' && c1 != '.') |
3774 | 1412 // A structure element reference is a binary op. |
1413 bin_op = true; | |
3246 | 1414 |
4410 | 1415 yyunput (c1, yytext); |
1554 | 1416 } |
1417 break; | |
1418 | |
3246 | 1419 // = == & && | || * ** |
1420 case '=': | |
1 | 1421 case '&': |
3246 | 1422 case '|': |
1 | 1423 case '*': |
3774 | 1424 // Always a binary op (may also include ==, &&, ||, **). |
1425 bin_op = true; | |
3246 | 1426 break; |
1427 | |
3774 | 1428 // < <= <> > >= |
1 | 1429 case '<': |
1430 case '>': | |
3774 | 1431 // Always a binary op (may also include <=, <>, >=). |
1432 bin_op = true; | |
1433 break; | |
1434 | |
1435 // ~= != | |
777 | 1436 case '~': |
1437 case '!': | |
3246 | 1438 { |
1439 int c1 = yyinput (); | |
1440 | |
3774 | 1441 // ~ and ! can be unary ops, so require following =. |
1442 if (c1 == '=') | |
1443 bin_op = true; | |
3246 | 1444 |
4410 | 1445 yyunput (c1, yytext); |
3246 | 1446 } |
1 | 1447 break; |
1448 | |
1449 default: | |
1276 | 1450 break; |
1 | 1451 } |
1452 | |
4410 | 1453 yyunput (c0, yytext); |
1 | 1454 |
1455 return bin_op; | |
1456 } | |
1457 | |
767 | 1458 // Used to delete trailing white space from tokens. |
1459 | |
3536 | 1460 static std::string |
1 | 1461 strip_trailing_whitespace (char *s) |
1462 { | |
3523 | 1463 std::string retval = s; |
1 | 1464 |
1823 | 1465 size_t pos = retval.find_first_of (" \t"); |
1 | 1466 |
1823 | 1467 if (pos != NPOS) |
1468 retval.resize (pos); | |
1 | 1469 |
1470 return retval; | |
1471 } | |
1472 | |
3665 | 1473 static void |
1474 scan_for_comments (const char *text) | |
1475 { | |
1476 std::string comment_buf; | |
1477 | |
1478 bool in_comment = false; | |
1479 bool beginning_of_comment = false; | |
1480 | |
1481 int len = strlen (text); | |
1482 int i = 0; | |
1483 | |
1484 while (i < len) | |
1485 { | |
1486 char c = text[i++]; | |
1487 | |
1488 switch (c) | |
1489 { | |
1490 case '%': | |
1491 case '#': | |
1492 if (in_comment) | |
1493 { | |
1494 if (! beginning_of_comment) | |
3802 | 1495 comment_buf += static_cast<char> (c); |
3665 | 1496 } |
1497 else | |
1498 { | |
4037 | 1499 maybe_gripe_matlab_incompatible_comment (c); |
3665 | 1500 in_comment = true; |
1501 beginning_of_comment = true; | |
1502 } | |
1503 break; | |
1504 | |
1505 case '\n': | |
1506 if (in_comment) | |
1507 { | |
3802 | 1508 comment_buf += static_cast<char> (c); |
3665 | 1509 octave_comment_buffer::append (comment_buf); |
1510 comment_buf.resize (0); | |
1511 in_comment = false; | |
1512 beginning_of_comment = false; | |
1513 } | |
1514 break; | |
1515 | |
1516 case '\r': | |
1517 if (in_comment) | |
3802 | 1518 comment_buf += static_cast<char> (c); |
3665 | 1519 if (i < len) |
1520 { | |
1521 c = text[i++]; | |
1522 | |
1523 if (c == '\n') | |
1524 { | |
1525 if (in_comment) | |
1526 { | |
3802 | 1527 comment_buf += static_cast<char> (c); |
3665 | 1528 octave_comment_buffer::append (comment_buf); |
1529 in_comment = false; | |
1530 beginning_of_comment = false; | |
1531 } | |
1532 } | |
1533 } | |
1534 | |
1535 default: | |
1536 if (in_comment) | |
1537 { | |
3802 | 1538 comment_buf += static_cast<char> (c); |
3665 | 1539 beginning_of_comment = false; |
1540 } | |
1541 break; | |
1542 } | |
1543 } | |
1544 | |
1545 if (! comment_buf.empty ()) | |
1546 octave_comment_buffer::append (comment_buf); | |
1547 } | |
1548 | |
1001 | 1549 // Discard whitespace, including comments and continuations. |
1088 | 1550 // |
1551 // Return value is logical OR of the following values: | |
1552 // | |
1826 | 1553 // ATE_NOTHING : no spaces to eat |
1088 | 1554 // ATE_SPACE_OR_TAB : space or tab in input |
1555 // ATE_NEWLINE : bare new line in input | |
1001 | 1556 |
1826 | 1557 static yum_yum |
975 | 1558 eat_whitespace (void) |
1559 { | |
1826 | 1560 yum_yum retval = ATE_NOTHING; |
3665 | 1561 |
1562 std::string comment_buf; | |
1563 | |
2857 | 1564 bool in_comment = false; |
3665 | 1565 bool beginning_of_comment = false; |
1566 | |
1567 int c = 0; | |
1568 | |
975 | 1569 while ((c = yyinput ()) != EOF) |
1570 { | |
1571 current_input_column++; | |
1572 | |
1573 switch (c) | |
1574 { | |
1575 case ' ': | |
1576 case '\t': | |
3665 | 1577 if (in_comment) |
1578 { | |
3802 | 1579 comment_buf += static_cast<char> (c); |
3665 | 1580 beginning_of_comment = false; |
1581 } | |
1088 | 1582 retval |= ATE_SPACE_OR_TAB; |
975 | 1583 break; |
1584 | |
1585 case '\n': | |
1088 | 1586 retval |= ATE_NEWLINE; |
3665 | 1587 if (in_comment) |
1588 { | |
3802 | 1589 comment_buf += static_cast<char> (c); |
3665 | 1590 octave_comment_buffer::append (comment_buf); |
1591 comment_buf.resize (0); | |
1592 in_comment = false; | |
1593 beginning_of_comment = false; | |
1594 } | |
975 | 1595 current_input_column = 0; |
1596 break; | |
1597 | |
1598 case '#': | |
1599 case '%': | |
3665 | 1600 if (in_comment) |
1601 { | |
1602 if (! beginning_of_comment) | |
3802 | 1603 comment_buf += static_cast<char> (c); |
3665 | 1604 } |
1605 else | |
1606 { | |
4037 | 1607 maybe_gripe_matlab_incompatible_comment (c); |
3665 | 1608 in_comment = true; |
1609 beginning_of_comment = true; | |
1610 } | |
975 | 1611 break; |
1612 | |
1001 | 1613 case '.': |
1614 if (in_comment) | |
3665 | 1615 { |
3802 | 1616 comment_buf += static_cast<char> (c); |
3665 | 1617 beginning_of_comment = false; |
1618 break; | |
1619 } | |
1001 | 1620 else |
1621 { | |
1622 if (have_ellipsis_continuation ()) | |
1623 break; | |
1624 else | |
1625 goto done; | |
1626 } | |
1627 | |
1628 case '\\': | |
1629 if (in_comment) | |
3665 | 1630 { |
3802 | 1631 comment_buf += static_cast<char> (c); |
3665 | 1632 beginning_of_comment = false; |
1633 break; | |
1634 } | |
1001 | 1635 else |
1636 { | |
3105 | 1637 if (have_continuation ()) |
1001 | 1638 break; |
1639 else | |
1640 goto done; | |
1641 } | |
1642 | |
6187 | 1643 case '\r': |
1644 if (in_comment) | |
1645 comment_buf += static_cast<char> (c); | |
1646 c = yyinput (); | |
1647 if (c == EOF) | |
1648 break; | |
1649 else if (c == '\n') | |
1650 { | |
1651 retval |= ATE_NEWLINE; | |
1652 if (in_comment) | |
1653 { | |
1654 comment_buf += static_cast<char> (c); | |
1655 octave_comment_buffer::append (comment_buf); | |
1656 in_comment = false; | |
1657 beginning_of_comment = false; | |
1658 } | |
1659 current_input_column = 0; | |
1660 break; | |
1661 } | |
1662 | |
1663 // Fall through... | |
1664 | |
975 | 1665 default: |
1666 if (in_comment) | |
3665 | 1667 { |
3802 | 1668 comment_buf += static_cast<char> (c); |
3665 | 1669 beginning_of_comment = false; |
1670 break; | |
1671 } | |
975 | 1672 else |
1673 goto done; | |
1674 } | |
1675 } | |
1676 | |
3665 | 1677 if (! comment_buf.empty ()) |
1678 octave_comment_buffer::append (comment_buf); | |
1679 | |
975 | 1680 done: |
4410 | 1681 yyunput (c, yytext); |
1082 | 1682 current_input_column--; |
1001 | 1683 return retval; |
975 | 1684 } |
1685 | |
3220 | 1686 static inline bool |
1687 looks_like_hex (const char *s, int len) | |
1688 { | |
1689 return (len > 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')); | |
1690 } | |
1691 | |
975 | 1692 static void |
3246 | 1693 handle_number (void) |
972 | 1694 { |
3220 | 1695 double value = 0.0; |
1696 int nread = 0; | |
1697 | |
3598 | 1698 if (looks_like_hex (yytext, strlen (yytext))) |
3220 | 1699 { |
1700 unsigned long ival; | |
3598 | 1701 |
1702 nread = sscanf (yytext, "%lx", &ival); | |
1703 | |
3220 | 1704 value = static_cast<double> (ival); |
1705 } | |
1706 else | |
3598 | 1707 { |
1708 char *tmp = strsave (yytext); | |
1709 | |
1710 char *idx = strpbrk (tmp, "Dd"); | |
2621 | 1711 |
3598 | 1712 if (idx) |
1713 *idx = 'e'; | |
1714 | |
1715 nread = sscanf (tmp, "%lf", &value); | |
1716 | |
1717 delete [] tmp; | |
1718 } | |
972 | 1719 |
1826 | 1720 // If yytext doesn't contain a valid number, we are in deep doo doo. |
985 | 1721 |
972 | 1722 assert (nread == 1); |
1723 | |
3988 | 1724 lexer_flags.quote_is_transpose = true; |
1725 lexer_flags.convert_spaces_to_comma = true; | |
972 | 1726 |
1727 yylval.tok_val = new token (value, yytext, input_line_number, | |
1728 current_input_column); | |
1729 | |
1730 token_stack.push (yylval.tok_val); | |
1731 | |
1732 current_input_column += yyleng; | |
1733 | |
1734 do_comma_insert_check (); | |
1735 } | |
1736 | |
1001 | 1737 // We have seen a backslash and need to find out if it should be |
1738 // treated as a continuation character. If so, this eats it, up to | |
1739 // and including the new line character. | |
1740 // | |
973 | 1741 // Match whitespace only, followed by a comment character or newline. |
1742 // Once a comment character is found, discard all input until newline. | |
1743 // If non-whitespace characters are found before comment | |
1744 // characters, return 0. Otherwise, return 1. | |
1745 | |
2857 | 1746 static bool |
3096 | 1747 have_continuation (bool trailing_comments_ok) |
973 | 1748 { |
5765 | 1749 std::ostringstream buf; |
973 | 1750 |
3665 | 1751 std::string comment_buf; |
1752 | |
2857 | 1753 bool in_comment = false; |
3665 | 1754 bool beginning_of_comment = false; |
1755 | |
1756 int c = 0; | |
1757 | |
973 | 1758 while ((c = yyinput ()) != EOF) |
1759 { | |
3802 | 1760 buf << static_cast<char> (c); |
973 | 1761 |
1762 switch (c) | |
1763 { | |
1764 case ' ': | |
1765 case '\t': | |
3665 | 1766 if (in_comment) |
1767 { | |
3802 | 1768 comment_buf += static_cast<char> (c); |
3665 | 1769 beginning_of_comment = false; |
1770 } | |
973 | 1771 break; |
1772 | |
1773 case '%': | |
1774 case '#': | |
1091 | 1775 if (trailing_comments_ok) |
3665 | 1776 { |
1777 if (in_comment) | |
1778 { | |
1779 if (! beginning_of_comment) | |
3802 | 1780 comment_buf += static_cast<char> (c); |
3665 | 1781 } |
1782 else | |
1783 { | |
4037 | 1784 maybe_gripe_matlab_incompatible_comment (c); |
3665 | 1785 in_comment = true; |
1786 beginning_of_comment = true; | |
1787 } | |
1788 } | |
1091 | 1789 else |
1790 goto cleanup; | |
973 | 1791 break; |
1792 | |
1793 case '\n': | |
3665 | 1794 if (in_comment) |
1795 { | |
3802 | 1796 comment_buf += static_cast<char> (c); |
3665 | 1797 octave_comment_buffer::append (comment_buf); |
1798 } | |
975 | 1799 current_input_column = 0; |
1001 | 1800 promptflag--; |
4037 | 1801 gripe_matlab_incompatible_continuation (); |
2857 | 1802 return true; |
973 | 1803 |
3263 | 1804 case '\r': |
3665 | 1805 if (in_comment) |
3802 | 1806 comment_buf += static_cast<char> (c); |
3263 | 1807 c = yyinput (); |
1808 if (c == EOF) | |
1809 break; | |
1810 else if (c == '\n') | |
1811 { | |
3665 | 1812 if (in_comment) |
1813 { | |
3802 | 1814 comment_buf += static_cast<char> (c); |
3665 | 1815 octave_comment_buffer::append (comment_buf); |
1816 } | |
3263 | 1817 current_input_column = 0; |
1818 promptflag--; | |
4037 | 1819 gripe_matlab_incompatible_continuation (); |
3263 | 1820 return true; |
3665 | 1821 } |
3263 | 1822 |
3802 | 1823 // Fall through... |
1824 | |
973 | 1825 default: |
3665 | 1826 if (in_comment) |
1827 { | |
3802 | 1828 comment_buf += static_cast<char> (c); |
3665 | 1829 beginning_of_comment = false; |
1830 } | |
1831 else | |
1091 | 1832 goto cleanup; |
1833 break; | |
973 | 1834 } |
1835 } | |
1836 | |
4410 | 1837 yyunput (c, yytext); |
2857 | 1838 return false; |
973 | 1839 |
3096 | 1840 cleanup: |
4051 | 1841 |
5765 | 1842 std::string s = buf.str (); |
4051 | 1843 |
1844 int len = s.length (); | |
1845 while (len--) | |
4410 | 1846 yyunput (s[len], yytext); |
3096 | 1847 |
2857 | 1848 return false; |
973 | 1849 } |
1850 | |
1001 | 1851 // We have seen a `.' and need to see if it is the start of a |
1852 // continuation. If so, this eats it, up to and including the new | |
1853 // line character. | |
1854 | |
2857 | 1855 static bool |
3096 | 1856 have_ellipsis_continuation (bool trailing_comments_ok) |
973 | 1857 { |
1858 char c1 = yyinput (); | |
1859 if (c1 == '.') | |
1860 { | |
1861 char c2 = yyinput (); | |
1091 | 1862 if (c2 == '.' && have_continuation (trailing_comments_ok)) |
2857 | 1863 return true; |
973 | 1864 else |
1865 { | |
4410 | 1866 yyunput (c2, yytext); |
1867 yyunput (c1, yytext); | |
973 | 1868 } |
1869 } | |
1870 else | |
4410 | 1871 yyunput (c1, yytext); |
973 | 1872 |
2857 | 1873 return false; |
973 | 1874 } |
1875 | |
1001 | 1876 // See if we have a continuation line. If so, eat it and the leading |
1877 // whitespace on the next line. | |
1088 | 1878 // |
1879 // Return value is the same as described for eat_whitespace(). | |
1001 | 1880 |
1826 | 1881 static yum_yum |
1001 | 1882 eat_continuation (void) |
1883 { | |
1826 | 1884 int retval = ATE_NOTHING; |
3665 | 1885 |
1001 | 1886 int c = yyinput (); |
3665 | 1887 |
1001 | 1888 if ((c == '.' && have_ellipsis_continuation ()) |
3105 | 1889 || (c == '\\' && have_continuation ())) |
1001 | 1890 retval = eat_whitespace (); |
1891 else | |
4410 | 1892 yyunput (c, yytext); |
1001 | 1893 |
1894 return retval; | |
1895 } | |
1896 | |
973 | 1897 static int |
975 | 1898 handle_string (char delim, int text_style) |
973 | 1899 { |
5765 | 1900 std::ostringstream buf; |
973 | 1901 |
3805 | 1902 int bos_line = input_line_number; |
1903 int bos_col = current_input_column; | |
1904 | |
973 | 1905 int c; |
1031 | 1906 int escape_pending = 0; |
973 | 1907 |
1908 while ((c = yyinput ()) != EOF) | |
1909 { | |
1910 current_input_column++; | |
1911 | |
3105 | 1912 if (c == '\\') |
973 | 1913 { |
5359 | 1914 if (delim == '\'' || escape_pending) |
1053 | 1915 { |
3802 | 1916 buf << static_cast<char> (c); |
1053 | 1917 escape_pending = 0; |
1918 } | |
1919 else | |
1920 { | |
3096 | 1921 if (have_continuation (false)) |
1053 | 1922 escape_pending = 0; |
1923 else | |
1924 { | |
3802 | 1925 buf << static_cast<char> (c); |
1053 | 1926 escape_pending = 1; |
1927 } | |
1928 } | |
1031 | 1929 continue; |
973 | 1930 } |
1931 else if (c == '.') | |
1932 { | |
5359 | 1933 if (delim == '\'' || ! have_ellipsis_continuation (false)) |
3802 | 1934 buf << static_cast<char> (c); |
973 | 1935 } |
1936 else if (c == '\n') | |
1937 { | |
1053 | 1938 error ("unterminated string constant"); |
973 | 1939 break; |
1940 } | |
1941 else if (c == delim) | |
1942 { | |
1031 | 1943 if (escape_pending) |
3802 | 1944 buf << static_cast<char> (c); |
973 | 1945 else |
1946 { | |
1947 c = yyinput (); | |
1948 if (c == delim) | |
5102 | 1949 { |
1950 buf << static_cast<char> (c); | |
1951 if (lexer_flags.doing_rawcommand) | |
1952 buf << static_cast<char> (c); | |
1953 } | |
973 | 1954 else |
1955 { | |
5102 | 1956 std::string s; |
4410 | 1957 yyunput (c, yytext); |
5765 | 1958 |
5279 | 1959 if (lexer_flags.doing_rawcommand || delim == '\'') |
5765 | 1960 s = buf.str (); |
5102 | 1961 else |
5765 | 1962 s = do_string_escapes (buf.str ()); |
975 | 1963 |
5102 | 1964 if (text_style && lexer_flags.doing_rawcommand) |
1965 s = std::string (1, delim) + s + std::string (1, delim); | |
975 | 1966 else |
1967 { | |
2857 | 1968 lexer_flags.quote_is_transpose = true; |
1969 lexer_flags.convert_spaces_to_comma = true; | |
975 | 1970 } |
1971 | |
3805 | 1972 yylval.tok_val = new token (s, bos_line, bos_col); |
973 | 1973 token_stack.push (yylval.tok_val); |
3400 | 1974 |
4037 | 1975 if (delim == '"') |
1976 gripe_matlab_incompatible ("\" used as string delimiter"); | |
1977 else if (delim == '\'') | |
3400 | 1978 gripe_single_quote_string (); |
1979 | |
5279 | 1980 return delim == '"' ? DQ_STRING : SQ_STRING; |
973 | 1981 } |
1982 } | |
1983 } | |
1984 else | |
1985 { | |
3802 | 1986 buf << static_cast<char> (c); |
973 | 1987 } |
1988 | |
1031 | 1989 escape_pending = 0; |
973 | 1990 } |
1991 | |
1992 return LEXICAL_ERROR; | |
1993 } | |
1994 | |
3208 | 1995 static bool |
1996 next_token_is_assign_op (void) | |
1997 { | |
1998 bool retval = false; | |
1999 | |
2000 int c0 = yyinput (); | |
2001 | |
2002 switch (c0) | |
2003 { | |
2004 case '=': | |
2005 { | |
2006 int c1 = yyinput (); | |
4410 | 2007 yyunput (c1, yytext); |
3208 | 2008 if (c1 != '=') |
2009 retval = true; | |
2010 } | |
2011 break; | |
2012 | |
2013 case '+': | |
2014 case '-': | |
2015 case '*': | |
2016 case '/': | |
2017 case '\\': | |
2018 case '&': | |
2019 case '|': | |
2020 { | |
2021 int c1 = yyinput (); | |
4410 | 2022 yyunput (c1, yytext); |
3208 | 2023 if (c1 == '=') |
2024 retval = true; | |
2025 } | |
2026 break; | |
2027 | |
2028 case '.': | |
2029 { | |
2030 int c1 = yyinput (); | |
2031 if (match_any (c1, "+-*/\\")) | |
2032 { | |
2033 int c2 = yyinput (); | |
4410 | 2034 yyunput (c2, yytext); |
3208 | 2035 if (c2 == '=') |
2036 retval = true; | |
2037 } | |
4410 | 2038 yyunput (c1, yytext); |
3208 | 2039 } |
2040 break; | |
2041 | |
2042 case '>': | |
2043 { | |
2044 int c1 = yyinput (); | |
2045 if (c1 == '>') | |
2046 { | |
2047 int c2 = yyinput (); | |
4410 | 2048 yyunput (c2, yytext); |
3208 | 2049 if (c2 == '=') |
2050 retval = true; | |
2051 } | |
4410 | 2052 yyunput (c1, yytext); |
3208 | 2053 } |
2054 break; | |
2055 | |
2056 case '<': | |
2057 { | |
2058 int c1 = yyinput (); | |
2059 if (c1 == '<') | |
2060 { | |
2061 int c2 = yyinput (); | |
4410 | 2062 yyunput (c2, yytext); |
3208 | 2063 if (c2 == '=') |
2064 retval = true; | |
2065 } | |
4410 | 2066 yyunput (c1, yytext); |
3208 | 2067 } |
2068 break; | |
2069 | |
2070 default: | |
2071 break; | |
2072 } | |
2073 | |
4410 | 2074 yyunput (c0, yytext); |
3208 | 2075 |
2076 return retval; | |
2077 } | |
2078 | |
4633 | 2079 static bool |
2080 next_token_is_index_op (void) | |
2081 { | |
2082 int c = yyinput (); | |
2083 yyunput (c, yytext); | |
2084 return c == '(' || c == '{'; | |
2085 } | |
2086 | |
4612 | 2087 static int |
2088 handle_close_bracket (bool spc_gobbled, int bracket_type) | |
971 | 2089 { |
4612 | 2090 int retval = bracket_type; |
3208 | 2091 |
1826 | 2092 if (! nesting_level.none ()) |
971 | 2093 { |
1826 | 2094 nesting_level.remove (); |
4613 | 2095 |
2096 if (bracket_type == ']') | |
2097 lexer_flags.bracketflag--; | |
2098 else if (bracket_type == '}') | |
2099 lexer_flags.braceflag--; | |
2100 else | |
2101 panic_impossible (); | |
971 | 2102 } |
2103 | |
4613 | 2104 if (lexer_flags.bracketflag == 0 && lexer_flags.braceflag == 0) |
4323 | 2105 BEGIN (INITIAL); |
1001 | 2106 |
4608 | 2107 if (bracket_type == ']' |
2108 && next_token_is_assign_op () | |
2109 && ! lexer_flags.looking_at_return_list) | |
971 | 2110 { |
3208 | 2111 retval = CLOSE_BRACE; |
971 | 2112 } |
4613 | 2113 else if ((lexer_flags.bracketflag || lexer_flags.braceflag) |
2114 && lexer_flags.convert_spaces_to_comma | |
2115 && (nesting_level.is_bracket () | |
2116 || (nesting_level.is_brace () | |
2117 && ! lexer_flags.looking_at_object_index))) | |
971 | 2118 { |
4633 | 2119 bool index_op = next_token_is_index_op (); |
2120 | |
2121 // Don't insert comma if we are looking at something like | |
2122 // | |
2123 // [x{i}{j}] or [x{i}(j)] | |
2124 // | |
2125 // but do if we are looking at | |
2126 // | |
2127 // [x{i} {j}] or [x{i} (j)] | |
2128 | |
2129 if (spc_gobbled || ! (bracket_type == '}' && index_op)) | |
971 | 2130 { |
4633 | 2131 bool bin_op = next_token_is_bin_op (spc_gobbled); |
2132 | |
2133 bool postfix_un_op = next_token_is_postfix_unary_op (spc_gobbled); | |
2134 | |
2135 bool sep_op = next_token_is_sep_op (); | |
2136 | |
2137 if (! (postfix_un_op || bin_op || sep_op)) | |
2138 { | |
2139 maybe_warn_separator_insert (','); | |
2140 | |
2141 yyunput (',', yytext); | |
2142 return retval; | |
2143 } | |
971 | 2144 } |
2145 } | |
2146 | |
2857 | 2147 lexer_flags.quote_is_transpose = true; |
2148 lexer_flags.convert_spaces_to_comma = true; | |
3208 | 2149 |
2150 return retval; | |
971 | 2151 } |
2152 | |
1072 | 2153 static void |
2154 maybe_unput_comma (int spc_gobbled) | |
2155 { | |
4613 | 2156 if (nesting_level.is_bracket () |
2157 || (nesting_level.is_brace () | |
2158 && ! lexer_flags.looking_at_object_index)) | |
1072 | 2159 { |
3246 | 2160 int bin_op = next_token_is_bin_op (spc_gobbled); |
1072 | 2161 |
3246 | 2162 int postfix_un_op = next_token_is_postfix_unary_op (spc_gobbled); |
1072 | 2163 |
2164 int c1 = yyinput (); | |
2165 int c2 = yyinput (); | |
2970 | 2166 |
4410 | 2167 yyunput (c2, yytext); |
2168 yyunput (c1, yytext); | |
2970 | 2169 |
3263 | 2170 int sep_op = next_token_is_sep_op (); |
2970 | 2171 |
1072 | 2172 int dot_op = (c1 == '.' |
2173 && (isalpha (c2) || isspace (c2) || c2 == '_')); | |
2970 | 2174 |
3388 | 2175 if (postfix_un_op || bin_op || sep_op || dot_op) |
2176 return; | |
2177 | |
3985 | 2178 int index_op = (c1 == '(' || c1 == '{'); |
3388 | 2179 |
4476 | 2180 // If there is no space before the indexing op, we don't insert |
2181 // a comma. | |
2182 | |
2183 if (index_op && ! spc_gobbled) | |
2184 return; | |
2185 | |
2186 maybe_warn_separator_insert (','); | |
2187 | |
2188 yyunput (',', yytext); | |
1072 | 2189 } |
2190 } | |
2191 | |
767 | 2192 // Figure out exactly what kind of token to return when we have seen |
4238 | 2193 // an identifier. Handles keywords. Return -1 if the identifier |
2194 // should be ignored. | |
767 | 2195 |
146 | 2196 static int |
3974 | 2197 handle_identifier (void) |
146 | 2198 { |
3974 | 2199 std::string tok = strip_trailing_whitespace (yytext); |
2200 | |
2201 int c = yytext[yyleng-1]; | |
2202 | |
2203 int cont_is_spc = eat_continuation (); | |
2204 | |
2205 int spc_gobbled = (cont_is_spc || c == ' ' || c == '\t'); | |
2206 | |
2970 | 2207 // If we are expecting a structure element, avoid recognizing |
2208 // keywords and other special names and return STRUCT_ELT, which is | |
2209 // a string that is also a valid identifier. But first, we have to | |
2210 // decide whether to insert a comma. | |
747 | 2211 |
1826 | 2212 if (lexer_flags.looking_at_indirect_ref) |
1072 | 2213 { |
2970 | 2214 do_comma_insert_check (); |
2215 | |
1072 | 2216 maybe_unput_comma (spc_gobbled); |
2819 | 2217 |
2218 yylval.tok_val = new token (tok, input_line_number, | |
2219 current_input_column); | |
2220 | |
2221 token_stack.push (yylval.tok_val); | |
2222 | |
2857 | 2223 lexer_flags.quote_is_transpose = true; |
2224 lexer_flags.convert_spaces_to_comma = true; | |
2819 | 2225 |
2226 current_input_column += yyleng; | |
2227 | |
2970 | 2228 return STRUCT_ELT; |
1072 | 2229 } |
747 | 2230 |
4930 | 2231 int kw_token = is_keyword_token (tok); |
2232 | |
2233 if (lexer_flags.looking_at_function_handle) | |
2234 { | |
2235 if (kw_token) | |
2236 { | |
2237 error ("function handles may not refer to keywords"); | |
2238 | |
2239 return LEXICAL_ERROR; | |
2240 } | |
2241 else | |
2242 TOK_PUSH_AND_RETURN (tok, FCN_HANDLE); | |
2243 } | |
2244 | |
5102 | 2245 // If we have a regular keyword, return it. |
1826 | 2246 // Keywords can be followed by identifiers (TOK_RETURN handles |
2247 // that). | |
146 | 2248 |
2249 if (kw_token) | |
2250 { | |
4238 | 2251 if (kw_token < 0) |
2252 return kw_token; | |
146 | 2253 else |
2254 TOK_RETURN (kw_token); | |
2255 } | |
2256 | |
1826 | 2257 // See if we have a plot keyword (title, using, with, or clear). |
146 | 2258 |
3480 | 2259 int c1 = yyinput (); |
2260 | |
2261 bool next_tok_is_paren = (c1 == '('); | |
2262 | |
2263 bool next_tok_is_eq = false; | |
2264 if (c1 == '=') | |
2265 { | |
2266 int c2 = yyinput (); | |
4410 | 2267 yyunput (c2, yytext); |
3480 | 2268 |
2269 if (c2 != '=') | |
2270 next_tok_is_eq = true; | |
2271 } | |
2272 | |
4410 | 2273 yyunput (c1, yytext); |
1001 | 2274 |
2702 | 2275 // Kluge alert. |
2276 // | |
2277 // If we are looking at a text style function, set up to gobble its | |
2745 | 2278 // arguments. |
2279 // | |
2280 // If the following token is `=', or if we are parsing a function | |
3189 | 2281 // return list or function parameter list, or if we are looking at |
2282 // something like [ab,cd] = foo (), force the symbol to be inserted | |
2283 // as a variable in the current symbol table. | |
2702 | 2284 |
4208 | 2285 if (is_command_name (tok) && ! is_variable (tok)) |
2702 | 2286 { |
2745 | 2287 if (next_tok_is_eq |
2288 || lexer_flags.looking_at_return_list | |
7634
ae90e05ad299
fix parameter list initializer bug
John W. Eaton <jwe@octave.org>
parents:
7587
diff
changeset
|
2289 || (lexer_flags.looking_at_parameter_list |
ae90e05ad299
fix parameter list initializer bug
John W. Eaton <jwe@octave.org>
parents:
7587
diff
changeset
|
2290 && ! lexer_flags.looking_at_initializer_expression) |
4101 | 2291 || lexer_flags.looking_at_matrix_or_assign_lhs) |
2745 | 2292 { |
2293 force_local_variable (tok); | |
2294 } | |
6067 | 2295 else if (! (next_tok_is_paren || lexer_flags.looking_at_object_index)) |
2702 | 2296 { |
5102 | 2297 BEGIN (COMMAND_START); |
2298 } | |
2299 | |
6067 | 2300 if (is_rawcommand_name (tok) && ! lexer_flags.looking_at_object_index) |
5102 | 2301 { |
2302 lexer_flags.doing_rawcommand = true; | |
4323 | 2303 BEGIN (COMMAND_START); |
2702 | 2304 } |
2305 } | |
2306 | |
4234 | 2307 // Find the token in the symbol table. Beware the magic |
2308 // transformation of the end keyword... | |
2309 | |
2310 if (tok == "end") | |
2311 tok = "__end__"; | |
146 | 2312 |
7336 | 2313 yylval.tok_val = new token (&(symbol_table::insert (tok)), |
2314 input_line_number, current_input_column); | |
2315 | |
146 | 2316 token_stack.push (yylval.tok_val); |
2317 | |
1826 | 2318 // After seeing an identifer, it is ok to convert spaces to a comma |
2319 // (if needed). | |
146 | 2320 |
2857 | 2321 lexer_flags.convert_spaces_to_comma = true; |
146 | 2322 |
2877 | 2323 if (! next_tok_is_eq) |
2324 { | |
2325 lexer_flags.quote_is_transpose = true; | |
146 | 2326 |
2877 | 2327 do_comma_insert_check (); |
2328 | |
2329 maybe_unput_comma (spc_gobbled); | |
146 | 2330 } |
2331 | |
2877 | 2332 current_input_column += yyleng; |
146 | 2333 |
2334 return NAME; | |
2335 } | |
2336 | |
1826 | 2337 void |
2338 lexical_feedback::init (void) | |
2339 { | |
2340 // Not initially defining a matrix list. | |
3351 | 2341 bracketflag = 0; |
1826 | 2342 |
4613 | 2343 // Not initially defining a cell array list. |
2344 braceflag = 0; | |
2345 | |
1826 | 2346 // Not initially inside a loop or if statement. |
2347 looping = 0; | |
2348 | |
2857 | 2349 // Not initially defining a function. |
2350 beginning_of_function = false; | |
2351 defining_func = false; | |
2877 | 2352 parsed_function_name = false; |
4240 | 2353 parsing_nested_function = 0; |
7336 | 2354 parsing_class_method = false; |
2857 | 2355 |
4930 | 2356 // Not initiallly looking at a function handle. |
2357 looking_at_function_handle = 0; | |
2358 | |
2857 | 2359 // Not parsing a function return or parameter list. |
2360 looking_at_return_list = false; | |
2361 looking_at_parameter_list = false; | |
2362 | |
7634
ae90e05ad299
fix parameter list initializer bug
John W. Eaton <jwe@octave.org>
parents:
7587
diff
changeset
|
2363 // Not looking at an argument list initializer expression. |
ae90e05ad299
fix parameter list initializer bug
John W. Eaton <jwe@octave.org>
parents:
7587
diff
changeset
|
2364 looking_at_initializer_expression = false; |
ae90e05ad299
fix parameter list initializer bug
John W. Eaton <jwe@octave.org>
parents:
7587
diff
changeset
|
2365 |
3796 | 2366 // Not parsing a matrix or the left hand side of multi-value |
2367 // assignment statement. | |
2368 looking_at_matrix_or_assign_lhs = false; | |
2369 | |
4234 | 2370 // Not parsing an object index. |
4237 | 2371 looking_at_object_index = 0; |
4234 | 2372 |
2857 | 2373 // No need to do comma insert or convert spaces to comma at |
2374 // beginning of input. | |
2375 convert_spaces_to_comma = true; | |
2376 do_comma_insert = false; | |
2377 | |
2378 // Not initially doing any plotting or setting of plot attributes. | |
5102 | 2379 doing_rawcommand = false; |
2857 | 2380 |
1826 | 2381 // Not initially looking at indirect references. |
2857 | 2382 looking_at_indirect_ref = false; |
1826 | 2383 |
2384 // Quote marks strings intially. | |
2857 | 2385 quote_is_transpose = false; |
1826 | 2386 } |
2387 | |
4867 | 2388 bool |
2389 is_keyword (const std::string& s) | |
2390 { | |
5088 | 2391 return octave_kw_hash::in_word_set (s.c_str (), s.length ()) != 0; |
4867 | 2392 } |
2393 | |
4264 | 2394 DEFCMD (iskeyword, args, , |
2395 "-*- texinfo -*-\n\ | |
2396 @deftypefn {Built-in Function} {} iskeyword (@var{name})\n\ | |
2397 Return true if @var{name} is an Octave keyword. If @var{name}\n\ | |
2398 is omitted, return a list of keywords.\n\ | |
2399 @end deftypefn") | |
2400 { | |
2401 octave_value retval; | |
2402 | |
2403 int argc = args.length () + 1; | |
2404 | |
4867 | 2405 string_vector argv = args.make_argv ("iskeyword"); |
4264 | 2406 |
2407 if (error_state) | |
2408 return retval; | |
2409 | |
2410 if (argc == 1) | |
2411 { | |
2412 string_vector lst (TOTAL_KEYWORDS); | |
2413 | |
2414 for (int i = 0; i < TOTAL_KEYWORDS; i++) | |
2415 lst[i] = wordlist[i].name; | |
2416 | |
2417 retval = Cell (lst.qsort ()); | |
2418 } | |
2419 else if (argc == 2) | |
2420 { | |
4867 | 2421 retval = is_keyword (argv[1]); |
4264 | 2422 } |
2423 else | |
5823 | 2424 print_usage (); |
4264 | 2425 |
2426 return retval; | |
2427 } | |
2428 | |
2429 | |
3388 | 2430 static void |
2431 maybe_warn_separator_insert (char sep) | |
2432 { | |
3523 | 2433 std::string nm = curr_fcn_file_full_name; |
3388 | 2434 |
5794 | 2435 if (nm.empty ()) |
2436 warning_with_id ("Octave:separator-insert", | |
2437 "potential auto-insertion of `%c' near line %d", | |
2438 sep, input_line_number); | |
2439 else | |
2440 warning_with_id ("Octave:separator-insert", | |
2441 "potential auto-insertion of `%c' near line %d of file %s", | |
2442 sep, input_line_number, nm.c_str ()); | |
3388 | 2443 } |
2444 | |
3400 | 2445 static void |
2446 gripe_single_quote_string (void) | |
2447 { | |
3523 | 2448 std::string nm = curr_fcn_file_full_name; |
3400 | 2449 |
5794 | 2450 if (nm.empty ()) |
2451 warning_with_id ("Octave:single-quote-string", | |
2452 "single quote delimited string near line %d", | |
2453 input_line_number); | |
2454 else | |
2455 warning_with_id ("Octave:single-quote-string", | |
2456 "single quote delimited string near line %d of file %s", | |
2457 input_line_number, nm.c_str ()); | |
3400 | 2458 } |
2459 | |
4037 | 2460 static void |
2461 gripe_matlab_incompatible (const std::string& msg) | |
2462 { | |
5794 | 2463 warning_with_id ("Octave:matlab-incompatible", |
2464 "potential Matlab compatibility problem: %s", | |
2465 msg.c_str ()); | |
4037 | 2466 } |
2467 | |
2468 static void | |
2469 maybe_gripe_matlab_incompatible_comment (char c) | |
2470 { | |
2471 if (c == '#') | |
2472 gripe_matlab_incompatible ("# used as comment character"); | |
2473 } | |
2474 | |
2475 static void | |
2476 gripe_matlab_incompatible_continuation (void) | |
2477 { | |
2478 gripe_matlab_incompatible ("\\ used as line continuation marker"); | |
2479 } | |
2480 | |
2481 static void | |
2482 gripe_matlab_incompatible_operator (const std::string& op) | |
2483 { | |
2484 std::string t = op; | |
2485 int n = t.length (); | |
2486 if (t[n-1] == '\n') | |
2487 t.resize (n-1); | |
2488 gripe_matlab_incompatible (t + " used as operator"); | |
2489 } | |
2490 | |
4910 | 2491 DEFUN (__token_count__, , , |
2492 "-*- texinfo -*-\n\ | |
2493 @deftypefn {Built-in Function} {} __token_count__\n\ | |
2494 Number of language tokens processed since Octave startup.\n\ | |
2495 @end deftypefn") | |
2496 { | |
2497 return octave_value (Vtoken_count); | |
2498 } | |
2499 | |
1994 | 2500 /* |
2501 ;;; Local Variables: *** | |
2502 ;;; mode: C++ *** | |
2503 ;;; End: *** | |
2504 */ |