Mercurial > hg > octave-nkf
comparison libinterp/parse-tree/lex.ll @ 15195:2fc554ffbc28
split libinterp from src
* libinterp: New directory. Move all files from src directory here
except Makefile.am, main.cc, main-cli.cc, mkoctfile.in.cc,
mkoctfilr.in.sh, octave-config.in.cc, octave-config.in.sh.
* libinterp/Makefile.am: New file, extracted from src/Makefile.am.
* src/Makefile.am: Delete everything except targets and definitions
needed to build and link main and utility programs.
* Makefile.am (SUBDIRS): Include libinterp in the list.
* autogen.sh: Run config-module.sh in libinterp/dldfcn directory, not
src/dldfcn directory.
* configure.ac (AC_CONFIG_SRCDIR): Use libinterp/octave.cc, not
src/octave.cc.
(DL_LDFLAGS, LIBOCTINTERP): Use libinterp, not src.
(AC_CONFIG_FILES): Include libinterp/Makefile in the list.
* find-docstring-files.sh: Look in libinterp, not src.
* gui/src/Makefile.am (liboctgui_la_CPPFLAGS): Find header files in
libinterp, not src.
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Sat, 18 Aug 2012 16:23:39 -0400 |
parents | src/parse-tree/lex.ll@28f5f4a4a80a |
children | 947cf10c94da 049e8bbff782 |
comparison
equal
deleted
inserted
replaced
15194:0f0b795044c3 | 15195:2fc554ffbc28 |
---|---|
1 /* | |
2 | |
3 Copyright (C) 1993-2012 John W. Eaton | |
4 | |
5 This file is part of Octave. | |
6 | |
7 Octave is free software; you can redistribute it and/or modify it | |
8 under the terms of the GNU General Public License as published by the | |
9 Free Software Foundation; either version 3 of the License, or (at your | |
10 option) any later version. | |
11 | |
12 Octave is distributed in the hope that it will be useful, but WITHOUT | |
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 for more details. | |
16 | |
17 You should have received a copy of the GNU General Public License | |
18 along with Octave; see the file COPYING. If not, see | |
19 <http://www.gnu.org/licenses/>. | |
20 | |
21 */ | |
22 | |
23 %option prefix = "octave_" | |
24 | |
25 %top { | |
26 #ifdef HAVE_CONFIG_H | |
27 #include <config.h> | |
28 #endif | |
29 | |
30 } | |
31 | |
32 %s COMMAND_START | |
33 %s MATRIX_START | |
34 | |
35 %x SCRIPT_FILE_BEGIN | |
36 %x FUNCTION_FILE_BEGIN | |
37 | |
38 %{ | |
39 | |
40 #include <cctype> | |
41 #include <cstring> | |
42 | |
43 #include <iostream> | |
44 #include <set> | |
45 #include <sstream> | |
46 #include <string> | |
47 #include <stack> | |
48 | |
49 #include <sys/types.h> | |
50 #include <unistd.h> | |
51 | |
52 #include "cmd-edit.h" | |
53 #include "quit.h" | |
54 #include "lo-mappers.h" | |
55 | |
56 // These would be alphabetical, but y.tab.h must be included before | |
57 // oct-gperf.h and y.tab.h must be included after token.h and the tree | |
58 // class declarations. We can't include y.tab.h in oct-gperf.h | |
59 // because it may not be protected to allow it to be included multiple | |
60 // times. | |
61 | |
62 #include "Cell.h" | |
63 #include "comment-list.h" | |
64 #include "defun.h" | |
65 #include "error.h" | |
66 #include "gripes.h" | |
67 #include "input.h" | |
68 #include "lex.h" | |
69 #include "ov.h" | |
70 #include "parse.h" | |
71 #include "parse-private.h" | |
72 #include "pt-all.h" | |
73 #include "symtab.h" | |
74 #include "token.h" | |
75 #include "toplev.h" | |
76 #include "utils.h" | |
77 #include "variables.h" | |
78 #include <oct-parse.h> | |
79 #include <oct-gperf.h> | |
80 | |
81 #if defined (GNULIB_NAMESPACE) | |
82 // Calls to the following functions appear in the generated output from | |
83 // flex without the namespace tag. Redefine them so we will use them | |
84 // via the gnulib namespace. | |
85 #define fprintf GNULIB_NAMESPACE::fprintf | |
86 #define fwrite GNULIB_NAMESPACE::fwrite | |
87 #define isatty GNULIB_NAMESPACE::isatty | |
88 #define malloc GNULIB_NAMESPACE::malloc | |
89 #define realloc GNULIB_NAMESPACE::realloc | |
90 #endif | |
91 | |
92 #if ! (defined (FLEX_SCANNER) \ | |
93 && defined (YY_FLEX_MAJOR_VERSION) && YY_FLEX_MAJOR_VERSION >= 2 \ | |
94 && defined (YY_FLEX_MINOR_VERSION) && YY_FLEX_MINOR_VERSION >= 5) | |
95 #error lex.l requires flex version 2.5.4 or later | |
96 #endif | |
97 | |
98 #define yylval octave_lval | |
99 | |
100 // Arrange to get input via readline. | |
101 | |
102 #ifdef YY_INPUT | |
103 #undef YY_INPUT | |
104 #endif | |
105 #define YY_INPUT(buf, result, max_size) \ | |
106 if ((result = octave_read (buf, max_size)) < 0) \ | |
107 YY_FATAL_ERROR ("octave_read () in flex scanner failed"); | |
108 | |
109 // Try to avoid crashing out completely on fatal scanner errors. | |
110 // The call to yy_fatal_error should never happen, but it avoids a | |
111 // `static function defined but not used' warning from gcc. | |
112 | |
113 #ifdef YY_FATAL_ERROR | |
114 #undef YY_FATAL_ERROR | |
115 #endif | |
116 #define YY_FATAL_ERROR(msg) \ | |
117 do \ | |
118 { \ | |
119 error (msg); \ | |
120 OCTAVE_QUIT; \ | |
121 yy_fatal_error (msg); \ | |
122 } \ | |
123 while (0) | |
124 | |
125 #define DISPLAY_TOK_AND_RETURN(tok) \ | |
126 do \ | |
127 { \ | |
128 int tok_val = tok; \ | |
129 if (Vdisplay_tokens) \ | |
130 display_token (tok_val); \ | |
131 if (lexer_debug_flag) \ | |
132 { \ | |
133 std::cerr << "R: "; \ | |
134 display_token (tok_val); \ | |
135 std::cerr << std::endl; \ | |
136 } \ | |
137 return tok_val; \ | |
138 } \ | |
139 while (0) | |
140 | |
141 #define COUNT_TOK_AND_RETURN(tok) \ | |
142 do \ | |
143 { \ | |
144 Vtoken_count++; \ | |
145 DISPLAY_TOK_AND_RETURN (tok); \ | |
146 } \ | |
147 while (0) | |
148 | |
149 #define TOK_RETURN(tok) \ | |
150 do \ | |
151 { \ | |
152 current_input_column += yyleng; \ | |
153 lexer_flags.quote_is_transpose = false; \ | |
154 lexer_flags.convert_spaces_to_comma = true; \ | |
155 COUNT_TOK_AND_RETURN (tok); \ | |
156 } \ | |
157 while (0) | |
158 | |
159 #define TOK_PUSH_AND_RETURN(name, tok) \ | |
160 do \ | |
161 { \ | |
162 yylval.tok_val = new token (name, input_line_number, \ | |
163 current_input_column); \ | |
164 token_stack.push (yylval.tok_val); \ | |
165 TOK_RETURN (tok); \ | |
166 } \ | |
167 while (0) | |
168 | |
169 #define BIN_OP_RETURN_INTERNAL(tok, convert, bos, qit) \ | |
170 do \ | |
171 { \ | |
172 yylval.tok_val = new token (input_line_number, current_input_column); \ | |
173 token_stack.push (yylval.tok_val); \ | |
174 current_input_column += yyleng; \ | |
175 lexer_flags.quote_is_transpose = qit; \ | |
176 lexer_flags.convert_spaces_to_comma = convert; \ | |
177 lexer_flags.looking_for_object_index = false; \ | |
178 lexer_flags.at_beginning_of_statement = bos; \ | |
179 COUNT_TOK_AND_RETURN (tok); \ | |
180 } \ | |
181 while (0) | |
182 | |
183 #define XBIN_OP_RETURN_INTERNAL(tok, convert, bos, qit) \ | |
184 do \ | |
185 { \ | |
186 gripe_matlab_incompatible_operator (yytext); \ | |
187 BIN_OP_RETURN_INTERNAL (tok, convert, bos, qit); \ | |
188 } \ | |
189 while (0) | |
190 | |
191 #define BIN_OP_RETURN(tok, convert, bos) \ | |
192 do \ | |
193 { \ | |
194 BIN_OP_RETURN_INTERNAL (tok, convert, bos, false); \ | |
195 } \ | |
196 while (0) | |
197 | |
198 #define XBIN_OP_RETURN(tok, convert, bos) \ | |
199 do \ | |
200 { \ | |
201 gripe_matlab_incompatible_operator (yytext); \ | |
202 BIN_OP_RETURN (tok, convert, bos); \ | |
203 } \ | |
204 while (0) | |
205 | |
206 #define LEXER_DEBUG(pattern) \ | |
207 do \ | |
208 { \ | |
209 if (lexer_debug_flag) \ | |
210 lexer_debug (pattern, yytext); \ | |
211 } \ | |
212 while (0) | |
213 | |
214 // TRUE means that we have encountered EOF on the input stream. | |
215 bool parser_end_of_input = false; | |
216 | |
217 // Flags that need to be shared between the lexer and parser. | |
218 lexical_feedback lexer_flags; | |
219 | |
220 // Stack to hold tokens so that we can delete them when the parser is | |
221 // reset and avoid growing forever just because we are stashing some | |
222 // information. This has to appear before lex.h is included, because | |
223 // one of the macros defined there uses token_stack. | |
224 // | |
225 // FIXME -- this should really be static, but that causes | |
226 // problems on some systems. | |
227 std::stack <token*> token_stack; | |
228 | |
229 // Did eat_whitespace() eat a space or tab, or a newline, or both? | |
230 | |
231 typedef int yum_yum; | |
232 | |
233 const yum_yum ATE_NOTHING = 0; | |
234 const yum_yum ATE_SPACE_OR_TAB = 1; | |
235 const yum_yum ATE_NEWLINE = 2; | |
236 | |
237 // Is the closest nesting level a square bracket, squiggly brace or a paren? | |
238 | |
239 class bracket_brace_paren_nesting_level | |
240 { | |
241 public: | |
242 | |
243 bracket_brace_paren_nesting_level (void) : context () { } | |
244 | |
245 ~bracket_brace_paren_nesting_level (void) { } | |
246 | |
247 void bracket (void) { context.push (BRACKET); } | |
248 bool is_bracket (void) | |
249 { return ! context.empty () && context.top () == BRACKET; } | |
250 | |
251 void brace (void) { context.push (BRACE); } | |
252 bool is_brace (void) | |
253 { return ! context.empty () && context.top () == BRACE; } | |
254 | |
255 void paren (void) { context.push (PAREN); } | |
256 bool is_paren (void) | |
257 { return ! context.empty () && context.top () == PAREN; } | |
258 | |
259 bool is_bracket_or_brace (void) | |
260 { return (! context.empty () | |
261 && (context.top () == BRACKET || context.top () == BRACE)); } | |
262 | |
263 bool none (void) { return context.empty (); } | |
264 | |
265 void remove (void) { if (! context.empty ()) context.pop (); } | |
266 | |
267 void clear (void) { while (! context.empty ()) context.pop (); } | |
268 | |
269 private: | |
270 | |
271 std::stack<int> context; | |
272 | |
273 static const int BRACKET; | |
274 static const int BRACE; | |
275 static const int PAREN; | |
276 | |
277 bracket_brace_paren_nesting_level (const bracket_brace_paren_nesting_level&); | |
278 | |
279 bracket_brace_paren_nesting_level& | |
280 operator = (const bracket_brace_paren_nesting_level&); | |
281 }; | |
282 | |
283 const int bracket_brace_paren_nesting_level::BRACKET = 1; | |
284 const int bracket_brace_paren_nesting_level::BRACE = 2; | |
285 const int bracket_brace_paren_nesting_level::PAREN = 3; | |
286 | |
287 static bracket_brace_paren_nesting_level nesting_level; | |
288 | |
289 static bool Vdisplay_tokens = false; | |
290 | |
291 static unsigned int Vtoken_count = 0; | |
292 | |
293 // The start state that was in effect when the beginning of a block | |
294 // comment was noticed. | |
295 static int block_comment_nesting_level = 0; | |
296 | |
297 // Internal variable for lexer debugging state. | |
298 static bool lexer_debug_flag = false; | |
299 | |
300 // Forward declarations for functions defined at the bottom of this | |
301 // file. | |
302 | |
303 static int text_yyinput (void); | |
304 static void xunput (char c, char *buf); | |
305 static void fixup_column_count (char *s); | |
306 static void do_comma_insert_check (void); | |
307 static int is_keyword_token (const std::string& s); | |
308 static int process_comment (bool start_in_block, bool& eof); | |
309 static bool match_any (char c, const char *s); | |
310 static bool next_token_is_sep_op (void); | |
311 static bool next_token_is_bin_op (bool spc_prev); | |
312 static bool next_token_is_postfix_unary_op (bool spc_prev); | |
313 static std::string strip_trailing_whitespace (char *s); | |
314 static void handle_number (void); | |
315 static int handle_string (char delim); | |
316 static int handle_close_bracket (bool spc_gobbled, int bracket_type); | |
317 static int handle_superclass_identifier (void); | |
318 static int handle_meta_identifier (void); | |
319 static int handle_identifier (void); | |
320 static bool have_continuation (bool trailing_comments_ok = true); | |
321 static bool have_ellipsis_continuation (bool trailing_comments_ok = true); | |
322 static void scan_for_comments (const char *); | |
323 static yum_yum eat_whitespace (void); | |
324 static yum_yum eat_continuation (void); | |
325 static void maybe_warn_separator_insert (char sep); | |
326 static void gripe_single_quote_string (void); | |
327 static void gripe_matlab_incompatible (const std::string& msg); | |
328 static void maybe_gripe_matlab_incompatible_comment (char c); | |
329 static void gripe_matlab_incompatible_continuation (void); | |
330 static void gripe_matlab_incompatible_operator (const std::string& op); | |
331 static void display_token (int tok); | |
332 static void lexer_debug (const char *pattern, const char *text); | |
333 | |
334 %} | |
335 | |
336 D [0-9] | |
337 S [ \t] | |
338 NL ((\n)|(\r)|(\r\n)) | |
339 SNL ({S}|{NL}) | |
340 EL (\.\.\.) | |
341 BS (\\) | |
342 CONT ({EL}|{BS}) | |
343 Im [iIjJ] | |
344 CCHAR [#%] | |
345 COMMENT ({CCHAR}.*{NL}) | |
346 SNLCMT ({SNL}|{COMMENT}) | |
347 NOT ((\~)|(\!)) | |
348 POW ((\*\*)|(\^)) | |
349 EPOW (\.{POW}) | |
350 IDENT ([_$a-zA-Z][_$a-zA-Z0-9]*) | |
351 EXPON ([DdEe][+-]?{D}+) | |
352 NUMBER (({D}+\.?{D}*{EXPON}?)|(\.{D}+{EXPON}?)|(0[xX][0-9a-fA-F]+)) | |
353 %% | |
354 | |
355 %{ | |
356 // Make script and function files start with a bogus token. This makes | |
357 // the parser go down a special path. | |
358 %} | |
359 | |
360 <SCRIPT_FILE_BEGIN>. { | |
361 LEXER_DEBUG ("<SCRIPT_FILE_BEGIN>."); | |
362 | |
363 BEGIN (INITIAL); | |
364 xunput (yytext[0], yytext); | |
365 COUNT_TOK_AND_RETURN (SCRIPT_FILE); | |
366 } | |
367 | |
368 <FUNCTION_FILE_BEGIN>. { | |
369 LEXER_DEBUG ("<FUNCTION_FILE_BEGIN>."); | |
370 | |
371 BEGIN (INITIAL); | |
372 xunput (yytext[0], yytext); | |
373 COUNT_TOK_AND_RETURN (FUNCTION_FILE); | |
374 } | |
375 | |
376 %{ | |
377 // Help and other command-style functions. | |
378 %} | |
379 | |
380 <COMMAND_START>{NL} { | |
381 LEXER_DEBUG ("<COMMAND_START>{NL}"); | |
382 | |
383 BEGIN (INITIAL); | |
384 input_line_number++; | |
385 current_input_column = 1; | |
386 | |
387 lexer_flags.quote_is_transpose = false; | |
388 lexer_flags.convert_spaces_to_comma = true; | |
389 lexer_flags.looking_for_object_index = false; | |
390 lexer_flags.at_beginning_of_statement = true; | |
391 | |
392 COUNT_TOK_AND_RETURN ('\n'); | |
393 } | |
394 | |
395 <COMMAND_START>[\;\,] { | |
396 LEXER_DEBUG ("<COMMAND_START>[\\;\\,]"); | |
397 | |
398 lexer_flags.looking_for_object_index = false; | |
399 lexer_flags.at_beginning_of_statement = true; | |
400 | |
401 BEGIN (INITIAL); | |
402 | |
403 if (strcmp (yytext, ",") == 0) | |
404 TOK_RETURN (','); | |
405 else | |
406 TOK_RETURN (';'); | |
407 } | |
408 | |
409 <COMMAND_START>[\"\'] { | |
410 LEXER_DEBUG ("<COMMAND_START>[\\\"\\']"); | |
411 | |
412 lexer_flags.at_beginning_of_statement = false; | |
413 | |
414 current_input_column++; | |
415 int tok = handle_string (yytext[0]); | |
416 | |
417 COUNT_TOK_AND_RETURN (tok); | |
418 } | |
419 | |
420 <COMMAND_START>[^#% \t\r\n\;\,\"\'][^ \t\r\n\;\,]*{S}* { | |
421 LEXER_DEBUG ("<COMMAND_START>[^#% \\t\\r\\n\\;\\,\\\"\\'][^ \\t\\r\\n\\;\\,]*{S}*"); | |
422 | |
423 std::string tok = strip_trailing_whitespace (yytext); | |
424 | |
425 lexer_flags.looking_for_object_index = false; | |
426 lexer_flags.at_beginning_of_statement = false; | |
427 | |
428 TOK_PUSH_AND_RETURN (tok, SQ_STRING); | |
429 } | |
430 | |
431 %{ | |
432 // For this and the next two rules, we're looking at ']', and we | |
433 // need to know if the next token is `=' or `=='. | |
434 // | |
435 // It would have been so much easier if the delimiters were simply | |
436 // different for the expression on the left hand side of the equals | |
437 // operator. | |
438 // | |
439 // It's also a pain in the ass to decide whether to insert a comma | |
440 // after seeing a ']' character... | |
441 | |
442 // FIXME -- we need to handle block comments here. | |
443 %} | |
444 | |
445 <MATRIX_START>{SNLCMT}*\]{S}* { | |
446 LEXER_DEBUG ("<MATRIX_START>{SNLCMT}*\\]{S}*"); | |
447 | |
448 scan_for_comments (yytext); | |
449 fixup_column_count (yytext); | |
450 | |
451 lexer_flags.looking_at_object_index.pop_front (); | |
452 | |
453 lexer_flags.looking_for_object_index = true; | |
454 lexer_flags.at_beginning_of_statement = false; | |
455 | |
456 int c = yytext[yyleng-1]; | |
457 int cont_is_spc = eat_continuation (); | |
458 bool spc_gobbled = (cont_is_spc || c == ' ' || c == '\t'); | |
459 int tok_to_return = handle_close_bracket (spc_gobbled, ']'); | |
460 | |
461 if (spc_gobbled) | |
462 xunput (' ', yytext); | |
463 | |
464 COUNT_TOK_AND_RETURN (tok_to_return); | |
465 } | |
466 | |
467 %{ | |
468 // FIXME -- we need to handle block comments here. | |
469 %} | |
470 | |
471 <MATRIX_START>{SNLCMT}*\}{S}* { | |
472 LEXER_DEBUG ("<MATRIX_START>{SNLCMT}*\\}{S}*"); | |
473 | |
474 scan_for_comments (yytext); | |
475 fixup_column_count (yytext); | |
476 | |
477 lexer_flags.looking_at_object_index.pop_front (); | |
478 | |
479 lexer_flags.looking_for_object_index = true; | |
480 lexer_flags.at_beginning_of_statement = false; | |
481 | |
482 int c = yytext[yyleng-1]; | |
483 int cont_is_spc = eat_continuation (); | |
484 bool spc_gobbled = (cont_is_spc || c == ' ' || c == '\t'); | |
485 int tok_to_return = handle_close_bracket (spc_gobbled, '}'); | |
486 | |
487 if (spc_gobbled) | |
488 xunput (' ', yytext); | |
489 | |
490 COUNT_TOK_AND_RETURN (tok_to_return); | |
491 } | |
492 | |
493 %{ | |
494 // Commas are element separators in matrix constants. If we don't | |
495 // check for continuations here we can end up inserting too many | |
496 // commas. | |
497 %} | |
498 | |
499 <MATRIX_START>{S}*\,{S}* { | |
500 LEXER_DEBUG ("<MATRIX_START>{S}*\\,{S}*"); | |
501 | |
502 current_input_column += yyleng; | |
503 | |
504 int tmp = eat_continuation (); | |
505 | |
506 lexer_flags.quote_is_transpose = false; | |
507 lexer_flags.convert_spaces_to_comma = true; | |
508 lexer_flags.looking_for_object_index = false; | |
509 lexer_flags.at_beginning_of_statement = false; | |
510 | |
511 if (! lexer_flags.looking_at_object_index.front ()) | |
512 { | |
513 if ((tmp & ATE_NEWLINE) == ATE_NEWLINE) | |
514 { | |
515 maybe_warn_separator_insert (';'); | |
516 | |
517 xunput (';', yytext); | |
518 } | |
519 } | |
520 | |
521 COUNT_TOK_AND_RETURN (','); | |
522 } | |
523 | |
524 %{ | |
525 // In some cases, spaces in matrix constants can turn into commas. | |
526 // If commas are required, spaces are not important in matrix | |
527 // constants so we just eat them. If we don't check for continuations | |
528 // here we can end up inserting too many commas. | |
529 %} | |
530 | |
531 <MATRIX_START>{S}+ { | |
532 LEXER_DEBUG ("<MATRIX_START>{S}+"); | |
533 | |
534 current_input_column += yyleng; | |
535 | |
536 lexer_flags.at_beginning_of_statement = false; | |
537 | |
538 int tmp = eat_continuation (); | |
539 | |
540 if (! lexer_flags.looking_at_object_index.front ()) | |
541 { | |
542 bool bin_op = next_token_is_bin_op (true); | |
543 bool postfix_un_op = next_token_is_postfix_unary_op (true); | |
544 bool sep_op = next_token_is_sep_op (); | |
545 | |
546 if (! (postfix_un_op || bin_op || sep_op) | |
547 && nesting_level.is_bracket_or_brace () | |
548 && lexer_flags.convert_spaces_to_comma) | |
549 { | |
550 if ((tmp & ATE_NEWLINE) == ATE_NEWLINE) | |
551 { | |
552 maybe_warn_separator_insert (';'); | |
553 | |
554 xunput (';', yytext); | |
555 } | |
556 | |
557 lexer_flags.quote_is_transpose = false; | |
558 lexer_flags.convert_spaces_to_comma = true; | |
559 lexer_flags.looking_for_object_index = false; | |
560 | |
561 maybe_warn_separator_insert (','); | |
562 | |
563 COUNT_TOK_AND_RETURN (','); | |
564 } | |
565 } | |
566 } | |
567 | |
568 %{ | |
569 // Semicolons are handled as row seprators in matrix constants. If we | |
570 // don't eat whitespace here we can end up inserting too many | |
571 // semicolons. | |
572 | |
573 // FIXME -- we need to handle block comments here. | |
574 %} | |
575 | |
576 <MATRIX_START>{SNLCMT}*;{SNLCMT}* { | |
577 LEXER_DEBUG ("<MATRIX_START>{SNLCMT}*;{SNLCMT}*"); | |
578 | |
579 scan_for_comments (yytext); | |
580 fixup_column_count (yytext); | |
581 eat_whitespace (); | |
582 | |
583 lexer_flags.quote_is_transpose = false; | |
584 lexer_flags.convert_spaces_to_comma = true; | |
585 lexer_flags.looking_for_object_index = false; | |
586 lexer_flags.at_beginning_of_statement = false; | |
587 | |
588 COUNT_TOK_AND_RETURN (';'); | |
589 } | |
590 | |
591 %{ | |
592 // In some cases, new lines can also become row separators. If we | |
593 // don't eat whitespace here we can end up inserting too many | |
594 // semicolons. | |
595 | |
596 // FIXME -- we need to handle block comments here. | |
597 %} | |
598 | |
599 <MATRIX_START>{S}*{COMMENT}{SNLCMT}* | | |
600 <MATRIX_START>{S}*{NL}{SNLCMT}* { | |
601 LEXER_DEBUG ("<MATRIX_START>{S}*{COMMENT}{SNLCMT}*|<MATRIX_START>{S}*{NL}{SNLCMT}*"); | |
602 | |
603 scan_for_comments (yytext); | |
604 fixup_column_count (yytext); | |
605 eat_whitespace (); | |
606 | |
607 lexer_flags.quote_is_transpose = false; | |
608 lexer_flags.convert_spaces_to_comma = true; | |
609 lexer_flags.at_beginning_of_statement = false; | |
610 | |
611 if (nesting_level.none ()) | |
612 return LEXICAL_ERROR; | |
613 | |
614 if (! lexer_flags.looking_at_object_index.front () | |
615 && nesting_level.is_bracket_or_brace ()) | |
616 { | |
617 maybe_warn_separator_insert (';'); | |
618 | |
619 COUNT_TOK_AND_RETURN (';'); | |
620 } | |
621 } | |
622 | |
623 \[{S}* { | |
624 LEXER_DEBUG ("\\[{S}*"); | |
625 | |
626 nesting_level.bracket (); | |
627 | |
628 lexer_flags.looking_at_object_index.push_front (false); | |
629 | |
630 current_input_column += yyleng; | |
631 lexer_flags.quote_is_transpose = false; | |
632 lexer_flags.convert_spaces_to_comma = true; | |
633 lexer_flags.looking_for_object_index = false; | |
634 lexer_flags.at_beginning_of_statement = false; | |
635 | |
636 if (lexer_flags.defining_func | |
637 && ! lexer_flags.parsed_function_name.top ()) | |
638 lexer_flags.looking_at_return_list = true; | |
639 else | |
640 lexer_flags.looking_at_matrix_or_assign_lhs = true; | |
641 | |
642 promptflag--; | |
643 eat_whitespace (); | |
644 | |
645 lexer_flags.bracketflag++; | |
646 BEGIN (MATRIX_START); | |
647 COUNT_TOK_AND_RETURN ('['); | |
648 } | |
649 | |
650 \] { | |
651 LEXER_DEBUG ("\\]"); | |
652 | |
653 nesting_level.remove (); | |
654 | |
655 lexer_flags.looking_at_object_index.pop_front (); | |
656 | |
657 lexer_flags.looking_for_object_index = true; | |
658 lexer_flags.at_beginning_of_statement = false; | |
659 | |
660 TOK_RETURN (']'); | |
661 } | |
662 | |
663 %{ | |
664 // Imaginary numbers. | |
665 %} | |
666 | |
667 {NUMBER}{Im} { | |
668 LEXER_DEBUG ("{NUMBER}{Im}"); | |
669 | |
670 handle_number (); | |
671 COUNT_TOK_AND_RETURN (IMAG_NUM); | |
672 } | |
673 | |
674 %{ | |
675 // Real numbers. Don't grab the `.' part of a dot operator as part of | |
676 // the constant. | |
677 %} | |
678 | |
679 {D}+/\.[\*/\\^\'] | | |
680 {NUMBER} { | |
681 LEXER_DEBUG ("{D}+/\\.[\\*/\\^\\']|{NUMBER}"); | |
682 handle_number (); | |
683 COUNT_TOK_AND_RETURN (NUM); | |
684 } | |
685 | |
686 %{ | |
687 // Eat whitespace. Whitespace inside matrix constants is handled by | |
688 // the <MATRIX_START> start state code above. | |
689 %} | |
690 | |
691 {S}* { | |
692 current_input_column += yyleng; | |
693 } | |
694 | |
695 %{ | |
696 // Continuation lines. Allow comments after continuations. | |
697 %} | |
698 | |
699 {CONT}{S}*{NL} | | |
700 {CONT}{S}*{COMMENT} { | |
701 LEXER_DEBUG ("{CONT}{S}*{NL}|{CONT}{S}*{COMMENT}"); | |
702 | |
703 if (yytext[0] == '\\') | |
704 gripe_matlab_incompatible_continuation (); | |
705 scan_for_comments (yytext); | |
706 promptflag--; | |
707 input_line_number++; | |
708 current_input_column = 1; | |
709 } | |
710 | |
711 %{ | |
712 // End of file. | |
713 %} | |
714 | |
715 <<EOF>> { | |
716 LEXER_DEBUG ("<<EOF>>"); | |
717 | |
718 if (block_comment_nesting_level != 0) | |
719 { | |
720 warning ("block comment open at end of input"); | |
721 | |
722 if ((reading_fcn_file || reading_script_file || reading_classdef_file) | |
723 && ! curr_fcn_file_name.empty ()) | |
724 warning ("near line %d of file `%s.m'", | |
725 input_line_number, curr_fcn_file_name.c_str ()); | |
726 } | |
727 | |
728 TOK_RETURN (END_OF_INPUT); | |
729 } | |
730 | |
731 %{ | |
732 // Identifiers. Truncate the token at the first space or tab but | |
733 // don't write directly on yytext. | |
734 %} | |
735 | |
736 {IDENT}{S}* { | |
737 LEXER_DEBUG ("{IDENT}{S}*"); | |
738 | |
739 int id_tok = handle_identifier (); | |
740 | |
741 if (id_tok >= 0) | |
742 COUNT_TOK_AND_RETURN (id_tok); | |
743 } | |
744 | |
745 %{ | |
746 // Superclass method identifiers. | |
747 %} | |
748 | |
749 {IDENT}@{IDENT}{S}* | | |
750 {IDENT}@{IDENT}.{IDENT}{S}* { | |
751 LEXER_DEBUG ("{IDENT}@{IDENT}{S}*|{IDENT}@{IDENT}.{IDENT}{S}*"); | |
752 | |
753 int id_tok = handle_superclass_identifier (); | |
754 | |
755 if (id_tok >= 0) | |
756 { | |
757 lexer_flags.looking_for_object_index = true; | |
758 | |
759 COUNT_TOK_AND_RETURN (SUPERCLASSREF); | |
760 } | |
761 } | |
762 | |
763 %{ | |
764 // Metaclass query | |
765 %} | |
766 | |
767 \?{IDENT}{S}* | | |
768 \?{IDENT}\.{IDENT}{S}* { | |
769 LEXER_DEBUG ("\\?{IDENT}{S}*|\\?{IDENT}\\.{IDENT}{S}*"); | |
770 | |
771 int id_tok = handle_meta_identifier (); | |
772 | |
773 if (id_tok >= 0) | |
774 { | |
775 lexer_flags.looking_for_object_index = true; | |
776 | |
777 COUNT_TOK_AND_RETURN (METAQUERY); | |
778 } | |
779 } | |
780 | |
781 %{ | |
782 // Function handles and superclass references | |
783 %} | |
784 | |
785 "@" { | |
786 LEXER_DEBUG ("@"); | |
787 | |
788 current_input_column++; | |
789 | |
790 lexer_flags.quote_is_transpose = false; | |
791 lexer_flags.convert_spaces_to_comma = false; | |
792 lexer_flags.looking_at_function_handle++; | |
793 lexer_flags.looking_for_object_index = false; | |
794 lexer_flags.at_beginning_of_statement = false; | |
795 | |
796 COUNT_TOK_AND_RETURN ('@'); | |
797 | |
798 } | |
799 | |
800 %{ | |
801 // A new line character. New line characters inside matrix constants | |
802 // are handled by the <MATRIX_START> start state code above. If closest | |
803 // nesting is inside parentheses, don't return a row separator. | |
804 %} | |
805 | |
806 {NL} { | |
807 LEXER_DEBUG ("{NL}"); | |
808 | |
809 input_line_number++; | |
810 current_input_column = 1; | |
811 | |
812 lexer_flags.quote_is_transpose = false; | |
813 lexer_flags.convert_spaces_to_comma = true; | |
814 | |
815 if (nesting_level.none ()) | |
816 { | |
817 lexer_flags.at_beginning_of_statement = true; | |
818 COUNT_TOK_AND_RETURN ('\n'); | |
819 } | |
820 else if (nesting_level.is_paren ()) | |
821 { | |
822 lexer_flags.at_beginning_of_statement = false; | |
823 gripe_matlab_incompatible ("bare newline inside parentheses"); | |
824 } | |
825 else if (nesting_level.is_bracket_or_brace ()) | |
826 return LEXICAL_ERROR; | |
827 } | |
828 | |
829 %{ | |
830 // Single quote can either be the beginning of a string or a transpose | |
831 // operator. | |
832 %} | |
833 | |
834 "'" { | |
835 LEXER_DEBUG ("'"); | |
836 | |
837 current_input_column++; | |
838 lexer_flags.convert_spaces_to_comma = true; | |
839 | |
840 if (lexer_flags.quote_is_transpose) | |
841 { | |
842 do_comma_insert_check (); | |
843 COUNT_TOK_AND_RETURN (QUOTE); | |
844 } | |
845 else | |
846 { | |
847 int tok = handle_string ('\''); | |
848 COUNT_TOK_AND_RETURN (tok); | |
849 } | |
850 } | |
851 | |
852 %{ | |
853 // Double quotes always begin strings. | |
854 %} | |
855 | |
856 \" { | |
857 LEXER_DEBUG ("\""); | |
858 | |
859 current_input_column++; | |
860 int tok = handle_string ('"'); | |
861 | |
862 COUNT_TOK_AND_RETURN (tok); | |
863 } | |
864 | |
865 %{ | |
866 // Gobble comments. | |
867 %} | |
868 | |
869 {CCHAR} { | |
870 LEXER_DEBUG ("{CCHAR}"); | |
871 | |
872 lexer_flags.looking_for_object_index = false; | |
873 | |
874 xunput (yytext[0], yytext); | |
875 | |
876 bool eof = false; | |
877 int tok = process_comment (false, eof); | |
878 | |
879 if (eof) | |
880 TOK_RETURN (END_OF_INPUT); | |
881 else if (tok > 0) | |
882 COUNT_TOK_AND_RETURN (tok); | |
883 } | |
884 | |
885 %{ | |
886 // Block comments. | |
887 %} | |
888 | |
889 ^{S}*{CCHAR}\{{S}*{NL} { | |
890 LEXER_DEBUG ("^{S}*{CCHAR}\\{{S}*{NL}"); | |
891 | |
892 lexer_flags.looking_for_object_index = false; | |
893 | |
894 input_line_number++; | |
895 current_input_column = 1; | |
896 block_comment_nesting_level++; | |
897 promptflag--; | |
898 | |
899 bool eof = false; | |
900 process_comment (true, eof); | |
901 } | |
902 | |
903 %{ | |
904 // Other operators. | |
905 %} | |
906 | |
907 ":" { LEXER_DEBUG (":"); BIN_OP_RETURN (':', false, false); } | |
908 | |
909 ".+" { LEXER_DEBUG (".+"); XBIN_OP_RETURN (EPLUS, false, false); } | |
910 ".-" { LEXER_DEBUG (".-"); XBIN_OP_RETURN (EMINUS, false, false); } | |
911 ".*" { LEXER_DEBUG (".*"); BIN_OP_RETURN (EMUL, false, false); } | |
912 "./" { LEXER_DEBUG ("./"); BIN_OP_RETURN (EDIV, false, false); } | |
913 ".\\" { LEXER_DEBUG (".\\"); BIN_OP_RETURN (ELEFTDIV, false, false); } | |
914 ".^" { LEXER_DEBUG (".^"); BIN_OP_RETURN (EPOW, false, false); } | |
915 ".**" { LEXER_DEBUG (".**"); XBIN_OP_RETURN (EPOW, false, false); } | |
916 ".'" { LEXER_DEBUG (".'"); do_comma_insert_check (); BIN_OP_RETURN (TRANSPOSE, true, false); } | |
917 "++" { LEXER_DEBUG ("++"); do_comma_insert_check (); XBIN_OP_RETURN_INTERNAL (PLUS_PLUS, true, false, true); } | |
918 "--" { LEXER_DEBUG ("--"); do_comma_insert_check (); XBIN_OP_RETURN_INTERNAL (MINUS_MINUS, true, false, true); } | |
919 "<=" { LEXER_DEBUG ("<="); BIN_OP_RETURN (EXPR_LE, false, false); } | |
920 "==" { LEXER_DEBUG ("=="); BIN_OP_RETURN (EXPR_EQ, false, false); } | |
921 "~=" { LEXER_DEBUG ("~="); BIN_OP_RETURN (EXPR_NE, false, false); } | |
922 "!=" { LEXER_DEBUG ("!="); XBIN_OP_RETURN (EXPR_NE, false, false); } | |
923 ">=" { LEXER_DEBUG (">="); BIN_OP_RETURN (EXPR_GE, false, false); } | |
924 "&" { LEXER_DEBUG ("&"); BIN_OP_RETURN (EXPR_AND, false, false); } | |
925 "|" { LEXER_DEBUG ("|"); BIN_OP_RETURN (EXPR_OR, false, false); } | |
926 "<" { LEXER_DEBUG ("<"); BIN_OP_RETURN (EXPR_LT, false, false); } | |
927 ">" { LEXER_DEBUG (">"); BIN_OP_RETURN (EXPR_GT, false, false); } | |
928 "+" { LEXER_DEBUG ("+"); BIN_OP_RETURN ('+', false, false); } | |
929 "-" { LEXER_DEBUG ("-"); BIN_OP_RETURN ('-', false, false); } | |
930 "*" { LEXER_DEBUG ("*"); BIN_OP_RETURN ('*', false, false); } | |
931 "/" { LEXER_DEBUG ("/"); BIN_OP_RETURN ('/', false, false); } | |
932 "\\" { LEXER_DEBUG ("\\"); BIN_OP_RETURN (LEFTDIV, false, false); } | |
933 ";" { LEXER_DEBUG (";"); BIN_OP_RETURN (';', true, true); } | |
934 "," { LEXER_DEBUG (","); BIN_OP_RETURN (',', true, ! lexer_flags.looking_at_object_index.front ()); } | |
935 "^" { LEXER_DEBUG ("^"); BIN_OP_RETURN (POW, false, false); } | |
936 "**" { LEXER_DEBUG ("**"); XBIN_OP_RETURN (POW, false, false); } | |
937 "=" { LEXER_DEBUG ("="); BIN_OP_RETURN ('=', true, false); } | |
938 "&&" { LEXER_DEBUG ("&&"); BIN_OP_RETURN (EXPR_AND_AND, false, false); } | |
939 "||" { LEXER_DEBUG ("||"); BIN_OP_RETURN (EXPR_OR_OR, false, false); } | |
940 "<<" { LEXER_DEBUG ("<<"); XBIN_OP_RETURN (LSHIFT, false, false); } | |
941 ">>" { LEXER_DEBUG (">>"); XBIN_OP_RETURN (RSHIFT, false, false); } | |
942 | |
943 {NOT} { | |
944 LEXER_DEBUG ("{NOT}"); | |
945 | |
946 if (yytext[0] == '~') | |
947 BIN_OP_RETURN (EXPR_NOT, false, false); | |
948 else | |
949 XBIN_OP_RETURN (EXPR_NOT, false, false); | |
950 } | |
951 | |
952 "(" { | |
953 LEXER_DEBUG ("("); | |
954 | |
955 // If we are looking for an object index, then push TRUE for | |
956 // looking_at_object_index. Otherwise, just push whatever state | |
957 // is current (so that we can pop it off the stack when we find | |
958 // the matching close paren). | |
959 | |
960 lexer_flags.looking_at_object_index.push_front | |
961 (lexer_flags.looking_for_object_index); | |
962 | |
963 lexer_flags.looking_at_indirect_ref = false; | |
964 lexer_flags.looking_for_object_index = false; | |
965 lexer_flags.at_beginning_of_statement = false; | |
966 | |
967 nesting_level.paren (); | |
968 promptflag--; | |
969 | |
970 TOK_RETURN ('('); | |
971 } | |
972 | |
973 ")" { | |
974 LEXER_DEBUG (")"); | |
975 | |
976 nesting_level.remove (); | |
977 current_input_column++; | |
978 | |
979 lexer_flags.looking_at_object_index.pop_front (); | |
980 | |
981 lexer_flags.quote_is_transpose = true; | |
982 lexer_flags.convert_spaces_to_comma | |
983 = (nesting_level.is_bracket_or_brace () | |
984 && ! lexer_flags.looking_at_anon_fcn_args); | |
985 lexer_flags.looking_for_object_index = true; | |
986 lexer_flags.at_beginning_of_statement = false; | |
987 | |
988 if (lexer_flags.looking_at_anon_fcn_args) | |
989 lexer_flags.looking_at_anon_fcn_args = false; | |
990 | |
991 do_comma_insert_check (); | |
992 | |
993 COUNT_TOK_AND_RETURN (')'); | |
994 } | |
995 | |
996 "." { | |
997 LEXER_DEBUG ("."); | |
998 | |
999 lexer_flags.looking_for_object_index = false; | |
1000 lexer_flags.at_beginning_of_statement = false; | |
1001 | |
1002 TOK_RETURN ('.'); | |
1003 } | |
1004 | |
1005 "+=" { LEXER_DEBUG ("+="); XBIN_OP_RETURN (ADD_EQ, false, false); } | |
1006 "-=" { LEXER_DEBUG ("-="); XBIN_OP_RETURN (SUB_EQ, false, false); } | |
1007 "*=" { LEXER_DEBUG ("*="); XBIN_OP_RETURN (MUL_EQ, false, false); } | |
1008 "/=" { LEXER_DEBUG ("/="); XBIN_OP_RETURN (DIV_EQ, false, false); } | |
1009 "\\=" { LEXER_DEBUG ("\\="); XBIN_OP_RETURN (LEFTDIV_EQ, false, false); } | |
1010 ".+=" { LEXER_DEBUG (".+="); XBIN_OP_RETURN (ADD_EQ, false, false); } | |
1011 ".-=" { LEXER_DEBUG (".-="); XBIN_OP_RETURN (SUB_EQ, false, false); } | |
1012 ".*=" { LEXER_DEBUG (".*="); XBIN_OP_RETURN (EMUL_EQ, false, false); } | |
1013 "./=" { LEXER_DEBUG ("./="); XBIN_OP_RETURN (EDIV_EQ, false, false); } | |
1014 ".\\=" { LEXER_DEBUG (".\\="); XBIN_OP_RETURN (ELEFTDIV_EQ, false, false); } | |
1015 {POW}= { LEXER_DEBUG ("{POW}="); XBIN_OP_RETURN (POW_EQ, false, false); } | |
1016 {EPOW}= { LEXER_DEBUG ("{EPOW}="); XBIN_OP_RETURN (EPOW_EQ, false, false); } | |
1017 "&=" { LEXER_DEBUG ("&="); XBIN_OP_RETURN (AND_EQ, false, false); } | |
1018 "|=" { LEXER_DEBUG ("|="); XBIN_OP_RETURN (OR_EQ, false, false); } | |
1019 "<<=" { LEXER_DEBUG ("<<="); XBIN_OP_RETURN (LSHIFT_EQ, false, false); } | |
1020 ">>=" { LEXER_DEBUG (">>="); XBIN_OP_RETURN (RSHIFT_EQ, false, false); } | |
1021 | |
1022 \{{S}* { | |
1023 LEXER_DEBUG ("\\{{S}*"); | |
1024 | |
1025 nesting_level.brace (); | |
1026 | |
1027 lexer_flags.looking_at_object_index.push_front | |
1028 (lexer_flags.looking_for_object_index); | |
1029 | |
1030 current_input_column += yyleng; | |
1031 lexer_flags.quote_is_transpose = false; | |
1032 lexer_flags.convert_spaces_to_comma = true; | |
1033 lexer_flags.looking_for_object_index = false; | |
1034 lexer_flags.at_beginning_of_statement = false; | |
1035 | |
1036 promptflag--; | |
1037 eat_whitespace (); | |
1038 | |
1039 lexer_flags.braceflag++; | |
1040 BEGIN (MATRIX_START); | |
1041 COUNT_TOK_AND_RETURN ('{'); | |
1042 } | |
1043 | |
1044 "}" { | |
1045 LEXER_DEBUG ("}"); | |
1046 | |
1047 lexer_flags.looking_at_object_index.pop_front (); | |
1048 | |
1049 lexer_flags.looking_for_object_index = true; | |
1050 lexer_flags.at_beginning_of_statement = false; | |
1051 | |
1052 nesting_level.remove (); | |
1053 | |
1054 TOK_RETURN ('}'); | |
1055 } | |
1056 | |
1057 %{ | |
1058 // Unrecognized input is a lexical error. | |
1059 %} | |
1060 | |
1061 . { | |
1062 LEXER_DEBUG ("."); | |
1063 | |
1064 xunput (yytext[0], yytext); | |
1065 | |
1066 int c = text_yyinput (); | |
1067 | |
1068 if (c != EOF) | |
1069 { | |
1070 current_input_column++; | |
1071 | |
1072 error ("invalid character `%s' (ASCII %d) near line %d, column %d", | |
1073 undo_string_escape (static_cast<char> (c)), c, | |
1074 input_line_number, current_input_column); | |
1075 | |
1076 return LEXICAL_ERROR; | |
1077 } | |
1078 else | |
1079 TOK_RETURN (END_OF_INPUT); | |
1080 } | |
1081 | |
1082 %% | |
1083 | |
1084 // GAG. | |
1085 // | |
1086 // If we're reading a matrix and the next character is '[', make sure | |
1087 // that we insert a comma ahead of it. | |
1088 | |
1089 void | |
1090 do_comma_insert_check (void) | |
1091 { | |
1092 int spc_gobbled = eat_continuation (); | |
1093 | |
1094 int c = text_yyinput (); | |
1095 | |
1096 xunput (c, yytext); | |
1097 | |
1098 if (spc_gobbled) | |
1099 xunput (' ', yytext); | |
1100 | |
1101 lexer_flags.do_comma_insert = (! lexer_flags.looking_at_object_index.front () | |
1102 && lexer_flags.bracketflag && c == '['); | |
1103 } | |
1104 | |
1105 // Fix things up for errors or interrupts. The parser is never called | |
1106 // recursively, so it is always safe to reinitialize its state before | |
1107 // doing any parsing. | |
1108 | |
1109 void | |
1110 reset_parser (void) | |
1111 { | |
1112 // Start off on the right foot. | |
1113 BEGIN (INITIAL); | |
1114 | |
1115 parser_end_of_input = false; | |
1116 | |
1117 parser_symtab_context.clear (); | |
1118 | |
1119 // We do want a prompt by default. | |
1120 promptflag = 1; | |
1121 | |
1122 // We are not in a block comment. | |
1123 block_comment_nesting_level = 0; | |
1124 | |
1125 // Error may have occurred inside some brackets, braces, or parentheses. | |
1126 nesting_level.clear (); | |
1127 | |
1128 // Clear out the stack of token info used to track line and column | |
1129 // numbers. | |
1130 while (! token_stack.empty ()) | |
1131 { | |
1132 delete token_stack.top (); | |
1133 token_stack.pop (); | |
1134 } | |
1135 | |
1136 // Can be reset by defining a function. | |
1137 if (! (reading_script_file || reading_fcn_file || reading_classdef_file)) | |
1138 { | |
1139 current_input_column = 1; | |
1140 input_line_number = command_editor::current_command_number (); | |
1141 } | |
1142 | |
1143 // Only ask for input from stdin if we are expecting interactive | |
1144 // input. | |
1145 | |
1146 if (! quitting_gracefully | |
1147 && (interactive || forced_interactive) | |
1148 && ! (reading_fcn_file | |
1149 || reading_classdef_file | |
1150 || reading_script_file | |
1151 || get_input_from_eval_string | |
1152 || input_from_startup_file)) | |
1153 yyrestart (stdin); | |
1154 | |
1155 // Clear the buffer for help text. | |
1156 while (! help_buf.empty ()) | |
1157 help_buf.pop (); | |
1158 | |
1159 // Reset other flags. | |
1160 lexer_flags.init (); | |
1161 } | |
1162 | |
1163 static void | |
1164 display_character (char c) | |
1165 { | |
1166 if (isgraph (c)) | |
1167 std::cerr << c; | |
1168 else | |
1169 switch (c) | |
1170 { | |
1171 case 0: | |
1172 std::cerr << "NUL"; | |
1173 break; | |
1174 | |
1175 case 1: | |
1176 std::cerr << "SOH"; | |
1177 break; | |
1178 | |
1179 case 2: | |
1180 std::cerr << "STX"; | |
1181 break; | |
1182 | |
1183 case 3: | |
1184 std::cerr << "ETX"; | |
1185 break; | |
1186 | |
1187 case 4: | |
1188 std::cerr << "EOT"; | |
1189 break; | |
1190 | |
1191 case 5: | |
1192 std::cerr << "ENQ"; | |
1193 break; | |
1194 | |
1195 case 6: | |
1196 std::cerr << "ACK"; | |
1197 break; | |
1198 | |
1199 case 7: | |
1200 std::cerr << "\\a"; | |
1201 break; | |
1202 | |
1203 case 8: | |
1204 std::cerr << "\\b"; | |
1205 break; | |
1206 | |
1207 case 9: | |
1208 std::cerr << "\\t"; | |
1209 break; | |
1210 | |
1211 case 10: | |
1212 std::cerr << "\\n"; | |
1213 break; | |
1214 | |
1215 case 11: | |
1216 std::cerr << "\\v"; | |
1217 break; | |
1218 | |
1219 case 12: | |
1220 std::cerr << "\\f"; | |
1221 break; | |
1222 | |
1223 case 13: | |
1224 std::cerr << "\\r"; | |
1225 break; | |
1226 | |
1227 case 14: | |
1228 std::cerr << "SO"; | |
1229 break; | |
1230 | |
1231 case 15: | |
1232 std::cerr << "SI"; | |
1233 break; | |
1234 | |
1235 case 16: | |
1236 std::cerr << "DLE"; | |
1237 break; | |
1238 | |
1239 case 17: | |
1240 std::cerr << "DC1"; | |
1241 break; | |
1242 | |
1243 case 18: | |
1244 std::cerr << "DC2"; | |
1245 break; | |
1246 | |
1247 case 19: | |
1248 std::cerr << "DC3"; | |
1249 break; | |
1250 | |
1251 case 20: | |
1252 std::cerr << "DC4"; | |
1253 break; | |
1254 | |
1255 case 21: | |
1256 std::cerr << "NAK"; | |
1257 break; | |
1258 | |
1259 case 22: | |
1260 std::cerr << "SYN"; | |
1261 break; | |
1262 | |
1263 case 23: | |
1264 std::cerr << "ETB"; | |
1265 break; | |
1266 | |
1267 case 24: | |
1268 std::cerr << "CAN"; | |
1269 break; | |
1270 | |
1271 case 25: | |
1272 std::cerr << "EM"; | |
1273 break; | |
1274 | |
1275 case 26: | |
1276 std::cerr << "SUB"; | |
1277 break; | |
1278 | |
1279 case 27: | |
1280 std::cerr << "ESC"; | |
1281 break; | |
1282 | |
1283 case 28: | |
1284 std::cerr << "FS"; | |
1285 break; | |
1286 | |
1287 case 29: | |
1288 std::cerr << "GS"; | |
1289 break; | |
1290 | |
1291 case 30: | |
1292 std::cerr << "RS"; | |
1293 break; | |
1294 | |
1295 case 31: | |
1296 std::cerr << "US"; | |
1297 break; | |
1298 | |
1299 case 32: | |
1300 std::cerr << "SPACE"; | |
1301 break; | |
1302 | |
1303 case 127: | |
1304 std::cerr << "DEL"; | |
1305 break; | |
1306 } | |
1307 } | |
1308 | |
1309 static int | |
1310 text_yyinput (void) | |
1311 { | |
1312 int c = yyinput (); | |
1313 | |
1314 if (lexer_debug_flag) | |
1315 { | |
1316 std::cerr << "I: "; | |
1317 display_character (c); | |
1318 std::cerr << std::endl; | |
1319 } | |
1320 | |
1321 // Convert CRLF into just LF and single CR into LF. | |
1322 | |
1323 if (c == '\r') | |
1324 { | |
1325 c = yyinput (); | |
1326 | |
1327 if (lexer_debug_flag) | |
1328 { | |
1329 std::cerr << "I: "; | |
1330 display_character (c); | |
1331 std::cerr << std::endl; | |
1332 } | |
1333 | |
1334 if (c != '\n') | |
1335 { | |
1336 xunput (c, yytext); | |
1337 c = '\n'; | |
1338 } | |
1339 } | |
1340 | |
1341 if (c == '\n') | |
1342 input_line_number++; | |
1343 | |
1344 return c; | |
1345 } | |
1346 | |
1347 static void | |
1348 xunput (char c, char *buf) | |
1349 { | |
1350 if (lexer_debug_flag) | |
1351 { | |
1352 std::cerr << "U: "; | |
1353 display_character (c); | |
1354 std::cerr << std::endl; | |
1355 } | |
1356 | |
1357 if (c == '\n') | |
1358 input_line_number--; | |
1359 | |
1360 yyunput (c, buf); | |
1361 } | |
1362 | |
1363 // If we read some newlines, we need figure out what column we're | |
1364 // really looking at. | |
1365 | |
1366 static void | |
1367 fixup_column_count (char *s) | |
1368 { | |
1369 char c; | |
1370 while ((c = *s++) != '\0') | |
1371 { | |
1372 if (c == '\n') | |
1373 { | |
1374 input_line_number++; | |
1375 current_input_column = 1; | |
1376 } | |
1377 else | |
1378 current_input_column++; | |
1379 } | |
1380 } | |
1381 | |
1382 // Include these so that we don't have to link to libfl.a. | |
1383 | |
1384 int | |
1385 yywrap (void) | |
1386 { | |
1387 return 1; | |
1388 } | |
1389 | |
1390 // Tell us all what the current buffer is. | |
1391 | |
1392 YY_BUFFER_STATE | |
1393 current_buffer (void) | |
1394 { | |
1395 return YY_CURRENT_BUFFER; | |
1396 } | |
1397 | |
1398 // Create a new buffer. | |
1399 | |
1400 YY_BUFFER_STATE | |
1401 create_buffer (FILE *f) | |
1402 { | |
1403 return yy_create_buffer (f, YY_BUF_SIZE); | |
1404 } | |
1405 | |
1406 // Start reading a new buffer. | |
1407 | |
1408 void | |
1409 switch_to_buffer (YY_BUFFER_STATE buf) | |
1410 { | |
1411 yy_switch_to_buffer (buf); | |
1412 } | |
1413 | |
1414 // Delete a buffer. | |
1415 | |
1416 void | |
1417 delete_buffer (YY_BUFFER_STATE buf) | |
1418 { | |
1419 yy_delete_buffer (buf); | |
1420 | |
1421 // Prevent invalid yyin from being used by yyrestart. | |
1422 if (! current_buffer ()) | |
1423 yyin = 0; | |
1424 } | |
1425 | |
1426 // Delete all buffers from the stack. | |
1427 void | |
1428 clear_all_buffers (void) | |
1429 { | |
1430 while (current_buffer ()) | |
1431 octave_pop_buffer_state (); | |
1432 } | |
1433 | |
1434 void | |
1435 cleanup_parser (void) | |
1436 { | |
1437 reset_parser (); | |
1438 | |
1439 clear_all_buffers (); | |
1440 } | |
1441 | |
1442 // Restore a buffer (for unwind-prot). | |
1443 | |
1444 void | |
1445 restore_input_buffer (void *buf) | |
1446 { | |
1447 switch_to_buffer (static_cast<YY_BUFFER_STATE> (buf)); | |
1448 } | |
1449 | |
1450 // Delete a buffer (for unwind-prot). | |
1451 | |
1452 void | |
1453 delete_input_buffer (void *buf) | |
1454 { | |
1455 delete_buffer (static_cast<YY_BUFFER_STATE> (buf)); | |
1456 } | |
1457 | |
1458 static bool | |
1459 inside_any_object_index (void) | |
1460 { | |
1461 bool retval = false; | |
1462 | |
1463 for (std::list<bool>::const_iterator i = lexer_flags.looking_at_object_index.begin (); | |
1464 i != lexer_flags.looking_at_object_index.end (); i++) | |
1465 { | |
1466 if (*i) | |
1467 { | |
1468 retval = true; | |
1469 break; | |
1470 } | |
1471 } | |
1472 | |
1473 return retval; | |
1474 } | |
1475 | |
1476 // Handle keywords. Return -1 if the keyword should be ignored. | |
1477 | |
1478 static int | |
1479 is_keyword_token (const std::string& s) | |
1480 { | |
1481 int l = input_line_number; | |
1482 int c = current_input_column; | |
1483 | |
1484 int len = s.length (); | |
1485 | |
1486 const octave_kw *kw = octave_kw_hash::in_word_set (s.c_str (), len); | |
1487 | |
1488 if (kw) | |
1489 { | |
1490 yylval.tok_val = 0; | |
1491 | |
1492 switch (kw->kw_id) | |
1493 { | |
1494 case break_kw: | |
1495 case catch_kw: | |
1496 case continue_kw: | |
1497 case else_kw: | |
1498 case otherwise_kw: | |
1499 case return_kw: | |
1500 case unwind_protect_cleanup_kw: | |
1501 lexer_flags.at_beginning_of_statement = true; | |
1502 break; | |
1503 | |
1504 case static_kw: | |
1505 if ((reading_fcn_file || reading_script_file | |
1506 || reading_classdef_file) | |
1507 && ! curr_fcn_file_full_name.empty ()) | |
1508 warning_with_id ("Octave:deprecated-keyword", | |
1509 "the `static' keyword is obsolete and will be removed from a future version of Octave; please use `persistent' instead; near line %d of file `%s'", | |
1510 input_line_number, | |
1511 curr_fcn_file_full_name.c_str ()); | |
1512 else | |
1513 warning_with_id ("Octave:deprecated-keyword", | |
1514 "the `static' keyword is obsolete and will be removed from a future version of Octave; please use `persistent' instead; near line %d", | |
1515 input_line_number); | |
1516 // fall through ... | |
1517 | |
1518 case persistent_kw: | |
1519 break; | |
1520 | |
1521 case case_kw: | |
1522 case elseif_kw: | |
1523 case global_kw: | |
1524 case until_kw: | |
1525 break; | |
1526 | |
1527 case end_kw: | |
1528 if (inside_any_object_index () | |
1529 || (! reading_classdef_file | |
1530 && (lexer_flags.defining_func | |
1531 && ! (lexer_flags.looking_at_return_list | |
1532 || lexer_flags.parsed_function_name.top ())))) | |
1533 return 0; | |
1534 | |
1535 yylval.tok_val = new token (token::simple_end, l, c); | |
1536 lexer_flags.at_beginning_of_statement = true; | |
1537 break; | |
1538 | |
1539 case end_try_catch_kw: | |
1540 yylval.tok_val = new token (token::try_catch_end, l, c); | |
1541 lexer_flags.at_beginning_of_statement = true; | |
1542 break; | |
1543 | |
1544 case end_unwind_protect_kw: | |
1545 yylval.tok_val = new token (token::unwind_protect_end, l, c); | |
1546 lexer_flags.at_beginning_of_statement = true; | |
1547 break; | |
1548 | |
1549 case endfor_kw: | |
1550 yylval.tok_val = new token (token::for_end, l, c); | |
1551 lexer_flags.at_beginning_of_statement = true; | |
1552 break; | |
1553 | |
1554 case endfunction_kw: | |
1555 yylval.tok_val = new token (token::function_end, l, c); | |
1556 lexer_flags.at_beginning_of_statement = true; | |
1557 break; | |
1558 | |
1559 case endif_kw: | |
1560 yylval.tok_val = new token (token::if_end, l, c); | |
1561 lexer_flags.at_beginning_of_statement = true; | |
1562 break; | |
1563 | |
1564 case endparfor_kw: | |
1565 yylval.tok_val = new token (token::parfor_end, l, c); | |
1566 lexer_flags.at_beginning_of_statement = true; | |
1567 break; | |
1568 | |
1569 case endswitch_kw: | |
1570 yylval.tok_val = new token (token::switch_end, l, c); | |
1571 lexer_flags.at_beginning_of_statement = true; | |
1572 break; | |
1573 | |
1574 case endwhile_kw: | |
1575 yylval.tok_val = new token (token::while_end, l, c); | |
1576 lexer_flags.at_beginning_of_statement = true; | |
1577 break; | |
1578 | |
1579 case endclassdef_kw: | |
1580 yylval.tok_val = new token (token::classdef_end, l, c); | |
1581 lexer_flags.at_beginning_of_statement = true; | |
1582 break; | |
1583 | |
1584 case endenumeration_kw: | |
1585 yylval.tok_val = new token (token::enumeration_end, l, c); | |
1586 lexer_flags.at_beginning_of_statement = true; | |
1587 break; | |
1588 | |
1589 case endevents_kw: | |
1590 yylval.tok_val = new token (token::events_end, l, c); | |
1591 lexer_flags.at_beginning_of_statement = true; | |
1592 break; | |
1593 | |
1594 case endmethods_kw: | |
1595 yylval.tok_val = new token (token::methods_end, l, c); | |
1596 lexer_flags.at_beginning_of_statement = true; | |
1597 break; | |
1598 | |
1599 case endproperties_kw: | |
1600 yylval.tok_val = new token (token::properties_end, l, c); | |
1601 lexer_flags.at_beginning_of_statement = true; | |
1602 break; | |
1603 | |
1604 | |
1605 case for_kw: | |
1606 case parfor_kw: | |
1607 case while_kw: | |
1608 promptflag--; | |
1609 lexer_flags.looping++; | |
1610 break; | |
1611 | |
1612 case do_kw: | |
1613 lexer_flags.at_beginning_of_statement = true; | |
1614 promptflag--; | |
1615 lexer_flags.looping++; | |
1616 break; | |
1617 | |
1618 case try_kw: | |
1619 case unwind_protect_kw: | |
1620 lexer_flags.at_beginning_of_statement = true; | |
1621 promptflag--; | |
1622 break; | |
1623 | |
1624 case if_kw: | |
1625 case switch_kw: | |
1626 promptflag--; | |
1627 break; | |
1628 | |
1629 case get_kw: | |
1630 case set_kw: | |
1631 // 'get' and 'set' are keywords in classdef method | |
1632 // declarations. | |
1633 if (! lexer_flags.maybe_classdef_get_set_method) | |
1634 return 0; | |
1635 break; | |
1636 | |
1637 case enumeration_kw: | |
1638 case events_kw: | |
1639 case methods_kw: | |
1640 case properties_kw: | |
1641 // 'properties', 'methods' and 'events' are keywords for | |
1642 // classdef blocks. | |
1643 if (! lexer_flags.parsing_classdef) | |
1644 return 0; | |
1645 // fall through ... | |
1646 | |
1647 case classdef_kw: | |
1648 // 'classdef' is always a keyword. | |
1649 promptflag--; | |
1650 break; | |
1651 | |
1652 case function_kw: | |
1653 promptflag--; | |
1654 | |
1655 lexer_flags.defining_func++; | |
1656 lexer_flags.parsed_function_name.push (false); | |
1657 | |
1658 if (! (reading_fcn_file || reading_script_file | |
1659 || reading_classdef_file)) | |
1660 input_line_number = 1; | |
1661 break; | |
1662 | |
1663 case magic_file_kw: | |
1664 { | |
1665 if ((reading_fcn_file || reading_script_file | |
1666 || reading_classdef_file) | |
1667 && ! curr_fcn_file_full_name.empty ()) | |
1668 yylval.tok_val = new token (curr_fcn_file_full_name, l, c); | |
1669 else | |
1670 yylval.tok_val = new token ("stdin", l, c); | |
1671 } | |
1672 break; | |
1673 | |
1674 case magic_line_kw: | |
1675 yylval.tok_val = new token (static_cast<double> (l), "", l, c); | |
1676 break; | |
1677 | |
1678 default: | |
1679 panic_impossible (); | |
1680 } | |
1681 | |
1682 if (! yylval.tok_val) | |
1683 yylval.tok_val = new token (l, c); | |
1684 | |
1685 token_stack.push (yylval.tok_val); | |
1686 | |
1687 return kw->tok; | |
1688 } | |
1689 | |
1690 return 0; | |
1691 } | |
1692 | |
1693 static bool | |
1694 is_variable (const std::string& name) | |
1695 { | |
1696 return (symbol_table::is_variable (name) | |
1697 || (lexer_flags.pending_local_variables.find (name) | |
1698 != lexer_flags.pending_local_variables.end ())); | |
1699 } | |
1700 | |
1701 static std::string | |
1702 grab_block_comment (stream_reader& reader, bool& eof) | |
1703 { | |
1704 std::string buf; | |
1705 | |
1706 bool at_bol = true; | |
1707 bool look_for_marker = false; | |
1708 | |
1709 bool warned_incompatible = false; | |
1710 | |
1711 int c = 0; | |
1712 | |
1713 while ((c = reader.getc ()) != EOF) | |
1714 { | |
1715 current_input_column++; | |
1716 | |
1717 if (look_for_marker) | |
1718 { | |
1719 at_bol = false; | |
1720 look_for_marker = false; | |
1721 | |
1722 if (c == '{' || c == '}') | |
1723 { | |
1724 std::string tmp_buf (1, static_cast<char> (c)); | |
1725 | |
1726 int type = c; | |
1727 | |
1728 bool done = false; | |
1729 | |
1730 while ((c = reader.getc ()) != EOF && ! done) | |
1731 { | |
1732 current_input_column++; | |
1733 | |
1734 switch (c) | |
1735 { | |
1736 case ' ': | |
1737 case '\t': | |
1738 tmp_buf += static_cast<char> (c); | |
1739 break; | |
1740 | |
1741 case '\n': | |
1742 { | |
1743 current_input_column = 0; | |
1744 at_bol = true; | |
1745 done = true; | |
1746 | |
1747 if (type == '{') | |
1748 { | |
1749 block_comment_nesting_level++; | |
1750 promptflag--; | |
1751 } | |
1752 else | |
1753 { | |
1754 block_comment_nesting_level--; | |
1755 promptflag++; | |
1756 | |
1757 if (block_comment_nesting_level == 0) | |
1758 { | |
1759 buf += grab_comment_block (reader, true, eof); | |
1760 | |
1761 return buf; | |
1762 } | |
1763 } | |
1764 } | |
1765 break; | |
1766 | |
1767 default: | |
1768 at_bol = false; | |
1769 tmp_buf += static_cast<char> (c); | |
1770 buf += tmp_buf; | |
1771 done = true; | |
1772 break; | |
1773 } | |
1774 } | |
1775 } | |
1776 } | |
1777 | |
1778 if (at_bol && (c == '%' || c == '#')) | |
1779 { | |
1780 if (c == '#' && ! warned_incompatible) | |
1781 { | |
1782 warned_incompatible = true; | |
1783 maybe_gripe_matlab_incompatible_comment (c); | |
1784 } | |
1785 | |
1786 at_bol = false; | |
1787 look_for_marker = true; | |
1788 } | |
1789 else | |
1790 { | |
1791 buf += static_cast<char> (c); | |
1792 | |
1793 if (c == '\n') | |
1794 { | |
1795 current_input_column = 0; | |
1796 at_bol = true; | |
1797 } | |
1798 } | |
1799 } | |
1800 | |
1801 if (c == EOF) | |
1802 eof = true; | |
1803 | |
1804 return buf; | |
1805 } | |
1806 | |
1807 std::string | |
1808 grab_comment_block (stream_reader& reader, bool at_bol, | |
1809 bool& eof) | |
1810 { | |
1811 std::string buf; | |
1812 | |
1813 // TRUE means we are at the beginning of a comment block. | |
1814 bool begin_comment = false; | |
1815 | |
1816 // TRUE means we are currently reading a comment block. | |
1817 bool in_comment = false; | |
1818 | |
1819 bool warned_incompatible = false; | |
1820 | |
1821 int c = 0; | |
1822 | |
1823 while ((c = reader.getc ()) != EOF) | |
1824 { | |
1825 current_input_column++; | |
1826 | |
1827 if (begin_comment) | |
1828 { | |
1829 if (c == '%' || c == '#') | |
1830 { | |
1831 at_bol = false; | |
1832 continue; | |
1833 } | |
1834 else if (at_bol && c == '{') | |
1835 { | |
1836 std::string tmp_buf (1, static_cast<char> (c)); | |
1837 | |
1838 bool done = false; | |
1839 | |
1840 while ((c = reader.getc ()) != EOF && ! done) | |
1841 { | |
1842 current_input_column++; | |
1843 | |
1844 switch (c) | |
1845 { | |
1846 case ' ': | |
1847 case '\t': | |
1848 tmp_buf += static_cast<char> (c); | |
1849 break; | |
1850 | |
1851 case '\n': | |
1852 { | |
1853 current_input_column = 0; | |
1854 at_bol = true; | |
1855 done = true; | |
1856 | |
1857 block_comment_nesting_level++; | |
1858 promptflag--; | |
1859 | |
1860 buf += grab_block_comment (reader, eof); | |
1861 | |
1862 in_comment = false; | |
1863 | |
1864 if (eof) | |
1865 goto done; | |
1866 } | |
1867 break; | |
1868 | |
1869 default: | |
1870 at_bol = false; | |
1871 tmp_buf += static_cast<char> (c); | |
1872 buf += tmp_buf; | |
1873 done = true; | |
1874 break; | |
1875 } | |
1876 } | |
1877 } | |
1878 else | |
1879 { | |
1880 at_bol = false; | |
1881 begin_comment = false; | |
1882 } | |
1883 } | |
1884 | |
1885 if (in_comment) | |
1886 { | |
1887 buf += static_cast<char> (c); | |
1888 | |
1889 if (c == '\n') | |
1890 { | |
1891 at_bol = true; | |
1892 current_input_column = 0; | |
1893 in_comment = false; | |
1894 | |
1895 // FIXME -- bailing out here prevents things like | |
1896 // | |
1897 // octave> # comment | |
1898 // octave> x = 1 | |
1899 // | |
1900 // from failing at the command line, while still | |
1901 // allowing blocks of comments to be grabbed properly | |
1902 // for function doc strings. But only the first line of | |
1903 // a mult-line doc string will be picked up for | |
1904 // functions defined on the command line. We need a | |
1905 // better way of collecting these comments... | |
1906 if (! (reading_fcn_file || reading_script_file)) | |
1907 goto done; | |
1908 } | |
1909 } | |
1910 else | |
1911 { | |
1912 switch (c) | |
1913 { | |
1914 case ' ': | |
1915 case '\t': | |
1916 break; | |
1917 | |
1918 case '#': | |
1919 if (! warned_incompatible) | |
1920 { | |
1921 warned_incompatible = true; | |
1922 maybe_gripe_matlab_incompatible_comment (c); | |
1923 } | |
1924 // fall through... | |
1925 | |
1926 case '%': | |
1927 in_comment = true; | |
1928 begin_comment = true; | |
1929 break; | |
1930 | |
1931 default: | |
1932 current_input_column--; | |
1933 reader.ungetc (c); | |
1934 goto done; | |
1935 } | |
1936 } | |
1937 } | |
1938 | |
1939 done: | |
1940 | |
1941 if (c == EOF) | |
1942 eof = true; | |
1943 | |
1944 return buf; | |
1945 } | |
1946 | |
1947 class | |
1948 flex_stream_reader : public stream_reader | |
1949 { | |
1950 public: | |
1951 flex_stream_reader (char *buf_arg) : stream_reader (), buf (buf_arg) { } | |
1952 | |
1953 int getc (void) { return ::text_yyinput (); } | |
1954 int ungetc (int c) { ::xunput (c, buf); return 0; } | |
1955 | |
1956 private: | |
1957 | |
1958 // No copying! | |
1959 | |
1960 flex_stream_reader (const flex_stream_reader&); | |
1961 | |
1962 flex_stream_reader& operator = (const flex_stream_reader&); | |
1963 | |
1964 char *buf; | |
1965 }; | |
1966 | |
1967 static int | |
1968 process_comment (bool start_in_block, bool& eof) | |
1969 { | |
1970 eof = false; | |
1971 | |
1972 std::string help_txt; | |
1973 | |
1974 if (! help_buf.empty ()) | |
1975 help_txt = help_buf.top (); | |
1976 | |
1977 flex_stream_reader flex_reader (yytext); | |
1978 | |
1979 // process_comment is only supposed to be called when we are not | |
1980 // initially looking at a block comment. | |
1981 | |
1982 std::string txt = start_in_block | |
1983 ? grab_block_comment (flex_reader, eof) | |
1984 : grab_comment_block (flex_reader, false, eof); | |
1985 | |
1986 if (lexer_debug_flag) | |
1987 std::cerr << "C: " << txt << std::endl; | |
1988 | |
1989 if (help_txt.empty () && nesting_level.none ()) | |
1990 { | |
1991 if (! help_buf.empty ()) | |
1992 help_buf.pop (); | |
1993 | |
1994 help_buf.push (txt); | |
1995 } | |
1996 | |
1997 octave_comment_buffer::append (txt); | |
1998 | |
1999 current_input_column = 1; | |
2000 lexer_flags.quote_is_transpose = false; | |
2001 lexer_flags.convert_spaces_to_comma = true; | |
2002 lexer_flags.at_beginning_of_statement = true; | |
2003 | |
2004 if (YY_START == COMMAND_START) | |
2005 BEGIN (INITIAL); | |
2006 | |
2007 if (nesting_level.none ()) | |
2008 return '\n'; | |
2009 else if (nesting_level.is_bracket_or_brace ()) | |
2010 return ';'; | |
2011 else | |
2012 return 0; | |
2013 } | |
2014 | |
2015 // Return 1 if the given character matches any character in the given | |
2016 // string. | |
2017 | |
2018 static bool | |
2019 match_any (char c, const char *s) | |
2020 { | |
2021 char tmp; | |
2022 while ((tmp = *s++) != '\0') | |
2023 { | |
2024 if (c == tmp) | |
2025 return true; | |
2026 } | |
2027 return false; | |
2028 } | |
2029 | |
2030 // Given information about the spacing surrounding an operator, | |
2031 // return 1 if it looks like it should be treated as a binary | |
2032 // operator. For example, | |
2033 // | |
2034 // [ 1 + 2 ] or [ 1+ 2] or [ 1+2 ] ==> binary | |
2035 // | |
2036 // [ 1 +2 ] ==> unary | |
2037 | |
2038 static bool | |
2039 looks_like_bin_op (bool spc_prev, int next_char) | |
2040 { | |
2041 bool spc_next = (next_char == ' ' || next_char == '\t'); | |
2042 | |
2043 return ((spc_prev && spc_next) || ! spc_prev); | |
2044 } | |
2045 | |
2046 // Recognize separators. If the separator is a CRLF pair, it is | |
2047 // replaced by a single LF. | |
2048 | |
2049 static bool | |
2050 next_token_is_sep_op (void) | |
2051 { | |
2052 bool retval = false; | |
2053 | |
2054 int c = text_yyinput (); | |
2055 | |
2056 retval = match_any (c, ",;\n]"); | |
2057 | |
2058 xunput (c, yytext); | |
2059 | |
2060 return retval; | |
2061 } | |
2062 | |
2063 // Try to determine if the next token should be treated as a postfix | |
2064 // unary operator. This is ugly, but it seems to do the right thing. | |
2065 | |
2066 static bool | |
2067 next_token_is_postfix_unary_op (bool spc_prev) | |
2068 { | |
2069 bool un_op = false; | |
2070 | |
2071 int c0 = text_yyinput (); | |
2072 | |
2073 if (c0 == '\'' && ! spc_prev) | |
2074 { | |
2075 un_op = true; | |
2076 } | |
2077 else if (c0 == '.') | |
2078 { | |
2079 int c1 = text_yyinput (); | |
2080 un_op = (c1 == '\''); | |
2081 xunput (c1, yytext); | |
2082 } | |
2083 else if (c0 == '+') | |
2084 { | |
2085 int c1 = text_yyinput (); | |
2086 un_op = (c1 == '+'); | |
2087 xunput (c1, yytext); | |
2088 } | |
2089 else if (c0 == '-') | |
2090 { | |
2091 int c1 = text_yyinput (); | |
2092 un_op = (c1 == '-'); | |
2093 xunput (c1, yytext); | |
2094 } | |
2095 | |
2096 xunput (c0, yytext); | |
2097 | |
2098 return un_op; | |
2099 } | |
2100 | |
2101 // Try to determine if the next token should be treated as a binary | |
2102 // operator. | |
2103 // | |
2104 // This kluge exists because whitespace is not always ignored inside | |
2105 // the square brackets that are used to create matrix objects (though | |
2106 // spacing only really matters in the cases that can be interpreted | |
2107 // either as binary ops or prefix unary ops: currently just +, -). | |
2108 // | |
2109 // Note that a line continuation directly following a + or - operator | |
2110 // (e.g., the characters '[' 'a' ' ' '+' '\' LFD 'b' ']') will be | |
2111 // parsed as a binary operator. | |
2112 | |
2113 static bool | |
2114 next_token_is_bin_op (bool spc_prev) | |
2115 { | |
2116 bool bin_op = false; | |
2117 | |
2118 int c0 = text_yyinput (); | |
2119 | |
2120 switch (c0) | |
2121 { | |
2122 case '+': | |
2123 case '-': | |
2124 { | |
2125 int c1 = text_yyinput (); | |
2126 | |
2127 switch (c1) | |
2128 { | |
2129 case '+': | |
2130 case '-': | |
2131 // Unary ops, spacing doesn't matter. | |
2132 break; | |
2133 | |
2134 case '=': | |
2135 // Binary ops, spacing doesn't matter. | |
2136 bin_op = true; | |
2137 break; | |
2138 | |
2139 default: | |
2140 // Could be either, spacing matters. | |
2141 bin_op = looks_like_bin_op (spc_prev, c1); | |
2142 break; | |
2143 } | |
2144 | |
2145 xunput (c1, yytext); | |
2146 } | |
2147 break; | |
2148 | |
2149 case ':': | |
2150 case '/': | |
2151 case '\\': | |
2152 case '^': | |
2153 // Always a binary op (may also include /=, \=, and ^=). | |
2154 bin_op = true; | |
2155 break; | |
2156 | |
2157 // .+ .- ./ .\ .^ .* .** | |
2158 case '.': | |
2159 { | |
2160 int c1 = text_yyinput (); | |
2161 | |
2162 if (match_any (c1, "+-/\\^*")) | |
2163 // Always a binary op (may also include .+=, .-=, ./=, ...). | |
2164 bin_op = true; | |
2165 else if (! isdigit (c1) && c1 != ' ' && c1 != '\t' && c1 != '.') | |
2166 // A structure element reference is a binary op. | |
2167 bin_op = true; | |
2168 | |
2169 xunput (c1, yytext); | |
2170 } | |
2171 break; | |
2172 | |
2173 // = == & && | || * ** | |
2174 case '=': | |
2175 case '&': | |
2176 case '|': | |
2177 case '*': | |
2178 // Always a binary op (may also include ==, &&, ||, **). | |
2179 bin_op = true; | |
2180 break; | |
2181 | |
2182 // < <= <> > >= | |
2183 case '<': | |
2184 case '>': | |
2185 // Always a binary op (may also include <=, <>, >=). | |
2186 bin_op = true; | |
2187 break; | |
2188 | |
2189 // ~= != | |
2190 case '~': | |
2191 case '!': | |
2192 { | |
2193 int c1 = text_yyinput (); | |
2194 | |
2195 // ~ and ! can be unary ops, so require following =. | |
2196 if (c1 == '=') | |
2197 bin_op = true; | |
2198 | |
2199 xunput (c1, yytext); | |
2200 } | |
2201 break; | |
2202 | |
2203 default: | |
2204 break; | |
2205 } | |
2206 | |
2207 xunput (c0, yytext); | |
2208 | |
2209 return bin_op; | |
2210 } | |
2211 | |
2212 // Used to delete trailing white space from tokens. | |
2213 | |
2214 static std::string | |
2215 strip_trailing_whitespace (char *s) | |
2216 { | |
2217 std::string retval = s; | |
2218 | |
2219 size_t pos = retval.find_first_of (" \t"); | |
2220 | |
2221 if (pos != std::string::npos) | |
2222 retval.resize (pos); | |
2223 | |
2224 return retval; | |
2225 } | |
2226 | |
2227 // FIXME -- we need to handle block comments here. | |
2228 | |
2229 static void | |
2230 scan_for_comments (const char *text) | |
2231 { | |
2232 std::string comment_buf; | |
2233 | |
2234 bool in_comment = false; | |
2235 bool beginning_of_comment = false; | |
2236 | |
2237 int len = strlen (text); | |
2238 int i = 0; | |
2239 | |
2240 while (i < len) | |
2241 { | |
2242 char c = text[i++]; | |
2243 | |
2244 switch (c) | |
2245 { | |
2246 case '%': | |
2247 case '#': | |
2248 if (in_comment) | |
2249 { | |
2250 if (! beginning_of_comment) | |
2251 comment_buf += static_cast<char> (c); | |
2252 } | |
2253 else | |
2254 { | |
2255 maybe_gripe_matlab_incompatible_comment (c); | |
2256 in_comment = true; | |
2257 beginning_of_comment = true; | |
2258 } | |
2259 break; | |
2260 | |
2261 case '\n': | |
2262 if (in_comment) | |
2263 { | |
2264 comment_buf += static_cast<char> (c); | |
2265 octave_comment_buffer::append (comment_buf); | |
2266 comment_buf.resize (0); | |
2267 in_comment = false; | |
2268 beginning_of_comment = false; | |
2269 } | |
2270 break; | |
2271 | |
2272 default: | |
2273 if (in_comment) | |
2274 { | |
2275 comment_buf += static_cast<char> (c); | |
2276 beginning_of_comment = false; | |
2277 } | |
2278 break; | |
2279 } | |
2280 } | |
2281 | |
2282 if (! comment_buf.empty ()) | |
2283 octave_comment_buffer::append (comment_buf); | |
2284 } | |
2285 | |
2286 // Discard whitespace, including comments and continuations. | |
2287 // | |
2288 // Return value is logical OR of the following values: | |
2289 // | |
2290 // ATE_NOTHING : no spaces to eat | |
2291 // ATE_SPACE_OR_TAB : space or tab in input | |
2292 // ATE_NEWLINE : bare new line in input | |
2293 | |
2294 // FIXME -- we need to handle block comments here. | |
2295 | |
2296 static yum_yum | |
2297 eat_whitespace (void) | |
2298 { | |
2299 yum_yum retval = ATE_NOTHING; | |
2300 | |
2301 std::string comment_buf; | |
2302 | |
2303 bool in_comment = false; | |
2304 bool beginning_of_comment = false; | |
2305 | |
2306 int c = 0; | |
2307 | |
2308 while ((c = text_yyinput ()) != EOF) | |
2309 { | |
2310 current_input_column++; | |
2311 | |
2312 switch (c) | |
2313 { | |
2314 case ' ': | |
2315 case '\t': | |
2316 if (in_comment) | |
2317 { | |
2318 comment_buf += static_cast<char> (c); | |
2319 beginning_of_comment = false; | |
2320 } | |
2321 retval |= ATE_SPACE_OR_TAB; | |
2322 break; | |
2323 | |
2324 case '\n': | |
2325 retval |= ATE_NEWLINE; | |
2326 if (in_comment) | |
2327 { | |
2328 comment_buf += static_cast<char> (c); | |
2329 octave_comment_buffer::append (comment_buf); | |
2330 comment_buf.resize (0); | |
2331 in_comment = false; | |
2332 beginning_of_comment = false; | |
2333 } | |
2334 current_input_column = 0; | |
2335 break; | |
2336 | |
2337 case '#': | |
2338 case '%': | |
2339 if (in_comment) | |
2340 { | |
2341 if (! beginning_of_comment) | |
2342 comment_buf += static_cast<char> (c); | |
2343 } | |
2344 else | |
2345 { | |
2346 maybe_gripe_matlab_incompatible_comment (c); | |
2347 in_comment = true; | |
2348 beginning_of_comment = true; | |
2349 } | |
2350 break; | |
2351 | |
2352 case '.': | |
2353 if (in_comment) | |
2354 { | |
2355 comment_buf += static_cast<char> (c); | |
2356 beginning_of_comment = false; | |
2357 break; | |
2358 } | |
2359 else | |
2360 { | |
2361 if (have_ellipsis_continuation ()) | |
2362 break; | |
2363 else | |
2364 goto done; | |
2365 } | |
2366 | |
2367 case '\\': | |
2368 if (in_comment) | |
2369 { | |
2370 comment_buf += static_cast<char> (c); | |
2371 beginning_of_comment = false; | |
2372 break; | |
2373 } | |
2374 else | |
2375 { | |
2376 if (have_continuation ()) | |
2377 break; | |
2378 else | |
2379 goto done; | |
2380 } | |
2381 | |
2382 default: | |
2383 if (in_comment) | |
2384 { | |
2385 comment_buf += static_cast<char> (c); | |
2386 beginning_of_comment = false; | |
2387 break; | |
2388 } | |
2389 else | |
2390 goto done; | |
2391 } | |
2392 } | |
2393 | |
2394 if (! comment_buf.empty ()) | |
2395 octave_comment_buffer::append (comment_buf); | |
2396 | |
2397 done: | |
2398 xunput (c, yytext); | |
2399 current_input_column--; | |
2400 return retval; | |
2401 } | |
2402 | |
2403 static inline bool | |
2404 looks_like_hex (const char *s, int len) | |
2405 { | |
2406 return (len > 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')); | |
2407 } | |
2408 | |
2409 static void | |
2410 handle_number (void) | |
2411 { | |
2412 double value = 0.0; | |
2413 int nread = 0; | |
2414 | |
2415 if (looks_like_hex (yytext, strlen (yytext))) | |
2416 { | |
2417 unsigned long ival; | |
2418 | |
2419 nread = sscanf (yytext, "%lx", &ival); | |
2420 | |
2421 value = static_cast<double> (ival); | |
2422 } | |
2423 else | |
2424 { | |
2425 char *tmp = strsave (yytext); | |
2426 | |
2427 char *idx = strpbrk (tmp, "Dd"); | |
2428 | |
2429 if (idx) | |
2430 *idx = 'e'; | |
2431 | |
2432 nread = sscanf (tmp, "%lf", &value); | |
2433 | |
2434 delete [] tmp; | |
2435 } | |
2436 | |
2437 // If yytext doesn't contain a valid number, we are in deep doo doo. | |
2438 | |
2439 assert (nread == 1); | |
2440 | |
2441 lexer_flags.quote_is_transpose = true; | |
2442 lexer_flags.convert_spaces_to_comma = true; | |
2443 lexer_flags.looking_for_object_index = false; | |
2444 lexer_flags.at_beginning_of_statement = false; | |
2445 | |
2446 yylval.tok_val = new token (value, yytext, input_line_number, | |
2447 current_input_column); | |
2448 | |
2449 token_stack.push (yylval.tok_val); | |
2450 | |
2451 current_input_column += yyleng; | |
2452 | |
2453 do_comma_insert_check (); | |
2454 } | |
2455 | |
2456 // We have seen a backslash and need to find out if it should be | |
2457 // treated as a continuation character. If so, this eats it, up to | |
2458 // and including the new line character. | |
2459 // | |
2460 // Match whitespace only, followed by a comment character or newline. | |
2461 // Once a comment character is found, discard all input until newline. | |
2462 // If non-whitespace characters are found before comment | |
2463 // characters, return 0. Otherwise, return 1. | |
2464 | |
2465 // FIXME -- we need to handle block comments here. | |
2466 | |
2467 static bool | |
2468 have_continuation (bool trailing_comments_ok) | |
2469 { | |
2470 std::ostringstream buf; | |
2471 | |
2472 std::string comment_buf; | |
2473 | |
2474 bool in_comment = false; | |
2475 bool beginning_of_comment = false; | |
2476 | |
2477 int c = 0; | |
2478 | |
2479 while ((c = text_yyinput ()) != EOF) | |
2480 { | |
2481 buf << static_cast<char> (c); | |
2482 | |
2483 switch (c) | |
2484 { | |
2485 case ' ': | |
2486 case '\t': | |
2487 if (in_comment) | |
2488 { | |
2489 comment_buf += static_cast<char> (c); | |
2490 beginning_of_comment = false; | |
2491 } | |
2492 break; | |
2493 | |
2494 case '%': | |
2495 case '#': | |
2496 if (trailing_comments_ok) | |
2497 { | |
2498 if (in_comment) | |
2499 { | |
2500 if (! beginning_of_comment) | |
2501 comment_buf += static_cast<char> (c); | |
2502 } | |
2503 else | |
2504 { | |
2505 maybe_gripe_matlab_incompatible_comment (c); | |
2506 in_comment = true; | |
2507 beginning_of_comment = true; | |
2508 } | |
2509 } | |
2510 else | |
2511 goto cleanup; | |
2512 break; | |
2513 | |
2514 case '\n': | |
2515 if (in_comment) | |
2516 { | |
2517 comment_buf += static_cast<char> (c); | |
2518 octave_comment_buffer::append (comment_buf); | |
2519 } | |
2520 current_input_column = 0; | |
2521 promptflag--; | |
2522 gripe_matlab_incompatible_continuation (); | |
2523 return true; | |
2524 | |
2525 default: | |
2526 if (in_comment) | |
2527 { | |
2528 comment_buf += static_cast<char> (c); | |
2529 beginning_of_comment = false; | |
2530 } | |
2531 else | |
2532 goto cleanup; | |
2533 break; | |
2534 } | |
2535 } | |
2536 | |
2537 xunput (c, yytext); | |
2538 return false; | |
2539 | |
2540 cleanup: | |
2541 | |
2542 std::string s = buf.str (); | |
2543 | |
2544 int len = s.length (); | |
2545 while (len--) | |
2546 xunput (s[len], yytext); | |
2547 | |
2548 return false; | |
2549 } | |
2550 | |
2551 // We have seen a `.' and need to see if it is the start of a | |
2552 // continuation. If so, this eats it, up to and including the new | |
2553 // line character. | |
2554 | |
2555 static bool | |
2556 have_ellipsis_continuation (bool trailing_comments_ok) | |
2557 { | |
2558 char c1 = text_yyinput (); | |
2559 if (c1 == '.') | |
2560 { | |
2561 char c2 = text_yyinput (); | |
2562 if (c2 == '.' && have_continuation (trailing_comments_ok)) | |
2563 return true; | |
2564 else | |
2565 { | |
2566 xunput (c2, yytext); | |
2567 xunput (c1, yytext); | |
2568 } | |
2569 } | |
2570 else | |
2571 xunput (c1, yytext); | |
2572 | |
2573 return false; | |
2574 } | |
2575 | |
2576 // See if we have a continuation line. If so, eat it and the leading | |
2577 // whitespace on the next line. | |
2578 // | |
2579 // Return value is the same as described for eat_whitespace(). | |
2580 | |
2581 static yum_yum | |
2582 eat_continuation (void) | |
2583 { | |
2584 int retval = ATE_NOTHING; | |
2585 | |
2586 int c = text_yyinput (); | |
2587 | |
2588 if ((c == '.' && have_ellipsis_continuation ()) | |
2589 || (c == '\\' && have_continuation ())) | |
2590 retval = eat_whitespace (); | |
2591 else | |
2592 xunput (c, yytext); | |
2593 | |
2594 return retval; | |
2595 } | |
2596 | |
2597 static int | |
2598 handle_string (char delim) | |
2599 { | |
2600 std::ostringstream buf; | |
2601 | |
2602 int bos_line = input_line_number; | |
2603 int bos_col = current_input_column; | |
2604 | |
2605 int c; | |
2606 int escape_pending = 0; | |
2607 | |
2608 while ((c = text_yyinput ()) != EOF) | |
2609 { | |
2610 current_input_column++; | |
2611 | |
2612 if (c == '\\') | |
2613 { | |
2614 if (delim == '\'' || escape_pending) | |
2615 { | |
2616 buf << static_cast<char> (c); | |
2617 escape_pending = 0; | |
2618 } | |
2619 else | |
2620 { | |
2621 if (have_continuation (false)) | |
2622 escape_pending = 0; | |
2623 else | |
2624 { | |
2625 buf << static_cast<char> (c); | |
2626 escape_pending = 1; | |
2627 } | |
2628 } | |
2629 continue; | |
2630 } | |
2631 else if (c == '.') | |
2632 { | |
2633 if (delim == '\'' || ! have_ellipsis_continuation (false)) | |
2634 buf << static_cast<char> (c); | |
2635 } | |
2636 else if (c == '\n') | |
2637 { | |
2638 error ("unterminated string constant"); | |
2639 break; | |
2640 } | |
2641 else if (c == delim) | |
2642 { | |
2643 if (escape_pending) | |
2644 buf << static_cast<char> (c); | |
2645 else | |
2646 { | |
2647 c = text_yyinput (); | |
2648 if (c == delim) | |
2649 { | |
2650 buf << static_cast<char> (c); | |
2651 } | |
2652 else | |
2653 { | |
2654 std::string s; | |
2655 xunput (c, yytext); | |
2656 | |
2657 if (delim == '\'') | |
2658 s = buf.str (); | |
2659 else | |
2660 s = do_string_escapes (buf.str ()); | |
2661 | |
2662 lexer_flags.quote_is_transpose = true; | |
2663 lexer_flags.convert_spaces_to_comma = true; | |
2664 | |
2665 yylval.tok_val = new token (s, bos_line, bos_col); | |
2666 token_stack.push (yylval.tok_val); | |
2667 | |
2668 if (delim == '"') | |
2669 gripe_matlab_incompatible ("\" used as string delimiter"); | |
2670 else if (delim == '\'') | |
2671 gripe_single_quote_string (); | |
2672 | |
2673 lexer_flags.looking_for_object_index = true; | |
2674 lexer_flags.at_beginning_of_statement = false; | |
2675 | |
2676 return delim == '"' ? DQ_STRING : SQ_STRING; | |
2677 } | |
2678 } | |
2679 } | |
2680 else | |
2681 { | |
2682 buf << static_cast<char> (c); | |
2683 } | |
2684 | |
2685 escape_pending = 0; | |
2686 } | |
2687 | |
2688 return LEXICAL_ERROR; | |
2689 } | |
2690 | |
2691 static bool | |
2692 next_token_is_assign_op (void) | |
2693 { | |
2694 bool retval = false; | |
2695 | |
2696 int c0 = text_yyinput (); | |
2697 | |
2698 switch (c0) | |
2699 { | |
2700 case '=': | |
2701 { | |
2702 int c1 = text_yyinput (); | |
2703 xunput (c1, yytext); | |
2704 if (c1 != '=') | |
2705 retval = true; | |
2706 } | |
2707 break; | |
2708 | |
2709 case '+': | |
2710 case '-': | |
2711 case '*': | |
2712 case '/': | |
2713 case '\\': | |
2714 case '&': | |
2715 case '|': | |
2716 { | |
2717 int c1 = text_yyinput (); | |
2718 xunput (c1, yytext); | |
2719 if (c1 == '=') | |
2720 retval = true; | |
2721 } | |
2722 break; | |
2723 | |
2724 case '.': | |
2725 { | |
2726 int c1 = text_yyinput (); | |
2727 if (match_any (c1, "+-*/\\")) | |
2728 { | |
2729 int c2 = text_yyinput (); | |
2730 xunput (c2, yytext); | |
2731 if (c2 == '=') | |
2732 retval = true; | |
2733 } | |
2734 xunput (c1, yytext); | |
2735 } | |
2736 break; | |
2737 | |
2738 case '>': | |
2739 { | |
2740 int c1 = text_yyinput (); | |
2741 if (c1 == '>') | |
2742 { | |
2743 int c2 = text_yyinput (); | |
2744 xunput (c2, yytext); | |
2745 if (c2 == '=') | |
2746 retval = true; | |
2747 } | |
2748 xunput (c1, yytext); | |
2749 } | |
2750 break; | |
2751 | |
2752 case '<': | |
2753 { | |
2754 int c1 = text_yyinput (); | |
2755 if (c1 == '<') | |
2756 { | |
2757 int c2 = text_yyinput (); | |
2758 xunput (c2, yytext); | |
2759 if (c2 == '=') | |
2760 retval = true; | |
2761 } | |
2762 xunput (c1, yytext); | |
2763 } | |
2764 break; | |
2765 | |
2766 default: | |
2767 break; | |
2768 } | |
2769 | |
2770 xunput (c0, yytext); | |
2771 | |
2772 return retval; | |
2773 } | |
2774 | |
2775 static bool | |
2776 next_token_is_index_op (void) | |
2777 { | |
2778 int c = text_yyinput (); | |
2779 xunput (c, yytext); | |
2780 return c == '(' || c == '{'; | |
2781 } | |
2782 | |
2783 static int | |
2784 handle_close_bracket (bool spc_gobbled, int bracket_type) | |
2785 { | |
2786 int retval = bracket_type; | |
2787 | |
2788 if (! nesting_level.none ()) | |
2789 { | |
2790 nesting_level.remove (); | |
2791 | |
2792 if (bracket_type == ']') | |
2793 lexer_flags.bracketflag--; | |
2794 else if (bracket_type == '}') | |
2795 lexer_flags.braceflag--; | |
2796 else | |
2797 panic_impossible (); | |
2798 } | |
2799 | |
2800 if (lexer_flags.bracketflag == 0 && lexer_flags.braceflag == 0) | |
2801 BEGIN (INITIAL); | |
2802 | |
2803 if (bracket_type == ']' | |
2804 && next_token_is_assign_op () | |
2805 && ! lexer_flags.looking_at_return_list) | |
2806 { | |
2807 retval = CLOSE_BRACE; | |
2808 } | |
2809 else if ((lexer_flags.bracketflag || lexer_flags.braceflag) | |
2810 && lexer_flags.convert_spaces_to_comma | |
2811 && (nesting_level.is_bracket () | |
2812 || (nesting_level.is_brace () | |
2813 && ! lexer_flags.looking_at_object_index.front ()))) | |
2814 { | |
2815 bool index_op = next_token_is_index_op (); | |
2816 | |
2817 // Don't insert comma if we are looking at something like | |
2818 // | |
2819 // [x{i}{j}] or [x{i}(j)] | |
2820 // | |
2821 // but do if we are looking at | |
2822 // | |
2823 // [x{i} {j}] or [x{i} (j)] | |
2824 | |
2825 if (spc_gobbled || ! (bracket_type == '}' && index_op)) | |
2826 { | |
2827 bool bin_op = next_token_is_bin_op (spc_gobbled); | |
2828 | |
2829 bool postfix_un_op = next_token_is_postfix_unary_op (spc_gobbled); | |
2830 | |
2831 bool sep_op = next_token_is_sep_op (); | |
2832 | |
2833 if (! (postfix_un_op || bin_op || sep_op)) | |
2834 { | |
2835 maybe_warn_separator_insert (','); | |
2836 | |
2837 xunput (',', yytext); | |
2838 return retval; | |
2839 } | |
2840 } | |
2841 } | |
2842 | |
2843 lexer_flags.quote_is_transpose = true; | |
2844 lexer_flags.convert_spaces_to_comma = true; | |
2845 | |
2846 return retval; | |
2847 } | |
2848 | |
2849 static void | |
2850 maybe_unput_comma (int spc_gobbled) | |
2851 { | |
2852 if (nesting_level.is_bracket () | |
2853 || (nesting_level.is_brace () | |
2854 && ! lexer_flags.looking_at_object_index.front ())) | |
2855 { | |
2856 int bin_op = next_token_is_bin_op (spc_gobbled); | |
2857 | |
2858 int postfix_un_op = next_token_is_postfix_unary_op (spc_gobbled); | |
2859 | |
2860 int c1 = text_yyinput (); | |
2861 int c2 = text_yyinput (); | |
2862 | |
2863 xunput (c2, yytext); | |
2864 xunput (c1, yytext); | |
2865 | |
2866 int sep_op = next_token_is_sep_op (); | |
2867 | |
2868 int dot_op = (c1 == '.' | |
2869 && (isalpha (c2) || isspace (c2) || c2 == '_')); | |
2870 | |
2871 if (postfix_un_op || bin_op || sep_op || dot_op) | |
2872 return; | |
2873 | |
2874 int index_op = (c1 == '(' || c1 == '{'); | |
2875 | |
2876 // If there is no space before the indexing op, we don't insert | |
2877 // a comma. | |
2878 | |
2879 if (index_op && ! spc_gobbled) | |
2880 return; | |
2881 | |
2882 maybe_warn_separator_insert (','); | |
2883 | |
2884 xunput (',', yytext); | |
2885 } | |
2886 } | |
2887 | |
2888 static bool | |
2889 next_token_can_follow_bin_op (void) | |
2890 { | |
2891 std::stack<char> buf; | |
2892 | |
2893 int c = EOF; | |
2894 | |
2895 // Skip whitespace in current statement on current line | |
2896 while (true) | |
2897 { | |
2898 c = text_yyinput (); | |
2899 | |
2900 buf.push (c); | |
2901 | |
2902 if (match_any (c, ",;\n") || (c != ' ' && c != '\t')) | |
2903 break; | |
2904 } | |
2905 | |
2906 // Restore input. | |
2907 while (! buf.empty ()) | |
2908 { | |
2909 xunput (buf.top (), yytext); | |
2910 | |
2911 buf.pop (); | |
2912 } | |
2913 | |
2914 return (isalnum (c) || match_any (c, "!\"'(-[_{~")); | |
2915 } | |
2916 | |
2917 static bool | |
2918 can_be_command (const std::string& tok) | |
2919 { | |
2920 // Don't allow these names to be treated as commands to avoid | |
2921 // surprises when parsing things like "NaN ^2". | |
2922 | |
2923 return ! (tok == "e" | |
2924 || tok == "I" || tok == "i" | |
2925 || tok == "J" || tok == "j" | |
2926 || tok == "Inf" || tok == "inf" | |
2927 || tok == "NaN" || tok == "nan"); | |
2928 } | |
2929 | |
2930 static bool | |
2931 looks_like_command_arg (void) | |
2932 { | |
2933 bool retval = true; | |
2934 | |
2935 int c0 = text_yyinput (); | |
2936 | |
2937 switch (c0) | |
2938 { | |
2939 // = == | |
2940 case '=': | |
2941 { | |
2942 int c1 = text_yyinput (); | |
2943 | |
2944 if (c1 == '=') | |
2945 { | |
2946 int c2 = text_yyinput (); | |
2947 | |
2948 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
2949 && next_token_can_follow_bin_op ()) | |
2950 retval = false; | |
2951 | |
2952 xunput (c2, yytext); | |
2953 } | |
2954 else | |
2955 retval = false; | |
2956 | |
2957 xunput (c1, yytext); | |
2958 } | |
2959 break; | |
2960 | |
2961 case '(': | |
2962 case '{': | |
2963 // Indexing. | |
2964 retval = false; | |
2965 break; | |
2966 | |
2967 case '\n': | |
2968 // EOL. | |
2969 break; | |
2970 | |
2971 case '\'': | |
2972 case '"': | |
2973 // Beginning of a character string. | |
2974 break; | |
2975 | |
2976 // + - ++ -- += -= | |
2977 case '+': | |
2978 case '-': | |
2979 { | |
2980 int c1 = text_yyinput (); | |
2981 | |
2982 switch (c1) | |
2983 { | |
2984 case '\n': | |
2985 // EOL. | |
2986 case '+': | |
2987 case '-': | |
2988 // Unary ops, spacing doesn't matter. | |
2989 break; | |
2990 | |
2991 case '\t': | |
2992 case ' ': | |
2993 { | |
2994 if (next_token_can_follow_bin_op ()) | |
2995 retval = false; | |
2996 } | |
2997 break; | |
2998 | |
2999 case '=': | |
3000 { | |
3001 int c2 = text_yyinput (); | |
3002 | |
3003 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
3004 && next_token_can_follow_bin_op ()) | |
3005 retval = false; | |
3006 | |
3007 xunput (c2, yytext); | |
3008 } | |
3009 break; | |
3010 } | |
3011 | |
3012 xunput (c1, yytext); | |
3013 } | |
3014 break; | |
3015 | |
3016 case ':': | |
3017 case '/': | |
3018 case '\\': | |
3019 case '^': | |
3020 { | |
3021 int c1 = text_yyinput (); | |
3022 | |
3023 if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t') | |
3024 && next_token_can_follow_bin_op ()) | |
3025 retval = false; | |
3026 | |
3027 xunput (c1, yytext); | |
3028 } | |
3029 break; | |
3030 | |
3031 // .+ .- ./ .\ .^ .* .** | |
3032 case '.': | |
3033 { | |
3034 int c1 = text_yyinput (); | |
3035 | |
3036 if (match_any (c1, "+-/\\^*")) | |
3037 { | |
3038 int c2 = text_yyinput (); | |
3039 | |
3040 if (c2 == '=') | |
3041 { | |
3042 int c3 = text_yyinput (); | |
3043 | |
3044 if (! match_any (c3, ",;\n") && (c3 == ' ' || c3 == '\t') | |
3045 && next_token_can_follow_bin_op ()) | |
3046 retval = false; | |
3047 | |
3048 xunput (c3, yytext); | |
3049 } | |
3050 else if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
3051 && next_token_can_follow_bin_op ()) | |
3052 retval = false; | |
3053 | |
3054 xunput (c2, yytext); | |
3055 } | |
3056 else if (! match_any (c1, ",;\n") | |
3057 && (! isdigit (c1) && c1 != ' ' && c1 != '\t' | |
3058 && c1 != '.')) | |
3059 { | |
3060 // Structure reference. FIXME -- is this a complete check? | |
3061 | |
3062 retval = false; | |
3063 } | |
3064 | |
3065 xunput (c1, yytext); | |
3066 } | |
3067 break; | |
3068 | |
3069 // & && | || * ** | |
3070 case '&': | |
3071 case '|': | |
3072 case '*': | |
3073 { | |
3074 int c1 = text_yyinput (); | |
3075 | |
3076 if (c1 == c0) | |
3077 { | |
3078 int c2 = text_yyinput (); | |
3079 | |
3080 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
3081 && next_token_can_follow_bin_op ()) | |
3082 retval = false; | |
3083 | |
3084 xunput (c2, yytext); | |
3085 } | |
3086 else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t') | |
3087 && next_token_can_follow_bin_op ()) | |
3088 retval = false; | |
3089 | |
3090 xunput (c1, yytext); | |
3091 } | |
3092 break; | |
3093 | |
3094 // < <= > >= | |
3095 case '<': | |
3096 case '>': | |
3097 { | |
3098 int c1 = text_yyinput (); | |
3099 | |
3100 if (c1 == '=') | |
3101 { | |
3102 int c2 = text_yyinput (); | |
3103 | |
3104 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
3105 && next_token_can_follow_bin_op ()) | |
3106 retval = false; | |
3107 | |
3108 xunput (c2, yytext); | |
3109 } | |
3110 else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t') | |
3111 && next_token_can_follow_bin_op ()) | |
3112 retval = false; | |
3113 | |
3114 xunput (c1, yytext); | |
3115 } | |
3116 break; | |
3117 | |
3118 // ~= != | |
3119 case '~': | |
3120 case '!': | |
3121 { | |
3122 int c1 = text_yyinput (); | |
3123 | |
3124 // ~ and ! can be unary ops, so require following =. | |
3125 if (c1 == '=') | |
3126 { | |
3127 int c2 = text_yyinput (); | |
3128 | |
3129 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
3130 && next_token_can_follow_bin_op ()) | |
3131 retval = false; | |
3132 | |
3133 xunput (c2, yytext); | |
3134 } | |
3135 else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t') | |
3136 && next_token_can_follow_bin_op ()) | |
3137 retval = false; | |
3138 | |
3139 xunput (c1, yytext); | |
3140 } | |
3141 break; | |
3142 | |
3143 default: | |
3144 break; | |
3145 } | |
3146 | |
3147 xunput (c0, yytext); | |
3148 | |
3149 return retval; | |
3150 } | |
3151 | |
3152 static int | |
3153 handle_superclass_identifier (void) | |
3154 { | |
3155 eat_continuation (); | |
3156 | |
3157 std::string pkg; | |
3158 std::string meth = strip_trailing_whitespace (yytext); | |
3159 size_t pos = meth.find ("@"); | |
3160 std::string cls = meth.substr (pos).substr (1); | |
3161 meth = meth.substr (0, pos - 1); | |
3162 | |
3163 pos = cls.find ("."); | |
3164 if (pos != std::string::npos) | |
3165 { | |
3166 pkg = cls.substr (pos).substr (1); | |
3167 cls = cls.substr (0, pos - 1); | |
3168 } | |
3169 | |
3170 int kw_token = (is_keyword_token (meth) || is_keyword_token (cls) | |
3171 || is_keyword_token (pkg)); | |
3172 if (kw_token) | |
3173 { | |
3174 error ("method, class and package names may not be keywords"); | |
3175 return LEXICAL_ERROR; | |
3176 } | |
3177 | |
3178 yylval.tok_val | |
3179 = new token (meth.empty () ? 0 : &(symbol_table::insert (meth)), | |
3180 cls.empty () ? 0 : &(symbol_table::insert (cls)), | |
3181 pkg.empty () ? 0 : &(symbol_table::insert (pkg)), | |
3182 input_line_number, current_input_column); | |
3183 token_stack.push (yylval.tok_val); | |
3184 | |
3185 lexer_flags.convert_spaces_to_comma = true; | |
3186 current_input_column += yyleng; | |
3187 | |
3188 return SUPERCLASSREF; | |
3189 } | |
3190 | |
3191 static int | |
3192 handle_meta_identifier (void) | |
3193 { | |
3194 eat_continuation (); | |
3195 | |
3196 std::string pkg; | |
3197 std::string cls = strip_trailing_whitespace (yytext).substr (1); | |
3198 size_t pos = cls.find ("."); | |
3199 | |
3200 if (pos != std::string::npos) | |
3201 { | |
3202 pkg = cls.substr (pos).substr (1); | |
3203 cls = cls.substr (0, pos - 1); | |
3204 } | |
3205 | |
3206 int kw_token = is_keyword_token (cls) || is_keyword_token (pkg); | |
3207 if (kw_token) | |
3208 { | |
3209 error ("class and package names may not be keywords"); | |
3210 return LEXICAL_ERROR; | |
3211 } | |
3212 | |
3213 yylval.tok_val | |
3214 = new token (cls.empty () ? 0 : &(symbol_table::insert (cls)), | |
3215 pkg.empty () ? 0 : &(symbol_table::insert (pkg)), | |
3216 input_line_number, current_input_column); | |
3217 | |
3218 token_stack.push (yylval.tok_val); | |
3219 | |
3220 lexer_flags.convert_spaces_to_comma = true; | |
3221 current_input_column += yyleng; | |
3222 | |
3223 return METAQUERY; | |
3224 } | |
3225 | |
3226 // Figure out exactly what kind of token to return when we have seen | |
3227 // an identifier. Handles keywords. Return -1 if the identifier | |
3228 // should be ignored. | |
3229 | |
3230 static int | |
3231 handle_identifier (void) | |
3232 { | |
3233 bool at_bos = lexer_flags.at_beginning_of_statement; | |
3234 | |
3235 std::string tok = strip_trailing_whitespace (yytext); | |
3236 | |
3237 int c = yytext[yyleng-1]; | |
3238 | |
3239 int cont_is_spc = eat_continuation (); | |
3240 | |
3241 int spc_gobbled = (cont_is_spc || c == ' ' || c == '\t'); | |
3242 | |
3243 // If we are expecting a structure element, avoid recognizing | |
3244 // keywords and other special names and return STRUCT_ELT, which is | |
3245 // a string that is also a valid identifier. But first, we have to | |
3246 // decide whether to insert a comma. | |
3247 | |
3248 if (lexer_flags.looking_at_indirect_ref) | |
3249 { | |
3250 do_comma_insert_check (); | |
3251 | |
3252 maybe_unput_comma (spc_gobbled); | |
3253 | |
3254 yylval.tok_val = new token (tok, input_line_number, | |
3255 current_input_column); | |
3256 | |
3257 token_stack.push (yylval.tok_val); | |
3258 | |
3259 lexer_flags.quote_is_transpose = true; | |
3260 lexer_flags.convert_spaces_to_comma = true; | |
3261 lexer_flags.looking_for_object_index = true; | |
3262 | |
3263 current_input_column += yyleng; | |
3264 | |
3265 return STRUCT_ELT; | |
3266 } | |
3267 | |
3268 lexer_flags.at_beginning_of_statement = false; | |
3269 | |
3270 // The is_keyword_token may reset | |
3271 // lexer_flags.at_beginning_of_statement. For example, if it sees | |
3272 // an else token, then the next token is at the beginning of a | |
3273 // statement. | |
3274 | |
3275 int kw_token = is_keyword_token (tok); | |
3276 | |
3277 // If we found a keyword token, then the beginning_of_statement flag | |
3278 // is already set. Otherwise, we won't be at the beginning of a | |
3279 // statement. | |
3280 | |
3281 if (lexer_flags.looking_at_function_handle) | |
3282 { | |
3283 if (kw_token) | |
3284 { | |
3285 error ("function handles may not refer to keywords"); | |
3286 | |
3287 return LEXICAL_ERROR; | |
3288 } | |
3289 else | |
3290 { | |
3291 yylval.tok_val = new token (tok, input_line_number, | |
3292 current_input_column); | |
3293 | |
3294 token_stack.push (yylval.tok_val); | |
3295 | |
3296 current_input_column += yyleng; | |
3297 lexer_flags.quote_is_transpose = false; | |
3298 lexer_flags.convert_spaces_to_comma = true; | |
3299 lexer_flags.looking_for_object_index = true; | |
3300 | |
3301 return FCN_HANDLE; | |
3302 } | |
3303 } | |
3304 | |
3305 // If we have a regular keyword, return it. | |
3306 // Keywords can be followed by identifiers. | |
3307 | |
3308 if (kw_token) | |
3309 { | |
3310 if (kw_token >= 0) | |
3311 { | |
3312 current_input_column += yyleng; | |
3313 lexer_flags.quote_is_transpose = false; | |
3314 lexer_flags.convert_spaces_to_comma = true; | |
3315 lexer_flags.looking_for_object_index = false; | |
3316 } | |
3317 | |
3318 return kw_token; | |
3319 } | |
3320 | |
3321 // See if we have a plot keyword (title, using, with, or clear). | |
3322 | |
3323 int c1 = text_yyinput (); | |
3324 | |
3325 bool next_tok_is_eq = false; | |
3326 if (c1 == '=') | |
3327 { | |
3328 int c2 = text_yyinput (); | |
3329 xunput (c2, yytext); | |
3330 | |
3331 if (c2 != '=') | |
3332 next_tok_is_eq = true; | |
3333 } | |
3334 | |
3335 xunput (c1, yytext); | |
3336 | |
3337 // Kluge alert. | |
3338 // | |
3339 // If we are looking at a text style function, set up to gobble its | |
3340 // arguments. | |
3341 // | |
3342 // If the following token is `=', or if we are parsing a function | |
3343 // return list or function parameter list, or if we are looking at | |
3344 // something like [ab,cd] = foo (), force the symbol to be inserted | |
3345 // as a variable in the current symbol table. | |
3346 | |
3347 if (! is_variable (tok)) | |
3348 { | |
3349 if (at_bos && spc_gobbled && can_be_command (tok) | |
3350 && looks_like_command_arg ()) | |
3351 { | |
3352 BEGIN (COMMAND_START); | |
3353 } | |
3354 else if (next_tok_is_eq | |
3355 || lexer_flags.looking_at_decl_list | |
3356 || lexer_flags.looking_at_return_list | |
3357 || (lexer_flags.looking_at_parameter_list | |
3358 && ! lexer_flags.looking_at_initializer_expression)) | |
3359 { | |
3360 symbol_table::force_variable (tok); | |
3361 } | |
3362 else if (lexer_flags.looking_at_matrix_or_assign_lhs) | |
3363 { | |
3364 lexer_flags.pending_local_variables.insert (tok); | |
3365 } | |
3366 } | |
3367 | |
3368 // Find the token in the symbol table. Beware the magic | |
3369 // transformation of the end keyword... | |
3370 | |
3371 if (tok == "end") | |
3372 tok = "__end__"; | |
3373 | |
3374 yylval.tok_val = new token (&(symbol_table::insert (tok)), | |
3375 input_line_number, current_input_column); | |
3376 | |
3377 token_stack.push (yylval.tok_val); | |
3378 | |
3379 // After seeing an identifer, it is ok to convert spaces to a comma | |
3380 // (if needed). | |
3381 | |
3382 lexer_flags.convert_spaces_to_comma = true; | |
3383 | |
3384 if (! (next_tok_is_eq || YY_START == COMMAND_START)) | |
3385 { | |
3386 lexer_flags.quote_is_transpose = true; | |
3387 | |
3388 do_comma_insert_check (); | |
3389 | |
3390 maybe_unput_comma (spc_gobbled); | |
3391 } | |
3392 | |
3393 current_input_column += yyleng; | |
3394 | |
3395 if (tok != "__end__") | |
3396 lexer_flags.looking_for_object_index = true; | |
3397 | |
3398 return NAME; | |
3399 } | |
3400 | |
3401 void | |
3402 lexical_feedback::init (void) | |
3403 { | |
3404 // Not initially defining a matrix list. | |
3405 bracketflag = 0; | |
3406 | |
3407 // Not initially defining a cell array list. | |
3408 braceflag = 0; | |
3409 | |
3410 // Not initially inside a loop or if statement. | |
3411 looping = 0; | |
3412 | |
3413 // Not initially defining a function. | |
3414 defining_func = 0; | |
3415 | |
3416 // Not parsing an object index. | |
3417 while (! parsed_function_name.empty ()) | |
3418 parsed_function_name.pop (); | |
3419 | |
3420 parsing_class_method = false; | |
3421 | |
3422 // Not initially defining a class with classdef. | |
3423 maybe_classdef_get_set_method = false; | |
3424 parsing_classdef = false; | |
3425 | |
3426 // Not initiallly looking at a function handle. | |
3427 looking_at_function_handle = 0; | |
3428 | |
3429 // Not initiallly looking at an anonymous function argument list. | |
3430 looking_at_anon_fcn_args = 0; | |
3431 | |
3432 // Not parsing a function return, parameter, or declaration list. | |
3433 looking_at_return_list = false; | |
3434 looking_at_parameter_list = false; | |
3435 looking_at_decl_list = false; | |
3436 | |
3437 // Not looking at an argument list initializer expression. | |
3438 looking_at_initializer_expression = false; | |
3439 | |
3440 // Not parsing a matrix or the left hand side of multi-value | |
3441 // assignment statement. | |
3442 looking_at_matrix_or_assign_lhs = false; | |
3443 | |
3444 // Not parsing an object index. | |
3445 while (! looking_at_object_index.empty ()) | |
3446 looking_at_object_index.pop_front (); | |
3447 | |
3448 looking_at_object_index.push_front (false); | |
3449 | |
3450 // Object index not possible until we've seen something. | |
3451 looking_for_object_index = false; | |
3452 | |
3453 // Yes, we are at the beginning of a statement. | |
3454 at_beginning_of_statement = true; | |
3455 | |
3456 // No need to do comma insert or convert spaces to comma at | |
3457 // beginning of input. | |
3458 convert_spaces_to_comma = true; | |
3459 do_comma_insert = false; | |
3460 | |
3461 // Not initially looking at indirect references. | |
3462 looking_at_indirect_ref = false; | |
3463 | |
3464 // Quote marks strings intially. | |
3465 quote_is_transpose = false; | |
3466 | |
3467 // Set of identifiers that might be local variable names is empty. | |
3468 pending_local_variables.clear (); | |
3469 } | |
3470 | |
3471 bool | |
3472 is_keyword (const std::string& s) | |
3473 { | |
3474 // Parsing function names like "set.property_name" inside | |
3475 // classdef-style class definitions is simplified by handling the | |
3476 // "set" and "get" portions of the names using the same mechanism as | |
3477 // is used for keywords. However, they are not really keywords in | |
3478 // the language, so omit them from the list of possible keywords. | |
3479 | |
3480 return (octave_kw_hash::in_word_set (s.c_str (), s.length ()) != 0 | |
3481 && ! (s == "set" || s == "get")); | |
3482 } | |
3483 | |
3484 DEFUN (iskeyword, args, , | |
3485 "-*- texinfo -*-\n\ | |
3486 @deftypefn {Built-in Function} {} iskeyword ()\n\ | |
3487 @deftypefnx {Built-in Function} {} iskeyword (@var{name})\n\ | |
3488 Return true if @var{name} is an Octave keyword. If @var{name}\n\ | |
3489 is omitted, return a list of keywords.\n\ | |
3490 @seealso{isvarname, exist}\n\ | |
3491 @end deftypefn") | |
3492 { | |
3493 octave_value retval; | |
3494 | |
3495 int argc = args.length () + 1; | |
3496 | |
3497 string_vector argv = args.make_argv ("iskeyword"); | |
3498 | |
3499 if (error_state) | |
3500 return retval; | |
3501 | |
3502 if (argc == 1) | |
3503 { | |
3504 // Neither set and get are keywords. See the note in the | |
3505 // is_keyword function for additional details. | |
3506 | |
3507 string_vector lst (TOTAL_KEYWORDS); | |
3508 | |
3509 int j = 0; | |
3510 | |
3511 for (int i = 0; i < TOTAL_KEYWORDS; i++) | |
3512 { | |
3513 std::string tmp = wordlist[i].name; | |
3514 | |
3515 if (! (tmp == "set" || tmp == "get")) | |
3516 lst[j++] = tmp; | |
3517 } | |
3518 | |
3519 lst.resize (j); | |
3520 | |
3521 retval = Cell (lst.sort ()); | |
3522 } | |
3523 else if (argc == 2) | |
3524 { | |
3525 retval = is_keyword (argv[1]); | |
3526 } | |
3527 else | |
3528 print_usage (); | |
3529 | |
3530 return retval; | |
3531 } | |
3532 | |
3533 /* | |
3534 | |
3535 %!assert (iskeyword ("for")) | |
3536 %!assert (iskeyword ("fort"), false) | |
3537 %!assert (iskeyword ("fft"), false) | |
3538 | |
3539 */ | |
3540 | |
3541 void | |
3542 prep_lexer_for_script_file (void) | |
3543 { | |
3544 BEGIN (SCRIPT_FILE_BEGIN); | |
3545 } | |
3546 | |
3547 void | |
3548 prep_lexer_for_function_file (void) | |
3549 { | |
3550 BEGIN (FUNCTION_FILE_BEGIN); | |
3551 } | |
3552 | |
3553 static void | |
3554 maybe_warn_separator_insert (char sep) | |
3555 { | |
3556 std::string nm = curr_fcn_file_full_name; | |
3557 | |
3558 if (nm.empty ()) | |
3559 warning_with_id ("Octave:separator-insert", | |
3560 "potential auto-insertion of `%c' near line %d", | |
3561 sep, input_line_number); | |
3562 else | |
3563 warning_with_id ("Octave:separator-insert", | |
3564 "potential auto-insertion of `%c' near line %d of file %s", | |
3565 sep, input_line_number, nm.c_str ()); | |
3566 } | |
3567 | |
3568 static void | |
3569 gripe_single_quote_string (void) | |
3570 { | |
3571 std::string nm = curr_fcn_file_full_name; | |
3572 | |
3573 if (nm.empty ()) | |
3574 warning_with_id ("Octave:single-quote-string", | |
3575 "single quote delimited string near line %d", | |
3576 input_line_number); | |
3577 else | |
3578 warning_with_id ("Octave:single-quote-string", | |
3579 "single quote delimited string near line %d of file %s", | |
3580 input_line_number, nm.c_str ()); | |
3581 } | |
3582 | |
3583 static void | |
3584 gripe_matlab_incompatible (const std::string& msg) | |
3585 { | |
3586 std::string nm = curr_fcn_file_full_name; | |
3587 | |
3588 if (nm.empty ()) | |
3589 warning_with_id ("Octave:matlab-incompatible", | |
3590 "potential Matlab compatibility problem: %s", | |
3591 msg.c_str ()); | |
3592 else | |
3593 warning_with_id ("Octave:matlab-incompatible", | |
3594 "potential Matlab compatibility problem: %s near line %d offile %s", | |
3595 msg.c_str (), input_line_number, nm.c_str ()); | |
3596 } | |
3597 | |
3598 static void | |
3599 maybe_gripe_matlab_incompatible_comment (char c) | |
3600 { | |
3601 if (c == '#') | |
3602 gripe_matlab_incompatible ("# used as comment character"); | |
3603 } | |
3604 | |
3605 static void | |
3606 gripe_matlab_incompatible_continuation (void) | |
3607 { | |
3608 gripe_matlab_incompatible ("\\ used as line continuation marker"); | |
3609 } | |
3610 | |
3611 static void | |
3612 gripe_matlab_incompatible_operator (const std::string& op) | |
3613 { | |
3614 std::string t = op; | |
3615 int n = t.length (); | |
3616 if (t[n-1] == '\n') | |
3617 t.resize (n-1); | |
3618 gripe_matlab_incompatible (t + " used as operator"); | |
3619 } | |
3620 | |
3621 static void | |
3622 display_token (int tok) | |
3623 { | |
3624 switch (tok) | |
3625 { | |
3626 case '=': std::cerr << "'='\n"; break; | |
3627 case ':': std::cerr << "':'\n"; break; | |
3628 case '-': std::cerr << "'-'\n"; break; | |
3629 case '+': std::cerr << "'+'\n"; break; | |
3630 case '*': std::cerr << "'*'\n"; break; | |
3631 case '/': std::cerr << "'/'\n"; break; | |
3632 case ADD_EQ: std::cerr << "ADD_EQ\n"; break; | |
3633 case SUB_EQ: std::cerr << "SUB_EQ\n"; break; | |
3634 case MUL_EQ: std::cerr << "MUL_EQ\n"; break; | |
3635 case DIV_EQ: std::cerr << "DIV_EQ\n"; break; | |
3636 case LEFTDIV_EQ: std::cerr << "LEFTDIV_EQ\n"; break; | |
3637 case POW_EQ: std::cerr << "POW_EQ\n"; break; | |
3638 case EMUL_EQ: std::cerr << "EMUL_EQ\n"; break; | |
3639 case EDIV_EQ: std::cerr << "EDIV_EQ\n"; break; | |
3640 case ELEFTDIV_EQ: std::cerr << "ELEFTDIV_EQ\n"; break; | |
3641 case EPOW_EQ: std::cerr << "EPOW_EQ\n"; break; | |
3642 case AND_EQ: std::cerr << "AND_EQ\n"; break; | |
3643 case OR_EQ: std::cerr << "OR_EQ\n"; break; | |
3644 case LSHIFT_EQ: std::cerr << "LSHIFT_EQ\n"; break; | |
3645 case RSHIFT_EQ: std::cerr << "RSHIFT_EQ\n"; break; | |
3646 case LSHIFT: std::cerr << "LSHIFT\n"; break; | |
3647 case RSHIFT: std::cerr << "RSHIFT\n"; break; | |
3648 case EXPR_AND_AND: std::cerr << "EXPR_AND_AND\n"; break; | |
3649 case EXPR_OR_OR: std::cerr << "EXPR_OR_OR\n"; break; | |
3650 case EXPR_AND: std::cerr << "EXPR_AND\n"; break; | |
3651 case EXPR_OR: std::cerr << "EXPR_OR\n"; break; | |
3652 case EXPR_NOT: std::cerr << "EXPR_NOT\n"; break; | |
3653 case EXPR_LT: std::cerr << "EXPR_LT\n"; break; | |
3654 case EXPR_LE: std::cerr << "EXPR_LE\n"; break; | |
3655 case EXPR_EQ: std::cerr << "EXPR_EQ\n"; break; | |
3656 case EXPR_NE: std::cerr << "EXPR_NE\n"; break; | |
3657 case EXPR_GE: std::cerr << "EXPR_GE\n"; break; | |
3658 case EXPR_GT: std::cerr << "EXPR_GT\n"; break; | |
3659 case LEFTDIV: std::cerr << "LEFTDIV\n"; break; | |
3660 case EMUL: std::cerr << "EMUL\n"; break; | |
3661 case EDIV: std::cerr << "EDIV\n"; break; | |
3662 case ELEFTDIV: std::cerr << "ELEFTDIV\n"; break; | |
3663 case EPLUS: std::cerr << "EPLUS\n"; break; | |
3664 case EMINUS: std::cerr << "EMINUS\n"; break; | |
3665 case QUOTE: std::cerr << "QUOTE\n"; break; | |
3666 case TRANSPOSE: std::cerr << "TRANSPOSE\n"; break; | |
3667 case PLUS_PLUS: std::cerr << "PLUS_PLUS\n"; break; | |
3668 case MINUS_MINUS: std::cerr << "MINUS_MINUS\n"; break; | |
3669 case POW: std::cerr << "POW\n"; break; | |
3670 case EPOW: std::cerr << "EPOW\n"; break; | |
3671 | |
3672 case NUM: | |
3673 case IMAG_NUM: | |
3674 std::cerr << (tok == NUM ? "NUM" : "IMAG_NUM") | |
3675 << " [" << yylval.tok_val->number () << "]\n"; | |
3676 break; | |
3677 | |
3678 case STRUCT_ELT: | |
3679 std::cerr << "STRUCT_ELT [" << yylval.tok_val->text () << "]\n"; break; | |
3680 | |
3681 case NAME: | |
3682 { | |
3683 symbol_table::symbol_record *sr = yylval.tok_val->sym_rec (); | |
3684 std::cerr << "NAME"; | |
3685 if (sr) | |
3686 std::cerr << " [" << sr->name () << "]"; | |
3687 std::cerr << "\n"; | |
3688 } | |
3689 break; | |
3690 | |
3691 case END: std::cerr << "END\n"; break; | |
3692 | |
3693 case DQ_STRING: | |
3694 case SQ_STRING: | |
3695 std::cerr << (tok == DQ_STRING ? "DQ_STRING" : "SQ_STRING") | |
3696 << " [" << yylval.tok_val->text () << "]\n"; | |
3697 break; | |
3698 | |
3699 case FOR: std::cerr << "FOR\n"; break; | |
3700 case WHILE: std::cerr << "WHILE\n"; break; | |
3701 case DO: std::cerr << "DO\n"; break; | |
3702 case UNTIL: std::cerr << "UNTIL\n"; break; | |
3703 case IF: std::cerr << "IF\n"; break; | |
3704 case ELSEIF: std::cerr << "ELSEIF\n"; break; | |
3705 case ELSE: std::cerr << "ELSE\n"; break; | |
3706 case SWITCH: std::cerr << "SWITCH\n"; break; | |
3707 case CASE: std::cerr << "CASE\n"; break; | |
3708 case OTHERWISE: std::cerr << "OTHERWISE\n"; break; | |
3709 case BREAK: std::cerr << "BREAK\n"; break; | |
3710 case CONTINUE: std::cerr << "CONTINUE\n"; break; | |
3711 case FUNC_RET: std::cerr << "FUNC_RET\n"; break; | |
3712 case UNWIND: std::cerr << "UNWIND\n"; break; | |
3713 case CLEANUP: std::cerr << "CLEANUP\n"; break; | |
3714 case TRY: std::cerr << "TRY\n"; break; | |
3715 case CATCH: std::cerr << "CATCH\n"; break; | |
3716 case GLOBAL: std::cerr << "GLOBAL\n"; break; | |
3717 case PERSISTENT: std::cerr << "PERSISTENT\n"; break; | |
3718 case FCN_HANDLE: std::cerr << "FCN_HANDLE\n"; break; | |
3719 case END_OF_INPUT: std::cerr << "END_OF_INPUT\n\n"; break; | |
3720 case LEXICAL_ERROR: std::cerr << "LEXICAL_ERROR\n\n"; break; | |
3721 case FCN: std::cerr << "FCN\n"; break; | |
3722 case CLOSE_BRACE: std::cerr << "CLOSE_BRACE\n"; break; | |
3723 case SCRIPT_FILE: std::cerr << "SCRIPT_FILE\n"; break; | |
3724 case FUNCTION_FILE: std::cerr << "FUNCTION_FILE\n"; break; | |
3725 case SUPERCLASSREF: std::cerr << "SUPERCLASSREF\n"; break; | |
3726 case METAQUERY: std::cerr << "METAQUERY\n"; break; | |
3727 case GET: std::cerr << "GET\n"; break; | |
3728 case SET: std::cerr << "SET\n"; break; | |
3729 case PROPERTIES: std::cerr << "PROPERTIES\n"; break; | |
3730 case METHODS: std::cerr << "METHODS\n"; break; | |
3731 case EVENTS: std::cerr << "EVENTS\n"; break; | |
3732 case CLASSDEF: std::cerr << "CLASSDEF\n"; break; | |
3733 case '\n': std::cerr << "\\n\n"; break; | |
3734 case '\r': std::cerr << "\\r\n"; break; | |
3735 case '\t': std::cerr << "TAB\n"; break; | |
3736 default: | |
3737 { | |
3738 if (tok < 256) | |
3739 std::cerr << static_cast<char> (tok) << "\n"; | |
3740 else | |
3741 std::cerr << "UNKNOWN(" << tok << ")\n"; | |
3742 } | |
3743 break; | |
3744 } | |
3745 } | |
3746 | |
3747 static void | |
3748 display_state (void) | |
3749 { | |
3750 std::cerr << "S: "; | |
3751 | |
3752 switch (YY_START) | |
3753 { | |
3754 case INITIAL: | |
3755 std::cerr << "INITIAL" << std::endl; | |
3756 break; | |
3757 | |
3758 case COMMAND_START: | |
3759 std::cerr << "COMMAND_START" << std::endl; | |
3760 break; | |
3761 | |
3762 case MATRIX_START: | |
3763 std::cerr << "MATRIX_START" << std::endl; | |
3764 break; | |
3765 | |
3766 case SCRIPT_FILE_BEGIN: | |
3767 std::cerr << "SCRIPT_FILE_BEGIN" << std::endl; | |
3768 break; | |
3769 | |
3770 case FUNCTION_FILE_BEGIN: | |
3771 std::cerr << "FUNCTION_FILE_BEGIN" << std::endl; | |
3772 break; | |
3773 | |
3774 default: | |
3775 std::cerr << "UNKNOWN START STATE!" << std::endl; | |
3776 break; | |
3777 } | |
3778 } | |
3779 | |
3780 static void | |
3781 lexer_debug (const char *pattern, const char *text) | |
3782 { | |
3783 std::cerr << std::endl; | |
3784 | |
3785 display_state (); | |
3786 | |
3787 std::cerr << "P: " << pattern << std::endl; | |
3788 std::cerr << "T: " << text << std::endl; | |
3789 } | |
3790 | |
3791 DEFUN (__display_tokens__, args, nargout, | |
3792 "-*- texinfo -*-\n\ | |
3793 @deftypefn {Built-in Function} {} __display_tokens__ ()\n\ | |
3794 Query or set the internal variable that determines whether Octave's\n\ | |
3795 lexer displays tokens as they are read.\n\ | |
3796 @end deftypefn") | |
3797 { | |
3798 return SET_INTERNAL_VARIABLE (display_tokens); | |
3799 } | |
3800 | |
3801 DEFUN (__token_count__, , , | |
3802 "-*- texinfo -*-\n\ | |
3803 @deftypefn {Built-in Function} {} __token_count__ ()\n\ | |
3804 Number of language tokens processed since Octave startup.\n\ | |
3805 @end deftypefn") | |
3806 { | |
3807 return octave_value (Vtoken_count); | |
3808 } | |
3809 | |
3810 DEFUN (__lexer_debug_flag__, args, nargout, | |
3811 "-*- texinfo -*-\n\ | |
3812 @deftypefn {Built-in Function} {@var{old_val} =} __lexer_debug_flag__ (@var{new_val}))\n\ | |
3813 Undocumented internal function.\n\ | |
3814 @end deftypefn") | |
3815 { | |
3816 octave_value retval; | |
3817 | |
3818 retval = set_internal_variable (lexer_debug_flag, args, nargout, | |
3819 "__lexer_debug_flag__"); | |
3820 | |
3821 return retval; | |
3822 } |