comparison libinterp/parse-tree/lex.ll @ 16123:a484e39d1f22

maint: move function definition in lex.ll
author John W. Eaton <jwe@octave.org>
date Tue, 26 Feb 2013 12:43:36 -0500
parents 6884401b2fbb
children 3be725cd195b
comparison
equal deleted inserted replaced
16122:6884401b2fbb 16123:a484e39d1f22
974 TOK_RETURN (END_OF_INPUT); 974 TOK_RETURN (END_OF_INPUT);
975 } 975 }
976 976
977 %% 977 %%
978 978
979 static void
980 display_character (char c)
981 {
982 if (isgraph (c))
983 std::cerr << c;
984 else
985 switch (c)
986 {
987 case 0:
988 std::cerr << "NUL";
989 break;
990
991 case 1:
992 std::cerr << "SOH";
993 break;
994
995 case 2:
996 std::cerr << "STX";
997 break;
998
999 case 3:
1000 std::cerr << "ETX";
1001 break;
1002
1003 case 4:
1004 std::cerr << "EOT";
1005 break;
1006
1007 case 5:
1008 std::cerr << "ENQ";
1009 break;
1010
1011 case 6:
1012 std::cerr << "ACK";
1013 break;
1014
1015 case 7:
1016 std::cerr << "\\a";
1017 break;
1018
1019 case 8:
1020 std::cerr << "\\b";
1021 break;
1022
1023 case 9:
1024 std::cerr << "\\t";
1025 break;
1026
1027 case 10:
1028 std::cerr << "\\n";
1029 break;
1030
1031 case 11:
1032 std::cerr << "\\v";
1033 break;
1034
1035 case 12:
1036 std::cerr << "\\f";
1037 break;
1038
1039 case 13:
1040 std::cerr << "\\r";
1041 break;
1042
1043 case 14:
1044 std::cerr << "SO";
1045 break;
1046
1047 case 15:
1048 std::cerr << "SI";
1049 break;
1050
1051 case 16:
1052 std::cerr << "DLE";
1053 break;
1054
1055 case 17:
1056 std::cerr << "DC1";
1057 break;
1058
1059 case 18:
1060 std::cerr << "DC2";
1061 break;
1062
1063 case 19:
1064 std::cerr << "DC3";
1065 break;
1066
1067 case 20:
1068 std::cerr << "DC4";
1069 break;
1070
1071 case 21:
1072 std::cerr << "NAK";
1073 break;
1074
1075 case 22:
1076 std::cerr << "SYN";
1077 break;
1078
1079 case 23:
1080 std::cerr << "ETB";
1081 break;
1082
1083 case 24:
1084 std::cerr << "CAN";
1085 break;
1086
1087 case 25:
1088 std::cerr << "EM";
1089 break;
1090
1091 case 26:
1092 std::cerr << "SUB";
1093 break;
1094
1095 case 27:
1096 std::cerr << "ESC";
1097 break;
1098
1099 case 28:
1100 std::cerr << "FS";
1101 break;
1102
1103 case 29:
1104 std::cerr << "GS";
1105 break;
1106
1107 case 30:
1108 std::cerr << "RS";
1109 break;
1110
1111 case 31:
1112 std::cerr << "US";
1113 break;
1114
1115 case 32:
1116 std::cerr << "SPACE";
1117 break;
1118
1119 case 127:
1120 std::cerr << "DEL";
1121 break;
1122 }
1123 }
1124
1125 // Include these so that we don't have to link to libfl.a.
1126
1127 int
1128 yywrap (void)
1129 {
1130 return 1;
1131 }
1132
1133 // Tell us all what the current buffer is.
1134
1135 YY_BUFFER_STATE
1136 current_buffer (void)
1137 {
1138 return YY_CURRENT_BUFFER;
1139 }
1140
1141 // Create a new buffer.
1142
1143 YY_BUFFER_STATE
1144 create_buffer (FILE *f)
1145 {
1146 return yy_create_buffer (f, YY_BUF_SIZE);
1147 }
1148
1149 // Start reading a new buffer.
1150
1151 void
1152 switch_to_buffer (YY_BUFFER_STATE buf)
1153 {
1154 yy_switch_to_buffer (buf);
1155 }
1156
1157 // Delete a buffer.
1158
1159 void
1160 delete_buffer (YY_BUFFER_STATE buf)
1161 {
1162 yy_delete_buffer (buf);
1163
1164 // Prevent invalid yyin from being used by yyrestart.
1165 if (! current_buffer ())
1166 yyin = 0;
1167 }
1168
1169 // Delete all buffers from the stack.
1170 void
1171 clear_all_buffers (void)
1172 {
1173 while (current_buffer ())
1174 octave_pop_buffer_state ();
1175 }
1176
1177 void
1178 cleanup_parser (void)
1179 {
1180 clear_all_buffers ();
1181 }
1182
1183 // Restore a buffer (for unwind-prot).
1184
1185 void
1186 restore_input_buffer (void *buf)
1187 {
1188 switch_to_buffer (static_cast<YY_BUFFER_STATE> (buf));
1189 }
1190
1191 // Delete a buffer (for unwind-prot).
1192
1193 void
1194 delete_input_buffer (void *buf)
1195 {
1196 delete_buffer (static_cast<YY_BUFFER_STATE> (buf));
1197 }
1198
1199 // Return 1 if the given character matches any character in the given
1200 // string.
1201
1202 static bool
1203 match_any (char c, const char *s)
1204 {
1205 char tmp;
1206 while ((tmp = *s++) != '\0')
1207 {
1208 if (c == tmp)
1209 return true;
1210 }
1211 return false;
1212 }
1213
1214 // Given information about the spacing surrounding an operator,
1215 // return 1 if it looks like it should be treated as a binary
1216 // operator. For example,
1217 //
1218 // [ 1 + 2 ] or [ 1+ 2] or [ 1+2 ] ==> binary
1219 //
1220 // [ 1 +2 ] ==> unary
1221
1222 static bool
1223 looks_like_bin_op (bool spc_prev, int next_char)
1224 {
1225 bool spc_next = (next_char == ' ' || next_char == '\t');
1226
1227 return ((spc_prev && spc_next) || ! spc_prev);
1228 }
1229
1230 bool
1231 is_keyword (const std::string& s)
1232 {
1233 // Parsing function names like "set.property_name" inside
1234 // classdef-style class definitions is simplified by handling the
1235 // "set" and "get" portions of the names using the same mechanism as
1236 // is used for keywords. However, they are not really keywords in
1237 // the language, so omit them from the list of possible keywords.
1238
1239 return (octave_kw_hash::in_word_set (s.c_str (), s.length ()) != 0
1240 && ! (s == "set" || s == "get"));
1241 }
1242
1243 DEFUN (iskeyword, args, ,
1244 "-*- texinfo -*-\n\
1245 @deftypefn {Built-in Function} {} iskeyword ()\n\
1246 @deftypefnx {Built-in Function} {} iskeyword (@var{name})\n\
1247 Return true if @var{name} is an Octave keyword. If @var{name}\n\
1248 is omitted, return a list of keywords.\n\
1249 @seealso{isvarname, exist}\n\
1250 @end deftypefn")
1251 {
1252 octave_value retval;
1253
1254 int argc = args.length () + 1;
1255
1256 string_vector argv = args.make_argv ("iskeyword");
1257
1258 if (error_state)
1259 return retval;
1260
1261 if (argc == 1)
1262 {
1263 // Neither set and get are keywords. See the note in the
1264 // is_keyword function for additional details.
1265
1266 string_vector lst (TOTAL_KEYWORDS);
1267
1268 int j = 0;
1269
1270 for (int i = 0; i < TOTAL_KEYWORDS; i++)
1271 {
1272 std::string tmp = wordlist[i].name;
1273
1274 if (! (tmp == "set" || tmp == "get"))
1275 lst[j++] = tmp;
1276 }
1277
1278 lst.resize (j);
1279
1280 retval = Cell (lst.sort ());
1281 }
1282 else if (argc == 2)
1283 {
1284 retval = is_keyword (argv[1]);
1285 }
1286 else
1287 print_usage ();
1288
1289 return retval;
1290 }
1291
1292 /*
1293
1294 %!assert (iskeyword ("for"))
1295 %!assert (iskeyword ("fort"), false)
1296 %!assert (iskeyword ("fft"), false)
1297
1298 */
1299
1300 void
1301 prep_lexer_for_script_file (void)
1302 {
1303 BEGIN (SCRIPT_FILE_BEGIN);
1304 }
1305
1306 void
1307 prep_lexer_for_function_file (void)
1308 {
1309 BEGIN (FUNCTION_FILE_BEGIN);
1310 }
1311
1312 // Used to delete trailing white space from tokens.
1313
1314 static std::string
1315 strip_trailing_whitespace (char *s)
1316 {
1317 std::string retval = s;
1318
1319 size_t pos = retval.find_first_of (" \t");
1320
1321 if (pos != std::string::npos)
1322 retval.resize (pos);
1323
1324 return retval;
1325 }
1326
1327 DEFUN (__display_tokens__, args, nargout,
1328 "-*- texinfo -*-\n\
1329 @deftypefn {Built-in Function} {} __display_tokens__ ()\n\
1330 Query or set the internal variable that determines whether Octave's\n\
1331 lexer displays tokens as they are read.\n\
1332 @end deftypefn")
1333 {
1334 return SET_INTERNAL_VARIABLE (display_tokens);
1335 }
1336
1337 DEFUN (__token_count__, , ,
1338 "-*- texinfo -*-\n\
1339 @deftypefn {Built-in Function} {} __token_count__ ()\n\
1340 Number of language tokens processed since Octave startup.\n\
1341 @end deftypefn")
1342 {
1343 return octave_value (Vtoken_count);
1344 }
1345
1346 DEFUN (__lexer_debug_flag__, args, nargout,
1347 "-*- texinfo -*-\n\
1348 @deftypefn {Built-in Function} {@var{old_val} =} __lexer_debug_flag__ (@var{new_val}))\n\
1349 Undocumented internal function.\n\
1350 @end deftypefn")
1351 {
1352 octave_value retval;
1353
1354 retval = set_internal_variable (lexer_debug_flag, args, nargout,
1355 "__lexer_debug_flag__");
1356
1357 return retval;
1358 }
1359
1360 class
1361 flex_stream_reader : public stream_reader
1362 {
1363 public:
1364 flex_stream_reader (lexical_feedback *l, char *buf_arg)
1365 : stream_reader (), lexer (l), buf (buf_arg)
1366 { }
1367
1368 int getc (void) { return lexer->text_yyinput (); }
1369 int ungetc (int c) { lexer->xunput (c, buf); return 0; }
1370
1371 private:
1372
1373 // No copying!
1374
1375 flex_stream_reader (const flex_stream_reader&);
1376
1377 flex_stream_reader& operator = (const flex_stream_reader&);
1378
1379 lexical_feedback *lexer;
1380
1381 char *buf;
1382 };
1383
1384 lexical_feedback::~lexical_feedback (void)
1385 {
1386 // Clear out the stack of token info used to track line and
1387 // column numbers.
1388
1389 while (! token_stack.empty ())
1390 {
1391 delete token_stack.top ();
1392 token_stack.pop ();
1393 }
1394 }
1395
979 void 1396 void
980 lexical_feedback::reset_parser (void) 1397 lexical_feedback::reset_parser (void)
981 { 1398 {
982 // Start off on the right foot. 1399 // Start off on the right foot.
983 BEGIN (INITIAL); 1400 BEGIN (INITIAL);
1000 yyrestart (stdin); 1417 yyrestart (stdin);
1001 1418
1002 // Clear the buffer for help text. 1419 // Clear the buffer for help text.
1003 while (! help_buf.empty ()) 1420 while (! help_buf.empty ())
1004 help_buf.pop (); 1421 help_buf.pop ();
1005 }
1006
1007 static void
1008 display_character (char c)
1009 {
1010 if (isgraph (c))
1011 std::cerr << c;
1012 else
1013 switch (c)
1014 {
1015 case 0:
1016 std::cerr << "NUL";
1017 break;
1018
1019 case 1:
1020 std::cerr << "SOH";
1021 break;
1022
1023 case 2:
1024 std::cerr << "STX";
1025 break;
1026
1027 case 3:
1028 std::cerr << "ETX";
1029 break;
1030
1031 case 4:
1032 std::cerr << "EOT";
1033 break;
1034
1035 case 5:
1036 std::cerr << "ENQ";
1037 break;
1038
1039 case 6:
1040 std::cerr << "ACK";
1041 break;
1042
1043 case 7:
1044 std::cerr << "\\a";
1045 break;
1046
1047 case 8:
1048 std::cerr << "\\b";
1049 break;
1050
1051 case 9:
1052 std::cerr << "\\t";
1053 break;
1054
1055 case 10:
1056 std::cerr << "\\n";
1057 break;
1058
1059 case 11:
1060 std::cerr << "\\v";
1061 break;
1062
1063 case 12:
1064 std::cerr << "\\f";
1065 break;
1066
1067 case 13:
1068 std::cerr << "\\r";
1069 break;
1070
1071 case 14:
1072 std::cerr << "SO";
1073 break;
1074
1075 case 15:
1076 std::cerr << "SI";
1077 break;
1078
1079 case 16:
1080 std::cerr << "DLE";
1081 break;
1082
1083 case 17:
1084 std::cerr << "DC1";
1085 break;
1086
1087 case 18:
1088 std::cerr << "DC2";
1089 break;
1090
1091 case 19:
1092 std::cerr << "DC3";
1093 break;
1094
1095 case 20:
1096 std::cerr << "DC4";
1097 break;
1098
1099 case 21:
1100 std::cerr << "NAK";
1101 break;
1102
1103 case 22:
1104 std::cerr << "SYN";
1105 break;
1106
1107 case 23:
1108 std::cerr << "ETB";
1109 break;
1110
1111 case 24:
1112 std::cerr << "CAN";
1113 break;
1114
1115 case 25:
1116 std::cerr << "EM";
1117 break;
1118
1119 case 26:
1120 std::cerr << "SUB";
1121 break;
1122
1123 case 27:
1124 std::cerr << "ESC";
1125 break;
1126
1127 case 28:
1128 std::cerr << "FS";
1129 break;
1130
1131 case 29:
1132 std::cerr << "GS";
1133 break;
1134
1135 case 30:
1136 std::cerr << "RS";
1137 break;
1138
1139 case 31:
1140 std::cerr << "US";
1141 break;
1142
1143 case 32:
1144 std::cerr << "SPACE";
1145 break;
1146
1147 case 127:
1148 std::cerr << "DEL";
1149 break;
1150 }
1151 }
1152
1153 // Include these so that we don't have to link to libfl.a.
1154
1155 int
1156 yywrap (void)
1157 {
1158 return 1;
1159 }
1160
1161 // Tell us all what the current buffer is.
1162
1163 YY_BUFFER_STATE
1164 current_buffer (void)
1165 {
1166 return YY_CURRENT_BUFFER;
1167 }
1168
1169 // Create a new buffer.
1170
1171 YY_BUFFER_STATE
1172 create_buffer (FILE *f)
1173 {
1174 return yy_create_buffer (f, YY_BUF_SIZE);
1175 }
1176
1177 // Start reading a new buffer.
1178
1179 void
1180 switch_to_buffer (YY_BUFFER_STATE buf)
1181 {
1182 yy_switch_to_buffer (buf);
1183 }
1184
1185 // Delete a buffer.
1186
1187 void
1188 delete_buffer (YY_BUFFER_STATE buf)
1189 {
1190 yy_delete_buffer (buf);
1191
1192 // Prevent invalid yyin from being used by yyrestart.
1193 if (! current_buffer ())
1194 yyin = 0;
1195 }
1196
1197 // Delete all buffers from the stack.
1198 void
1199 clear_all_buffers (void)
1200 {
1201 while (current_buffer ())
1202 octave_pop_buffer_state ();
1203 }
1204
1205 void
1206 cleanup_parser (void)
1207 {
1208 clear_all_buffers ();
1209 }
1210
1211 // Restore a buffer (for unwind-prot).
1212
1213 void
1214 restore_input_buffer (void *buf)
1215 {
1216 switch_to_buffer (static_cast<YY_BUFFER_STATE> (buf));
1217 }
1218
1219 // Delete a buffer (for unwind-prot).
1220
1221 void
1222 delete_input_buffer (void *buf)
1223 {
1224 delete_buffer (static_cast<YY_BUFFER_STATE> (buf));
1225 }
1226
1227 // Return 1 if the given character matches any character in the given
1228 // string.
1229
1230 static bool
1231 match_any (char c, const char *s)
1232 {
1233 char tmp;
1234 while ((tmp = *s++) != '\0')
1235 {
1236 if (c == tmp)
1237 return true;
1238 }
1239 return false;
1240 }
1241
1242 // Given information about the spacing surrounding an operator,
1243 // return 1 if it looks like it should be treated as a binary
1244 // operator. For example,
1245 //
1246 // [ 1 + 2 ] or [ 1+ 2] or [ 1+2 ] ==> binary
1247 //
1248 // [ 1 +2 ] ==> unary
1249
1250 static bool
1251 looks_like_bin_op (bool spc_prev, int next_char)
1252 {
1253 bool spc_next = (next_char == ' ' || next_char == '\t');
1254
1255 return ((spc_prev && spc_next) || ! spc_prev);
1256 }
1257
1258 bool
1259 is_keyword (const std::string& s)
1260 {
1261 // Parsing function names like "set.property_name" inside
1262 // classdef-style class definitions is simplified by handling the
1263 // "set" and "get" portions of the names using the same mechanism as
1264 // is used for keywords. However, they are not really keywords in
1265 // the language, so omit them from the list of possible keywords.
1266
1267 return (octave_kw_hash::in_word_set (s.c_str (), s.length ()) != 0
1268 && ! (s == "set" || s == "get"));
1269 }
1270
1271 DEFUN (iskeyword, args, ,
1272 "-*- texinfo -*-\n\
1273 @deftypefn {Built-in Function} {} iskeyword ()\n\
1274 @deftypefnx {Built-in Function} {} iskeyword (@var{name})\n\
1275 Return true if @var{name} is an Octave keyword. If @var{name}\n\
1276 is omitted, return a list of keywords.\n\
1277 @seealso{isvarname, exist}\n\
1278 @end deftypefn")
1279 {
1280 octave_value retval;
1281
1282 int argc = args.length () + 1;
1283
1284 string_vector argv = args.make_argv ("iskeyword");
1285
1286 if (error_state)
1287 return retval;
1288
1289 if (argc == 1)
1290 {
1291 // Neither set and get are keywords. See the note in the
1292 // is_keyword function for additional details.
1293
1294 string_vector lst (TOTAL_KEYWORDS);
1295
1296 int j = 0;
1297
1298 for (int i = 0; i < TOTAL_KEYWORDS; i++)
1299 {
1300 std::string tmp = wordlist[i].name;
1301
1302 if (! (tmp == "set" || tmp == "get"))
1303 lst[j++] = tmp;
1304 }
1305
1306 lst.resize (j);
1307
1308 retval = Cell (lst.sort ());
1309 }
1310 else if (argc == 2)
1311 {
1312 retval = is_keyword (argv[1]);
1313 }
1314 else
1315 print_usage ();
1316
1317 return retval;
1318 }
1319
1320 /*
1321
1322 %!assert (iskeyword ("for"))
1323 %!assert (iskeyword ("fort"), false)
1324 %!assert (iskeyword ("fft"), false)
1325
1326 */
1327
1328 void
1329 prep_lexer_for_script_file (void)
1330 {
1331 BEGIN (SCRIPT_FILE_BEGIN);
1332 }
1333
1334 void
1335 prep_lexer_for_function_file (void)
1336 {
1337 BEGIN (FUNCTION_FILE_BEGIN);
1338 }
1339
1340 // Used to delete trailing white space from tokens.
1341
1342 static std::string
1343 strip_trailing_whitespace (char *s)
1344 {
1345 std::string retval = s;
1346
1347 size_t pos = retval.find_first_of (" \t");
1348
1349 if (pos != std::string::npos)
1350 retval.resize (pos);
1351
1352 return retval;
1353 }
1354
1355 DEFUN (__display_tokens__, args, nargout,
1356 "-*- texinfo -*-\n\
1357 @deftypefn {Built-in Function} {} __display_tokens__ ()\n\
1358 Query or set the internal variable that determines whether Octave's\n\
1359 lexer displays tokens as they are read.\n\
1360 @end deftypefn")
1361 {
1362 return SET_INTERNAL_VARIABLE (display_tokens);
1363 }
1364
1365 DEFUN (__token_count__, , ,
1366 "-*- texinfo -*-\n\
1367 @deftypefn {Built-in Function} {} __token_count__ ()\n\
1368 Number of language tokens processed since Octave startup.\n\
1369 @end deftypefn")
1370 {
1371 return octave_value (Vtoken_count);
1372 }
1373
1374 DEFUN (__lexer_debug_flag__, args, nargout,
1375 "-*- texinfo -*-\n\
1376 @deftypefn {Built-in Function} {@var{old_val} =} __lexer_debug_flag__ (@var{new_val}))\n\
1377 Undocumented internal function.\n\
1378 @end deftypefn")
1379 {
1380 octave_value retval;
1381
1382 retval = set_internal_variable (lexer_debug_flag, args, nargout,
1383 "__lexer_debug_flag__");
1384
1385 return retval;
1386 }
1387
1388 class
1389 flex_stream_reader : public stream_reader
1390 {
1391 public:
1392 flex_stream_reader (lexical_feedback *l, char *buf_arg)
1393 : stream_reader (), lexer (l), buf (buf_arg)
1394 { }
1395
1396 int getc (void) { return lexer->text_yyinput (); }
1397 int ungetc (int c) { lexer->xunput (c, buf); return 0; }
1398
1399 private:
1400
1401 // No copying!
1402
1403 flex_stream_reader (const flex_stream_reader&);
1404
1405 flex_stream_reader& operator = (const flex_stream_reader&);
1406
1407 lexical_feedback *lexer;
1408
1409 char *buf;
1410 };
1411
1412 lexical_feedback::~lexical_feedback (void)
1413 {
1414 // Clear out the stack of token info used to track line and
1415 // column numbers.
1416
1417 while (! token_stack.empty ())
1418 {
1419 delete token_stack.top ();
1420 token_stack.pop ();
1421 }
1422 } 1422 }
1423 1423
1424 int 1424 int
1425 lexical_feedback::octave_read (char *buf, unsigned max_size) 1425 lexical_feedback::octave_read (char *buf, unsigned max_size)
1426 { 1426 {