Mercurial > hg > octave-lyh
comparison libinterp/parse-tree/lex.ll @ 16273:c5e5f6ccac5d
9/10 commits reworking the lexer
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Mon, 11 Mar 2013 14:32:53 -0400 |
parents | 87069bd38107 |
children | 3c265e4dba6d dd7de0345124 |
comparison
equal
deleted
inserted
replaced
16271:87069bd38107 | 16273:c5e5f6ccac5d |
---|---|
48 | 48 |
49 %x INPUT_FILE_START | 49 %x INPUT_FILE_START |
50 | 50 |
51 %x BLOCK_COMMENT_START | 51 %x BLOCK_COMMENT_START |
52 %x LINE_COMMENT_START | 52 %x LINE_COMMENT_START |
53 | |
54 %x KLUGE | |
55 | 53 |
56 %{ | 54 %{ |
57 | 55 |
58 #include <cctype> | 56 #include <cctype> |
59 #include <cstring> | 57 #include <cstring> |
249 int tok = curr_lexer->previous_token_value (); | 247 int tok = curr_lexer->previous_token_value (); |
250 | 248 |
251 if (! (tok == ';' || tok == '[' || tok == '{')) | 249 if (! (tok == ';' || tok == '[' || tok == '{')) |
252 curr_lexer->xunput (';'); | 250 curr_lexer->xunput (';'); |
253 } | 251 } |
254 } | |
255 | |
256 <KLUGE>@ { | |
257 curr_lexer->lexer_debug ("<KLUGE>@"); | |
258 curr_lexer->pop_start_state (); | |
259 return curr_lexer->count_token (CHOOSE_ASSIGNMENT); | |
260 } | 252 } |
261 | 253 |
262 %{ | 254 %{ |
263 // For this and the next two rules, we're looking at ']', and we | 255 // For this and the next two rules, we're looking at ']', and we |
264 // need to know if the next token is '=' or '=='. | 256 // need to know if the next token is '=' or '=='. |
800 | 792 |
801 %{ | 793 %{ |
802 // Other operators. | 794 // Other operators. |
803 %} | 795 %} |
804 | 796 |
805 ":" { return curr_lexer->handle_op (":", ':'); } | 797 ":" { return curr_lexer->handle_op (":", ':'); } |
806 ".+" { return curr_lexer->handle_incompatible_op (".+", EPLUS); } | 798 ".+" { return curr_lexer->handle_incompatible_op (".+", EPLUS); } |
807 ".-" { return curr_lexer->handle_incompatible_op (".-", EMINUS); } | 799 ".-" { return curr_lexer->handle_incompatible_op (".-", EMINUS); } |
808 ".*" { return curr_lexer->handle_op (".*", EMUL); } | 800 ".*" { return curr_lexer->handle_op (".*", EMUL); } |
809 "./" { return curr_lexer->handle_op ("./", EDIV); } | 801 "./" { return curr_lexer->handle_op ("./", EDIV); } |
810 ".\\" { return curr_lexer->handle_op (".\\", ELEFTDIV); } | 802 ".\\" { return curr_lexer->handle_op (".\\", ELEFTDIV); } |
811 ".^" { return curr_lexer->handle_op (".^", EPOW); } | 803 ".^" { return curr_lexer->handle_op (".^", EPOW); } |
812 ".**" { return curr_lexer->handle_incompatible_op (".**", EPOW); } | 804 ".**" { return curr_lexer->handle_incompatible_op (".**", EPOW); } |
813 "<=" { return curr_lexer->handle_op ("<=", EXPR_LE); } | 805 "<=" { return curr_lexer->handle_op ("<=", EXPR_LE); } |
814 "==" { return curr_lexer->handle_op ("==", EXPR_EQ); } | 806 "==" { return curr_lexer->handle_op ("==", EXPR_EQ); } |
815 "~=" { return curr_lexer->handle_op ("~=", EXPR_NE); } | 807 "~=" { return curr_lexer->handle_op ("~=", EXPR_NE); } |
816 "!=" { return curr_lexer->handle_incompatible_op ("!=", EXPR_NE); } | 808 "!=" { return curr_lexer->handle_incompatible_op ("!=", EXPR_NE); } |
817 ">=" { return curr_lexer->handle_op (">=", EXPR_GE); } | 809 ">=" { return curr_lexer->handle_op (">=", EXPR_GE); } |
818 "&" { return curr_lexer->handle_op ("&", EXPR_AND); } | 810 "&" { return curr_lexer->handle_op ("&", EXPR_AND); } |
819 "|" { return curr_lexer->handle_op ("|", EXPR_OR); } | 811 "|" { return curr_lexer->handle_op ("|", EXPR_OR); } |
820 "<" { return curr_lexer->handle_op ("<", EXPR_LT); } | 812 "<" { return curr_lexer->handle_op ("<", EXPR_LT); } |
821 ">" { return curr_lexer->handle_op (">", EXPR_GT); } | 813 ">" { return curr_lexer->handle_op (">", EXPR_GT); } |
822 "*" { return curr_lexer->handle_op ("*", '*'); } | 814 "*" { return curr_lexer->handle_op ("*", '*'); } |
823 | 815 |
824 "/" { | 816 "/" { |
825 int prev_tok = curr_lexer->previous_token_value (); | 817 int prev_tok = curr_lexer->previous_token_value (); |
826 bool space_before = curr_lexer->space_follows_previous_token (); | 818 bool space_before = curr_lexer->space_follows_previous_token (); |
827 int c = curr_lexer->text_yyinput (); | 819 int c = curr_lexer->text_yyinput (); |
836 } | 828 } |
837 else | 829 else |
838 return curr_lexer->handle_op ("/", '/'); | 830 return curr_lexer->handle_op ("/", '/'); |
839 } | 831 } |
840 | 832 |
841 "\\" { return curr_lexer->handle_op ("\\", LEFTDIV); } | 833 "\\" { return curr_lexer->handle_op ("\\", LEFTDIV); } |
842 "^" { return curr_lexer->handle_op ("^", POW); } | 834 "^" { return curr_lexer->handle_op ("^", POW); } |
843 "**" { return curr_lexer->handle_incompatible_op ("**", POW); } | 835 "**" { return curr_lexer->handle_incompatible_op ("**", POW); } |
844 "&&" { return curr_lexer->handle_op ("&&", EXPR_AND_AND); } | 836 "&&" { return curr_lexer->handle_op ("&&", EXPR_AND_AND); } |
845 "||" { return curr_lexer->handle_op ("||", EXPR_OR_OR); } | 837 "||" { return curr_lexer->handle_op ("||", EXPR_OR_OR); } |
846 "<<" { return curr_lexer->handle_incompatible_op ("<<", LSHIFT); } | 838 "<<" { return curr_lexer->handle_incompatible_op ("<<", LSHIFT); } |
847 ">>" { return curr_lexer->handle_incompatible_op (">>", RSHIFT); } | 839 ">>" { return curr_lexer->handle_incompatible_op (">>", RSHIFT); } |
848 | 840 |
849 ";" { | 841 ";" { |
850 bool at_beginning_of_statement | 842 bool at_beginning_of_statement |
851 = (! (curr_lexer->whitespace_is_significant () | 843 = (! (curr_lexer->whitespace_is_significant () |
852 || curr_lexer->looking_at_object_index.front ())); | 844 || curr_lexer->looking_at_object_index.front ())); |
1028 | 1020 |
1029 %{ | 1021 %{ |
1030 // = and op= operators. | 1022 // = and op= operators. |
1031 %} | 1023 %} |
1032 | 1024 |
1033 "=" { | 1025 "=" { return curr_lexer->handle_op ("=", '='); } |
1034 int tok = curr_lexer->handle_assign_op ("=", '='); | 1026 "+=" { return curr_lexer->handle_incompatible_op ("+=", ADD_EQ); } |
1035 if (tok < 0) | 1027 "-=" { return curr_lexer->handle_incompatible_op ("-=", SUB_EQ); } |
1036 { | 1028 "*=" { return curr_lexer->handle_incompatible_op ("*=", MUL_EQ); } |
1037 yyless (0); | 1029 "/=" { return curr_lexer->handle_incompatible_op ("/=", DIV_EQ); } |
1038 curr_lexer->xunput ('@'); | 1030 "\\=" { return curr_lexer->handle_incompatible_op ("\\=", LEFTDIV_EQ); } |
1039 curr_lexer->push_start_state (KLUGE); | 1031 ".+=" { return curr_lexer->handle_incompatible_op (".+=", ADD_EQ); } |
1040 } | 1032 ".-=" { return curr_lexer->handle_incompatible_op (".-=", SUB_EQ); } |
1041 else | 1033 ".*=" { return curr_lexer->handle_incompatible_op (".*=", EMUL_EQ); } |
1042 return tok; | 1034 "./=" { return curr_lexer->handle_incompatible_op ("./=", EDIV_EQ); } |
1043 } | 1035 ".\\=" { return curr_lexer->handle_incompatible_op (".\\=", ELEFTDIV_EQ); } |
1044 | 1036 "^=" { return curr_lexer->handle_incompatible_op ("^=", POW_EQ); } |
1045 "+=" { | 1037 "**=" { return curr_lexer->handle_incompatible_op ("^=", POW_EQ); } |
1046 int tok = curr_lexer->handle_incompatible_assign_op ("+=", ADD_EQ); | 1038 ".^=" { return curr_lexer->handle_incompatible_op (".^=", EPOW_EQ); } |
1047 if (tok < 0) | 1039 ".**=" { return curr_lexer->handle_incompatible_op (".^=", EPOW_EQ); } |
1048 { | 1040 "&=" { return curr_lexer->handle_incompatible_op ("&=", AND_EQ); } |
1049 yyless (0); | 1041 "|=" { return curr_lexer->handle_incompatible_op ("|=", OR_EQ); } |
1050 curr_lexer->xunput ('@'); | 1042 "<<=" { return curr_lexer->handle_incompatible_op ("<<=", LSHIFT_EQ); } |
1051 curr_lexer->push_start_state (KLUGE); | 1043 ">>=" { return curr_lexer->handle_incompatible_op (">>=", RSHIFT_EQ); } |
1052 } | |
1053 else | |
1054 return tok; | |
1055 } | |
1056 | |
1057 "-=" { | |
1058 int tok = curr_lexer->handle_incompatible_assign_op ("-=", SUB_EQ); | |
1059 if (tok < 0) | |
1060 { | |
1061 yyless (0); | |
1062 curr_lexer->xunput ('@'); | |
1063 curr_lexer->push_start_state (KLUGE); | |
1064 } | |
1065 else | |
1066 return tok; | |
1067 } | |
1068 | |
1069 "*=" { | |
1070 int tok = curr_lexer->handle_incompatible_assign_op ("*=", MUL_EQ); | |
1071 if (tok < 0) | |
1072 { | |
1073 yyless (0); | |
1074 curr_lexer->xunput ('@'); | |
1075 curr_lexer->push_start_state (KLUGE); | |
1076 } | |
1077 else | |
1078 return tok; | |
1079 } | |
1080 | |
1081 "/=" { | |
1082 int tok = curr_lexer->handle_incompatible_assign_op ("/=", DIV_EQ); | |
1083 if (tok < 0) | |
1084 { | |
1085 yyless (0); | |
1086 curr_lexer->xunput ('@'); | |
1087 curr_lexer->push_start_state (KLUGE); | |
1088 } | |
1089 else | |
1090 return tok; | |
1091 } | |
1092 | |
1093 "\\=" { | |
1094 int tok = curr_lexer->handle_incompatible_assign_op ("\\=", LEFTDIV_EQ); | |
1095 if (tok < 0) | |
1096 { | |
1097 yyless (0); | |
1098 curr_lexer->xunput ('@'); | |
1099 curr_lexer->push_start_state (KLUGE); | |
1100 } | |
1101 else | |
1102 return tok; | |
1103 } | |
1104 | |
1105 ".+=" { | |
1106 int tok = curr_lexer->handle_incompatible_assign_op (".+=", ADD_EQ); | |
1107 if (tok < 0) | |
1108 { | |
1109 yyless (0); | |
1110 curr_lexer->xunput ('@'); | |
1111 curr_lexer->push_start_state (KLUGE); | |
1112 } | |
1113 else | |
1114 return tok; | |
1115 } | |
1116 | |
1117 ".-=" { | |
1118 int tok = curr_lexer->handle_incompatible_assign_op (".-=", SUB_EQ); | |
1119 if (tok < 0) | |
1120 { | |
1121 yyless (0); | |
1122 curr_lexer->xunput ('@'); | |
1123 curr_lexer->push_start_state (KLUGE); | |
1124 } | |
1125 else | |
1126 return tok; | |
1127 } | |
1128 | |
1129 ".*=" { | |
1130 int tok = curr_lexer->handle_incompatible_assign_op (".*=", EMUL_EQ); | |
1131 if (tok < 0) | |
1132 { | |
1133 yyless (0); | |
1134 curr_lexer->xunput ('@'); | |
1135 curr_lexer->push_start_state (KLUGE); | |
1136 } | |
1137 else | |
1138 return tok; | |
1139 } | |
1140 | |
1141 "./=" { | |
1142 int tok = curr_lexer->handle_incompatible_assign_op ("./=", EDIV_EQ); | |
1143 if (tok < 0) | |
1144 { | |
1145 yyless (0); | |
1146 curr_lexer->xunput ('@'); | |
1147 curr_lexer->push_start_state (KLUGE); | |
1148 } | |
1149 else | |
1150 return tok; | |
1151 } | |
1152 | |
1153 ".\\=" { | |
1154 int tok = curr_lexer->handle_incompatible_assign_op (".\\=", ELEFTDIV_EQ); | |
1155 if (tok < 0) | |
1156 { | |
1157 yyless (0); | |
1158 curr_lexer->xunput ('@'); | |
1159 curr_lexer->push_start_state (KLUGE); | |
1160 } | |
1161 else | |
1162 return tok; | |
1163 } | |
1164 | |
1165 "^=" { | |
1166 int tok = curr_lexer->handle_incompatible_assign_op ("^=", POW_EQ); | |
1167 if (tok < 0) | |
1168 { | |
1169 yyless (0); | |
1170 curr_lexer->xunput ('@'); | |
1171 curr_lexer->push_start_state (KLUGE); | |
1172 } | |
1173 else | |
1174 return tok; | |
1175 } | |
1176 | |
1177 "**=" { | |
1178 int tok = curr_lexer->handle_incompatible_assign_op ("^=", POW_EQ); | |
1179 if (tok < 0) | |
1180 { | |
1181 yyless (0); | |
1182 curr_lexer->xunput ('@'); | |
1183 curr_lexer->push_start_state (KLUGE); | |
1184 } | |
1185 else | |
1186 return tok; | |
1187 } | |
1188 | |
1189 ".^=" { | |
1190 int tok = curr_lexer->handle_incompatible_assign_op (".^=", EPOW_EQ); | |
1191 if (tok < 0) | |
1192 { | |
1193 yyless (0); | |
1194 curr_lexer->xunput ('@'); | |
1195 curr_lexer->push_start_state (KLUGE); | |
1196 } | |
1197 else | |
1198 return tok; | |
1199 } | |
1200 | |
1201 ".**=" { | |
1202 int tok = curr_lexer->handle_incompatible_assign_op (".^=", EPOW_EQ); | |
1203 if (tok < 0) | |
1204 { | |
1205 yyless (0); | |
1206 curr_lexer->xunput ('@'); | |
1207 curr_lexer->push_start_state (KLUGE); | |
1208 } | |
1209 else | |
1210 return tok; | |
1211 } | |
1212 | |
1213 "&=" { | |
1214 int tok = curr_lexer->handle_incompatible_assign_op ("&=", AND_EQ); | |
1215 if (tok < 0) | |
1216 { | |
1217 yyless (0); | |
1218 curr_lexer->xunput ('@'); | |
1219 curr_lexer->push_start_state (KLUGE); | |
1220 } | |
1221 else | |
1222 return tok; | |
1223 } | |
1224 | |
1225 "|=" { | |
1226 int tok = curr_lexer->handle_incompatible_assign_op ("|=", OR_EQ); | |
1227 if (tok < 0) | |
1228 { | |
1229 yyless (0); | |
1230 curr_lexer->xunput ('@'); | |
1231 curr_lexer->push_start_state (KLUGE); | |
1232 } | |
1233 else | |
1234 return tok; | |
1235 } | |
1236 | |
1237 "<<=" { | |
1238 int tok = curr_lexer->handle_incompatible_assign_op ("<<=", LSHIFT_EQ); | |
1239 if (tok < 0) | |
1240 { | |
1241 yyless (0); | |
1242 curr_lexer->xunput ('@'); | |
1243 curr_lexer->push_start_state (KLUGE); | |
1244 } | |
1245 else | |
1246 return tok; | |
1247 } | |
1248 | |
1249 ">>=" { | |
1250 int tok = curr_lexer->handle_incompatible_assign_op (">>=", RSHIFT_EQ); | |
1251 if (tok < 0) | |
1252 { | |
1253 yyless (0); | |
1254 curr_lexer->xunput ('@'); | |
1255 curr_lexer->push_start_state (KLUGE); | |
1256 } | |
1257 else | |
1258 return tok; | |
1259 } | |
1260 | 1044 |
1261 "{" { | 1045 "{" { |
1262 curr_lexer->lexer_debug ("{"); | 1046 curr_lexer->lexer_debug ("{"); |
1263 | 1047 |
1264 bool unput_comma = false; | 1048 bool unput_comma = false; |
3027 current_input_column += flex_yyleng (); | 2811 current_input_column += flex_yyleng (); |
3028 looking_for_object_index = false; | 2812 looking_for_object_index = false; |
3029 } | 2813 } |
3030 | 2814 |
3031 return kw_token; | 2815 return kw_token; |
3032 } | |
3033 | |
3034 // See if we have a plot keyword (title, using, with, or clear). | |
3035 | |
3036 int c1 = text_yyinput (); | |
3037 | |
3038 bool next_tok_is_eq = false; | |
3039 if (c1 == '=') | |
3040 { | |
3041 int c2 = text_yyinput (); | |
3042 xunput (c2); | |
3043 | |
3044 if (c2 != '=') | |
3045 next_tok_is_eq = true; | |
3046 } | |
3047 | |
3048 xunput (c1); | |
3049 | |
3050 // Kluge alert. | |
3051 // | |
3052 // If we are looking at a text style function, set up to gobble its | |
3053 // arguments. | |
3054 // | |
3055 // If the following token is '=', or if we are parsing a function | |
3056 // return list or function parameter list, or if we are looking at | |
3057 // something like [ab,cd] = foo (), force the symbol to be inserted | |
3058 // as a variable in the current symbol table. | |
3059 | |
3060 if (! is_variable (tok)) | |
3061 { | |
3062 if (next_tok_is_eq | |
3063 || looking_at_decl_list | |
3064 || looking_at_return_list | |
3065 || (looking_at_parameter_list | |
3066 && ! looking_at_initializer_expression)) | |
3067 { | |
3068 symbol_table::force_variable (tok); | |
3069 } | |
3070 else if (looking_at_matrix_or_assign_lhs) | |
3071 { | |
3072 pending_local_variables.insert (tok); | |
3073 } | |
3074 } | 2816 } |
3075 | 2817 |
3076 // Find the token in the symbol table. Beware the magic | 2818 // Find the token in the symbol table. Beware the magic |
3077 // transformation of the end keyword... | 2819 // transformation of the end keyword... |
3078 | 2820 |
3296 case PERSISTENT: std::cerr << "PERSISTENT\n"; break; | 3038 case PERSISTENT: std::cerr << "PERSISTENT\n"; break; |
3297 case FCN_HANDLE: std::cerr << "FCN_HANDLE\n"; break; | 3039 case FCN_HANDLE: std::cerr << "FCN_HANDLE\n"; break; |
3298 case END_OF_INPUT: std::cerr << "END_OF_INPUT\n\n"; break; | 3040 case END_OF_INPUT: std::cerr << "END_OF_INPUT\n\n"; break; |
3299 case LEXICAL_ERROR: std::cerr << "LEXICAL_ERROR\n\n"; break; | 3041 case LEXICAL_ERROR: std::cerr << "LEXICAL_ERROR\n\n"; break; |
3300 case FCN: std::cerr << "FCN\n"; break; | 3042 case FCN: std::cerr << "FCN\n"; break; |
3301 case CHOOSE_ASSIGNMENT: std::cerr << "CHOOSE_ASSIGNMENT\n"; break; | |
3302 case INPUT_FILE: std::cerr << "INPUT_FILE\n"; break; | 3043 case INPUT_FILE: std::cerr << "INPUT_FILE\n"; break; |
3303 case SUPERCLASSREF: std::cerr << "SUPERCLASSREF\n"; break; | 3044 case SUPERCLASSREF: std::cerr << "SUPERCLASSREF\n"; break; |
3304 case METAQUERY: std::cerr << "METAQUERY\n"; break; | 3045 case METAQUERY: std::cerr << "METAQUERY\n"; break; |
3305 case GET: std::cerr << "GET\n"; break; | 3046 case GET: std::cerr << "GET\n"; break; |
3306 case SET: std::cerr << "SET\n"; break; | 3047 case SET: std::cerr << "SET\n"; break; |
3404 | 3145 |
3405 case LINE_COMMENT_START: | 3146 case LINE_COMMENT_START: |
3406 std::cerr << "LINE_COMMENT_START" << std::endl; | 3147 std::cerr << "LINE_COMMENT_START" << std::endl; |
3407 break; | 3148 break; |
3408 | 3149 |
3409 case KLUGE: | |
3410 std::cerr << "KLUGE" << std::endl; | |
3411 break; | |
3412 | |
3413 default: | 3150 default: |
3414 std::cerr << "UNKNOWN START STATE!" << std::endl; | 3151 std::cerr << "UNKNOWN START STATE!" << std::endl; |
3415 break; | 3152 break; |
3416 } | 3153 } |
3417 } | 3154 } |
3476 return maybe_unput_comma_before_unary_op (tok) | 3213 return maybe_unput_comma_before_unary_op (tok) |
3477 ? -1 : handle_op_internal (pattern, tok, convert, bos, qit, false); | 3214 ? -1 : handle_op_internal (pattern, tok, convert, bos, qit, false); |
3478 } | 3215 } |
3479 | 3216 |
3480 int | 3217 int |
3481 octave_lexer::handle_assign_op (const char *pattern, int tok) | |
3482 { | |
3483 lexer_debug (pattern); | |
3484 | |
3485 return (previous_token_value_is (']') && looking_at_matrix_or_assign_lhs) | |
3486 ? -1 : handle_op_internal (pattern, tok, false, false, false, true); | |
3487 } | |
3488 | |
3489 int | |
3490 octave_lexer::handle_incompatible_assign_op (const char *pattern, int tok) | |
3491 { | |
3492 lexer_debug (pattern); | |
3493 | |
3494 return (previous_token_value_is (']') && looking_at_matrix_or_assign_lhs) | |
3495 ? -1 : handle_op_internal (pattern, tok, false, false, false, false); | |
3496 } | |
3497 | |
3498 int | |
3499 octave_lexer::handle_op_internal (const char *pattern, int tok, bool convert, | 3218 octave_lexer::handle_op_internal (const char *pattern, int tok, bool convert, |
3500 bool bos, bool qit, bool compat) | 3219 bool bos, bool qit, bool compat) |
3501 { | 3220 { |
3502 if (! compat) | 3221 if (! compat) |
3503 gripe_matlab_incompatible_operator (flex_yytext ()); | 3222 gripe_matlab_incompatible_operator (flex_yytext ()); |