comparison libinterp/parse-tree/lex.ll @ 16273:c5e5f6ccac5d

9/10 commits reworking the lexer
author John W. Eaton <jwe@octave.org>
date Mon, 11 Mar 2013 14:32:53 -0400
parents 87069bd38107
children 3c265e4dba6d dd7de0345124
comparison
equal deleted inserted replaced
16271:87069bd38107 16273:c5e5f6ccac5d
48 48
49 %x INPUT_FILE_START 49 %x INPUT_FILE_START
50 50
51 %x BLOCK_COMMENT_START 51 %x BLOCK_COMMENT_START
52 %x LINE_COMMENT_START 52 %x LINE_COMMENT_START
53
54 %x KLUGE
55 53
56 %{ 54 %{
57 55
58 #include <cctype> 56 #include <cctype>
59 #include <cstring> 57 #include <cstring>
249 int tok = curr_lexer->previous_token_value (); 247 int tok = curr_lexer->previous_token_value ();
250 248
251 if (! (tok == ';' || tok == '[' || tok == '{')) 249 if (! (tok == ';' || tok == '[' || tok == '{'))
252 curr_lexer->xunput (';'); 250 curr_lexer->xunput (';');
253 } 251 }
254 }
255
256 <KLUGE>@ {
257 curr_lexer->lexer_debug ("<KLUGE>@");
258 curr_lexer->pop_start_state ();
259 return curr_lexer->count_token (CHOOSE_ASSIGNMENT);
260 } 252 }
261 253
262 %{ 254 %{
263 // For this and the next two rules, we're looking at ']', and we 255 // For this and the next two rules, we're looking at ']', and we
264 // need to know if the next token is '=' or '=='. 256 // need to know if the next token is '=' or '=='.
800 792
801 %{ 793 %{
802 // Other operators. 794 // Other operators.
803 %} 795 %}
804 796
805 ":" { return curr_lexer->handle_op (":", ':'); } 797 ":" { return curr_lexer->handle_op (":", ':'); }
806 ".+" { return curr_lexer->handle_incompatible_op (".+", EPLUS); } 798 ".+" { return curr_lexer->handle_incompatible_op (".+", EPLUS); }
807 ".-" { return curr_lexer->handle_incompatible_op (".-", EMINUS); } 799 ".-" { return curr_lexer->handle_incompatible_op (".-", EMINUS); }
808 ".*" { return curr_lexer->handle_op (".*", EMUL); } 800 ".*" { return curr_lexer->handle_op (".*", EMUL); }
809 "./" { return curr_lexer->handle_op ("./", EDIV); } 801 "./" { return curr_lexer->handle_op ("./", EDIV); }
810 ".\\" { return curr_lexer->handle_op (".\\", ELEFTDIV); } 802 ".\\" { return curr_lexer->handle_op (".\\", ELEFTDIV); }
811 ".^" { return curr_lexer->handle_op (".^", EPOW); } 803 ".^" { return curr_lexer->handle_op (".^", EPOW); }
812 ".**" { return curr_lexer->handle_incompatible_op (".**", EPOW); } 804 ".**" { return curr_lexer->handle_incompatible_op (".**", EPOW); }
813 "<=" { return curr_lexer->handle_op ("<=", EXPR_LE); } 805 "<=" { return curr_lexer->handle_op ("<=", EXPR_LE); }
814 "==" { return curr_lexer->handle_op ("==", EXPR_EQ); } 806 "==" { return curr_lexer->handle_op ("==", EXPR_EQ); }
815 "~=" { return curr_lexer->handle_op ("~=", EXPR_NE); } 807 "~=" { return curr_lexer->handle_op ("~=", EXPR_NE); }
816 "!=" { return curr_lexer->handle_incompatible_op ("!=", EXPR_NE); } 808 "!=" { return curr_lexer->handle_incompatible_op ("!=", EXPR_NE); }
817 ">=" { return curr_lexer->handle_op (">=", EXPR_GE); } 809 ">=" { return curr_lexer->handle_op (">=", EXPR_GE); }
818 "&" { return curr_lexer->handle_op ("&", EXPR_AND); } 810 "&" { return curr_lexer->handle_op ("&", EXPR_AND); }
819 "|" { return curr_lexer->handle_op ("|", EXPR_OR); } 811 "|" { return curr_lexer->handle_op ("|", EXPR_OR); }
820 "<" { return curr_lexer->handle_op ("<", EXPR_LT); } 812 "<" { return curr_lexer->handle_op ("<", EXPR_LT); }
821 ">" { return curr_lexer->handle_op (">", EXPR_GT); } 813 ">" { return curr_lexer->handle_op (">", EXPR_GT); }
822 "*" { return curr_lexer->handle_op ("*", '*'); } 814 "*" { return curr_lexer->handle_op ("*", '*'); }
823 815
824 "/" { 816 "/" {
825 int prev_tok = curr_lexer->previous_token_value (); 817 int prev_tok = curr_lexer->previous_token_value ();
826 bool space_before = curr_lexer->space_follows_previous_token (); 818 bool space_before = curr_lexer->space_follows_previous_token ();
827 int c = curr_lexer->text_yyinput (); 819 int c = curr_lexer->text_yyinput ();
836 } 828 }
837 else 829 else
838 return curr_lexer->handle_op ("/", '/'); 830 return curr_lexer->handle_op ("/", '/');
839 } 831 }
840 832
841 "\\" { return curr_lexer->handle_op ("\\", LEFTDIV); } 833 "\\" { return curr_lexer->handle_op ("\\", LEFTDIV); }
842 "^" { return curr_lexer->handle_op ("^", POW); } 834 "^" { return curr_lexer->handle_op ("^", POW); }
843 "**" { return curr_lexer->handle_incompatible_op ("**", POW); } 835 "**" { return curr_lexer->handle_incompatible_op ("**", POW); }
844 "&&" { return curr_lexer->handle_op ("&&", EXPR_AND_AND); } 836 "&&" { return curr_lexer->handle_op ("&&", EXPR_AND_AND); }
845 "||" { return curr_lexer->handle_op ("||", EXPR_OR_OR); } 837 "||" { return curr_lexer->handle_op ("||", EXPR_OR_OR); }
846 "<<" { return curr_lexer->handle_incompatible_op ("<<", LSHIFT); } 838 "<<" { return curr_lexer->handle_incompatible_op ("<<", LSHIFT); }
847 ">>" { return curr_lexer->handle_incompatible_op (">>", RSHIFT); } 839 ">>" { return curr_lexer->handle_incompatible_op (">>", RSHIFT); }
848 840
849 ";" { 841 ";" {
850 bool at_beginning_of_statement 842 bool at_beginning_of_statement
851 = (! (curr_lexer->whitespace_is_significant () 843 = (! (curr_lexer->whitespace_is_significant ()
852 || curr_lexer->looking_at_object_index.front ())); 844 || curr_lexer->looking_at_object_index.front ()));
1028 1020
1029 %{ 1021 %{
1030 // = and op= operators. 1022 // = and op= operators.
1031 %} 1023 %}
1032 1024
1033 "=" { 1025 "=" { return curr_lexer->handle_op ("=", '='); }
1034 int tok = curr_lexer->handle_assign_op ("=", '='); 1026 "+=" { return curr_lexer->handle_incompatible_op ("+=", ADD_EQ); }
1035 if (tok < 0) 1027 "-=" { return curr_lexer->handle_incompatible_op ("-=", SUB_EQ); }
1036 { 1028 "*=" { return curr_lexer->handle_incompatible_op ("*=", MUL_EQ); }
1037 yyless (0); 1029 "/=" { return curr_lexer->handle_incompatible_op ("/=", DIV_EQ); }
1038 curr_lexer->xunput ('@'); 1030 "\\=" { return curr_lexer->handle_incompatible_op ("\\=", LEFTDIV_EQ); }
1039 curr_lexer->push_start_state (KLUGE); 1031 ".+=" { return curr_lexer->handle_incompatible_op (".+=", ADD_EQ); }
1040 } 1032 ".-=" { return curr_lexer->handle_incompatible_op (".-=", SUB_EQ); }
1041 else 1033 ".*=" { return curr_lexer->handle_incompatible_op (".*=", EMUL_EQ); }
1042 return tok; 1034 "./=" { return curr_lexer->handle_incompatible_op ("./=", EDIV_EQ); }
1043 } 1035 ".\\=" { return curr_lexer->handle_incompatible_op (".\\=", ELEFTDIV_EQ); }
1044 1036 "^=" { return curr_lexer->handle_incompatible_op ("^=", POW_EQ); }
1045 "+=" { 1037 "**=" { return curr_lexer->handle_incompatible_op ("^=", POW_EQ); }
1046 int tok = curr_lexer->handle_incompatible_assign_op ("+=", ADD_EQ); 1038 ".^=" { return curr_lexer->handle_incompatible_op (".^=", EPOW_EQ); }
1047 if (tok < 0) 1039 ".**=" { return curr_lexer->handle_incompatible_op (".^=", EPOW_EQ); }
1048 { 1040 "&=" { return curr_lexer->handle_incompatible_op ("&=", AND_EQ); }
1049 yyless (0); 1041 "|=" { return curr_lexer->handle_incompatible_op ("|=", OR_EQ); }
1050 curr_lexer->xunput ('@'); 1042 "<<=" { return curr_lexer->handle_incompatible_op ("<<=", LSHIFT_EQ); }
1051 curr_lexer->push_start_state (KLUGE); 1043 ">>=" { return curr_lexer->handle_incompatible_op (">>=", RSHIFT_EQ); }
1052 }
1053 else
1054 return tok;
1055 }
1056
1057 "-=" {
1058 int tok = curr_lexer->handle_incompatible_assign_op ("-=", SUB_EQ);
1059 if (tok < 0)
1060 {
1061 yyless (0);
1062 curr_lexer->xunput ('@');
1063 curr_lexer->push_start_state (KLUGE);
1064 }
1065 else
1066 return tok;
1067 }
1068
1069 "*=" {
1070 int tok = curr_lexer->handle_incompatible_assign_op ("*=", MUL_EQ);
1071 if (tok < 0)
1072 {
1073 yyless (0);
1074 curr_lexer->xunput ('@');
1075 curr_lexer->push_start_state (KLUGE);
1076 }
1077 else
1078 return tok;
1079 }
1080
1081 "/=" {
1082 int tok = curr_lexer->handle_incompatible_assign_op ("/=", DIV_EQ);
1083 if (tok < 0)
1084 {
1085 yyless (0);
1086 curr_lexer->xunput ('@');
1087 curr_lexer->push_start_state (KLUGE);
1088 }
1089 else
1090 return tok;
1091 }
1092
1093 "\\=" {
1094 int tok = curr_lexer->handle_incompatible_assign_op ("\\=", LEFTDIV_EQ);
1095 if (tok < 0)
1096 {
1097 yyless (0);
1098 curr_lexer->xunput ('@');
1099 curr_lexer->push_start_state (KLUGE);
1100 }
1101 else
1102 return tok;
1103 }
1104
1105 ".+=" {
1106 int tok = curr_lexer->handle_incompatible_assign_op (".+=", ADD_EQ);
1107 if (tok < 0)
1108 {
1109 yyless (0);
1110 curr_lexer->xunput ('@');
1111 curr_lexer->push_start_state (KLUGE);
1112 }
1113 else
1114 return tok;
1115 }
1116
1117 ".-=" {
1118 int tok = curr_lexer->handle_incompatible_assign_op (".-=", SUB_EQ);
1119 if (tok < 0)
1120 {
1121 yyless (0);
1122 curr_lexer->xunput ('@');
1123 curr_lexer->push_start_state (KLUGE);
1124 }
1125 else
1126 return tok;
1127 }
1128
1129 ".*=" {
1130 int tok = curr_lexer->handle_incompatible_assign_op (".*=", EMUL_EQ);
1131 if (tok < 0)
1132 {
1133 yyless (0);
1134 curr_lexer->xunput ('@');
1135 curr_lexer->push_start_state (KLUGE);
1136 }
1137 else
1138 return tok;
1139 }
1140
1141 "./=" {
1142 int tok = curr_lexer->handle_incompatible_assign_op ("./=", EDIV_EQ);
1143 if (tok < 0)
1144 {
1145 yyless (0);
1146 curr_lexer->xunput ('@');
1147 curr_lexer->push_start_state (KLUGE);
1148 }
1149 else
1150 return tok;
1151 }
1152
1153 ".\\=" {
1154 int tok = curr_lexer->handle_incompatible_assign_op (".\\=", ELEFTDIV_EQ);
1155 if (tok < 0)
1156 {
1157 yyless (0);
1158 curr_lexer->xunput ('@');
1159 curr_lexer->push_start_state (KLUGE);
1160 }
1161 else
1162 return tok;
1163 }
1164
1165 "^=" {
1166 int tok = curr_lexer->handle_incompatible_assign_op ("^=", POW_EQ);
1167 if (tok < 0)
1168 {
1169 yyless (0);
1170 curr_lexer->xunput ('@');
1171 curr_lexer->push_start_state (KLUGE);
1172 }
1173 else
1174 return tok;
1175 }
1176
1177 "**=" {
1178 int tok = curr_lexer->handle_incompatible_assign_op ("^=", POW_EQ);
1179 if (tok < 0)
1180 {
1181 yyless (0);
1182 curr_lexer->xunput ('@');
1183 curr_lexer->push_start_state (KLUGE);
1184 }
1185 else
1186 return tok;
1187 }
1188
1189 ".^=" {
1190 int tok = curr_lexer->handle_incompatible_assign_op (".^=", EPOW_EQ);
1191 if (tok < 0)
1192 {
1193 yyless (0);
1194 curr_lexer->xunput ('@');
1195 curr_lexer->push_start_state (KLUGE);
1196 }
1197 else
1198 return tok;
1199 }
1200
1201 ".**=" {
1202 int tok = curr_lexer->handle_incompatible_assign_op (".^=", EPOW_EQ);
1203 if (tok < 0)
1204 {
1205 yyless (0);
1206 curr_lexer->xunput ('@');
1207 curr_lexer->push_start_state (KLUGE);
1208 }
1209 else
1210 return tok;
1211 }
1212
1213 "&=" {
1214 int tok = curr_lexer->handle_incompatible_assign_op ("&=", AND_EQ);
1215 if (tok < 0)
1216 {
1217 yyless (0);
1218 curr_lexer->xunput ('@');
1219 curr_lexer->push_start_state (KLUGE);
1220 }
1221 else
1222 return tok;
1223 }
1224
1225 "|=" {
1226 int tok = curr_lexer->handle_incompatible_assign_op ("|=", OR_EQ);
1227 if (tok < 0)
1228 {
1229 yyless (0);
1230 curr_lexer->xunput ('@');
1231 curr_lexer->push_start_state (KLUGE);
1232 }
1233 else
1234 return tok;
1235 }
1236
1237 "<<=" {
1238 int tok = curr_lexer->handle_incompatible_assign_op ("<<=", LSHIFT_EQ);
1239 if (tok < 0)
1240 {
1241 yyless (0);
1242 curr_lexer->xunput ('@');
1243 curr_lexer->push_start_state (KLUGE);
1244 }
1245 else
1246 return tok;
1247 }
1248
1249 ">>=" {
1250 int tok = curr_lexer->handle_incompatible_assign_op (">>=", RSHIFT_EQ);
1251 if (tok < 0)
1252 {
1253 yyless (0);
1254 curr_lexer->xunput ('@');
1255 curr_lexer->push_start_state (KLUGE);
1256 }
1257 else
1258 return tok;
1259 }
1260 1044
1261 "{" { 1045 "{" {
1262 curr_lexer->lexer_debug ("{"); 1046 curr_lexer->lexer_debug ("{");
1263 1047
1264 bool unput_comma = false; 1048 bool unput_comma = false;
3027 current_input_column += flex_yyleng (); 2811 current_input_column += flex_yyleng ();
3028 looking_for_object_index = false; 2812 looking_for_object_index = false;
3029 } 2813 }
3030 2814
3031 return kw_token; 2815 return kw_token;
3032 }
3033
3034 // See if we have a plot keyword (title, using, with, or clear).
3035
3036 int c1 = text_yyinput ();
3037
3038 bool next_tok_is_eq = false;
3039 if (c1 == '=')
3040 {
3041 int c2 = text_yyinput ();
3042 xunput (c2);
3043
3044 if (c2 != '=')
3045 next_tok_is_eq = true;
3046 }
3047
3048 xunput (c1);
3049
3050 // Kluge alert.
3051 //
3052 // If we are looking at a text style function, set up to gobble its
3053 // arguments.
3054 //
3055 // If the following token is '=', or if we are parsing a function
3056 // return list or function parameter list, or if we are looking at
3057 // something like [ab,cd] = foo (), force the symbol to be inserted
3058 // as a variable in the current symbol table.
3059
3060 if (! is_variable (tok))
3061 {
3062 if (next_tok_is_eq
3063 || looking_at_decl_list
3064 || looking_at_return_list
3065 || (looking_at_parameter_list
3066 && ! looking_at_initializer_expression))
3067 {
3068 symbol_table::force_variable (tok);
3069 }
3070 else if (looking_at_matrix_or_assign_lhs)
3071 {
3072 pending_local_variables.insert (tok);
3073 }
3074 } 2816 }
3075 2817
3076 // Find the token in the symbol table. Beware the magic 2818 // Find the token in the symbol table. Beware the magic
3077 // transformation of the end keyword... 2819 // transformation of the end keyword...
3078 2820
3296 case PERSISTENT: std::cerr << "PERSISTENT\n"; break; 3038 case PERSISTENT: std::cerr << "PERSISTENT\n"; break;
3297 case FCN_HANDLE: std::cerr << "FCN_HANDLE\n"; break; 3039 case FCN_HANDLE: std::cerr << "FCN_HANDLE\n"; break;
3298 case END_OF_INPUT: std::cerr << "END_OF_INPUT\n\n"; break; 3040 case END_OF_INPUT: std::cerr << "END_OF_INPUT\n\n"; break;
3299 case LEXICAL_ERROR: std::cerr << "LEXICAL_ERROR\n\n"; break; 3041 case LEXICAL_ERROR: std::cerr << "LEXICAL_ERROR\n\n"; break;
3300 case FCN: std::cerr << "FCN\n"; break; 3042 case FCN: std::cerr << "FCN\n"; break;
3301 case CHOOSE_ASSIGNMENT: std::cerr << "CHOOSE_ASSIGNMENT\n"; break;
3302 case INPUT_FILE: std::cerr << "INPUT_FILE\n"; break; 3043 case INPUT_FILE: std::cerr << "INPUT_FILE\n"; break;
3303 case SUPERCLASSREF: std::cerr << "SUPERCLASSREF\n"; break; 3044 case SUPERCLASSREF: std::cerr << "SUPERCLASSREF\n"; break;
3304 case METAQUERY: std::cerr << "METAQUERY\n"; break; 3045 case METAQUERY: std::cerr << "METAQUERY\n"; break;
3305 case GET: std::cerr << "GET\n"; break; 3046 case GET: std::cerr << "GET\n"; break;
3306 case SET: std::cerr << "SET\n"; break; 3047 case SET: std::cerr << "SET\n"; break;
3404 3145
3405 case LINE_COMMENT_START: 3146 case LINE_COMMENT_START:
3406 std::cerr << "LINE_COMMENT_START" << std::endl; 3147 std::cerr << "LINE_COMMENT_START" << std::endl;
3407 break; 3148 break;
3408 3149
3409 case KLUGE:
3410 std::cerr << "KLUGE" << std::endl;
3411 break;
3412
3413 default: 3150 default:
3414 std::cerr << "UNKNOWN START STATE!" << std::endl; 3151 std::cerr << "UNKNOWN START STATE!" << std::endl;
3415 break; 3152 break;
3416 } 3153 }
3417 } 3154 }
3476 return maybe_unput_comma_before_unary_op (tok) 3213 return maybe_unput_comma_before_unary_op (tok)
3477 ? -1 : handle_op_internal (pattern, tok, convert, bos, qit, false); 3214 ? -1 : handle_op_internal (pattern, tok, convert, bos, qit, false);
3478 } 3215 }
3479 3216
3480 int 3217 int
3481 octave_lexer::handle_assign_op (const char *pattern, int tok)
3482 {
3483 lexer_debug (pattern);
3484
3485 return (previous_token_value_is (']') && looking_at_matrix_or_assign_lhs)
3486 ? -1 : handle_op_internal (pattern, tok, false, false, false, true);
3487 }
3488
3489 int
3490 octave_lexer::handle_incompatible_assign_op (const char *pattern, int tok)
3491 {
3492 lexer_debug (pattern);
3493
3494 return (previous_token_value_is (']') && looking_at_matrix_or_assign_lhs)
3495 ? -1 : handle_op_internal (pattern, tok, false, false, false, false);
3496 }
3497
3498 int
3499 octave_lexer::handle_op_internal (const char *pattern, int tok, bool convert, 3218 octave_lexer::handle_op_internal (const char *pattern, int tok, bool convert,
3500 bool bos, bool qit, bool compat) 3219 bool bos, bool qit, bool compat)
3501 { 3220 {
3502 if (! compat) 3221 if (! compat)
3503 gripe_matlab_incompatible_operator (flex_yytext ()); 3222 gripe_matlab_incompatible_operator (flex_yytext ());