Mercurial > hg > octave-nkf
diff libinterp/parse-tree/lex.ll @ 16263:9acb86e6ac90
4/10 commits reworking the lexer
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Mon, 11 Mar 2013 14:28:11 -0400 |
parents | b45a90cdb0ae |
children | 6077d13ddb3b 71ee3afedb69 |
line wrap: on
line diff
--- a/libinterp/parse-tree/lex.ll +++ b/libinterp/parse-tree/lex.ll @@ -244,8 +244,8 @@ int tok = curr_lexer->previous_token_value (); - if (! (tok == ',' || tok == ';' || tok == '[' || tok == '{')) - curr_lexer->xunput (','); + if (! (tok == ';' || tok == '[' || tok == '{')) + curr_lexer->xunput (';'); } <KLUGE>@ { @@ -301,27 +301,47 @@ \[ { curr_lexer->lexer_debug ("\\["); - curr_lexer->nesting_level.bracket (); - - curr_lexer->looking_at_object_index.push_front (false); - - curr_lexer->current_input_column += yyleng; - curr_lexer->looking_for_object_index = false; - curr_lexer->at_beginning_of_statement = false; - - if (curr_lexer->defining_func - && ! curr_lexer->parsed_function_name.top ()) - curr_lexer->looking_at_return_list = true; + bool unput_comma = false; + + if (curr_lexer->whitespace_is_significant () + && curr_lexer->space_follows_previous_token ()) + { + int tok = curr_lexer->previous_token_value (); + + if (! (tok == ';' || tok == ',' || tok == '[' || tok == '{' + || curr_lexer->previous_token_is_binop ())) + unput_comma = true; + } + + if (unput_comma) + { + yyless (0); + curr_lexer->xunput (','); + } else - curr_lexer->looking_at_matrix_or_assign_lhs = true; - - curr_lexer->decrement_promptflag (); - - curr_lexer->bracketflag++; - - curr_lexer->push_start_state (MATRIX_START); - - return curr_lexer->count_token ('['); + { + curr_lexer->nesting_level.bracket (); + + curr_lexer->looking_at_object_index.push_front (false); + + curr_lexer->current_input_column += yyleng; + curr_lexer->looking_for_object_index = false; + curr_lexer->at_beginning_of_statement = false; + + if (curr_lexer->defining_func + && ! curr_lexer->parsed_function_name.top ()) + curr_lexer->looking_at_return_list = true; + else + curr_lexer->looking_at_matrix_or_assign_lhs = true; + + curr_lexer->decrement_promptflag (); + + curr_lexer->bracketflag++; + + curr_lexer->push_start_state (MATRIX_START); + + return curr_lexer->count_token ('['); + } } \] { @@ -485,9 +505,12 @@ {NUMBER}{Im} { curr_lexer->lexer_debug ("{NUMBER}{Im}"); + int tok = curr_lexer->previous_token_value (); + if (curr_lexer->whitespace_is_significant () && curr_lexer->space_follows_previous_token () - && ! curr_lexer->previous_token_is_binop ()) + && ! (tok == '[' || tok == '{' + || curr_lexer->previous_token_is_binop ())) { yyless (0); unput (','); @@ -508,9 +531,12 @@ {NUMBER} { curr_lexer->lexer_debug ("{D}+/\\.[\\*/\\^\\']|{NUMBER}"); + int tok = curr_lexer->previous_token_value (); + if (curr_lexer->whitespace_is_significant () && curr_lexer->space_follows_previous_token () - && ! curr_lexer->previous_token_is_binop ()) + && ! (tok == '[' || tok == '{' + || curr_lexer->previous_token_is_binop ())) { yyless (0); unput (','); @@ -571,7 +597,8 @@ } else { - if (curr_lexer->previous_token_may_be_command ()) + if (! curr_lexer->looking_at_decl_list + && curr_lexer->previous_token_may_be_command ()) { yyless (0); curr_lexer->push_start_state (COMMAND_START); @@ -696,7 +723,7 @@ } else { - if (tok == ',' || tok == ';' + if (tok == ',' || tok == ';' || tok == '[' || tok == '{' || curr_lexer->previous_token_is_binop ()) { curr_lexer->current_input_column++; @@ -736,7 +763,7 @@ { if (curr_lexer->space_follows_previous_token ()) { - if (tok == '[' || tok == '{' + if (tok == ',' || tok == ';' || tok == '[' || tok == '{' || curr_lexer->previous_token_is_binop ()) { curr_lexer->current_input_column++; @@ -785,8 +812,6 @@ "|" { return curr_lexer->handle_op ("|", EXPR_OR); } "<" { return curr_lexer->handle_op ("<", EXPR_LT); } ">" { return curr_lexer->handle_op (">", EXPR_GT); } -"+" { return curr_lexer->handle_op ("+", '+'); } -"-" { return curr_lexer->handle_op ("-", '-'); } "*" { return curr_lexer->handle_op ("*", '*'); } "/" { return curr_lexer->handle_op ("/", '/'); } "\\" { return curr_lexer->handle_op ("\\", LEFTDIV); } @@ -796,10 +821,71 @@ "||" { return curr_lexer->handle_op ("||", EXPR_OR_OR); } "<<" { return curr_lexer->handle_incompatible_op ("<<", LSHIFT); } ">>" { return curr_lexer->handle_incompatible_op (">>", RSHIFT); } -"~" { return curr_lexer->handle_op ("~", EXPR_NOT); } -"!" { return curr_lexer->handle_incompatible_op ("!", EXPR_NOT); } ";" { return curr_lexer->handle_op (";", ';', true, true); } +"+" { + int tok = curr_lexer->handle_unary_op ("+", '+'); + + if (tok < 0) + { + yyless (0); + curr_lexer->xunput (','); + } + else + return tok; + } + +"-" { + int prev_tok = curr_lexer->previous_token_value (); + bool space_before = curr_lexer->space_follows_previous_token (); + int c = curr_lexer->text_yyinput (); + curr_lexer->xunput (c); + bool space_after = (c == ' ' || c == '\t'); + + if (space_before && ! space_after + && curr_lexer->previous_token_may_be_command ()) + { + yyless (0); + curr_lexer->push_start_state (COMMAND_START); + } + else + { + int tok = curr_lexer->handle_unary_op ("-", '-'); + + if (tok < 0) + { + yyless (0); + curr_lexer->xunput (','); + } + else + return tok; + } + } + +"~" { + int tok = curr_lexer->handle_unary_op ("~", EXPR_NOT); + + if (tok < 0) + { + yyless (0); + curr_lexer->xunput (','); + } + else + return tok; + } + +"!" { + int tok = curr_lexer->handle_incompatible_unary_op ("!", EXPR_NOT); + + if (tok < 0) + { + yyless (0); + curr_lexer->xunput (','); + } + else + return tok; + } + "," { return curr_lexer->handle_op (",", ',', true, ! curr_lexer->looking_at_object_index.front ()); @@ -810,35 +896,70 @@ } "++" { - return curr_lexer->handle_incompatible_op - ("++", PLUS_PLUS, true, false, true); + int tok = curr_lexer->handle_incompatible_unary_op + ("++", PLUS_PLUS, true, false, true); + + if (tok < 0) + { + yyless (0); + curr_lexer->xunput (','); + } + else + return tok; } "--" { - ; - return curr_lexer->handle_incompatible_op - ("--", MINUS_MINUS, true, false, true); + int tok = curr_lexer->handle_incompatible_unary_op + ("--", MINUS_MINUS, true, false, true); + + if (tok < 0) + { + yyless (0); + curr_lexer->xunput (','); + } + else + return tok; } "(" { curr_lexer->lexer_debug ("("); - // If we are looking for an object index, then push TRUE for - // looking_at_object_index. Otherwise, just push whatever state - // is current (so that we can pop it off the stack when we find - // the matching close paren). - - curr_lexer->looking_at_object_index.push_front - (curr_lexer->looking_for_object_index); - - curr_lexer->looking_at_indirect_ref = false; - curr_lexer->looking_for_object_index = false; - curr_lexer->at_beginning_of_statement = false; - - curr_lexer->nesting_level.paren (); - curr_lexer->decrement_promptflag (); - - return curr_lexer->handle_token ('('); + bool unput_comma = false; + + if (curr_lexer->whitespace_is_significant () + && curr_lexer->space_follows_previous_token ()) + { + int tok = curr_lexer->previous_token_value (); + + if (! (tok == ';' || tok == ',' || tok == '[' || tok == '{' + || curr_lexer->previous_token_is_binop ())) + unput_comma = true; + } + + if (unput_comma) + { + yyless (0); + curr_lexer->xunput (','); + } + else + { + // If we are looking for an object index, then push TRUE for + // looking_at_object_index. Otherwise, just push whatever state + // is current (so that we can pop it off the stack when we find + // the matching close paren). + + curr_lexer->looking_at_object_index.push_front + (curr_lexer->looking_for_object_index); + + curr_lexer->looking_at_indirect_ref = false; + curr_lexer->looking_for_object_index = false; + curr_lexer->at_beginning_of_statement = false; + + curr_lexer->nesting_level.paren (); + curr_lexer->decrement_promptflag (); + + return curr_lexer->handle_token ('('); + } } ")" { @@ -1102,22 +1223,42 @@ "{" { curr_lexer->lexer_debug ("{"); - curr_lexer->nesting_level.brace (); - - curr_lexer->looking_at_object_index.push_front - (curr_lexer->looking_for_object_index); - - curr_lexer->current_input_column += yyleng; - curr_lexer->looking_for_object_index = false; - curr_lexer->at_beginning_of_statement = false; - - curr_lexer->decrement_promptflag (); - - curr_lexer->braceflag++; - - curr_lexer->push_start_state (MATRIX_START); - - return curr_lexer->count_token ('{'); + bool unput_comma = false; + + if (curr_lexer->whitespace_is_significant () + && curr_lexer->space_follows_previous_token ()) + { + int tok = curr_lexer->previous_token_value (); + + if (! (tok == ';' || tok == ',' || tok == '[' || tok == '{' + || curr_lexer->previous_token_is_binop ())) + unput_comma = true; + } + + if (unput_comma) + { + yyless (0); + curr_lexer->xunput (','); + } + else + { + curr_lexer->nesting_level.brace (); + + curr_lexer->looking_at_object_index.push_front + (curr_lexer->looking_for_object_index); + + curr_lexer->current_input_column += yyleng; + curr_lexer->looking_for_object_index = false; + curr_lexer->at_beginning_of_statement = false; + + curr_lexer->decrement_promptflag (); + + curr_lexer->braceflag++; + + curr_lexer->push_start_state (MATRIX_START); + + return curr_lexer->count_token ('{'); + } } "}" { @@ -1902,11 +2043,12 @@ // fall through ... case persistent_kw: + case global_kw: + looking_at_decl_list = true; break; case case_kw: case elseif_kw: - case global_kw: case until_kw: break; @@ -3259,6 +3401,8 @@ octave_lexer::handle_op (const char *pattern, int tok, bool convert, bool bos, bool qit) { + lexer_debug (pattern); + return handle_op_internal (pattern, tok, convert, bos, qit, true); } @@ -3266,9 +3410,55 @@ octave_lexer::handle_incompatible_op (const char *pattern, int tok, bool convert, bool bos, bool qit) { + lexer_debug (pattern); + return handle_op_internal (pattern, tok, convert, bos, qit, false); } +bool +octave_lexer::maybe_unput_comma_before_unary_op (int tok) +{ + int prev_tok = previous_token_value (); + + bool unput_comma = false; + + if (whitespace_is_significant () && space_follows_previous_token ()) + { + int c = text_yyinput (); + xunput (c); + + bool space_after = (c == ' ' || c == '\t'); + + if (! (prev_tok == ';' || prev_tok == ',' + || prev_tok == '[' || prev_tok == '{' + || previous_token_is_binop () + || ((tok == '+' || tok == '-') && space_after))) + unput_comma = true; + } + + return unput_comma; +} + +int +octave_lexer::handle_unary_op (const char *pattern, int tok, bool convert, + bool bos, bool qit) +{ + lexer_debug (pattern); + + return maybe_unput_comma_before_unary_op (tok) + ? -1 : handle_op_internal (pattern, tok, convert, bos, qit, true); +} + +int +octave_lexer::handle_incompatible_unary_op (const char *pattern, int tok, + bool convert, bool bos, bool qit) +{ + lexer_debug (pattern); + + return maybe_unput_comma_before_unary_op (tok) + ? -1 : handle_op_internal (pattern, tok, convert, bos, qit, false); +} + int octave_lexer::handle_assign_op (const char *pattern, int tok) { @@ -3291,8 +3481,6 @@ octave_lexer::handle_op_internal (const char *pattern, int tok, bool convert, bool bos, bool qit, bool compat) { - lexer_debug (pattern); - if (! compat) gripe_matlab_incompatible_operator (flex_yytext ());