octave-nkf: libinterp/parse-tree/lex.ll comparison

comparison libinterp/parse-tree/lex.ll @ 16263:9acb86e6ac90

4/10 commits reworking the lexer

author	John W. Eaton <jwe@octave.org>
date	Mon, 11 Mar 2013 14:28:11 -0400
parents	b45a90cdb0ae
children	6077d13ddb3b 71ee3afedb69

comparison

equal deleted inserted replaced

-:b45a90cdb0ae
+:9acb86e6ac90
 <MATRIX_START>{NL} {
 curr_lexer->lexer_debug ("<MATRIX_START>{NL}");
 int tok = curr_lexer->previous_token_value ();
-if (! (tok == ',' || tok == ';' || tok == '[' || tok == '{'))
+if (! (tok == ';' || tok == '[' || tok == '{'))
-curr_lexer->xunput (',');
+curr_lexer->xunput (';');
 }
 <KLUGE>@ {
 curr_lexer->lexer_debug ("<KLUGE>@");
 curr_lexer->pop_start_state ();
 }
 \[ {
 curr_lexer->lexer_debug ("\\[");
-curr_lexer->nesting_level.bracket ();
+bool unput_comma = false;
-curr_lexer->looking_at_object_index.push_front (false);
+if (curr_lexer->whitespace_is_significant ()
+&& curr_lexer->space_follows_previous_token ())
-curr_lexer->current_input_column += yyleng;
+{
-curr_lexer->looking_for_object_index = false;
+int tok = curr_lexer->previous_token_value ();
-curr_lexer->at_beginning_of_statement = false;
+if (! (tok == ';' || tok == ',' || tok == '[' || tok == '{'
-if (curr_lexer->defining_func
+|| curr_lexer->previous_token_is_binop ()))
-&& ! curr_lexer->parsed_function_name.top ())
+unput_comma = true;
-curr_lexer->looking_at_return_list = true;
+}
+if (unput_comma)
+{
+yyless (0);
+curr_lexer->xunput (',');
+}
 else
-curr_lexer->looking_at_matrix_or_assign_lhs = true;
+{
+curr_lexer->nesting_level.bracket ();
-curr_lexer->decrement_promptflag ();
+curr_lexer->looking_at_object_index.push_front (false);
-curr_lexer->bracketflag++;
+curr_lexer->current_input_column += yyleng;
-curr_lexer->push_start_state (MATRIX_START);
+curr_lexer->looking_for_object_index = false;
+curr_lexer->at_beginning_of_statement = false;
-return curr_lexer->count_token ('[');
+if (curr_lexer->defining_func
+&& ! curr_lexer->parsed_function_name.top ())
+curr_lexer->looking_at_return_list = true;
+else
+curr_lexer->looking_at_matrix_or_assign_lhs = true;
+curr_lexer->decrement_promptflag ();
+curr_lexer->bracketflag++;
+curr_lexer->push_start_state (MATRIX_START);
+return curr_lexer->count_token ('[');
+}
 }
 \] {
 curr_lexer->lexer_debug ("\\]");
 %}
 {NUMBER}{Im} {
 curr_lexer->lexer_debug ("{NUMBER}{Im}");
+int tok = curr_lexer->previous_token_value ();
 if (curr_lexer->whitespace_is_significant ()
 && curr_lexer->space_follows_previous_token ()
-&& ! curr_lexer->previous_token_is_binop ())
+&& ! (tok == '[' || tok == '{'
+|| curr_lexer->previous_token_is_binop ()))
 {
 yyless (0);
 unput (',');
 }
 else
 {D}+/\.[\*/\\^\'] |
 {NUMBER} {
 curr_lexer->lexer_debug ("{D}+/\\.[\\*/\\^\\']|{NUMBER}");
+int tok = curr_lexer->previous_token_value ();
 if (curr_lexer->whitespace_is_significant ()
 && curr_lexer->space_follows_previous_token ()
-&& ! curr_lexer->previous_token_is_binop ())
+&& ! (tok == '[' || tok == '{'
+|| curr_lexer->previous_token_is_binop ()))
 {
 yyless (0);
 unput (',');
 }
 else
 yyless (0);
 unput (',');
 }
 else
 {
-if (curr_lexer->previous_token_may_be_command ())
+if (! curr_lexer->looking_at_decl_list
+&& curr_lexer->previous_token_may_be_command ())
 {
 yyless (0);
 curr_lexer->push_start_state (COMMAND_START);
 }
 else
 curr_lexer->xunput (',');
 }
 }
 else
 {
-if (tok == ',' || tok == ';'
+if (tok == ',' || tok == ';' || tok == '[' || tok == '{'
 || curr_lexer->previous_token_is_binop ())
 {
 curr_lexer->current_input_column++;
 int retval = curr_lexer->handle_string ('\'');
 return curr_lexer->count_token_internal (retval);
 if (curr_lexer->whitespace_is_significant ())
 {
 if (curr_lexer->space_follows_previous_token ())
 {
-if (tok == '[' || tok == '{'
+if (tok == ',' || tok == ';' || tok == '[' || tok == '{'
 || curr_lexer->previous_token_is_binop ())
 {
 curr_lexer->current_input_column++;
 int retval = curr_lexer->handle_string ('"');
 return curr_lexer->count_token_internal (retval);
 ">="    { return curr_lexer->handle_op (">=", EXPR_GE); }
 "&"     { return curr_lexer->handle_op ("&", EXPR_AND); }
 "|"     { return curr_lexer->handle_op ("|", EXPR_OR); }
 "<"     { return curr_lexer->handle_op ("<", EXPR_LT); }
 ">"     { return curr_lexer->handle_op (">", EXPR_GT); }
-"+"     { return curr_lexer->handle_op ("+", '+'); }
-"-"     { return curr_lexer->handle_op ("-", '-'); }
 "*"     { return curr_lexer->handle_op ("*", '*'); }
 "/"     { return curr_lexer->handle_op ("/", '/'); }
 "\\"    { return curr_lexer->handle_op ("\\", LEFTDIV); }
 "^"     { return curr_lexer->handle_op ("^", POW); }
 "**"    { return curr_lexer->handle_incompatible_op ("**", POW); }
 "&&"    { return curr_lexer->handle_op ("&&", EXPR_AND_AND); }
 "||"    { return curr_lexer->handle_op ("||", EXPR_OR_OR); }
 "<<"    { return curr_lexer->handle_incompatible_op ("<<", LSHIFT); }
 ">>"    { return curr_lexer->handle_incompatible_op (">>", RSHIFT); }
-"~"     { return curr_lexer->handle_op ("~", EXPR_NOT); }
-"!"     { return curr_lexer->handle_incompatible_op ("!", EXPR_NOT); }
 ";"     { return curr_lexer->handle_op (";", ';', true, true); }
+"+" {
+int tok = curr_lexer->handle_unary_op ("+", '+');
+if (tok < 0)
+{
+yyless (0);
+curr_lexer->xunput (',');
+}
+else
+return tok;
+}
+"-" {
+int prev_tok = curr_lexer->previous_token_value ();
+bool space_before = curr_lexer->space_follows_previous_token ();
+int c = curr_lexer->text_yyinput ();
+curr_lexer->xunput (c);
+bool space_after = (c == ' ' || c == '\t');
+if (space_before && ! space_after
+&& curr_lexer->previous_token_may_be_command ())
+{
+yyless (0);
+curr_lexer->push_start_state (COMMAND_START);
+}
+else
+{
+int tok = curr_lexer->handle_unary_op ("-", '-');
+if (tok < 0)
+{
+yyless (0);
+curr_lexer->xunput (',');
+}
+else
+return tok;
+}
+}
+"~" {
+int tok = curr_lexer->handle_unary_op ("~", EXPR_NOT);
+if (tok < 0)
+{
+yyless (0);
+curr_lexer->xunput (',');
+}
+else
+return tok;
+}
+"!" {
+int tok = curr_lexer->handle_incompatible_unary_op ("!", EXPR_NOT);
+if (tok < 0)
+{
+yyless (0);
+curr_lexer->xunput (',');
+}
+else
+return tok;
+}
 "," {
 return curr_lexer->handle_op
 (",", ',', true, ! curr_lexer->looking_at_object_index.front ());
 }
 ".'" {
 return curr_lexer->handle_op (".'", TRANSPOSE, true, false);
 }
 "++" {
-return curr_lexer->handle_incompatible_op
+int tok = curr_lexer->handle_incompatible_unary_op
 ("++", PLUS_PLUS, true, false, true);
+if (tok < 0)
+{
+yyless (0);
+curr_lexer->xunput (',');
+}
+else
+return tok;
 }
 "--" {
-;
+int tok = curr_lexer->handle_incompatible_unary_op
-return curr_lexer->handle_incompatible_op
+("--", MINUS_MINUS, true, false, true);
-("--", MINUS_MINUS, true, false, true);
+if (tok < 0)
+{
+yyless (0);
+curr_lexer->xunput (',');
+}
+else
+return tok;
 }
 "(" {
 curr_lexer->lexer_debug ("(");
-// If we are looking for an object index, then push TRUE for
+bool unput_comma = false;
-// looking_at_object_index.  Otherwise, just push whatever state
-// is current (so that we can pop it off the stack when we find
+if (curr_lexer->whitespace_is_significant ()
-// the matching close paren).
+&& curr_lexer->space_follows_previous_token ())
+{
-curr_lexer->looking_at_object_index.push_front
+int tok = curr_lexer->previous_token_value ();
-(curr_lexer->looking_for_object_index);
+if (! (tok == ';' || tok == ',' || tok == '[' || tok == '{'
-curr_lexer->looking_at_indirect_ref = false;
+|| curr_lexer->previous_token_is_binop ()))
-curr_lexer->looking_for_object_index = false;
+unput_comma = true;
-curr_lexer->at_beginning_of_statement = false;
+}
-curr_lexer->nesting_level.paren ();
+if (unput_comma)
-curr_lexer->decrement_promptflag ();
+{
+yyless (0);
-return curr_lexer->handle_token ('(');
+curr_lexer->xunput (',');
+}
+else
+{
+// If we are looking for an object index, then push TRUE for
+// looking_at_object_index.  Otherwise, just push whatever state
+// is current (so that we can pop it off the stack when we find
+// the matching close paren).
+curr_lexer->looking_at_object_index.push_front
+(curr_lexer->looking_for_object_index);
+curr_lexer->looking_at_indirect_ref = false;
+curr_lexer->looking_for_object_index = false;
+curr_lexer->at_beginning_of_statement = false;
+curr_lexer->nesting_level.paren ();
+curr_lexer->decrement_promptflag ();
+return curr_lexer->handle_token ('(');
+}
 }
 ")" {
 curr_lexer->lexer_debug (")");
 }
 "{" {
 curr_lexer->lexer_debug ("{");
-curr_lexer->nesting_level.brace ();
+bool unput_comma = false;
-curr_lexer->looking_at_object_index.push_front
+if (curr_lexer->whitespace_is_significant ()
-(curr_lexer->looking_for_object_index);
+&& curr_lexer->space_follows_previous_token ())
+{
-curr_lexer->current_input_column += yyleng;
+int tok = curr_lexer->previous_token_value ();
-curr_lexer->looking_for_object_index = false;
-curr_lexer->at_beginning_of_statement = false;
+if (! (tok == ';' || tok == ',' || tok == '[' || tok == '{'
+|| curr_lexer->previous_token_is_binop ()))
-curr_lexer->decrement_promptflag ();
+unput_comma = true;
+}
-curr_lexer->braceflag++;
+if (unput_comma)
-curr_lexer->push_start_state (MATRIX_START);
+{
+yyless (0);
-return curr_lexer->count_token ('{');
+curr_lexer->xunput (',');
+}
+else
+{
+curr_lexer->nesting_level.brace ();
+curr_lexer->looking_at_object_index.push_front
+(curr_lexer->looking_for_object_index);
+curr_lexer->current_input_column += yyleng;
+curr_lexer->looking_for_object_index = false;
+curr_lexer->at_beginning_of_statement = false;
+curr_lexer->decrement_promptflag ();
+curr_lexer->braceflag++;
+curr_lexer->push_start_state (MATRIX_START);
+return curr_lexer->count_token ('{');
+}
 }
 "}" {
 curr_lexer->lexer_debug ("}");
 "the 'static' keyword is obsolete and will be removed from a future version of Octave; please use 'persistent' instead; near line %d",
 input_line_number);
 // fall through ...
 case persistent_kw:
+case global_kw:
+looking_at_decl_list = true;
 break;
 case case_kw:
 case elseif_kw:
-case global_kw:
 case until_kw:
 break;
 case end_kw:
 if (inside_any_object_index ()
 int
 octave_lexer::handle_op (const char *pattern, int tok, bool convert,
 bool bos, bool qit)
 {
+lexer_debug (pattern);
 return handle_op_internal (pattern, tok, convert, bos, qit, true);
 }
 int
 octave_lexer::handle_incompatible_op (const char *pattern, int tok,
 bool convert, bool bos, bool qit)
 {
+lexer_debug (pattern);
 return handle_op_internal (pattern, tok, convert, bos, qit, false);
+}
+bool
+octave_lexer::maybe_unput_comma_before_unary_op (int tok)
+{
+int prev_tok = previous_token_value ();
+bool unput_comma = false;
+if (whitespace_is_significant () && space_follows_previous_token ())
+{
+int c = text_yyinput ();
+xunput (c);
+bool space_after = (c == ' ' || c == '\t');
+if (! (prev_tok == ';' || prev_tok == ','
+|| prev_tok == '[' || prev_tok == '{'
+|| previous_token_is_binop ()
+|| ((tok == '+' || tok == '-') && space_after)))
+unput_comma = true;
+}
+return unput_comma;
+}
+int
+octave_lexer::handle_unary_op (const char *pattern, int tok, bool convert,
+bool bos, bool qit)
+{
+lexer_debug (pattern);
+return maybe_unput_comma_before_unary_op (tok)
+? -1 : handle_op_internal (pattern, tok, convert, bos, qit, true);
+}
+int
+octave_lexer::handle_incompatible_unary_op (const char *pattern, int tok,
+bool convert, bool bos, bool qit)
+{
+lexer_debug (pattern);
+return maybe_unput_comma_before_unary_op (tok)
+? -1 : handle_op_internal (pattern, tok, convert, bos, qit, false);
 }
 int
 octave_lexer::handle_assign_op (const char *pattern, int tok)
 {
 int
 octave_lexer::handle_op_internal (const char *pattern, int tok, bool convert,
 bool bos, bool qit, bool compat)
 {
-lexer_debug (pattern);
 if (! compat)
 gripe_matlab_incompatible_operator (flex_yytext ());
 push_token (new token (tok, input_line_number, current_input_column));

Mercurial > hg > octave-nkf

comparison libinterp/parse-tree/lex.ll @ 16263:9acb86e6ac90