changeset 11944:278afaecddd4 release-3-0-x

fix leaving stray '\r' in stream when reading from CRLF data file * * * fix CRLF issues with text-mode reading in windows when loading ascii data
author Benjamin Lindner <lindnerb@users.sourceforge.net>
date Wed, 18 Mar 2009 15:23:14 +0100
parents af4fa72ee250
children af62948f711b
files src/ChangeLog src/Makefile.in src/load-save.cc src/ls-ascii-helper.cc src/ls-ascii-helper.h src/ls-mat-ascii.cc src/ls-oct-ascii.cc src/ls-oct-ascii.h src/ov-fcn-handle.cc src/ov-fcn-inline.cc src/ov-range.cc src/ov-str-mat.cc
diffstat 12 files changed, 256 insertions(+), 60 deletions(-) [+]
line wrap: on
line diff
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,21 @@
+2009-03-18  Benjamin Lindner <lindnerb@users.sourceforge.net>
+
+	* ls-oct-ascii.cc (extract_keyword): fix leaving stray '\r' in stream
+	when reading from CRLF data file by replacing loop with call to
+	read_until_newline()
+
+2009-03-03  Benjamin Lindner  <lindnerb@users.sourceforge.net>
+
+	* ls-ascii-helper.h ls-ascii-helper.cc: New files, provide helper 
+	functions skip_until_newline(), skip_preceeding_newline() and
+	read_until_newline() that take care of CR/LF handling.
+	* Makefile.in: add new files
+	* load-save.cc: Open files always in binary mode in Fload
+	* ls-mat-ascii.cc (get_mat_data_input_line), ls-oct-ascii.cc 
+	(extract_keyword, read_ascii_data), ls-oct-ascii.h (extract_keyword), 
+	ov-fcn-handle.cc, ov-fcn-inline.cc, ov-range.cc, ov-str-mat.cc 
+	(load_ascii): Use helper functions 
+	
 2009-02-25  Marco Caliari <marco.caliari@univr.it>
 
 	* graphics.cc (base_properties::remove_child): Fix order of dims.
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -116,7 +116,7 @@
 	comment-list.h debug.h defun-dld.h defun-int.h defun.h \
 	dirfns.h dynamic-ld.h error.h file-io.h gripes.h help.h \
 	input.h lex.h load-path.h load-save.h ls-hdf5.h \
-	ls-mat-ascii.h ls-mat4.h ls-mat5.h ls-oct-ascii.h \
+	ls-mat-ascii.h ls-mat4.h ls-mat5.h ls-oct-ascii.h ls-ascii-helper.h \
 	ls-oct-binary.h ls-utils.h mex.h mexproto.h oct-errno.h \
 	oct-fstrm.h oct-hist.h oct-iostrm.h oct-map.h oct-obj.h \
 	oct-prcstrm.h oct-procbuf.h oct-stdstrm.h oct-stream.h \
@@ -186,7 +186,7 @@
 	cutils.c data.cc debug.cc defaults.cc defun.cc dirfns.cc \
 	dynamic-ld.cc error.cc file-io.cc graphics.cc gripes.cc \
 	help.cc input.cc lex.l load-path.cc load-save.cc ls-hdf5.cc \
-	ls-mat-ascii.cc ls-mat4.cc ls-mat5.cc ls-oct-ascii.cc \
+	ls-mat-ascii.cc ls-mat4.cc ls-mat5.cc ls-oct-ascii.cc ls-ascii-helper.cc \
 	ls-oct-binary.cc ls-utils.cc main.c mappers.cc matherr.c \
 	mex.cc oct-fstrm.cc oct-hist.cc oct-iostrm.cc oct-map.cc \
 	oct-obj.cc oct-prcstrm.cc oct-procbuf.cc oct-stream.cc \
--- a/src/load-save.cc
+++ b/src/load-save.cc
@@ -906,15 +906,12 @@
 
 	  std::ios::openmode mode = std::ios::in;
 
-	  if (format == LS_BINARY
-#ifdef HAVE_HDF5
-	      || format == LS_HDF5
-#endif
-	      || format == LS_MAT_BINARY
-	      || format == LS_MAT5_BINARY
-	      || format == LS_MAT7_BINARY)
-	    mode |= std::ios::binary;
-
+	  // Open in binary mode in any case, to fix annoying bug that
+	  // text-mode opened streams cannot be seekg'ed/tellg'ed with
+	  // mingw32 (See http://oldwiki.mingw.org/index.php/Known%20Problems )
+	  // The CR/LF issues are handled in ls-ascii-helper.cc
+	  mode |= std::ios::binary;
+	  
 #ifdef HAVE_ZLIB
 	  if (use_zlib)
 	    {
new file mode 100644
--- /dev/null
+++ b/src/ls-ascii-helper.cc
@@ -0,0 +1,160 @@
+/*
+
+Copyright (C) 2003, 2005, 2006, 2007 John W. Eaton
+
+This file is part of Octave.
+
+Octave is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+Octave is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Octave; see the file COPYING.  If not, see
+<http://www.gnu.org/licenses/>.
+
+*/
+
+
+#include "ls-ascii-helper.h"
+
+#include <iostream>
+#include <sstream>
+
+// Helper functions when reading from ascii files.
+// These function take care of CR/LF issues when files are opened in text-mode for reading 
+
+// Skip characters from stream IS until a newline is reached.
+// Depending on KEEP_NEWLINE, either eat newline from stream or
+// keep it unread
+
+void
+skip_until_newline( std::istream& is, bool keep_newline )
+{
+  if (!is)
+    return;
+  
+  char c,d;
+  
+  while (is)
+  {
+      c = is.peek();
+      if (c == '\n' || c == '\r')
+      {
+	  // reached newline
+	  if (keep_newline == false)
+	  {
+	      // eat the CR or LF character
+	      is.get(d);
+	      
+	      // make sure that for binary-mode opened ascii files containing CRLF line endings
+	      // we skip the LF after CR...
+	      if (c == '\r' && is.peek()=='\n')
+	      {
+		  // yes, LF following CR, eat it...
+		  is.get(d);
+	      }
+	  }
+	  
+	  // Newline was found, and read from stream if keep_newline==true, so exit loop
+	  break;
+      }
+      else
+	  // no newline charater peeked, so read it and proceed to next character
+	  is.get(d);
+  }
+  
+  return;
+}
+
+
+// If stream IS currently points to a newline (a leftover from a previous read)
+// then eat newline(s) until a non-newline character is found
+
+void
+skip_preceeding_newline( std::istream& is )
+{
+  if (!is)
+    return;
+  
+  char c,d;
+  
+  // Check if IS currently points to newline character
+  c = is.peek();
+  if (c == '\n' || c == '\r')
+  {
+      // Yes, at newline
+      do {
+	  // eat the CR or LF character
+	  is.get(d);
+	  
+	  // make sure that for binary-mode opened ascii files containing CRLF line endings
+	  // we skip the LF after CR...
+	  if (c == '\r' && is.peek() == '\n')
+	  {
+	      // yes, LF following CR, eat it...
+	      is.get(d);
+	  }
+	  
+	  // Peek into next character
+	  c = is.peek();
+      // Loop while still a newline ahead
+      } while( c == '\n' || c == '\r' );
+  }
+  
+  return;
+}
+
+
+// Read charaters from stream IS until a newline is reached.
+// Depending on KEEP_NEWLINE, either eat newline from stream or
+// keep it unread
+// Characters read are stored and returned as std::string
+
+std::string
+read_until_newline( std::istream& is, bool keep_newline )
+{
+  if (!is)
+    return std::string();
+  
+  char c,d;
+  std::ostringstream buf;
+  
+  while (is)
+  {
+      c = is.peek();
+      if (c == '\n' || c == '\r')
+      {
+	  // reached newline
+	  if (keep_newline == false)
+	  {
+	      // eat the CR or LF character
+	      is.get(d);
+	      
+	      // make sure that for binary-mode opened ascii files containing CRLF line endings
+	      // we skip the LF after CR...
+	      if (c == '\r' && is.peek() == '\n')
+	      {
+		  // yes, LF following CR, eat it...
+		  is.get(d);
+	      }
+	  }
+	  
+	  // Newline was found, and read from stream if keep_newline==true, so exit loop
+	  break;
+      }
+      else
+      {
+	  // no newline charater peeked, so read it, store it, and proceed to next
+	  is.get(d);
+	  buf << d;
+      }
+  }
+  
+  return buf.str();
+}
new file mode 100644
--- /dev/null
+++ b/src/ls-ascii-helper.h
@@ -0,0 +1,40 @@
+/*
+
+Copyright (C) 2003, 2005, 2006, 2007 John W. Eaton
+
+This file is part of Octave.
+
+Octave is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+Octave is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Octave; see the file COPYING.  If not, see
+<http://www.gnu.org/licenses/>.
+
+*/
+
+#if !defined (octave_ls_ascii_helper_h)
+#define octave_ls_ascii_helper_h 1
+
+#include <iosfwd>
+#include <string>
+
+#include "oct-dlldefs.h"
+
+extern OCTINTERP_API void
+skip_until_newline( std::istream& is, bool keep_newline = false );
+
+extern OCTINTERP_API void
+skip_preceeding_newline( std::istream& is );
+
+extern OCTINTERP_API std::string
+read_until_newline( std::istream& is, bool keep_newline = false );
+
+#endif  // !defined (octave_ls_ascii_helper_h)
--- a/src/ls-mat-ascii.cc
+++ b/src/ls-mat-ascii.cc
@@ -65,6 +65,7 @@
 #include "dMatrix.h"
 
 #include "ls-mat-ascii.h"
+#include "ls-ascii-helper.h"
 
 static std::string
 get_mat_data_input_line (std::istream& is)
@@ -81,14 +82,16 @@
       while (is.get (c))
 	{
 	  if (c == '\n' || c == '\r')
-	    break;
+	    {
+	      // Let skip_until_newline handle CR/LF issues...
+	      skip_until_newline (is, false);
+	      break;
+	    }
 
 	  if (c == '%' || c == '#')
 	    {
 	      // skip to end of line
-	      while (is.get (c))
-		if (c == '\n' || c == '\r')
-		  break;
+	      skip_until_newline (is, false);
 
 	      break;
 	    }
--- a/src/ls-oct-ascii.cc
+++ b/src/ls-oct-ascii.cc
@@ -108,14 +108,8 @@
 	      while (is.get (c) && (c == ' ' || c == '\t' || c == ':'))
 		; // Skip whitespace and the colon.
 
-	      if (c != '\n' && c != '\r')
-		{
-		  value << c;
-		  while (is.get (c) && c != '\n' && c != '\r')
-		    value << c;
-		}
-
-	      retval = value.str ();
+	      is.putback(c);
+	      retval = read_until_newline (is, false);
 	      break;
 	    }
 	  else if (next_only)
--- a/src/ls-oct-ascii.h
+++ b/src/ls-oct-ascii.h
@@ -29,6 +29,7 @@
 #include <string>
 
 #include "str-vec.h"
+#include "ls-ascii-helper.h"
 
 // Flag for cell elements
 #define CELL_ELT_TAG "<cell-element>"
@@ -103,8 +104,8 @@
 		is >> value;
 	      if (is)
 		status = true;
-	      while (is.get (c) && c != '\n' && c != '\r')
-		; // Skip to beginning of next line;
+	      // Skip to beginning of next line;
+	      skip_until_newline (is, false);
 	      break;
 	    }
 	  else if (next_only)
@@ -165,8 +166,8 @@
 		    is >> value;
 		  if (is)
 		    status = true;
-		  while (is.get (c) && c != '\n' && c != '\r')
-		    ; // Skip to beginning of next line;
+		  // Skip to beginning of next line;
+		  skip_until_newline (is, false);
 		  return status;
 		}
 	    }
--- a/src/ov-fcn-handle.cc
+++ b/src/ov-fcn-handle.cc
@@ -56,6 +56,7 @@
 #include "ls-oct-binary.h"
 #include "ls-hdf5.h"
 #include "ls-utils.h"
+#include "ls-ascii-helper.h"
 
 DEFINE_OCTAVE_ALLOCATOR (octave_fcn_handle);
 
@@ -330,26 +331,18 @@
     {
       octave_idx_type len = 0;
       char c;
-      std::ostringstream buf;
+      std::string buf;
 
       // Skip preceeding newline(s).
-      while (is.get (c) && c == '\n')
-	/* do nothing */;
+      skip_preceeding_newline (is);
 
       if (is)
 	{
-	  buf << c;
 
 	  // Get a line of text whitespace characters included, leaving
 	  // newline in the stream.
+	  buf = read_until_newline (is, true);
 
-	  while (is.peek () != '\n')
-	    {
-	      is.get (c);
-	      if (! is)
-		break;
-	      buf << c;
-	    }
 	}
 
       pos = is.tellg ();
@@ -408,7 +401,7 @@
 
 	  int parse_status;
 	  octave_value anon_fcn_handle = 
-	    eval_string (buf.str (), true, parse_status);
+	    eval_string (buf, true, parse_status);
 
 	  if (parse_status == 0)
 	    {
--- a/src/ov-fcn-inline.cc
+++ b/src/ov-fcn-inline.cc
@@ -47,6 +47,7 @@
 #include "ls-oct-ascii.h"
 #include "ls-hdf5.h"
 #include "ls-utils.h"
+#include "ls-ascii-helper.h"
 
 DEFINE_OCTAVE_ALLOCATOR (octave_fcn_inline);
 
@@ -139,27 +140,20 @@
 	nm = "";
 
       char c;
-      std::ostringstream buf;
+      std::string buf;
 
       // Skip preceeding newline(s)
-      while (is.get (c) && c == '\n');
+      skip_preceeding_newline (is);
 
       if (is)
 	{
-	  buf << c;
 
 	  // Get a line of text whitespace characters included, leaving
 	  // newline in the stream
-	  while (is.peek () != '\n')
-	    {
-	      is.get (c);
-	      if (! is)
-		break;
-	      buf << c;
-	    }
+	  buf = read_until_newline (is, true);
 	}
 
-      iftext = buf.str ();
+      iftext = buf;
 
       octave_fcn_inline tmp (iftext, ifargs, nm);
       fcn = tmp.fcn;
--- a/src/ov-range.cc
+++ b/src/ov-range.cc
@@ -41,6 +41,7 @@
 #include "byte-swap.h"
 #include "ls-hdf5.h"
 #include "ls-utils.h"
+#include "ls-ascii-helper.h"
 
 DEFINE_OCTAVE_ALLOCATOR (octave_range);
 
@@ -274,14 +275,9 @@
 	break;
     }
 
-  for (;;)
-    {
-      if (is && (c == '%' || c == '#'))
-	while (is.get (c) && c != '\n')
-	  ; // Skip to beginning of next line, ignoring everything.
-      else
-	break;
-    }
+  // Skip to beginning of next line, ignoring everything.
+  skip_until_newline (is, false);
+  
 }
 
 bool 
--- a/src/ov-str-mat.cc
+++ b/src/ov-str-mat.cc
@@ -48,6 +48,7 @@
 #include "pr-output.h"
 #include "pt-mat.h"
 #include "utils.h"
+#include "ls-ascii-helper.h"
 
 DEFINE_OCTAVE_ALLOCATOR (octave_char_matrix_str);
 DEFINE_OCTAVE_ALLOCATOR (octave_char_matrix_sq_str);
@@ -344,8 +345,7 @@
 		      char *ftmp = tmp.fortran_vec ();
 
 		      // Skip the return line
-		      if (! is.read (ftmp, 1))
-			return false;
+		      skip_preceeding_newline (is);
 
 		      if (! is.read (ftmp, dv.numel ()) || !is)
 			{