Mercurial > hg > octave-nkf

--- a/scripts/io/strread.m
+++ b/scripts/io/strread.m
@@ -26,10 +26,9 @@
 ##
 ## The string @var{str} is split into words that are repeatedly matched to the
 ## specifiers in @var{format}.  The first word is matched to the first
-## specifier,
-## the second to the second specifier and so forth.  If there are more words
-## than
-## specifiers, the process is repeated until all words have been processed.
+## specifier, the second to the second specifier and so forth.  If there are
+## more words than specifiers, the process is repeated until all words have
+## been processed.
 ##
 ## The string @var{format} describes how the words in @var{str} should be
 ## parsed.
@@ -224,9 +223,12 @@
       case "returnonerror"
         err_action = varargin{n+1};
       case "treatasempty"
-        empty_str = varargin{n+1};
-        if (ischar (empty_str))
-          empty_str = {empty_str};
+        if (iscellstr (varargin{n+1}))
+          empty_str = varargin{n+1};
+        elseif (ischar (varargin{n+1}))
+          empty_str = varargin(n+1);
+        else
+          error ('strread: "treatasempty" value must be string or cellstr');
         endif
       otherwise
         warning ('strread: unknown property "%s"', varargin{n});
@@ -234,11 +236,7 @@
   endfor

   ## Parse format string to compare nr. of conversion fields and nargout
-  idx = strfind (format, "%")';
-  specif = format([idx, idx+1]);
-  nspecif = length (idx);
-  idx_star = strfind (format, "%*");
-  nfields = length (idx) - length (idx_star);
+  nfields = length (strfind (format, "%")) - length (strfind (format, "%*"));
   ## If str only has numeric fields, a (default) format ("%f") will do.
   ## Otherwise:
   if ((max (nargout, 1) != nfields) && ! strcmp (format, "%f"))
@@ -300,6 +298,7 @@

   if (! isempty (white_spaces))
     ## Check for overlapping whitespaces and delimiters & trim whitespace
+    ## FIXME: Can this section be replaced by call to setdiff() ?
     if (! isempty (delimiter_str))
       [ovlp, iw] = intersect (white_spaces, delimiter_str);
       if (! isempty (ovlp))
@@ -362,6 +361,7 @@
   ## Replace TreatAsEmpty char sequences by empty strings
   if (! isempty (empty_str))
     ## FIXME: There should be a simpler way to do this with cellfun
+    ##        or possibly with regexprep
     for ii = 1:numel (empty_str)
       idz = strmatch (empty_str{ii}, words, "exact");
       words(idz) = {""};
@@ -393,7 +393,7 @@
       ## 1. Assess "period" in the split-up words array ( < num_words_per_line).
       ## Could be done using EndOfLine but that prohibits EndOfLine = "" option.
       fmt_in_word = cell (num_words_per_line, 1);
-      words_period = 1;
+      words_period = litptr = 1;
       ## For each literal in turn
       for ii = 1:numel (idy)
         fmt_in_word(idy(ii)) = num_words;
--- a/scripts/io/textread.m
+++ b/scripts/io/textread.m
@@ -19,7 +19,9 @@
 ## -*- texinfo -*-
 ## @deftypefn  {Function File} {[@var{a}, @dots{}] =} textread (@var{filename})
 ## @deftypefnx {Function File} {[@var{a}, @dots{}] =} textread (@var{filename}, @var{format})
+## @deftypefnx {Function File} {[@var{a}, @dots{}] =} textread (@var{filename}, @var{format}, @var{n})
 ## @deftypefnx {Function File} {[@var{a}, @dots{}] =} textread (@var{filename}, @var{format}, @var{prop1}, @var{value1}, @dots{})
+## @deftypefnx {Function File} {[@var{a}, @dots{}] =} textread (@var{filename}, @var{format}, @var{n}, @var{prop1}, @var{value1}, @dots{})
 ## Read data from a text file.
 ##
 ## The file @var{filename} is read and parsed according to @var{format}.  The
@@ -39,6 +41,9 @@
 ## delimiters.
 ## @end itemize
 ##
+## The optional input @var{n} specifes the number of times to use
+## @var{format} when parsing, i.e., the format repeat count.
+##
 ## @seealso{strread, load, dlmread, fscanf, textscan}
 ## @end deftypefn

@@ -49,7 +54,7 @@
     print_usage ();
   endif

-  if (!ischar (filename) || !ischar (format))
+  if (! ischar (filename) || ! ischar (format))
     error ("textread: FILENAME and FORMAT arguments must be strings");
   endif

@@ -67,47 +72,60 @@
     varargin(headerlines:headerlines+1) = [];
   endif

-  str = fread (fid, "char=>char").';
+  if (nargin > 2 && isnumeric (varargin{1}))
+    nlines = varargin{1};
+  else
+    nlines = Inf;
+  endif
+
+  if (isfinite (nlines))
+    str = tmp_str = "";
+    n = 0;
+    ## FIXME: Can this be done without slow loop?
+    while (ischar (tmp_str) && n++ <= nlines)
+      str = strcat (str, tmp_str);
+      tmp_str = fgets (fid);
+    endwhile
+  else
+    str = fread (fid, "char=>char").';
+  endif
   fclose (fid);

   if (isempty (str))
     warning ("textread: empty file");
+    return;
+  endif
+
+  endofline = find (strcmpi (varargin, "endofline"), 1);
+  if (! isempty (endofline))
+    ## 'endofline' option set by user.
+    if (! ischar (varargin{endofline + 1}));
+      error ("textread: character value required for EndOfLine");
+    endif
   else
-    endofline = find (strcmpi (varargin, "endofline"), 1);
-    if (! isempty (endofline))
-      ## 'endofline' option set by user.
-      endofline = find (strcmpi (varargin, "endofline"), 1);
-      if (! ischar (varargin{endofline + 1}));
-        error ("textscan: character value required for EndOfLine");
-      endif
+    ## Determine EOL from file.  Search for EOL candidates in first 3000 chars
+    eol_srch_len = min (length (str), 3000);
+    ## First try DOS (CRLF)
+    if (! isempty (findstr ("\r\n", str(1 : eol_srch_len))))
+      eol_char = "\r\n";
+    ## Perhaps old Macintosh? (CR)
+    elseif (! isempty (findstr ("\r", str(1 : eol_srch_len))))
+      eol_char = "\r";
+    ## Otherwise, use plain UNIX (LF)
     else
-      ## Determine EOL from file.  Search for EOL candidates in first 3000 chars
-      eol_srch_len = min (length (str), 3000);
-      ## First try DOS (CRLF)
-      if (! isempty (findstr ("\r\n", str(1 : eol_srch_len))))
-        eol_char = "\r\n";
-      ## Perhaps old Macintosh? (CR)
-      elseif (! isempty (findstr ("\r", str(1 : eol_srch_len))))
-        eol_char = "\r";
-      ## Otherwise, use plain UNIX (LF)
-      else
-        eol_char = "\n";
-      endif
-      ## Set up default endofline param value
-      nargs = numel (varargin);
-      varargin(nargs+1:nargs+2) = {'endofline', eol_char};
+      eol_char = "\n";
     endif
+    ## Set up default endofline param value
+    varargin(end+1:end+2) = {'endofline', eol_char};
+  endif

-    ## Set up default whitespace param value if needed
-    if (isempty (find (strcmpi ('whitespace', varargin))))
-      nargs = numel (varargin);
-      varargin(nargs+1:nargs+2) = {'whitespace', " \b\t"};
-    endif
+  ## Set up default whitespace param value if needed
+  if (isempty (find (strcmpi ('whitespace', varargin))))
+    varargin(end+1:end+2) = {'whitespace', " \b\t"};
+  endif

-    ## Call strread to make it do the real work
-    [varargout{1:max (nargout, 1)}] = strread (str, format, varargin {:});
-
-  endif
+  ## Call strread to make it do the real work
+  [varargout{1:max (nargout, 1)}] = strread (str, format, varargin {:});

 endfunction

@@ -125,5 +143,6 @@
 %% Test input validation
 %!error textread ()
 %!error textread (1)
-%!error textread ("fname", 1)
+%!error <arguments must be strings> textread (1, '%f')
+%!error <arguments must be strings> textread ("fname", 1)
--- a/scripts/io/textscan.m
+++ b/scripts/io/textscan.m
@@ -22,8 +22,8 @@
 ## @deftypefnx {Function File} {@var{C} =} textscan (@var{fid}, @var{format}, @var{param}, @var{value}, @dots{})
 ## @deftypefnx {Function File} {@var{C} =} textscan (@var{fid}, @var{format}, @var{n}, @var{param}, @var{value}, @dots{})
 ## @deftypefnx {Function File} {@var{C} =} textscan (@var{str}, @dots{})
-## @deftypefnx {Function File} {[@var{C}, @var{position}] =} textscan (@dots{})
-## Read data from a text file.
+## @deftypefnx {Function File} {[@var{C}, @var{position}] =} textscan (@var{fid}, @dots{})
+## Read data from a text file or string.
 ##
 ## The file associated with @var{fid} is read and parsed according to
 ## @var{format}.  The function behaves like @code{strread} except it works by
@@ -35,7 +35,7 @@
 ##
 ## @itemize
 ## @item "headerlines":
-## The first @var{value} number of lines of @var{str} are skipped.
+## The first @var{value} number of lines of @var{fid} are skipped.
 ##
 ## @item "endofline":
 ## Specify a single character or "\r\n".  If no value is given, it will be
@@ -47,10 +47,10 @@
 ## have been encountered.  If set to 0 or false, return an error and no data.
 ## @end itemize
 ##
-## The optional input, @var{n}, specifes the number of lines to be read from
-## the file, associated with @var{fid}.
+## The optional input @var{n} specifes the number of times to use
+## @var{format} when parsing, i.e., the format repeat count.
 ##
-## The output, @var{C}, is a cell array whose length is given by the number
+## The output @var{C} is a cell array whose length is given by the number
 ## of format specifiers.
 ##
 ## The second output, @var{position}, provides the position, in characters,
@@ -75,21 +75,19 @@
   endif

   if (! ischar (format))
-    error ("textscan: FORMAT must be a valid specification");
+    error ("textscan: FORMAT must be a string");
   endif

-  if (nargin > 2 && isnumeric (varargin{1}))
-    nlines = varargin{1};
-    args = varargin(2:end);
+  args = varargin;
+  if (nargin > 2 && isnumeric (args{1}))
+    nlines = args{1};
   else
     nlines = Inf;
-    args = varargin;
   endif

   if (! any (strcmpi (args, "emptyvalue")))
     ## Matlab returns NaNs for missing values
-    args{end+1} = "emptyvalue";
-    args{end+1} = NaN;
+    args(end+1:end+2) = {'emptyvalue', NaN};
   endif

   ## Check default parameter values that differ for strread & textread
@@ -97,37 +95,34 @@
   ipos = find (strcmpi (args, "whitespace"));
   if (isempty (ipos))
     ## Matlab default whitespace = " \b\t"
-    args{end+1} = "whitespace";
-    args{end+1} = " \b\t";
+    args(end+1:end+2) = {'whitespace', " \b\t"};
     whitespace = " \b\t";
   else
     ## Check if there's at least one string format specifier
     fmt = strrep (format, "%", " %");
-    [~, ~, ~, fmt] = regexp (fmt, '[^ ]+');
+    fmt = regexp (fmt, '[^ ]+', 'match');
     fmt = strtrim (fmt(strmatch ("%", fmt)))
     has_str_fmt = all (cellfun ("isempty", strfind (strtrim (fmt(strmatch ("%", fmt))), 's')));
     ## If there is a format, AND whitespace value = empty,
     ## don't add a space (char(32)) to whitespace
     if (! (isempty (args{ipos+1}) &&  has_str_fmt))
-      args {ipos+1} = unique ([" " whitespace]);
+      args{ipos+1} = unique ([" ", whitespace]);
     endif
   endif

   if (! any (strcmpi (args, "delimiter")))
     ## Matlab says default delimiter = whitespace.
     ## strread() will pick this up further
-    args{end+1} = "delimiter";
-    args{end+1} = "";
+    args(end+1:end+2) = {'delimiter', ""};
   endif

   if (any (strcmpi (args, "returnonerror")))
     ## Because of the way strread() reads data (columnwise) this parameter
     ## can't be neatly implemented.  strread() will pick it up anyway
-    warning ('ReturnOnError is not fully implemented');
+    warning ('textscan: ReturnOnError is not fully implemented');
   else
     ## Set default value (=true)
-    args{end+1} = "returnonerror";
-    args{end+1} = 1;
+    args(end+1:end+2) = {"returnonerror", 1};
   endif

   if (ischar (fid))
@@ -145,11 +140,13 @@
       args(headerlines:headerlines+1) = [];
     endif
     if (isfinite (nlines))
-      str = "";
-      ## FIXME: Can this be done without slow for loop?
-      for n = 1:nlines
-        str = strcat (str, fgets (fid));
-      endfor
+      str = tmp_str = "";
+      n = 0;
+      ## FIXME: Can this be done without slow loop?
+      while (ischar (tmp_str) && n++ <= nlines)
+        str = strcat (str, tmp_str);
+        tmp_str = fgets (fid);
+      endwhile
     else
       str = fread (fid, "char=>char").';
     endif
@@ -159,53 +156,48 @@
   if (isempty (str))
     warning ("textscan: no data read");
     C = [];
-  else
-    ## Check value of 'endofline'.  String or file doesn't seem to matter
-    endofline = find (strcmpi (args, "endofline"), 1);
-    if (! isempty (endofline))
-      if (! ischar (args{endofline + 1}))
-        error ("textscan: character value required for EndOfLine");
-      endif
+    return;
+  endif
+
+  ## Check value of 'endofline'.  String or file doesn't seem to matter
+  endofline = find (strcmpi (args, "endofline"), 1);
+  if (! isempty (endofline))
+    if (ischar (args{endofline + 1}))
+      eol_char = args{endofline + 1};
     else
-      ## Determine EOL from file.  Search for EOL candidates in first 3000 chars
-      BUFLEN = 3000;
-      ## First try DOS (CRLF)
-      eol_srch_len = min (length (str), 3000);
-      if (! isempty (findstr ("\r\n", str(1 : eol_srch_len))))
-        eol_char = "\r\n";
-      ## Perhaps old Macintosh? (CR)
-      elseif (! isempty (findstr ("\r", str(1 : eol_srch_len))))
-        eol_char = "\r";
-      ## Otherwise, use plain UNIX (LF)
-      else
-        eol_char = "\n";
-      endif
-      ## Set up the default endofline param value
-      args{end+1} = "endofline";
-      args{end+1} = eol_char;
+      error ("textscan: character value required for EndOfLine");
     endif
+  else
+    ## Determine EOL from file.  Search for EOL candidates in first 3000 chars
+    eol_srch_len = min (length (str), 3000);
+    ## First try DOS (CRLF)
+    if (! isempty (findstr ("\r\n", str(1 : eol_srch_len))))
+      eol_char = "\r\n";
+    ## Perhaps old Macintosh? (CR)
+    elseif (! isempty (findstr ("\r", str(1 : eol_srch_len))))
+      eol_char = "\r";
+    ## Otherwise, use plain UNIX (LF)
+    else
+      eol_char = "\n";
+    endif
+    ## Set up the default endofline param value
+    args(end+1:end+2) = {'endofline', eol_char};
+  endif

-    ## Determine the number of data fields
-    num_fields = numel (strfind (format, "%")) - ...
-                 numel (idx_star = strfind (format, "%*"));
-
-    ## Strip trailing EOL to avoid returning stray missing values (f. strread)
-    if (strcmp (str(end-length (eol_char) + 1 : end), eol_char));
-      str = str(1 : end-length (eol_char));
-    endif
+  ## Determine the number of data fields
+  num_fields = numel (strfind (format, "%")) - numel (strfind (format, "%*"));

-    ## Call strread to make it do the real work
-    C = cell (1, num_fields);
-    [C{:}] = strread (str, format, args{:});
+  ## Strip trailing EOL to avoid returning stray missing values (f. strread)
+  if (strcmp (str(end-length (eol_char) + 1 : end), eol_char));
+    str(end-length (eol_char) + 1 : end) = "";
+  endif

-    if (ischar (fid) && isfinite (nlines))
-      C = cellfun (@(x) x(1:nlines), C, "uniformoutput", false);
-    endif
+  ## Call strread to make it do the real work
+  C = cell (1, num_fields);
+  [C{:}] = strread (str, format, args{:});

-    if (nargout == 2)
-      position = ftell (fid);
-    endif
-
+  if (nargout == 2)
+    position = ftell (fid);
   endif

 endfunction
@@ -249,3 +241,10 @@
 %! assert (a{2}', {'B' 'J' 'R' 'Z'});
 %! assert (a{3}', [16 241 3 NaN], 1e-5);

+%% Test input validation
+%!error textscan ()
+%!error textscan (single (4))
+%!error textscan ({4})
+%!error <must be a string> textscan ("Hello World", 2)
+%!error <cannot provide position information> [C, pos] = textscan ("Hello World")
+%!error <character value required> textscan ("Hello World", '%s', 'EndOfLine', 3)