# HG changeset patch # User Philip Nienhuis # Date 1340384970 -7200 # Node ID 21197c43f984f8a43b7a1bb749009cdf26234589 # Parent fce841c941a5d3facaf27c99258f19751b57e009 Better Matlab compatibility of textscan, bugs #36356 and #36392 textscan.m: ensure trailing newline in data string before calling strread in case of CollectOutput parameter test added for uneven data column lengths + CollectOutput return cell array of proper dimension when no data were read rewrote parts of texinfo help text * strread.m: add explanation of role of trailing newline to texinfo help text diff --git a/scripts/io/strread.m b/scripts/io/strread.m --- a/scripts/io/strread.m +++ b/scripts/io/strread.m @@ -154,7 +154,21 @@ ## ## @end table ## -## @seealso{textscan, textread, load, dlmread, fscanf} +## When the number of words in @var{str} doesn't match an exact multiple +## of the number of format conversion specifiers, strread's behavior +## depends on the last character of @var{str}: +## +## @table @asis +## @item last character = "\n" +## Data columns are padded with empty fields or Nan so that all columns +## have equal length +## +## @item last character is not "\n" +## Data columns are not padded; strread returns columns of unequal length +## +## @end table +## +# @seealso{textscan, textread, load, dlmread, fscanf} ## @end deftypefn function varargout = strread (str, format = "%f", varargin) diff --git a/scripts/io/textscan.m b/scripts/io/textscan.m --- a/scripts/io/textscan.m +++ b/scripts/io/textscan.m @@ -25,13 +25,13 @@ ## @deftypefnx {Function File} {[@var{C}, @var{position}] =} textscan (@var{fid}, @dots{}) ## Read data from a text file or string. ## -## The file associated with @var{fid} is read and parsed according to -## @var{format}. The function behaves like @code{strread} except it works by -## parsing a file instead of a string. See the documentation of -## @code{strread} for details. +## The string @var{str} or file associated with @var{fid} is read from and +## parsed according to @var{format}. The function behaves like @code{strread} +## except it can also read from file instead of a string. See the documentation +## of @code{strread} for details. ## -## In addition to the options supported by -## @code{strread}, this function supports a few more: +## In addition to the options supported by @code{strread}, this function +## supports a few more: ## ## @itemize ## @item "collectoutput": @@ -50,16 +50,19 @@ ## @item "returnonerror": ## If set to numerical 1 or true (default), return normally when read errors ## have been encountered. If set to 0 or false, return an error and no data. +## As the string or file is read by columns rather than by rows, and because +## textscan is fairly forgiving as regards read errors, setting this option +## may have little or no actual effect. ## @end itemize ## ## When reading from a character string, optional input argument @var{n} ## specifies the number of times @var{format} should be used (i.e., to limit ## the amount of data read). -## When reading fro file, @var{n} specifies the number of data lines to read; +## When reading from file, @var{n} specifies the number of data lines to read; ## in this sense it differs slightly from the format repeat count in strread. ## -## The output @var{C} is a cell array whose length is given by the number -## of format specifiers. +## The output @var{C} is a cell array whose second dimension is determined +## by the number of format specifiers. ## ## The second output, @var{position}, provides the position, in characters, ## from the beginning of the file. @@ -80,14 +83,18 @@ format = "%f"; endif + if (! ischar (format)) + error ("textscan: FORMAT must be a string"); + endif + + ## Determine the number of data fields & initialize output array + num_fields = numel (strfind (format, "%")) - numel (strfind (format, "%*")); + C = cell (1, num_fields); + if (! (isa (fid, "double") && fid > 0) && ! ischar (fid)) error ("textscan: first argument must be a file id or character string"); endif - if (! ischar (format)) - error ("textscan: FORMAT must be a string"); - endif - args = varargin; if (nargin > 2 && isnumeric (args{1})) nlines = args{1}; @@ -96,7 +103,6 @@ endif if (nlines < 1) printf ("textscan: N = 0, no data read\n"); - C = []; return endif @@ -174,7 +180,6 @@ ## Check for empty result if (isempty (str)) warning ("textscan: no data read"); - C = []; return; endif @@ -249,9 +254,15 @@ ## Determine the number of data fields num_fields = numel (strfind (format, "%")) - numel (strfind (format, "%*")); - ## Strip trailing EOL to avoid returning stray missing values (f. strread) - if (strcmp (str(end-length (eol_char) + 1 : end), eol_char)); - str(end-length (eol_char) + 1 : end) = ""; + ## Strip trailing EOL to avoid returning stray missing values (f. strread). + ## However, in case of CollectOutput request, presence of EOL is required + eol_at_end = strcmp (str(end-length (eol_char) + 1 : end), eol_char); + if (collop) + if (! eol_at_end) + str(end+1 : end+length (eol_char)) = eol_char; + endif + elseif (eol_at_end) + str(end-length (eol_char) + 1 : end) = ""; endif ## Call strread to make it do the real work @@ -316,14 +327,14 @@ %! assert (b(1,:)', c{1}, 1e-5); %! assert (b(2,:)', c{2}, 1e-5); -#%!test -#%! str = "13, 72, NA, str1, 25\r\n// Middle line\r\n36, na, 05, str3, 6"; -#%! a = textscan (str, "%d %n %f %s %n", "delimiter", ",","treatAsEmpty", {"NA", "na"},"commentStyle", "//"); -#%! assert (a{1}, int32([13; 36])); -#%! assert (a{2}, [72; NaN]); -#%! assert (a{3}, [NaN; 5]); -#%! assert (a{4}, {"str1"; "str3"}); -#%! assert (a{5}, [25; 6]); +%!test +%! str = "13, 72, NA, str1, 25\r\n// Middle line\r\n36, na, 05, str3, 6"; +%! a = textscan (str, "%d %n %f %s %n", "delimiter", ",","treatAsEmpty", {"NA", "na"},"commentStyle", "//"); +%! assert (a{1}, int32([13; 36])); +%! assert (a{2}, [72; NaN]); +%! assert (a{3}, [NaN; 5]); +%! assert (a{4}, {"str1"; "str3"}); +%! assert (a{5}, [25; 6]); %!test %! str = "Km:10 = hhhBjjj miles16hour\r\n"; @@ -362,6 +373,21 @@ %! assert (size(c{3}), [10, 2]); %! assert (size(c{2}), [10, 2]); +%!test +%% CollectOutput test with uneven column length files +%! b = [10:10:100]; +%! b = [b; 8*b/5; 8*b*1000/5]; +%! str = sprintf ("%g miles/hr = %g (%g) kilometers (meters)/hr\n", b); +%! str = [str "110 miles/hr"]; +%! fmt = "%f miles%s %s %f (%f) kilometers %*s"; +%! c = textscan (str, fmt, "collectoutput", 1); +%! assert (size(c{1}), [11, 1]); +%! assert (size(c{3}), [11, 2]); +%! assert (size(c{2}), [11, 2]); +%! assert (c{3}(end), NaN); +%! assert (c{2}{11, 1}, "/hr"); +%! assert (isempty (c{2}{11, 2}), true); + %% Test input validation %!error textscan () %!error textscan (single (4))