Mercurial > hg > octave-nkf
diff scripts/io/textscan.m @ 16357:0cbe330f39a2
textscan.m, textread.m: allow reading multi-column data files with empty format + tests (bug #38317)
author | Philip Nienhuis <prnienhuis@users.sf.net> |
---|---|
date | Fri, 22 Mar 2013 17:46:04 +0100 |
parents | 9c4ac8f25a8c |
children | e2de3c8882be |
line wrap: on
line diff
--- a/scripts/io/textscan.m +++ b/scripts/io/textscan.m @@ -67,19 +67,26 @@ ## The second output, @var{position}, provides the position, in characters, ## from the beginning of the file. ## +## If the format string is empty (not: omitted) and the file contains only +## numeric data (excluding headerlines), textscan will return data in a number +## of columns matching the number of numeric fields on the first data line of +## the file. +## ## @seealso{dlmread, fscanf, load, strread, textread} ## @end deftypefn function [C, position] = textscan (fid, format = "%f", varargin) BUFLENGTH = 4096; ## Read buffer - + emptfmt = 0; ## Signals deliberately empty format string + ## Check input if (nargin < 1) print_usage (); endif if (isempty (format)) + emptfmt = 1; format = "%f"; endif @@ -132,6 +139,9 @@ ## Matlab says default delimiter = whitespace. ## strread() will pick this up further args(end+1:end+2) = {'delimiter', ""}; + delimiter = ""; + else + delimiter = args{find (strcmpi (args, "delimiter")) + 1}; endif collop = false; @@ -157,6 +167,15 @@ args(end+1:end+2) = {"returnonerror", 1}; endif + ## Check if a headerlines argument is specified + headerlines = find (strcmpi (args, "headerlines"), 1); + if (! isempty (headerlines)) + ## Yep. But it is stray when reading from strings... + if (ischar (fid)) + warning ("textscan: 'headerlines' ignored when reading from strings"); + endif + endif + if (ischar (fid)) ## Read from a text string if (nargout == 2) @@ -166,7 +185,6 @@ else st_pos = ftell (fid); ## Skip header lines if requested - headerlines = find (strcmpi (args, "headerlines"), 1); if (! isempty (headerlines)) ## Beware of missing or wrong headerline value if (headerlines == numel (args) @@ -268,9 +286,10 @@ endif ## Strip trailing EOL to avoid returning stray missing values (f. strread). - ## However, in case of CollectOutput request, presence of EOL is required + ## However, in case of CollectOutput request, presence of EOL is required; + ## also in case of deliberately entered empty format string eol_at_end = strcmp (str(end-length (eol_char) + 1 : end), eol_char); - if (collop) + if (collop || emptfmt) if (! eol_at_end) str(end+1 : end+length (eol_char)) = eol_char; endif @@ -284,6 +303,36 @@ C = cell (1, num_fields); [C{:}] = strread (str, format, args{:}); + ## I.c.o. empty format, match nr. of cols to nr. of fields on first read line + if (emptfmt) + ## Find end of first line + eoi = index (str, eol_char); + if (eoi) + ## str contains an EOL, proceed with assessing nr. of columns + ncols = countcols (C, str(1 : eoi-1), delimiter, whitespace); + ## See if lowermost data row must be completed + pad = mod (numel (C{1}), ncols); + if (pad) + ## Textscan returns NaNs for empty fields + C(1) = [C{1}; NaN(ncols - pad, 1)]; + endif + ## Replace NaNs with EmptyValue, if any + ipos = find (strcmpi (args, "emptyvalue")); + if (ipos) + C{1}(find (isnan (C{1}))) = args{ipos+1}; + endif + ## Compute nr. of rows + nrows = floor (numel (C{1}) / ncols); + ## Reshape C; watch out, transpose needed + C(1) = reshape (C{1}, ncols, numel (C{1}) / ncols)'; + ## Distribute columns over C and wipe cols 2:end of C{1} + for ii=2:ncols + C(ii) = C{1}(:, ii); + endfor + C{1} = C{1}(:, 1); + endif + endif + ## If requested, collect output columns of same class if (collop) C = colloutp (C); @@ -297,6 +346,21 @@ endfunction +## Assess nr of data fields on first line of data +function ncols = countcols (C, str, dlm, wsp) + + if (isempty (dlm)) + ## Field separator = whitespace. Fold multiple whitespace into one + str = regexprep (str, sprintf ("[%s]", wsp), " "); + str = strtrim (str); + ncols = numel (strfind (str, " ")) + 1; + else + ncols = numel (regexp (str, sprintf ("[%s]", dlm))) + 1; + endif + +endfunction + + ## Collect consecutive columns of same class into one cell column function C = colloutp (C) @@ -520,6 +584,77 @@ %! assert (A{1}, [d(1); d(3)], 1e-6); %! assert (A{2}, [d(2); d(4)], 1e-6); -%!error <missing or illegal value for> textread (file_in_loadpath ("textscan.m"), "", "headerlines") -%!error <missing or illegal value for> textread (file_in_loadpath ("textscan.m"), "", "headerlines", 'hh') -%!error <character value required for> textread (file_in_loadpath ("textscan.m"), "", "endofline", true) +%% Tests reading with empty format, should return proper nr of columns +%!test +%! f = tmpnam (); +%! fid = fopen (f, "w+"); +%! fprintf (fid, " 1 2 3 4\n5 6 7 8"); +%! fseek (fid, 0, "bof"); +%! A = textscan (fid, ""); +%! fclose (fid); +%! unlink (f); +%! assert (A{1}, [1 ; 5], 1e-6); +%! assert (A{2}, [2 ; 6], 1e-6); +%! assert (A{3}, [3 ; 7], 1e-6); +%! assert (A{4}, [4 ; 8], 1e-6); + +%% Tests reading with empty format; empty fields & incomplete lower row +%!test +%! f = tmpnam (); +%! fid = fopen (f, "w+"); +%! fprintf (fid, " ,2,,4\n5,6"); +%! fseek (fid, 0, "bof"); +%! A = textscan (fid, "", "delimiter", ",", "EmptyValue", 999, "CollectOutput" , 1); +%! fclose (fid); +%! unlink (f); +%! assert (A{1}, [999, 2, 999, 4; 5, 6, 999, 999], 1e-6); + +%% Error message tests + +%!test +%! f = tmpnam (); +%! fid = fopen (f, "w+"); +%! msg1 = "Missing or illegal value for 'headerlines'"; +%! try +%! A = textscan (fid, "", "headerlines"); +%! end_try_catch; +%! fclose (fid); +%! unlink (f); +%! assert (msg1, lasterr); + +%!test +%! f = tmpnam (); +%! fid = fopen (f, "w+"); +%! msg1 = "Missing or illegal value for 'headerlines'"; +%! try +%! A = textscan (fid, "", "headerlines", "hh"); +%! end_try_catch; +%! fclose (fid); +%! unlink (f); +%! assert (msg1, lasterr); + +%!test +%! f = tmpnam (); +%! fid = fopen (f, "w+"); +%! fprintf (fid,"some_string"); +%! fseek (fid, 0, "bof"); +%! msg1 = "textscan: illegal EndOfLine character value specified"; +%! try +%! A = textscan (fid, "%f", "EndOfLine", "\n\r"); +%! end_try_catch; +%! fclose (fid); +%! unlink (f); +%! assert (msg1, lasterr); + +%!test +%! f = tmpnam (); +%! fid = fopen (f, "w+"); +%! fprintf (fid,"some_string"); +%! fseek (fid, 0, "bof"); +%! msg1 = "textscan: character value required for EndOfLine"; +%! try +%! A = textscan (fid, "%f", "EndOfLine", 33); +%! end_try_catch; +%! fclose (fid); +%! unlink (f); +%! assert (msg1, lasterr);