Mercurial > hg > octave-lyh
view scripts/io/textscan.m @ 14363:f3d52523cde1
Use Octave coding conventions in all m-file %!test blocks
* wavread.m, acosd.m, acot.m, acotd.m, acoth.m, acsc.m, acscd.m, acsch.m,
asec.m, asecd.m, asech.m, asind.m, atand.m, cosd.m, cot.m, cotd.m, coth.m,
csc.m, cscd.m, csch.m, sec.m, secd.m, sech.m, sind.m, tand.m, accumarray.m,
accumdim.m, bitcmp.m, bitget.m, bitset.m, blkdiag.m, cart2pol.m, cart2sph.m,
celldisp.m, chop.m, circshift.m, colon.m, common_size.m, cplxpair.m,
cumtrapz.m, curl.m, dblquad.m, deal.m, divergence.m, flipdim.m, fliplr.m,
flipud.m, genvarname.m, gradient.m, idivide.m, int2str.m, interp1.m,
interp1q.m, interp2.m, interp3.m, interpft.m, interpn.m, isa.m, isdir.m,
isequal.m, isequalwithequalnans.m, issquare.m, logspace.m, nargchk.m,
narginchk.m, nargoutchk.m, nextpow2.m, nthargout.m, num2str.m, pol2cart.m,
polyarea.m, postpad.m, prepad.m, profile.m, profshow.m, quadgk.m, quadv.m,
randi.m, rat.m, repmat.m, rot90.m, rotdim.m, shift.m, shiftdim.m, sph2cart.m,
structfun.m, trapz.m, triplequad.m, convhull.m, dsearch.m, dsearchn.m,
griddata3.m, griddatan.m, rectint.m, tsearchn.m, __makeinfo__.m, doc.m,
get_first_help_sentence.m, help.m, type.m, unimplemented.m, which.m, imread.m,
imwrite.m, dlmwrite.m, fileread.m, is_valid_file_id.m, strread.m, textread.m,
textscan.m, commutation_matrix.m, cond.m, condest.m, cross.m,
duplication_matrix.m, expm.m, housh.m, isdefinite.m, ishermitian.m,
issymmetric.m, logm.m, normest.m, null.m, onenormest.m, orth.m, planerot.m,
qzhess.m, rank.m, rref.m, trace.m, vech.m, ans.m, bincoeff.m, bug_report.m,
bzip2.m, comma.m, compare_versions.m, computer.m, edit.m, fileparts.m,
fullfile.m, getfield.m, gzip.m, info.m, inputname.m, isappdata.m, isdeployed.m,
ismac.m, ispc.m, isunix.m, list_primes.m, ls.m, mexext.m, namelengthmax.m,
news.m, orderfields.m, paren.m, recycle.m, rmappdata.m, semicolon.m,
setappdata.m, setfield.m, substruct.m, symvar.m, ver.m, version.m,
warning_ids.m, xor.m, fminbnd.m, fsolve.m, fzero.m, lsqnonneg.m, optimset.m,
pqpnonneg.m, sqp.m, matlabroot.m, __gnuplot_drawnow__.m,
__plt_get_axis_arg__.m, ancestor.m, cla.m, clf.m, close.m, colorbar.m,
colstyle.m, comet3.m, contourc.m, figure.m, gca.m, gcbf.m, gcbo.m, gcf.m,
ginput.m, graphics_toolkit.m, gtext.m, hggroup.m, hist.m, hold.m, isfigure.m,
ishghandle.m, ishold.m, isocolors.m, isonormals.m, isosurface.m, isprop.m,
legend.m, line.m, loglog.m, loglogerr.m, meshgrid.m, ndgrid.m, newplot.m,
orient.m, patch.m, plot3.m, plotyy.m, __print_parse_opts__.m, quiver3.m,
refreshdata.m, ribbon.m, semilogx.m, semilogxerr.m, semilogy.m, stem.m,
stem3.m, subplot.m, title.m, uigetfile.m, view.m, whitebg.m, compan.m, conv.m,
deconv.m, mkpp.m, mpoles.m, pchip.m, poly.m, polyaffine.m, polyder.m,
polyfit.m, polygcd.m, polyint.m, polyout.m, polyval.m, polyvalm.m, ppder.m,
ppint.m, ppjumps.m, ppval.m, residue.m, roots.m, spline.m, intersect.m,
ismember.m, powerset.m, setdiff.m, setxor.m, union.m, unique.m,
autoreg_matrix.m, bartlett.m, blackman.m, detrend.m, fftconv.m, fftfilt.m,
fftshift.m, freqz.m, hamming.m, hanning.m, ifftshift.m, sinc.m, sinetone.m,
sinewave.m, unwrap.m, bicg.m, bicgstab.m, gmres.m, gplot.m, nonzeros.m, pcg.m,
pcr.m, spaugment.m, spconvert.m, spdiags.m, speye.m, spfun.m, spones.m,
sprand.m, sprandsym.m, spstats.m, spy.m, svds.m, treelayout.m, bessel.m,
beta.m, betaln.m, factor.m, factorial.m, isprime.m, lcm.m, legendre.m,
nchoosek.m, nthroot.m, perms.m, pow2.m, primes.m, reallog.m, realpow.m,
realsqrt.m, hadamard.m, hankel.m, hilb.m, invhilb.m, magic.m, rosser.m,
vander.m, __finish__.m, center.m, cloglog.m, corr.m, cov.m, gls.m, histc.m,
iqr.m, kendall.m, kurtosis.m, logit.m, mahalanobis.m, mean.m, meansq.m,
median.m, mode.m, moment.m, ols.m, ppplot.m, prctile.m, probit.m, quantile.m,
range.m, ranks.m, run_count.m, runlength.m, skewness.m, spearman.m,
statistics.m, std.m, table.m, var.m, zscore.m, betacdf.m, betainv.m, betapdf.m,
betarnd.m, binocdf.m, binoinv.m, binopdf.m, binornd.m, cauchy_cdf.m,
cauchy_inv.m, cauchy_pdf.m, cauchy_rnd.m, chi2cdf.m, chi2inv.m, chi2pdf.m,
chi2rnd.m, discrete_cdf.m, discrete_inv.m, discrete_pdf.m, discrete_rnd.m,
empirical_cdf.m, empirical_inv.m, empirical_pdf.m, empirical_rnd.m, expcdf.m,
expinv.m, exppdf.m, exprnd.m, fcdf.m, finv.m, fpdf.m, frnd.m, gamcdf.m,
gaminv.m, gampdf.m, gamrnd.m, geocdf.m, geoinv.m, geopdf.m, geornd.m,
hygecdf.m, hygeinv.m, hygepdf.m, hygernd.m, kolmogorov_smirnov_cdf.m,
laplace_cdf.m, laplace_inv.m, laplace_pdf.m, laplace_rnd.m, logistic_cdf.m,
logistic_inv.m, logistic_pdf.m, logistic_rnd.m, logncdf.m, logninv.m,
lognpdf.m, lognrnd.m, nbincdf.m, nbininv.m, nbinpdf.m, nbinrnd.m, normcdf.m,
norminv.m, normpdf.m, normrnd.m, poisscdf.m, poissinv.m, poisspdf.m,
poissrnd.m, stdnormal_cdf.m, stdnormal_inv.m, stdnormal_pdf.m, stdnormal_rnd.m,
tcdf.m, tinv.m, tpdf.m, trnd.m, unidcdf.m, unidinv.m, unidpdf.m, unidrnd.m,
unifcdf.m, unifinv.m, unifpdf.m, unifrnd.m, wblcdf.m, wblinv.m, wblpdf.m,
wblrnd.m, kolmogorov_smirnov_test.m, kruskal_wallis_test.m, base2dec.m,
bin2dec.m, blanks.m, cstrcat.m, deblank.m, dec2base.m, dec2bin.m, dec2hex.m,
findstr.m, hex2dec.m, index.m, isletter.m, mat2str.m, rindex.m, str2num.m,
strcat.m, strjust.m, strmatch.m, strsplit.m, strtok.m, strtrim.m, strtrunc.m,
substr.m, validatestring.m, demo.m, example.m, fail.m, speed.m, addtodate.m,
asctime.m, clock.m, ctime.m, date.m, datenum.m, datetick.m, datevec.m,
eomday.m, etime.m, is_leap_year.m, now.m:
Use Octave coding conventions in all m-file %!test blocks
author | Rik <octave@nomad.inbox5.com> |
---|---|
date | Mon, 13 Feb 2012 07:29:44 -0800 |
parents | 2fe0f5fa8cc3 |
children | 86854d032a37 |
line wrap: on
line source
## Copyright (C) 2010-2012 Ben Abbott <bpabbott@mac.com> ## ## This file is part of Octave. ## ## Octave is free software; you can redistribute it and/or modify it ## under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or (at ## your option) any later version. ## ## Octave is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Octave; see the file COPYING. If not, see ## <http://www.gnu.org/licenses/>. ## -*- texinfo -*- ## @deftypefn {Function File} {@var{C} =} textscan (@var{fid}, @var{format}) ## @deftypefnx {Function File} {@var{C} =} textscan (@var{fid}, @var{format}, @var{n}) ## @deftypefnx {Function File} {@var{C} =} textscan (@var{fid}, @var{format}, @var{param}, @var{value}, @dots{}) ## @deftypefnx {Function File} {@var{C} =} textscan (@var{fid}, @var{format}, @var{n}, @var{param}, @var{value}, @dots{}) ## @deftypefnx {Function File} {@var{C} =} textscan (@var{str}, @dots{}) ## @deftypefnx {Function File} {[@var{C}, @var{position}] =} textscan (@var{fid}, @dots{}) ## Read data from a text file or string. ## ## The file associated with @var{fid} is read and parsed according to ## @var{format}. The function behaves like @code{strread} except it works by ## parsing a file instead of a string. See the documentation of ## @code{strread} for details. ## ## In addition to the options supported by ## @code{strread}, this function supports a few more: ## ## @itemize ## @item "collectoutput": ## A value of 1 or true instructs textscan to concatenate consecutive columns ## of the same class in the output cell array. A value of 0 or false (default) ## leaves output in distinct columns. ## ## @item "endofline": ## Specify "\r", "\n" or "\r\n" (for CR, LF, or CRLF). If no value is given, ## it will be inferred from the file. If set to "" (empty string) EOLs are ## ignored as delimiters and added to whitespace. ## ## @item "headerlines": ## The first @var{value} number of lines of @var{fid} are skipped. ## ## @item "returnonerror": ## If set to numerical 1 or true (default), return normally when read errors ## have been encountered. If set to 0 or false, return an error and no data. ## @end itemize ## ## The optional input @var{n} specifes the number of times to use ## @var{format} when parsing, i.e., the format repeat count. ## ## The output @var{C} is a cell array whose length is given by the number ## of format specifiers. ## ## The second output, @var{position}, provides the position, in characters, ## from the beginning of the file. ## ## @seealso{dlmread, fscanf, load, strread, textread} ## @end deftypefn function [C, position] = textscan (fid, format = "%f", varargin) ## Check input if (nargin < 1) print_usage (); endif if (isempty (format)) format = "%f"; endif if (! (isa (fid, "double") && fid > 0) && ! ischar (fid)) error ("textscan: first argument must be a file id or character string"); endif if (! ischar (format)) error ("textscan: FORMAT must be a string"); endif args = varargin; if (nargin > 2 && isnumeric (args{1})) nlines = args{1}; else nlines = Inf; endif if (! any (strcmpi (args, "emptyvalue"))) ## Matlab returns NaNs for missing values args(end+1:end+2) = {'emptyvalue', NaN}; endif ## Check default parameter values that differ for strread & textread ipos = find (strcmpi (args, "whitespace")); if (isempty (ipos)) ## Matlab default whitespace = " \b\t" args(end+1:end+2) = {'whitespace', " \b\t"}; whitespace = " \b\t"; else ## Check if there's at least one string format specifier has_str_fmt = regexp (format, '%[*]?\d*s', "once"); ## If there is a string format AND whitespace value = empty, ## don't add a space (char(32)) to whitespace if (! (isempty (args{ipos+1}) && has_str_fmt)) args{ipos+1} = unique ([" ", args{ipos+1}]); endif endif if (! any (strcmpi (args, "delimiter"))) ## Matlab says default delimiter = whitespace. ## strread() will pick this up further args(end+1:end+2) = {'delimiter', ""}; endif collop = false; ipos = find (strcmpi (args, "collectoutput")); if (! isempty (ipos)) ## Search & concatenate consecutive columns of same class requested if (isscalar (args{ipos+1}) && (islogical (args{ipos+1}) || isnumeric (args{ipos+1}))) collop = args{ipos+1}; else warning ("textscan: illegal value for CollectOutput parameter - ignored"); endif ## Remove argument before call to strread() below args(ipos:ipos+1) = []; endif if (any (strcmpi (args, "returnonerror"))) ## Because of the way strread() reads data (columnwise) this parameter ## can't be neatly implemented. strread() will pick it up anyway warning ('textscan: ReturnOnError is not fully implemented'); else ## Set default value (=true) args(end+1:end+2) = {"returnonerror", 1}; endif if (ischar (fid)) ## Read from a text string if (nargout == 2) error ("textscan: cannot provide position information for character input"); endif str = fid; else ## Skip header lines if requested headerlines = find (strcmpi (args, "headerlines"), 1); ## Beware of zero valued headerline, fskipl would skip to EOF if (! isempty (headerlines) && (args{headerlines + 1} > 0)) fskipl (fid, varargin{headerlines + 1}); args(headerlines:headerlines+1) = []; endif if (isfinite (nlines) && (nlines >= 0)) str = tmp_str = ""; n = 0; ## FIXME: Can this be done without slow loop? while (ischar (tmp_str) && n++ < nlines) tmp_str = fgets (fid); if (ischar (tmp_str)) str = strcat (str, tmp_str); endif endwhile else str = fread (fid, "char=>char").'; endif endif ## Check for empty result if (isempty (str)) warning ("textscan: no data read"); C = []; return; endif ## Check value of 'endofline'. String or file doesn't seem to matter endofline = find (strcmpi (args, "endofline"), 1); if (! isempty (endofline)) if (ischar (args{endofline + 1})) eol_char = args{endofline + 1}; if (! any (strcmp (eol_char, {"", "\n", "\r", "\r\n"}))) error ("textscan: illegal EndOfLine character value specified"); endif else error ("textscan: character value required for EndOfLine"); endif else ## Determine EOL from file. Search for EOL candidates in first 3000 chars eol_srch_len = min (length (str), 3000); ## First try DOS (CRLF) if (! isempty (strfind ("\r\n", str(1 : eol_srch_len)))) eol_char = "\r\n"; ## Perhaps old Macintosh? (CR) elseif (! isempty (strfind ("\r", str(1 : eol_srch_len)))) eol_char = "\r"; ## Otherwise, use plain UNIX (LF) else eol_char = "\n"; endif ## Set up the default endofline param value args(end+1:end+2) = {'endofline', eol_char}; endif ## Determine the number of data fields num_fields = numel (strfind (format, "%")) - numel (strfind (format, "%*")); ## Strip trailing EOL to avoid returning stray missing values (f. strread) if (strcmp (str(end-length (eol_char) + 1 : end), eol_char)); str(end-length (eol_char) + 1 : end) = ""; endif ## Call strread to make it do the real work C = cell (1, num_fields); [C{:}] = strread (str, format, args{:}); ## If requested, collect output columns of same class if (collop) C = colloutp (C); endif if (nargout == 2) position = ftell (fid); endif endfunction ## Collect consecutive columns of same class into one cell column function C = colloutp (C) ## Start at rightmost column and work backwards to avoid ptr mixup ii = numel (C); while ii > 1 clss1 = class (C{ii}); jj = ii; while (jj > 1 && strcmp (clss1, class (C{jj - 1}))) ## Column to the left is still same class; check next column to the left --jj; endwhile if (jj < ii) ## Concatenate columns into current column C{jj} = [C{jj : ii}]; ## Wipe concatenated columns to the right, resume search to the left C(jj+1 : ii) = []; ii = jj - 1; else ## No similar class in column to the left, search from there --ii; endif endwhile endfunction %!test %! str = "1, 2, 3, 4\n 5, , , 8\n 9, 10, 11, 12"; %! fmtstr = "%f %d %f %s"; %! c = textscan (str, fmtstr, 2, "delimiter", ",", "emptyvalue", -Inf); %! assert (isequal (c{1}, [1;5])); %! assert (length (c{1}), 2); %! assert (iscellstr (c{4})); %! assert (isequal (c{3}, [3; -Inf])); %!test %! b = [10:10:100]; %! b = [b; 8*b/5]; %! str = sprintf ("%g miles/hr = %g kilometers/hr\n", b); %! fmt = "%f miles/hr = %f kilometers/hr"; %! c = textscan (str, fmt); %! assert (b(1,:)', c{1}, 1e-5); %! assert (b(2,:)', c{2}, 1e-5); #%!test #%! str = "13, 72, NA, str1, 25\r\n// Middle line\r\n36, na, 05, str3, 6"; #%! a = textscan (str, "%d %n %f %s %n", "delimiter", ",","treatAsEmpty", {"NA", "na"},"commentStyle", "//"); #%! assert (a{1}, int32([13; 36])); #%! assert (a{2}, [72; NaN]); #%! assert (a{3}, [NaN; 5]); #%! assert (a{4}, {"str1"; "str3"}); #%! assert (a{5}, [25; 6]); %!test %! str = "Km:10 = hhhBjjj miles16hour\r\n"; %! str = [str "Km:15 = hhhJjjj miles241hour\r\n"]; %! str = [str "Km:2 = hhhRjjj miles3hour\r\n"]; %! str = [str "Km:25 = hhhZ\r\n"]; %! fmt = "Km:%d = hhh%1sjjj miles%dhour"; %! a = textscan (str, fmt, "delimiter", " "); %! assert (a{1}', int32([10 15 2 25])); %! assert (a{2}', {'B' 'J' 'R' 'Z'}); %! assert (a{3}', int32([16 241 3 0])); %% Test with default endofline parameter %!test %! c = textscan ("L1\nL2", "%s"); %! assert (c{:}, {"L1"; "L2"}); %% Test with endofline parameter set to "" (empty) - newline should be in word %!test %! c = textscan ("L1\nL2", "%s", "endofline", ""); %! assert (int8(c{:}{:}), int8([ 76, 49, 10, 76, 50 ])); %!test %! # No delimiters at all besides EOL. Skip fields, even empty fields %! str = "Text1Text2Text\nTextText4Text\nText57Text"; %! c = textscan (str, "Text%*dText%dText"); %! assert (c{1}, int32 ([2; 4; 0])); %!test %% CollectOutput test %! b = [10:10:100]; %! b = [b; 8*b/5; 8*b*1000/5]; %! str = sprintf ("%g miles/hr = %g (%g) kilometers (meters)/hr\n", b); %! fmt = "%f miles%s %s %f (%f) kilometers %*s"; %! c = textscan (str, fmt, "collectoutput", 1); %! assert (size(c{3}), [10, 2]); %! assert (size(c{2}), [10, 2]); %% Test input validation %!error textscan () %!error textscan (single (4)) %!error textscan ({4}) %!error <must be a string> textscan ("Hello World", 2) %!error <cannot provide position information> [C, pos] = textscan ("Hello World") %!error <character value required> textscan ("Hello World", '%s', 'EndOfLine', 3)