Mercurial > hg > octave-lyh
annotate scripts/io/textscan.m @ 12679:f38cf6224452 stable
Fix textscan/textread headerlines arg always skipping 2 lines (bug #33028)
* textread.m, textscan.m: Fix wrong headerline argument being fed
to fskipl. Check & avoid processing zero headerline values.
author | Philip Nienhuis <prnienhuis@users.sf.net> |
---|---|
date | Sun, 15 May 2011 19:15:50 -0700 |
parents | d0b799dafede |
children | fe6e2afcd9ee |
rev | line source |
---|---|
11523 | 1 ## Copyright (C) 2010-2011 Ben Abbott <bpabbott@mac.com> |
11141 | 2 ## |
3 ## This file is part of Octave. | |
4 ## | |
5 ## Octave is free software; you can redistribute it and/or modify it | |
6 ## under the terms of the GNU General Public License as published by | |
7 ## the Free Software Foundation; either version 3 of the License, or (at | |
8 ## your option) any later version. | |
9 ## | |
10 ## Octave is distributed in the hope that it will be useful, but | |
11 ## WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 ## General Public License for more details. | |
14 ## | |
15 ## You should have received a copy of the GNU General Public License | |
16 ## along with Octave; see the file COPYING. If not, see | |
17 ## <http://www.gnu.org/licenses/>. | |
18 | |
19 ## -*- texinfo -*- | |
11471
994e2a93a8e2
Use uppercase 'A' to refer to matrix inputs in m-files.
Rik <octave@nomad.inbox5.com>
parents:
11469
diff
changeset
|
20 ## @deftypefn {Function File} {@var{C} =} textscan (@var{fid}, @var{format}) |
12575
d0b799dafede
Grammarcheck files for 3.4.1 release.
Rik <octave@nomad.inbox5.com>
parents:
11589
diff
changeset
|
21 ## @deftypefnx {Function File} {@var{C} =} textscan (@var{fid}, @var{format}, @var{n}) |
d0b799dafede
Grammarcheck files for 3.4.1 release.
Rik <octave@nomad.inbox5.com>
parents:
11589
diff
changeset
|
22 ## @deftypefnx {Function File} {@var{C} =} textscan (@var{fid}, @var{format}, @var{param}, @var{value}, @dots{}) |
d0b799dafede
Grammarcheck files for 3.4.1 release.
Rik <octave@nomad.inbox5.com>
parents:
11589
diff
changeset
|
23 ## @deftypefnx {Function File} {@var{C} =} textscan (@var{fid}, @var{format}, @var{n}, @var{param}, @var{value}, @dots{}) |
11471
994e2a93a8e2
Use uppercase 'A' to refer to matrix inputs in m-files.
Rik <octave@nomad.inbox5.com>
parents:
11469
diff
changeset
|
24 ## @deftypefnx {Function File} {@var{C} =} textscan (@var{str}, @dots{}) |
994e2a93a8e2
Use uppercase 'A' to refer to matrix inputs in m-files.
Rik <octave@nomad.inbox5.com>
parents:
11469
diff
changeset
|
25 ## @deftypefnx {Function File} {[@var{C}, @var{position}] =} textscan (@dots{}) |
11141 | 26 ## Read data from a text file. |
27 ## | |
11563
3c6e8aaa9555
Grammarcheck m-files before 3.4 release.
Rik <octave@nomad.inbox5.com>
parents:
11523
diff
changeset
|
28 ## The file associated with @var{fid} is read and parsed according to |
3c6e8aaa9555
Grammarcheck m-files before 3.4 release.
Rik <octave@nomad.inbox5.com>
parents:
11523
diff
changeset
|
29 ## @var{format}. The function behaves like @code{strread} except it works by |
3c6e8aaa9555
Grammarcheck m-files before 3.4 release.
Rik <octave@nomad.inbox5.com>
parents:
11523
diff
changeset
|
30 ## parsing a file instead of a string. See the documentation of |
3c6e8aaa9555
Grammarcheck m-files before 3.4 release.
Rik <octave@nomad.inbox5.com>
parents:
11523
diff
changeset
|
31 ## @code{strread} for details. In addition to the options supported by |
3c6e8aaa9555
Grammarcheck m-files before 3.4 release.
Rik <octave@nomad.inbox5.com>
parents:
11523
diff
changeset
|
32 ## @code{strread}, this function supports one more: |
11141 | 33 ## @itemize |
34 ## @item "headerlines": | |
35 ## @end itemize | |
36 ## The first @var{value} number of lines of @var{str} are skipped. | |
37 ## | |
38 ## The optional input, @var{n}, specifes the number of lines to be read from | |
39 ## the file, associated with @var{fid}. | |
40 ## | |
11471
994e2a93a8e2
Use uppercase 'A' to refer to matrix inputs in m-files.
Rik <octave@nomad.inbox5.com>
parents:
11469
diff
changeset
|
41 ## The output, @var{C}, is a cell array whose length is given by the number |
11141 | 42 ## of format specifiers. |
43 ## | |
44 ## The second output, @var{position}, provides the position, in characters, | |
45 ## from the beginning of the file. | |
46 ## | |
47 ## @seealso{dlmread, fscanf, load, strread, textread} | |
48 ## @end deftypefn | |
49 | |
11471
994e2a93a8e2
Use uppercase 'A' to refer to matrix inputs in m-files.
Rik <octave@nomad.inbox5.com>
parents:
11469
diff
changeset
|
50 function [C, p] = textscan (fid, format, varargin) |
11141 | 51 |
52 ## Check input | |
53 if (nargin < 1) | |
54 print_usage (); | |
11469
c776f063fefe
Overhaul m-script files to use common variable name between code and documentation.
Rik <octave@nomad.inbox5.com>
parents:
11191
diff
changeset
|
55 elseif (nargin == 1 || isempty (format)) |
c776f063fefe
Overhaul m-script files to use common variable name between code and documentation.
Rik <octave@nomad.inbox5.com>
parents:
11191
diff
changeset
|
56 format = "%f"; |
11141 | 57 endif |
58 | |
59 if (nargin > 2 && isnumeric (varargin{1})) | |
60 nlines = varargin{1}; | |
61 args = varargin(2:end); | |
62 else | |
63 nlines = Inf; | |
64 args = varargin; | |
65 endif | |
66 | |
67 if (! any (strcmpi (args, "emptyvalue"))) | |
68 ## Matlab returns NaNs for missing values | |
69 args{end+1} = "emptyvalue"; | |
70 args{end+1} = NaN; | |
71 endif | |
72 | |
73 if (isa (fid, "double") && fid > 0 || ischar (fid)) | |
11469
c776f063fefe
Overhaul m-script files to use common variable name between code and documentation.
Rik <octave@nomad.inbox5.com>
parents:
11191
diff
changeset
|
74 if (ischar (format)) |
11141 | 75 if (ischar (fid)) |
76 if (nargout == 2) | |
11589
b0084095098e
missing semicolons in script files
John W. Eaton <jwe@octave.org>
parents:
11563
diff
changeset
|
77 error ("textscan: cannot provide position information for character input"); |
11141 | 78 endif |
79 str = fid; | |
80 else | |
81 ## Maybe skip header lines | |
82 headerlines = find (strcmpi (args, "headerlines"), 1); | |
83 if (! isempty (headerlines)) | |
12679
f38cf6224452
Fix textscan/textread headerlines arg always skipping 2 lines (bug #33028)
Philip Nienhuis <prnienhuis@users.sf.net>
parents:
12575
diff
changeset
|
84 hdr_lines = floor (varargin{headerlines + 1}); |
f38cf6224452
Fix textscan/textread headerlines arg always skipping 2 lines (bug #33028)
Philip Nienhuis <prnienhuis@users.sf.net>
parents:
12575
diff
changeset
|
85 ## Beware of zero valued headerline, fskipl will count lines to EOF |
f38cf6224452
Fix textscan/textread headerlines arg always skipping 2 lines (bug #33028)
Philip Nienhuis <prnienhuis@users.sf.net>
parents:
12575
diff
changeset
|
86 if (hdr_lines > 0) |
f38cf6224452
Fix textscan/textread headerlines arg always skipping 2 lines (bug #33028)
Philip Nienhuis <prnienhuis@users.sf.net>
parents:
12575
diff
changeset
|
87 fskipl (fid, hdr_lines); |
f38cf6224452
Fix textscan/textread headerlines arg always skipping 2 lines (bug #33028)
Philip Nienhuis <prnienhuis@users.sf.net>
parents:
12575
diff
changeset
|
88 endif |
11141 | 89 endif |
90 if (isfinite (nlines)) | |
91 str = ""; | |
92 for n = 1:nlines | |
93 str = strcat (str, fgets (fid)); | |
94 endfor | |
95 else | |
96 str = fread (fid, "char=>char").'; | |
97 endif | |
98 endif | |
99 | |
100 ## Determine the number of data fields | |
11469
c776f063fefe
Overhaul m-script files to use common variable name between code and documentation.
Rik <octave@nomad.inbox5.com>
parents:
11191
diff
changeset
|
101 num_fields = numel (strfind (format, "%")) - ... |
c776f063fefe
Overhaul m-script files to use common variable name between code and documentation.
Rik <octave@nomad.inbox5.com>
parents:
11191
diff
changeset
|
102 numel (idx_star = strfind (format, "%*")); |
11141 | 103 |
104 ## Call strread to make it do the real work | |
11471
994e2a93a8e2
Use uppercase 'A' to refer to matrix inputs in m-files.
Rik <octave@nomad.inbox5.com>
parents:
11469
diff
changeset
|
105 C = cell (1, num_fields); |
994e2a93a8e2
Use uppercase 'A' to refer to matrix inputs in m-files.
Rik <octave@nomad.inbox5.com>
parents:
11469
diff
changeset
|
106 [C{:}] = strread (str, format, args{:}); |
11141 | 107 |
108 if (ischar (fid) && isfinite (nlines)) | |
11471
994e2a93a8e2
Use uppercase 'A' to refer to matrix inputs in m-files.
Rik <octave@nomad.inbox5.com>
parents:
11469
diff
changeset
|
109 C = cellfun (@(x) x(1:nlines), C, "uniformoutput", false); |
11141 | 110 endif |
111 | |
112 if (nargout == 2) | |
113 p = ftell (fid); | |
114 endif | |
115 | |
116 else | |
11472
1740012184f9
Use uppercase for variable names in error() strings to match Info documentation. Only m-files done.
Rik <octave@nomad.inbox5.com>
parents:
11471
diff
changeset
|
117 error ("textscan: FORMAT must be a valid specification"); |
11141 | 118 endif |
119 else | |
11472
1740012184f9
Use uppercase for variable names in error() strings to match Info documentation. Only m-files done.
Rik <octave@nomad.inbox5.com>
parents:
11471
diff
changeset
|
120 error ("textscan: first argument must be a file id or character string"); |
11141 | 121 endif |
122 | |
123 endfunction | |
124 | |
125 %!test | |
126 %! str = "1, 2, 3, 4\n 5, , , 8\n 9, 10, 11, 12"; | |
127 %! fmtstr = "%f %d %f %s"; | |
128 %! c = textscan (str, fmtstr, 2, "delimiter", ",", "emptyvalue", -Inf); | |
129 %! assert (isequal (c{1}, [1;5])) | |
130 %! assert (length (c{1}), 2); | |
131 %! assert (iscellstr (c{4})) | |
132 %! assert (isequal (c{3}, [3; -Inf])) | |
133 | |
134 %!test | |
135 %! b = [10:10:100]; | |
136 %! b = [b; 8*b/5]; | |
137 %! str = sprintf ("%g miles/hr = %g kilometers/hr\n", b); | |
138 %! fmt = "%f miles/hr = %f kilometers/hr"; | |
139 %! c = textscan (str, fmt); | |
140 %! assert (b(1,:)', c{1}) | |
141 %! assert (b(2,:)', c{2}) | |
142 | |
143 |