Mercurial > hg > octave-nkf
diff scripts/io/textscan.m @ 14565:98aaebc56d7c
2012-03-25 Philip Nienhuis <prnienhuis@users.sf.net>
* textscan.m, textread.m
Updated texinfo header (@var{n} format repeat count section)
Replaced slow fgets / str concat section by block reading
Supplied varargout in some cases to avoid unneeded errors
Improvements to coding style
* textscan.m
Moved some code upward to avoid having multiple fclose statements
author | Philip Nienhuis <prnienhuis@@users.sf.net> |
---|---|
date | Fri, 30 Mar 2012 18:44:01 +0200 |
parents | 86854d032a37 |
children | e97ec01d4157 |
line wrap: on
line diff
--- a/scripts/io/textscan.m +++ b/scripts/io/textscan.m @@ -52,8 +52,11 @@ ## have been encountered. If set to 0 or false, return an error and no data. ## @end itemize ## -## The optional input @var{n} specifes the number of times to use -## @var{format} when parsing, i.e., the format repeat count. +## When reading from a character string, optional input argument @var{n} +## specifes the number of times @var{format} should be used (i.e., to limit +## the amount of data read). +## When reading fro file, @var{n} specifes the number of data lines to read; +## in this sense it differs slightly from the format repeat count in strread. ## ## The output @var{C} is a cell array whose length is given by the number ## of format specifiers. @@ -66,6 +69,8 @@ function [C, position] = textscan (fid, format = "%f", varargin) + BUFLENGTH = 4096; ## Read buffer + ## Check input if (nargin < 1) print_usage (); @@ -89,6 +94,11 @@ else nlines = Inf; endif + if (nlines < 1) + printf ("textscan: N = 0, no data read\n"); + C = []; + return + endif if (! any (strcmpi (args, "emptyvalue"))) ## Matlab returns NaNs for missing values @@ -148,26 +158,17 @@ endif str = fid; else + st_pos = ftell (fid); ## Skip header lines if requested headerlines = find (strcmpi (args, "headerlines"), 1); ## Beware of zero valued headerline, fskipl would skip to EOF if (! isempty (headerlines) && (args{headerlines + 1} > 0)) fskipl (fid, varargin{headerlines + 1}); args(headerlines:headerlines+1) = []; + st_pos = ftell (fid); endif - if (isfinite (nlines) && (nlines >= 0)) - str = tmp_str = ""; - n = 0; - ## FIXME: Can this be done without slow loop? - while (ischar (tmp_str) && n++ < nlines) - tmp_str = fgets (fid); - if (ischar (tmp_str)) - str = strcat (str, tmp_str); - endif - endwhile - else - str = fread (fid, "char=>char").'; - endif + ## Read a first file chunk. Rest follows after endofline processing + [str, count] = fscanf (fid, "%c", BUFLENGTH); endif ## Check for empty result @@ -189,8 +190,8 @@ error ("textscan: character value required for EndOfLine"); endif else - ## Determine EOL from file. Search for EOL candidates in first 3000 chars - eol_srch_len = min (length (str), 3000); + ## Determine EOL from file. Search for EOL candidates in first BUFLENGTH chars + eol_srch_len = min (length (str), BUFLENGTH); ## First try DOS (CRLF) if (! isempty (strfind ("\r\n", str(1 : eol_srch_len)))) eol_char = "\r\n"; @@ -202,7 +203,47 @@ eol_char = "\n"; endif ## Set up the default endofline param value - args(end+1:end+2) = {'endofline', eol_char}; + args(end+1:end+2) = {"endofline", eol_char}; + endif + + if (!ischar (fid)) + ## Now that we know what EOL looks like, we can process format_repeat_count. + ## FIXME The below isn't ML-compatible: counts lines, not format string uses + if (isfinite (nlines) && (nlines >= 0)) + l_eol_char = length (eol_char); + eoi = findstr (str, eol_char); + n_eoi = length (eoi); + nblks = 0; + ## Avoid slow repeated str concatenation, first seek requested end of data + while (n_eoi < nlines && count == BUFLENGTH) + [nstr, count] = fscanf (fid, "%c", BUFLENGTH); + if (count > 0) + ## Watch out for multichar EOL being missed across buffer boundaries + if (l_eol_char > 1) + str = [str(end - length (eol_char) + 2 : end) nstr]; + else + str = nstr; + endif + eoi = findstr (str, eol_char); + n_eoi += numel (eoi); + ++nblks; + endif + endwhile + ## OK, found EOL delimiting last requested line. Compute ptr (incl. EOL) + if (isempty (eoi)) + printf ("textscan: format repeat count specified but no endofline found\n"); + data_size = nblks * BUFLENGTH + count; + else + ## Compute data size to read incl complete EOL + data_size = (nblks * BUFLENGTH) + eoi(end + min (nlines, n_eoi) - n_eoi) \ + + l_eol_char - 1; + endif + fseek (fid, st_pos, "bof"); + str = fscanf (fid, "%c", data_size); + else + fseek (fid, st_pos, "bof"); + str = fread (fid, "char=>char").'; + endif endif ## Determine the number of data fields @@ -223,6 +264,7 @@ endif if (nargout == 2) + ## Remember file position (persistent var) position = ftell (fid); endif