# HG changeset patch # User Ben Abbott # Date 1287675495 -28800 # Node ID 64728cd28d7ac9d43af4d4c4922f292fdc8014d3 # Parent 304b0ed4ca5674137f1e2e964b96eacaebad50fc strread.m: Improve compatibility with Matlab. diff --git a/scripts/ChangeLog b/scripts/ChangeLog --- a/scripts/ChangeLog +++ b/scripts/ChangeLog @@ -1,3 +1,7 @@ +2010-10-21 Ben Abbott + + * io/strread.m: Improve compatibility with Matlab. + 2010-10-21 John W. Eaton * general/private/__isequal__.m: Style fixes. diff --git a/scripts/io/strread.m b/scripts/io/strread.m --- a/scripts/io/strread.m +++ b/scripts/io/strread.m @@ -103,8 +103,8 @@ if (nargin < 1) print_usage (); endif - - if (!ischar (str) || !ischar (str)) + + if (!ischar (str) || !ischar (formatstr)) error ("strread: first and second input arguments must be strings"); endif @@ -112,6 +112,7 @@ comment_flag = false; numeric_fill_value = 0; white_spaces = " \n\r\t\b"; + delimiter_str = ""; for n = 1:2:length (varargin) switch (lower (varargin {n})) case "commentstyle" @@ -134,15 +135,18 @@ numeric_fill_value = varargin {n+1}; case "bufsize" ## XXX: We could synthesize this, but that just seems weird... - warning ("strread: property \"bufsize\" is not implemented"); + warning ("strread: property \"bufsize\" is not implemented"); case "whitespace" white_spaces = varargin {n+1}; case "expchars" - warning ("strread: property \"expchars\" is not implemented"); + warning ("strread: property \"expchars\" is not implemented"); otherwise warning ("strread: unknown property \"%s\"", varargin {n}); endswitch endfor + if (isempty (delimiter_str)) + delimiter_str = white_spaces; + endif ## Parse format string idx = strfind (formatstr, "%")'; @@ -180,36 +184,64 @@ str = cellslices (str, [1, cstop + c2len], [cstart - 1, len]); str = [str{:}]; endif - + + ## Determine the number of words per line + [~, ~, ~, fmt_words] = regexp (formatstr, "[^\\s]+"); + + num_words_per_line = numel (fmt_words); + for m = 1:numel(fmt_words) + ## Convert formats such as "%Ns" to "%s" (see the FIXME below) + if (length (fmt_words{m}) > 2) + if (strcmp (fmt_words{m}(1:2), "%*")) + fmt_words{m} = "%*"; + elseif (fmt_words{m}(1) == "%") + fmt_words{m} = fmt_words{m}([1, end]); + endif + endif + endfor + ## Split 'str' into words - words = split_by (str, white_spaces); + words = split_by (str, delimiter_str); num_words = numel (words); - num_lines = ceil (num_words / nspecif); - + num_lines = ceil (num_words / num_words_per_line); + ## For each specifier k = 1; - for m = 1:nspecif - data = words (m:nspecif:end); - + for m = 1:num_words_per_line + data = words (m:num_words_per_line:end); ## Map to format - switch (specif(m,:)) + ## FIXME - add support for formats like "%4s" or "<%s>", "%[a-zA-Z]" + ## Someone with regexp experience is needed. + switch fmt_words{m} case "%s" data (end+1:num_lines) = {""}; varargout {k} = data'; k++; case {"%d", "%f"} + n = cellfun (@isempty, data); data = str2double (data); + data(n) = numeric_fill_value; data (end+1:num_lines) = numeric_fill_value; varargout {k} = data.'; k++; - case "%*" - ## do nothing + case {"%*", "%*s"} + ## skip the word + otherwise + ## Ensure descriptive content is consistent + if (numel (unique (data)) > 1 + || ! strcmpi (unique (data), fmt_words{m})) + error ("strread: format does not match data") + endif endswitch endfor endfunction function out = split_by (text, sep) - out = strtrim (strsplit (text, sep, true)); + sep = union (sep, "\n"); + pat = sprintf ("[^%s]+", sep); + [~, ~, ~, out] = regexp (text, pat); + out(cellfun (@isempty, out)) = {""}; + out = strtrim (out); endfunction %!test @@ -244,3 +276,15 @@ %! a = strread (str, '%f', 'commentstyle', 'c'); %! assert (a, [1; 2; 3]); +%!test +%! str = sprintf ("Tom 100 miles/hr\nDick 90 miles/hr\nHarry 80 miles/hr"); +%! fmt = "%s %f miles/hr"; +%! c = cell (1, 2); +%! [c{:}] = strread (str, fmt); +%! assert (c{1}, {"Tom"; "Dick"; "Harry"}) +%! assert (c{2}, [100; 90; 80]) + +%!test +%! a = strread ("a b c, d e, , f", "%s", "delimiter", ","); +%! assert (a, {"a b c"; "d e"; ""; "f"}); +