Mercurial > hg > octave-lyh
view scripts/io/importdata.m @ 15547:9a455cf96dbe
Incorporate importdata.m into Octave sources
* importdata.m (importdata): Various style fixes to fix Octave house
style. Use ## for comments, "" for strings, parens around if
conditionals, ! instead of "not" and "~", spaces before "(" in
function calls, under_scores instead of camelCap, as in Appendix D.5
of the Octave manual.
* module.mk: Add importdata.m
* io.txi: Add to docs
author | Jordi Gutiérrez Hermoso <jordigh@octave.org> |
---|---|
date | Fri, 19 Oct 2012 15:09:45 -0400 |
parents | 3bf1e0da76b0 |
children | 48b1312bb0fe |
line wrap: on
line source
## Copyright (C) 2012 Erik Kjellson <erikiiofph7@users.sourceforge.net> ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; if not, see <http://www.gnu.org/licenses/>. ## -*- texinfo -*- ## @deftypefn {Function File} {@var{A} =} importdata (@var{fname}) ## @deftypefnx {Function File} {@var{A} =} importdata (@var{fname}, @var{delimiter}) ## @deftypefnx {Function File} {@var{A} =} importdata (@var{fname}, @var{delimiter}, @var{header_rows}) ## @deftypefnx {Function File} {[@var{A}, @var{delimiter}] =} importdata (...) ## @deftypefnx {Function File} {[@var{A}, @var{delimiter}, @var{header_rows}] =} importdata (...) ## Importing data from file. ## ## Importing the contents of file @var{fname} into workspace. ## ## Input parameters: ## @itemize ## @item @var{fname} ## The file name for the file to import. ## ## @item @var{delimiter} ## The character separating columns of data. Use @code{\t} for tab. ## (Only valid for ascii files) ## ## @item @var{header_rows} ## Number of header rows before the data begins. (Only valid for ascii files) ## @end itemize ## ## Different file types are supported: ## @itemize ## @item Ascii table ## ## Importing ascii table using the specified number of header rows and ## the specified delimiter. ## ## @item Image file ## ## @item @sc{Matlab} file ## ## @item Wav file ## ## @end itemize ## ## @seealso{textscan, dlmread, csvread, load} ## @end deftypefn ## Author: Erik Kjellson <erikiiofph7@users.sourceforge.net> ## 2012-10-16 First version function [output, delimiter, header_rows] = importdata (varargin) ## Default values fname = ""; delimiter = ""; header_rows = -1; ########## ## Check input arguments if (nargin < 1) print_usage (); endif fname = varargin{1}; ## Check that the file name really is a string if (! ischar (fname)) error ("importdata: file name needs to be a string"); endif if ( strcmpi (fname, "-pastespecial")) error ("importdata: option -pastespecial not implemented"); endif if (nargin > 1) delimiter = varargin{2}; ## Check that the delimiter really is a string if (!ischar (delimiter)) error("importdata: delimiter needs to be a character"); endif if (length (delimiter) > 1 && !strcmpi (delimiter, "\t")) error("importdata: delimiter cannot be longer than 1 character"); endif if (strcmpi (delimiter, "\\")) delimiter = "\\\\"; endif endif if (nargin > 2) header_rows = varargin{3}; if (!isnumeric (header_rows) || header_rows < 0) error ("importdata: number of header rows needs to be an integer number >= 0"); endif endif if (nargin > 3) error ("importdata: too many input arguments"); endif ########## ## Check file format ## Get the extension from the file name. [d n fileExt v] = fileparts (fname); ## Make sure file extension is in lower case. fileExt = lower (fileExt); switch fileExt case {".au", ".snd"} error (sprintf ("importdata: not implemented for file format %s", fileExt)); case ".avi" error (sprintf ("importdata: not implemented for file format %s", fileExt)); case {".bmp", ".cur", ".gif", ".hdf", ".ico", ".jpe", ".jpeg", ".jpg", \ ".pbm", ".pcx", ".pgm", ".png", ".pnm", ".ppm", ".ras", \ ".tif", ".tiff", ".xwd"} delimiter = NaN; header_rows = 0; [output.cdata, output.colormap, output.alpha] = imread (fname); case ".mat" delimiter = NaN; header_rows = 0; output = load (fname); case ".wk1" error (sprintf ("importdata: not implemented for file format %s", fileExt)); case {".xls", ".xlsx"} ## FIXME: implement Excel import. error (sprintf ("importdata: not implemented for file format %s", fileExt)); case {".wav", ".wave"} delimiter = NaN; header_rows = 0; [output.data, output.fs] = wavread (fname); otherwise ## Assume the file is in ascii format. [output, delimiter, header_rows] = \ importdata_ascii (fname, delimiter, header_rows); endswitch ## If there are any empty fields in the output structure, then remove them if (isstruct (output) && length (output) == 1) fields = fieldnames (output); for i=1:length (fields) if (isempty (getfield (output, fields{i}))) output = rmfield (output, fields{i}); endif endfor ## If only one field is left, replace the structure with the field, ## i.e. output = output.onlyFieldLeft ## Update the list of fields fields = fieldnames (output); if (length (fields) == 1) output = getfield (output, fields{1}); endif endif endfunction ######################################## function [output, delimiter, header_rows] = \ importdata_ascii (fname, delimiter, header_rows) ## Define the fields in the output structure so that the order will be ## correct. output.data = []; output.textdata = []; output.rowheaders = []; output.colheaders = []; ## Read file into string and count the number of header rows file_content = fileread (fname); ## The characters need to be in a row vector instead of a column ## vector to be recognized as a proper string. file_content = file_content(:)'; ## Split the file into rows (using \r\n or \n as delimiters between rows). file_content_rows = regexp (file_content, "\r?\n", "split"); ## FIXME: guess delimiter, if it isn't defined if (isempty (delimiter)) error ("importdata: Guessing delimiter is not implemented yet, you have to specify it."); endif ## FIXME: A more intelligent way to count number of header rows. This ## is needed e.g. when delimiter=' ' and the header contains spaces... ## If number of header rows is undefined, then count the number of ## header rows by step through row by row and look for the delimiter. ## Assume that the header can't contain any delimiter. if (header_rows < 0) header_rows = 0; for i=1:length (file_content_rows) if (isempty (regexp(file_content_rows{i}, delimiter, "once"))) header_rows++; else ## Data part has begun and therefore no more header rows can be ## found break; endif endfor endif ## Put the header rows in output.textdata. if (header_rows > 0) output.textdata = file_content_rows (1:header_rows)'; endif ## If space is the delimiter, then remove spaces in the beginning of ## each data row. if (strcmpi (delimiter, " ")) for i=(header_rows+1):length (file_content_rows) ## strtrim does not only remove the leading spaces but also the ## tailing spaces, but that doesn't really matter. file_content_rows{i} = strtrim (file_content_rows{i}); endfor endif ## Remove empty data rows. Go through them backwards so that you wont ## get out of bounds. for i=length (file_content_rows):-1:(header_rows + 1) if (length (file_content_rows{i}) < 1) file_content_rows = [file_content_rows(1:i-1), \ file_content_rows(i+1:length(file_content_rows))]; endif endfor ## Count the number of data columns. If there are different number of ## columns, use the greatest value. data_columns = 0; delimiter_pattern = delimiter; ## If space is the delimiter, then multiple spaces should count as ONE ## delimiter. Also ignore leading spaces. if (strcmpi (delimiter, " ")) delimiter_pattern = ' +'; endif for i=(header_rows+1):length(file_content_rows) data_columns = max(data_columns, length (regexp (file_content_rows{i}, delimiter_pattern, "split"))); endfor ## Go through the data and put it in either output.data or ## output.textdata depending on if it is numeric or not. output.data = NaN (length (file_content_rows) - header_rows, data_columns); for i=(header_rows+1):length(file_content_rows) ## Only use the row if it contains anything other than white-space ## characters. if (length (regexp (file_content_rows{i}, "\S","match")) > 0) row_data = regexp (file_content_rows{i}, delimiter_pattern, "split"); for j=1:length(row_data) ## Try to convert the column to a number, if it works put it in ## output.data, otherwise in output.textdata if (length(str2num(row_data{j})) > 0) output.data ((i-header_rows), j) = str2num (row_data{j}); else output.textdata{i,j} = row_data{j}; endif endfor endif endfor ## Check wether rowheaders or colheaders should be used if ((header_rows == data_columns) && (size (output.textdata, 2) == 1)) output.rowheaders = output.textdata; elseif (size (output.textdata, 2) == data_columns) output.colheaders = output.textdata(end,:); endif ## When delimiter = "\\t" convert it to a tab, as is done in the Matlab ## version if (strcmpi (delimiter, "\\t")) delimiter = "\t"; endif endfunction