Mercurial > hg > octave-lyh
view scripts/io/importdata.m @ 15546:3bf1e0da76b0
importdata: new function
author | Erik Kjellson <erikiiofph7@users.sourceforge.net> |
---|---|
date | Fri, 19 Oct 2012 13:56:22 -0400 |
parents | |
children | 9a455cf96dbe |
line wrap: on
line source
## Copyright (C) 2012 Erik Kjellson <erikiiofph7@users.sourceforge.net> ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; if not, see <http://www.gnu.org/licenses/>. ## -*- texinfo -*- ## @deftypefn {Function File} {@var{A} =} importdata (@var{fileName}) ## @deftypefnx {Function File} {@var{A} =} importdata (@var{fileName}, @var{delimiter}) ## @deftypefnx {Function File} {@var{A} =} importdata (@var{fileName}, @var{delimiter}, @var{headerRows}) ## @deftypefnx {Function File} {[@var{A}, @var{delimiter}] =} importdata (...) ## @deftypefnx {Function File} {[@var{A}, @var{delimiter}, @var{headerRows}] =} importdata (...) ## Importing data from file. ## ## Importing the contents of file @var{fileName} into workspace. ## ## Input parameters: ## @table @input ## @item @var{fileName} ## The file name for the file to import. ## ## @item @var{delimiter} ## The character separating columns of data. Use @code{\t} for tab. (Only valid for ascii files) ## ## @item @var{headerRows} ## Number of header rows before the data begins. (Only valid for ascii files) ## @end table ## ## Different file types are supported: ## @itemize ## @item Ascii table ## ## Importing ascii table using the specified number of header rows and the specified delimiter. ## ## @item Image file ## ## @item Matlab file ## ## @item Wav file ## ## @end table ## ## @seealso{textscan, dlmread, csvread, load} ## @end deftypefn ## Author: Erik Kjellson <erikiiofph7@users.sourceforge.net> ## 2012-10-16 First version function [output, delimiter, headerRows] = importdata(varargin) # Default values fileName = ''; delimiter = ''; headerRows = -1; ########## # Check input arguments if (nargin < 1) print_usage (); endif fileName = varargin{1}; # Check that the file name really is a string if not(ischar(fileName)) error('importdata: File name needs to be a string.') endif if strcmpi(fileName, '-pastespecial') error('importdata: Option ''-pastespecial'' not implemented.') endif if (nargin > 1) delimiter = varargin{2}; # Check that the delimiter really is a string if not(ischar(delimiter)) error('importdata: Delimiter needs to be a character.') endif if ((length(delimiter) > 1) && not(strcmpi(delimiter, '\t'))) error('importdata: Delimiter cannot be longer than 1 character.') endif if strcmpi(delimiter, '\') delimiter = '\\'; endif endif if (nargin > 2) headerRows = varargin{3}; if ((~isnumeric(headerRows)) || (headerRows < 0)) error('importdata: Number of header rows needs to be an integer number >= 0.') endif endif if (nargin > 3) error('importdata: Too many input arguments.') endif ########## # Check file format # Get the extension from the file name. [d n fileExt v] = fileparts(fileName); # Make sure file extension is in lower case. fileExt = lower(fileExt); switch fileExt case {'.au','.snd'} error(['importdata: Not implemented for file format ''' fileExt '''.']) case '.avi' error(['importdata: Not implemented for file format ''' fileExt '''.']) case {'.bmp', '.cur', '.gif', '.hdf', '.ico', '.jpe', '.jpeg', '.jpg', '.pbm', '.pcx', '.pgm', '.png', '.pnm', '.ppm', '.ras', '.tif', '.tiff', '.xwd'} delimiter = NaN; headerRows = 0; [output.cdata, output.colormap, output.alpha] = imread(fileName); case '.mat' delimiter = NaN; headerRows = 0; output = load(fileName); case '.wk1' error(['importdata: Not implemented for file format ''' fileExt '''.']) case {'.xls', '.xlsx'} #FIXME: implement Excel import. error(['importdata: Not implemented for file format ''' fileExt '''.']) case {'.wav', '.wave'} delimiter = NaN; headerRows = 0; [output.data, output.fs] = wavread(fileName); otherwise # Assume the file is in ascii format. [output, delimiter, headerRows] = importdata_ascii(fileName, delimiter, headerRows); endswitch # If there are any empty fields in the output structure, then remove them if (isstruct(output) && (length(output) == 1)) fields = fieldnames(output); for i=1:length(fields) if isempty(getfield(output, fields{i})) output = rmfield(output, fields{i}); endif endfor # If only one field is left, replace the structure with the field, i.e. output = output.onlyFieldLeft # Update the list of fields fields = fieldnames(output); if (length(fields) == 1) output = getfield(output, fields{1}); endif endif endfunction ######################################## function [output, delimiter, headerRows] = importdata_ascii(fileName, delimiter, headerRows) # Define the fields in the output structure so that the order will be correct. output.data = []; output.textdata = []; output.rowheaders = []; output.colheaders = []; # Read file into string and count the number of header rows fileContent = fileread(fileName); # The characters need to be in a row vector instead of a column vector to be recognized as a proper string. if (size(fileContent,2) == 1) fileContent = fileContent'; endif # Split the file into rows (using \r\n or \n as delimiters between rows). fileContentRows = regexp(fileContent, '\r?\n', 'split'); #FIXME: guess delimiter, if it isn't defined if (isempty(delimiter)) error('importdata: Guessing delimiter is not implemented yet, you have to specify it.') endif #FIXME: A more intelligent way to count number of header rows. This is needed e.g. when delimiter=' ' and the header contains spaces... # If number of header rows is undefined, then count the number of header rows by step through row by row and look for the delimiter. # Assume that the header can't contain any delimiter. if (headerRows < 0) headerRows = 0; for i=1:length(fileContentRows) if (isempty(regexp(fileContentRows{i}, delimiter, 'once'))) headerRows++; else # Data part has begun and therefore no more header rows can be found break; endif endfor endif # Put the header rows in output.textdata. if (headerRows > 0) output.textdata = fileContentRows(1:headerRows)'; endif # If space is the delimiter, then remove spaces in the beginning of each data row. if strcmpi(delimiter, ' ') for i=(headerRows+1):length(fileContentRows) # strtrim does not only remove the leading spaces but also the tailing spaces, but that doesn't really matter. fileContentRows{i} = strtrim(fileContentRows{i}); endfor endif # Remove empty data rows. Go through them backwards so that you wont get out of bounds. for i=length(fileContentRows):-1:(headerRows+1) if (length(fileContentRows{i}) < 1) fileContentRows = [fileContentRows(1:i-1), fileContentRows(i+1:length(fileContentRows))]; endif endfor # Count the number of data columns. # If there are different number of columns, use the greatest value. dataColumns = 0; delimiterPattern = delimiter; # If space is the delimiter, then multiple spaces should count as ONE delimiter. Also ignore leading spaces. if (strcmpi(delimiter, ' ')) delimiterPattern = ' +'; endif for i=(headerRows+1):length(fileContentRows) dataColumns = max(dataColumns, length(regexp(fileContentRows{i}, delimiterPattern, 'split'))); endfor # Go through the data and put it in either output.data or output.textdata depending on if it is numeric or not. output.data = NaN(length(fileContentRows)-headerRows, dataColumns); for i=(headerRows+1):length(fileContentRows) # Only use the row if it contains anything other than white-space characters. if (length(regexp(fileContentRows{i}, '\S','match')) > 0) rowData = regexp(fileContentRows{i}, delimiterPattern, 'split'); for j=1:length(rowData) # Try to convert the column to a number, if it works put it in output.data, otherwise in output.textdata if (length(str2num(rowData{j})) > 0) output.data((i-headerRows),j) = str2num(rowData{j}); else output.textdata{i,j} = rowData{j}; endif endfor endif endfor # Check wether rowheaders or colheaders should be used if ((headerRows == dataColumns) && (size(output.textdata,2) == 1)) output.rowheaders = output.textdata; elseif (size(output.textdata,2) == dataColumns) output.colheaders = output.textdata(end,:); endif # When delimiter = '\t' convert it to a tab, as is done in the Matlab version if (strcmpi(delimiter, '\t')) delimiter = sprintf('\t'); endif endfunction