# HG changeset patch # User Jordi GutiƩrrez Hermoso # Date 1350673785 14400 # Node ID 9a455cf96dbe096b1103d662114cde9acd3abe42 # Parent 3bf1e0da76b01561eab11a98a9da1b10e87b3237 Incorporate importdata.m into Octave sources * importdata.m (importdata): Various style fixes to fix Octave house style. Use ## for comments, "" for strings, parens around if conditionals, ! instead of "not" and "~", spaces before "(" in function calls, under_scores instead of camelCap, as in Appendix D.5 of the Octave manual. * module.mk: Add importdata.m * io.txi: Add to docs diff --git a/doc/interpreter/io.txi b/doc/interpreter/io.txi --- a/doc/interpreter/io.txi +++ b/doc/interpreter/io.txi @@ -230,6 +230,11 @@ @DOCSTRING(textscan) +The @code{importdata} function has the ability to work with a wide +variety of data. + +@DOCSTRING(importdata) + @menu * Saving Data on Unexpected Exits:: @end menu diff --git a/scripts/io/importdata.m b/scripts/io/importdata.m --- a/scripts/io/importdata.m +++ b/scripts/io/importdata.m @@ -14,40 +14,42 @@ ## along with this program; if not, see . ## -*- texinfo -*- -## @deftypefn {Function File} {@var{A} =} importdata (@var{fileName}) -## @deftypefnx {Function File} {@var{A} =} importdata (@var{fileName}, @var{delimiter}) -## @deftypefnx {Function File} {@var{A} =} importdata (@var{fileName}, @var{delimiter}, @var{headerRows}) +## @deftypefn {Function File} {@var{A} =} importdata (@var{fname}) +## @deftypefnx {Function File} {@var{A} =} importdata (@var{fname}, @var{delimiter}) +## @deftypefnx {Function File} {@var{A} =} importdata (@var{fname}, @var{delimiter}, @var{header_rows}) ## @deftypefnx {Function File} {[@var{A}, @var{delimiter}] =} importdata (...) -## @deftypefnx {Function File} {[@var{A}, @var{delimiter}, @var{headerRows}] =} importdata (...) +## @deftypefnx {Function File} {[@var{A}, @var{delimiter}, @var{header_rows}] =} importdata (...) ## Importing data from file. ## -## Importing the contents of file @var{fileName} into workspace. +## Importing the contents of file @var{fname} into workspace. ## ## Input parameters: -## @table @input -## @item @var{fileName} +## @itemize +## @item @var{fname} ## The file name for the file to import. ## ## @item @var{delimiter} -## The character separating columns of data. Use @code{\t} for tab. (Only valid for ascii files) +## The character separating columns of data. Use @code{\t} for tab. +## (Only valid for ascii files) ## -## @item @var{headerRows} +## @item @var{header_rows} ## Number of header rows before the data begins. (Only valid for ascii files) -## @end table +## @end itemize ## ## Different file types are supported: ## @itemize ## @item Ascii table ## -## Importing ascii table using the specified number of header rows and the specified delimiter. +## Importing ascii table using the specified number of header rows and +## the specified delimiter. ## ## @item Image file ## -## @item Matlab file +## @item @sc{Matlab} file ## ## @item Wav file ## -## @end table +## @end itemize ## ## @seealso{textscan, dlmread, csvread, load} ## @end deftypefn @@ -56,104 +58,113 @@ ## 2012-10-16 First version -function [output, delimiter, headerRows] = importdata(varargin) +function [output, delimiter, header_rows] = importdata (varargin) - # Default values - fileName = ''; - delimiter = ''; - headerRows = -1; + ## Default values + fname = ""; + delimiter = ""; + header_rows = -1; ########## - # Check input arguments + ## Check input arguments if (nargin < 1) print_usage (); endif - fileName = varargin{1}; - # Check that the file name really is a string - if not(ischar(fileName)) - error('importdata: File name needs to be a string.') + fname = varargin{1}; + ## Check that the file name really is a string + if (! ischar (fname)) + error ("importdata: file name needs to be a string"); endif - if strcmpi(fileName, '-pastespecial') - error('importdata: Option ''-pastespecial'' not implemented.') + if ( strcmpi (fname, "-pastespecial")) + error ("importdata: option -pastespecial not implemented"); endif if (nargin > 1) delimiter = varargin{2}; - # Check that the delimiter really is a string - if not(ischar(delimiter)) - error('importdata: Delimiter needs to be a character.') + ## Check that the delimiter really is a string + if (!ischar (delimiter)) + error("importdata: delimiter needs to be a character"); endif - if ((length(delimiter) > 1) && not(strcmpi(delimiter, '\t'))) - error('importdata: Delimiter cannot be longer than 1 character.') + if (length (delimiter) > 1 && !strcmpi (delimiter, "\t")) + error("importdata: delimiter cannot be longer than 1 character"); endif - if strcmpi(delimiter, '\') - delimiter = '\\'; + if (strcmpi (delimiter, "\\")) + delimiter = "\\\\"; endif endif if (nargin > 2) - headerRows = varargin{3}; - if ((~isnumeric(headerRows)) || (headerRows < 0)) - error('importdata: Number of header rows needs to be an integer number >= 0.') + header_rows = varargin{3}; + if (!isnumeric (header_rows) || header_rows < 0) + error ("importdata: number of header rows needs to be an integer number >= 0"); endif endif if (nargin > 3) - error('importdata: Too many input arguments.') + error ("importdata: too many input arguments"); endif ########## - # Check file format - # Get the extension from the file name. - [d n fileExt v] = fileparts(fileName); - # Make sure file extension is in lower case. - fileExt = lower(fileExt); + ## Check file format + ## Get the extension from the file name. + [d n fileExt v] = fileparts (fname); + ## Make sure file extension is in lower case. + fileExt = lower (fileExt); switch fileExt - case {'.au','.snd'} - error(['importdata: Not implemented for file format ''' fileExt '''.']) - case '.avi' - error(['importdata: Not implemented for file format ''' fileExt '''.']) - case {'.bmp', '.cur', '.gif', '.hdf', '.ico', '.jpe', '.jpeg', '.jpg', '.pbm', '.pcx', '.pgm', '.png', '.pnm', '.ppm', '.ras', '.tif', '.tiff', '.xwd'} + case {".au", ".snd"} + error (sprintf ("importdata: not implemented for file format %s", + fileExt)); + case ".avi" + error (sprintf ("importdata: not implemented for file format %s", + fileExt)); + case {".bmp", ".cur", ".gif", ".hdf", ".ico", ".jpe", ".jpeg", ".jpg", \ + ".pbm", ".pcx", ".pgm", ".png", ".pnm", ".ppm", ".ras", \ + ".tif", ".tiff", ".xwd"} delimiter = NaN; - headerRows = 0; - [output.cdata, output.colormap, output.alpha] = imread(fileName); - case '.mat' + header_rows = 0; + [output.cdata, output.colormap, output.alpha] = imread (fname); + case ".mat" delimiter = NaN; - headerRows = 0; - output = load(fileName); - case '.wk1' - error(['importdata: Not implemented for file format ''' fileExt '''.']) - case {'.xls', '.xlsx'} - #FIXME: implement Excel import. - error(['importdata: Not implemented for file format ''' fileExt '''.']) - case {'.wav', '.wave'} + header_rows = 0; + output = load (fname); + case ".wk1" + error (sprintf ("importdata: not implemented for file format %s", + fileExt)); + case {".xls", ".xlsx"} + ## FIXME: implement Excel import. + error (sprintf ("importdata: not implemented for file format %s", + fileExt)); + case {".wav", ".wave"} delimiter = NaN; - headerRows = 0; - [output.data, output.fs] = wavread(fileName); + header_rows = 0; + [output.data, output.fs] = wavread (fname); otherwise - # Assume the file is in ascii format. - [output, delimiter, headerRows] = importdata_ascii(fileName, delimiter, headerRows); + ## Assume the file is in ascii format. + [output, delimiter, header_rows] = \ + importdata_ascii (fname, delimiter, header_rows); endswitch - # If there are any empty fields in the output structure, then remove them - if (isstruct(output) && (length(output) == 1)) - fields = fieldnames(output); - for i=1:length(fields) - if isempty(getfield(output, fields{i})) - output = rmfield(output, fields{i}); + ## If there are any empty fields in the output structure, then remove them + if (isstruct (output) && length (output) == 1) + fields = fieldnames (output); + for i=1:length (fields) + if (isempty (getfield (output, fields{i}))) + output = rmfield (output, fields{i}); endif endfor - # If only one field is left, replace the structure with the field, i.e. output = output.onlyFieldLeft - # Update the list of fields - fields = fieldnames(output); - if (length(fields) == 1) - output = getfield(output, fields{1}); + ## If only one field is left, replace the structure with the field, + ## i.e. output = output.onlyFieldLeft + + ## Update the list of fields + fields = fieldnames (output); + if (length (fields) == 1) + output = getfield (output, fields{1}); endif endif endfunction @@ -161,105 +172,123 @@ ######################################## -function [output, delimiter, headerRows] = importdata_ascii(fileName, delimiter, headerRows) +function [output, delimiter, header_rows] = \ + importdata_ascii (fname, delimiter, header_rows) - # Define the fields in the output structure so that the order will be correct. + ## Define the fields in the output structure so that the order will be + ## correct. + output.data = []; output.textdata = []; output.rowheaders = []; output.colheaders = []; - # Read file into string and count the number of header rows + ## Read file into string and count the number of header rows + file_content = fileread (fname); + + ## The characters need to be in a row vector instead of a column + ## vector to be recognized as a proper string. + file_content = file_content(:)'; - fileContent = fileread(fileName); - # The characters need to be in a row vector instead of a column vector to be recognized as a proper string. - if (size(fileContent,2) == 1) - fileContent = fileContent'; + ## Split the file into rows (using \r\n or \n as delimiters between rows). + file_content_rows = regexp (file_content, "\r?\n", "split"); + + ## FIXME: guess delimiter, if it isn't defined + if (isempty (delimiter)) + error ("importdata: Guessing delimiter is not implemented yet, you have to specify it."); endif - # Split the file into rows (using \r\n or \n as delimiters between rows). - fileContentRows = regexp(fileContent, '\r?\n', 'split'); - - #FIXME: guess delimiter, if it isn't defined - if (isempty(delimiter)) - error('importdata: Guessing delimiter is not implemented yet, you have to specify it.') - endif - - #FIXME: A more intelligent way to count number of header rows. This is needed e.g. when delimiter=' ' and the header contains spaces... + ## FIXME: A more intelligent way to count number of header rows. This + ## is needed e.g. when delimiter=' ' and the header contains spaces... - # If number of header rows is undefined, then count the number of header rows by step through row by row and look for the delimiter. - # Assume that the header can't contain any delimiter. - if (headerRows < 0) - headerRows = 0; - for i=1:length(fileContentRows) - if (isempty(regexp(fileContentRows{i}, delimiter, 'once'))) - headerRows++; + ## If number of header rows is undefined, then count the number of + ## header rows by step through row by row and look for the delimiter. + ## Assume that the header can't contain any delimiter. + if (header_rows < 0) + header_rows = 0; + for i=1:length (file_content_rows) + if (isempty (regexp(file_content_rows{i}, delimiter, "once"))) + header_rows++; else - # Data part has begun and therefore no more header rows can be found + ## Data part has begun and therefore no more header rows can be + ## found break; endif endfor endif - # Put the header rows in output.textdata. - if (headerRows > 0) - output.textdata = fileContentRows(1:headerRows)'; + ## Put the header rows in output.textdata. + if (header_rows > 0) + output.textdata = file_content_rows (1:header_rows)'; endif - # If space is the delimiter, then remove spaces in the beginning of each data row. - if strcmpi(delimiter, ' ') - for i=(headerRows+1):length(fileContentRows) - # strtrim does not only remove the leading spaces but also the tailing spaces, but that doesn't really matter. - fileContentRows{i} = strtrim(fileContentRows{i}); + ## If space is the delimiter, then remove spaces in the beginning of + ## each data row. + if (strcmpi (delimiter, " ")) + for i=(header_rows+1):length (file_content_rows) + ## strtrim does not only remove the leading spaces but also the + ## tailing spaces, but that doesn't really matter. + file_content_rows{i} = strtrim (file_content_rows{i}); endfor endif - # Remove empty data rows. Go through them backwards so that you wont get out of bounds. - for i=length(fileContentRows):-1:(headerRows+1) - if (length(fileContentRows{i}) < 1) - fileContentRows = [fileContentRows(1:i-1), fileContentRows(i+1:length(fileContentRows))]; + ## Remove empty data rows. Go through them backwards so that you wont + ## get out of bounds. + for i=length (file_content_rows):-1:(header_rows + 1) + if (length (file_content_rows{i}) < 1) + file_content_rows = [file_content_rows(1:i-1), \ + file_content_rows(i+1:length(file_content_rows))]; endif endfor - # Count the number of data columns. - # If there are different number of columns, use the greatest value. - dataColumns = 0; - delimiterPattern = delimiter; - # If space is the delimiter, then multiple spaces should count as ONE delimiter. Also ignore leading spaces. - if (strcmpi(delimiter, ' ')) - delimiterPattern = ' +'; + ## Count the number of data columns. If there are different number of + ## columns, use the greatest value. + data_columns = 0; + delimiter_pattern = delimiter; + ## If space is the delimiter, then multiple spaces should count as ONE + ## delimiter. Also ignore leading spaces. + if (strcmpi (delimiter, " ")) + delimiter_pattern = ' +'; endif - for i=(headerRows+1):length(fileContentRows) - dataColumns = max(dataColumns, length(regexp(fileContentRows{i}, delimiterPattern, 'split'))); + for i=(header_rows+1):length(file_content_rows) + data_columns = max(data_columns, + length (regexp (file_content_rows{i}, + delimiter_pattern, "split"))); endfor - # Go through the data and put it in either output.data or output.textdata depending on if it is numeric or not. - output.data = NaN(length(fileContentRows)-headerRows, dataColumns); - for i=(headerRows+1):length(fileContentRows) - # Only use the row if it contains anything other than white-space characters. - if (length(regexp(fileContentRows{i}, '\S','match')) > 0) - rowData = regexp(fileContentRows{i}, delimiterPattern, 'split'); - for j=1:length(rowData) - # Try to convert the column to a number, if it works put it in output.data, otherwise in output.textdata - if (length(str2num(rowData{j})) > 0) - output.data((i-headerRows),j) = str2num(rowData{j}); + ## Go through the data and put it in either output.data or + ## output.textdata depending on if it is numeric or not. + output.data = NaN (length (file_content_rows) - header_rows, data_columns); + for i=(header_rows+1):length(file_content_rows) + ## Only use the row if it contains anything other than white-space + ## characters. + if (length (regexp (file_content_rows{i}, "\S","match")) > 0) + row_data = regexp (file_content_rows{i}, delimiter_pattern, "split"); + + for j=1:length(row_data) + ## Try to convert the column to a number, if it works put it in + ## output.data, otherwise in output.textdata + if (length(str2num(row_data{j})) > 0) + output.data ((i-header_rows), j) = str2num (row_data{j}); else - output.textdata{i,j} = rowData{j}; + output.textdata{i,j} = row_data{j}; endif endfor + endif endfor - # Check wether rowheaders or colheaders should be used - if ((headerRows == dataColumns) && (size(output.textdata,2) == 1)) + ## Check wether rowheaders or colheaders should be used + if ((header_rows == data_columns) && (size (output.textdata, 2) == 1)) output.rowheaders = output.textdata; - elseif (size(output.textdata,2) == dataColumns) + elseif (size (output.textdata, 2) == data_columns) output.colheaders = output.textdata(end,:); endif - # When delimiter = '\t' convert it to a tab, as is done in the Matlab version - if (strcmpi(delimiter, '\t')) - delimiter = sprintf('\t'); + ## When delimiter = "\\t" convert it to a tab, as is done in the Matlab + ## version + if (strcmpi (delimiter, "\\t")) + delimiter = "\t"; endif endfunction diff --git a/scripts/io/module.mk b/scripts/io/module.mk --- a/scripts/io/module.mk +++ b/scripts/io/module.mk @@ -6,6 +6,7 @@ io/csvwrite.m \ io/dlmwrite.m \ io/fileread.m \ + io/importdata.m \ io/is_valid_file_id.m \ io/strread.m \ io/textscan.m \