changeset 15548:9a455cf96dbe

Incorporate importdata.m into Octave sources * importdata.m (importdata): Various style fixes to fix Octave house style. Use ## for comments, "" for strings, parens around if conditionals, ! instead of "not" and "~", spaces before "(" in function calls, under_scores instead of camelCap, as in Appendix D.5 of the Octave manual. * module.mk: Add importdata.m * io.txi: Add to docs
author Jordi Gutiérrez Hermoso <jordigh@octave.org>
date Fri, 19 Oct 2012 15:09:45 -0400
parents 3bf1e0da76b0
children 874091868fba
files doc/interpreter/io.txi scripts/io/importdata.m scripts/io/module.mk
diffstat 3 files changed, 169 insertions(+), 134 deletions(-) [+]
line wrap: on
line diff
--- a/doc/interpreter/io.txi
+++ b/doc/interpreter/io.txi
@@ -230,6 +230,11 @@
 
 @DOCSTRING(textscan)
 
+The @code{importdata} function has the ability to work with a wide
+variety of data.
+
+@DOCSTRING(importdata)
+
 @menu
 * Saving Data on Unexpected Exits::
 @end menu
--- a/scripts/io/importdata.m
+++ b/scripts/io/importdata.m
@@ -14,40 +14,42 @@
 ## along with this program; if not, see <http://www.gnu.org/licenses/>.
 
 ## -*- texinfo -*-
-## @deftypefn  {Function File} {@var{A} =} importdata (@var{fileName})
-## @deftypefnx {Function File} {@var{A} =} importdata (@var{fileName}, @var{delimiter})
-## @deftypefnx {Function File} {@var{A} =} importdata (@var{fileName}, @var{delimiter},  @var{headerRows})
+## @deftypefn  {Function File} {@var{A} =} importdata (@var{fname})
+## @deftypefnx {Function File} {@var{A} =} importdata (@var{fname}, @var{delimiter})
+## @deftypefnx {Function File} {@var{A} =} importdata (@var{fname}, @var{delimiter},  @var{header_rows})
 ## @deftypefnx {Function File} {[@var{A}, @var{delimiter}] =} importdata (...)
-## @deftypefnx {Function File} {[@var{A}, @var{delimiter}, @var{headerRows}] =} importdata (...)
+## @deftypefnx {Function File} {[@var{A}, @var{delimiter}, @var{header_rows}] =} importdata (...)
 ## Importing data from file.
 ##
-## Importing the contents of file @var{fileName} into workspace.
+## Importing the contents of file @var{fname} into workspace.
 ##
 ## Input parameters:
-## @table @input
-## @item @var{fileName}
+## @itemize
+## @item @var{fname}
 ## The file name for the file to import.
 ## 
 ## @item @var{delimiter}
-## The character separating columns of data. Use @code{\t} for tab. (Only valid for ascii files)
+## The character separating columns of data. Use @code{\t} for tab.
+## (Only valid for ascii files)
 ##
-## @item @var{headerRows}
+## @item @var{header_rows}
 ## Number of header rows before the data begins. (Only valid for ascii files)
-## @end table
+## @end itemize
 ##
 ## Different file types are supported:
 ## @itemize
 ## @item Ascii table
 ##
-## Importing ascii table using the specified number of header rows and the specified delimiter.
+## Importing ascii table using the specified number of header rows and
+## the specified delimiter.
 ##
 ## @item Image file
 ##
-## @item Matlab file
+## @item @sc{Matlab} file
 ##
 ## @item Wav file
 ##
-## @end table
+## @end itemize
 ##
 ## @seealso{textscan, dlmread, csvread, load}
 ## @end deftypefn
@@ -56,104 +58,113 @@
 ## 2012-10-16 First version
 
 
-function [output, delimiter, headerRows] = importdata(varargin)
+function [output, delimiter, header_rows] = importdata (varargin)
 
-  # Default values
-  fileName   = '';
-  delimiter  = '';
-  headerRows = -1;
+  ## Default values
+  fname   = "";
+  delimiter  = "";
+  header_rows = -1;
 
   ##########
 
-  # Check input arguments
+  ## Check input arguments
 
   if (nargin < 1)
     print_usage ();
   endif
 
-  fileName = varargin{1};
-  # Check that the file name really is a string
-  if not(ischar(fileName))
-    error('importdata: File name needs to be a string.')
+  fname = varargin{1};
+  ## Check that the file name really is a string
+  if (! ischar (fname))
+    error ("importdata: file name needs to be a string");
   endif
-  if strcmpi(fileName, '-pastespecial')
-    error('importdata: Option ''-pastespecial'' not implemented.')
+  if ( strcmpi (fname, "-pastespecial"))
+    error ("importdata: option -pastespecial not implemented");
   endif
 
   if (nargin > 1)
     delimiter = varargin{2};
-    # Check that the delimiter really is a string
-    if not(ischar(delimiter))
-      error('importdata: Delimiter needs to be a character.')
+    ## Check that the delimiter really is a string
+    if (!ischar (delimiter))
+      error("importdata: delimiter needs to be a character");
     endif
-    if ((length(delimiter) > 1) && not(strcmpi(delimiter, '\t')))
-      error('importdata: Delimiter cannot be longer than 1 character.')
+    if (length (delimiter) > 1 && !strcmpi (delimiter, "\t"))
+      error("importdata: delimiter cannot be longer than 1 character");
     endif
-    if strcmpi(delimiter, '\')
-      delimiter = '\\';
+    if (strcmpi (delimiter, "\\"))
+      delimiter = "\\\\";
     endif
   endif
 
   if (nargin > 2)
-    headerRows = varargin{3};
-    if ((~isnumeric(headerRows)) || (headerRows < 0))
-      error('importdata: Number of header rows needs to be an integer number >= 0.')
+    header_rows = varargin{3};
+    if (!isnumeric (header_rows) || header_rows < 0)
+      error ("importdata: number of header rows needs to be an integer number >= 0");
     endif
   endif
 
   if (nargin > 3)
-    error('importdata: Too many input arguments.')
+    error ("importdata: too many input arguments");
   endif
 
   ##########
 
-  # Check file format
-  # Get the extension from the file name.
-  [d n fileExt v] = fileparts(fileName);
-  # Make sure file extension is in lower case.
-  fileExt = lower(fileExt);
+  ## Check file format
+  ## Get the extension from the file name.
+  [d n fileExt v] = fileparts (fname);
+  ## Make sure file extension is in lower case.
+  fileExt = lower (fileExt);
 
   switch fileExt
-    case {'.au','.snd'}
-      error(['importdata: Not implemented for file format ''' fileExt '''.'])
-    case '.avi'
-      error(['importdata: Not implemented for file format ''' fileExt '''.'])
-    case {'.bmp', '.cur', '.gif', '.hdf', '.ico', '.jpe', '.jpeg', '.jpg', '.pbm', '.pcx', '.pgm', '.png', '.pnm', '.ppm', '.ras', '.tif', '.tiff', '.xwd'}
+    case {".au", ".snd"}
+      error (sprintf ("importdata: not implemented for file format %s", 
+                      fileExt));
+    case ".avi"
+      error (sprintf ("importdata: not implemented for file format %s", 
+                      fileExt));
+    case {".bmp", ".cur", ".gif", ".hdf", ".ico", ".jpe", ".jpeg", ".jpg", \
+          ".pbm", ".pcx", ".pgm", ".png", ".pnm", ".ppm", ".ras", \
+          ".tif", ".tiff", ".xwd"}
       delimiter  = NaN;
-      headerRows = 0;
-      [output.cdata, output.colormap, output.alpha] = imread(fileName);
-    case '.mat'
+      header_rows = 0;
+      [output.cdata, output.colormap, output.alpha] = imread (fname);
+    case ".mat"
       delimiter  = NaN;
-      headerRows = 0;
-      output = load(fileName);
-    case '.wk1'
-      error(['importdata: Not implemented for file format ''' fileExt '''.'])
-    case {'.xls', '.xlsx'}
-  #FIXME: implement Excel import.
-      error(['importdata: Not implemented for file format ''' fileExt '''.'])
-    case {'.wav', '.wave'}
+      header_rows = 0;
+      output = load (fname);
+    case ".wk1"
+      error (sprintf ("importdata: not implemented for file format %s", 
+                      fileExt));
+    case {".xls", ".xlsx"}
+      ## FIXME: implement Excel import.
+      error (sprintf ("importdata: not implemented for file format %s", 
+                      fileExt));
+    case {".wav", ".wave"}
       delimiter  = NaN;
-      headerRows = 0;
-      [output.data, output.fs] = wavread(fileName);
+      header_rows = 0;
+      [output.data, output.fs] = wavread (fname);
     otherwise
-      # Assume the file is in ascii format.
-      [output, delimiter, headerRows] = importdata_ascii(fileName, delimiter, headerRows);
+      ## Assume the file is in ascii format.
+      [output, delimiter, header_rows]  = \
+          importdata_ascii (fname, delimiter, header_rows);
   endswitch
 
-  # If there are any empty fields in the output structure, then remove them
-  if (isstruct(output) && (length(output) == 1))
-    fields = fieldnames(output);
-    for i=1:length(fields)
-      if isempty(getfield(output, fields{i}))
-        output = rmfield(output, fields{i});
+  ## If there are any empty fields in the output structure, then remove them
+  if (isstruct (output) && length (output) == 1)
+    fields = fieldnames (output);
+    for i=1:length (fields)
+      if (isempty (getfield (output, fields{i})))
+        output = rmfield (output, fields{i});
       endif
     endfor
 
-    # If only one field is left, replace the structure with the field, i.e. output = output.onlyFieldLeft
-    # Update the list of fields
-    fields = fieldnames(output);
-    if (length(fields) == 1)
-      output = getfield(output, fields{1});
+    ## If only one field is left, replace the structure with the field,
+    ## i.e. output = output.onlyFieldLeft
+
+    ## Update the list of fields
+    fields = fieldnames (output);
+    if (length (fields) == 1)
+      output = getfield (output, fields{1});
     endif
   endif
 endfunction
@@ -161,105 +172,123 @@
 
 ########################################
 
-function [output, delimiter, headerRows] = importdata_ascii(fileName, delimiter, headerRows)
+function [output, delimiter, header_rows] = \
+      importdata_ascii (fname, delimiter, header_rows)
 
-  # Define the fields in the output structure so that the order will be correct.
+  ## Define the fields in the output structure so that the order will be
+  ## correct.
+
   output.data       = [];
   output.textdata   = [];
   output.rowheaders = [];
   output.colheaders = [];
 
-  # Read file into string and count the number of header rows
+  ## Read file into string and count the number of header rows
+  file_content = fileread (fname);
+
+  ## The characters need to be in a row vector instead of a column
+  ## vector to be recognized as a proper string.
+  file_content = file_content(:)';
 
-  fileContent = fileread(fileName);
-  # The characters need to be in a row vector instead of a column vector to be recognized as a proper string.
-  if (size(fileContent,2) == 1)
-    fileContent = fileContent';
+  ## Split the file into rows (using \r\n or \n as delimiters between rows).
+  file_content_rows = regexp (file_content, "\r?\n", "split");
+
+  ## FIXME: guess delimiter, if it isn't defined
+  if (isempty (delimiter))
+    error ("importdata: Guessing delimiter is not implemented yet, you have to specify it.");
   endif
 
-  # Split the file into rows (using \r\n or \n as delimiters between rows).
-  fileContentRows = regexp(fileContent, '\r?\n', 'split');
-
-  #FIXME: guess delimiter, if it isn't defined
-  if (isempty(delimiter))
-    error('importdata: Guessing delimiter is not implemented yet, you have to specify it.')
-  endif
-
-  #FIXME: A more intelligent way to count number of header rows. This is needed e.g. when delimiter=' ' and the header contains spaces...
+  ## FIXME: A more intelligent way to count number of header rows. This
+  ## is needed e.g. when delimiter=' ' and the header contains spaces...
 
-  # If number of header rows is undefined, then count the number of header rows by step through row by row and look for the delimiter.
-  # Assume that the header can't contain any delimiter.
-  if (headerRows < 0)
-    headerRows = 0;
-    for i=1:length(fileContentRows)
-      if (isempty(regexp(fileContentRows{i}, delimiter, 'once')))
-        headerRows++;
+  ## If number of header rows is undefined, then count the number of
+  ## header rows by step through row by row and look for the delimiter.
+  ## Assume that the header can't contain any delimiter.
+  if (header_rows < 0)
+    header_rows = 0;
+    for i=1:length (file_content_rows)
+      if (isempty (regexp(file_content_rows{i}, delimiter, "once")))
+        header_rows++;
       else
-        # Data part has begun and therefore no more header rows can be found
+        ## Data part has begun and therefore no more header rows can be
+        ## found
         break;
       endif
     endfor
   endif
 
-  # Put the header rows in output.textdata.
-  if (headerRows > 0)
-    output.textdata   = fileContentRows(1:headerRows)';
+  ## Put the header rows in output.textdata.
+  if (header_rows > 0)
+    output.textdata   = file_content_rows (1:header_rows)';
   endif
 
-  # If space is the delimiter, then remove spaces in the beginning of each data row.
-  if strcmpi(delimiter, ' ')
-    for i=(headerRows+1):length(fileContentRows)
-      # strtrim does not only remove the leading spaces but also the tailing spaces, but that doesn't really matter.
-      fileContentRows{i} = strtrim(fileContentRows{i});
+  ## If space is the delimiter, then remove spaces in the beginning of
+  ## each data row.
+  if (strcmpi (delimiter, " "))
+    for i=(header_rows+1):length (file_content_rows)
+      ## strtrim does not only remove the leading spaces but also the
+      ## tailing spaces, but that doesn't really matter.
+      file_content_rows{i} = strtrim (file_content_rows{i});
     endfor
   endif
 
-  # Remove empty data rows. Go through them backwards so that you wont get out of bounds.
-  for i=length(fileContentRows):-1:(headerRows+1)
-    if (length(fileContentRows{i}) < 1)
-      fileContentRows = [fileContentRows(1:i-1), fileContentRows(i+1:length(fileContentRows))];
+  ## Remove empty data rows. Go through them backwards so that you wont
+  ## get out of bounds.
+  for i=length (file_content_rows):-1:(header_rows + 1)
+    if (length (file_content_rows{i}) < 1)
+      file_content_rows = [file_content_rows(1:i-1), \
+                           file_content_rows(i+1:length(file_content_rows))];
     endif
   endfor
 
-  # Count the number of data columns.
-  # If there are different number of columns, use the greatest value.
-  dataColumns = 0;
-  delimiterPattern = delimiter;
-  # If space is the delimiter, then multiple spaces should count as ONE delimiter. Also ignore leading spaces.
-  if (strcmpi(delimiter, ' '))
-    delimiterPattern = ' +';
+  ## Count the number of data columns. If there are different number of
+  ## columns, use the greatest value.
+  data_columns = 0;
+  delimiter_pattern = delimiter;
+  ## If space is the delimiter, then multiple spaces should count as ONE
+  ## delimiter. Also ignore leading spaces.
+  if (strcmpi (delimiter, " "))
+    delimiter_pattern = ' +';
   endif
-  for i=(headerRows+1):length(fileContentRows)
-    dataColumns = max(dataColumns, length(regexp(fileContentRows{i}, delimiterPattern, 'split')));
+  for i=(header_rows+1):length(file_content_rows)
+    data_columns = max(data_columns, 
+                       length (regexp (file_content_rows{i},
+                                       delimiter_pattern, "split")));
   endfor
 
 
-  # Go through the data and put it in either output.data or output.textdata depending on if it is numeric or not.
-  output.data = NaN(length(fileContentRows)-headerRows, dataColumns);
-  for i=(headerRows+1):length(fileContentRows)
-    # Only use the row if it contains anything other than white-space characters.
-    if (length(regexp(fileContentRows{i}, '\S','match')) > 0)
-      rowData = regexp(fileContentRows{i}, delimiterPattern, 'split');
-      for j=1:length(rowData)
-        # Try to convert the column to a number, if it works put it in output.data, otherwise in output.textdata
-        if (length(str2num(rowData{j})) > 0)
-          output.data((i-headerRows),j) = str2num(rowData{j});
+  ## Go through the data and put it in either output.data or
+  ## output.textdata depending on if it is numeric or not.
+  output.data = NaN (length (file_content_rows) - header_rows, data_columns);
+  for i=(header_rows+1):length(file_content_rows)
+    ## Only use the row if it contains anything other than white-space
+    ## characters.
+    if (length (regexp (file_content_rows{i}, "\S","match")) > 0)
+      row_data = regexp (file_content_rows{i}, delimiter_pattern, "split");
+
+      for j=1:length(row_data)
+        ## Try to convert the column to a number, if it works put it in
+        ## output.data, otherwise in output.textdata
+        if (length(str2num(row_data{j})) > 0)
+          output.data ((i-header_rows), j) = str2num (row_data{j});
         else
-          output.textdata{i,j} = rowData{j};
+          output.textdata{i,j} = row_data{j};
         endif
       endfor
+
     endif
   endfor
 
-  # Check wether rowheaders or colheaders should be used
-  if ((headerRows == dataColumns) && (size(output.textdata,2) == 1))
+  ## Check wether rowheaders or colheaders should be used
+  if ((header_rows == data_columns) && (size (output.textdata, 2) == 1))
     output.rowheaders = output.textdata;
-  elseif (size(output.textdata,2) == dataColumns)
+  elseif (size (output.textdata, 2) == data_columns)
     output.colheaders = output.textdata(end,:);
   endif
 
-  # When delimiter = '\t' convert it to a tab, as is done in the Matlab version
-  if (strcmpi(delimiter, '\t'))
-    delimiter = sprintf('\t');
+  ## When delimiter = "\\t" convert it to a tab, as is done in the Matlab
+  ## version
+  if (strcmpi (delimiter, "\\t"))
+    delimiter = "\t";
   endif
 endfunction
--- a/scripts/io/module.mk
+++ b/scripts/io/module.mk
@@ -6,6 +6,7 @@
   io/csvwrite.m \
   io/dlmwrite.m \
   io/fileread.m \
+  io/importdata.m \
   io/is_valid_file_id.m \
   io/strread.m \
   io/textscan.m \