Mercurial > hg > octave-lyh
diff scripts/miscellaneous/unpack.m @ 6082:588721ac2140
[project @ 2006-10-25 03:46:17 by jwe]
author | jwe |
---|---|
date | Wed, 25 Oct 2006 03:46:17 +0000 |
parents | |
children | 0295db941ae7 |
line wrap: on
line diff
new file mode 100644 --- /dev/null +++ b/scripts/miscellaneous/unpack.m @@ -0,0 +1,266 @@ +## Copyright (C) 2006 Bill Denney +## +## This file is part of Octave. +## +## Octave is free software; you can redistribute it and/or modify it +## under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## Octave is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Octave; see the file COPYING. If not, write to the Free +## Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +## 02110-1301, USA. + +## -*- texinfo -*- +## @deftypefn {Function File} {@var{files} =} unpack (@var{file}, @var{dir}) +## @deftypefnx {Function File} {@var{files} =} unpack (@var{file}, @var{dir}, @var{filetype}) +## Unpack the archive @var{file} based on its extension to the directory +## @var{dir}. If @var{file} is a cellstr, then all files will be +## handled individually. If @var{dir} is not specified, it defaults to +## the current directory. It returns a list of @var{files} +## unpacked. If a directory is in the file list, then the +## @var{filetype} to unpack must also be specified. +## +## The @var{files} includes the entire path to the output files. +## @seealso{bzip2,bunzip2,tar,untar,gzip,gunzip,zip,unzip} +## @end deftypefn + +## Author: Bill Denney <denney@seas.upenn.edu> + +function filelist = unpack (file, directory, filetype) + + if (nargin < 1 || nargin > 3) + print_usage (); + endif + + if (nargin < 2) + directory = "."; + endif + if (nargin < 3) + filetype = ""; + endif + + if (ischar (file)) + if (isdir (file)) + if (isempty (filetype)) + error ("unpack: filetype must be given for a directory"); + elseif (! any (strcmpi (filetype, "gunzip"))) + error ("unpack: filetype must be gunzip for a directory"); + endif + else + [pathstr, name, ext] = fileparts (file); + + ## Check to see if it's .tar.gz, .tar.Z, etc. + if (any (strcmpi ({".gz" ".Z" ".bz2" ".bz"}, ext))) + [tmppathstr, tmpname, tmpext] = fileparts (name); + if (strcmpi (tmpext, ".tar")) + name = tmpname; + ext = strcat (tmpext, ext); + endif + endif + + ## If the file is a url, download it and then work with that + ## file. + if (! isempty (strfind (file, "://"))) + ## FIXME -- the above is not a perfect test for a url + urlfile = file; + ## FIXME -- should we name the file that we download with the + ## same file name as the url requests? + tmpfile = strcat (tmpnam (), ext); + [file, success, msg] = urlwrite (urlfile, tmpfile); + if (! success) + error ("unpack: could not get \"%s\": %s", urlfile, msg); + endif + endif + + endif + + ## canonicalize_file_name returns empty if the file isn't found, so + ## use that to check for existence + cfile = canonicalize_file_name (file); + + if (isempty (cfile)) + error ("unpack: file \"%s\" not found.", file); + else + file = cfile; + endif + + elseif (iscellstr (file)) + files = {}; + for i = 1:numel (file) + tmpfiles = unpack (file{i}, directory); + files = {files{:} tmpfiles{:}}; + endfor + + else + error ("unpack: invalid input file class, %s", class(file)); + endif + + ## Instructions on what to do for any extension. + ## + ## The field names are the file extension without periods. + ## The first cell is what is executed to unpack an archive verbosely. + ## The second cell is what is executed to unpack an archive quietly. + ## The third cell is the function to execute on output to get the + ## files list. + ## The fourth cell indicates if the files may need to be manually moved + ## (i.e. tar and unzip decompress into the current directory while + ## bzip2 and gzip decompress the file at its location). + persistent commandlist; + if (isempty (commandlist)) + commandlist.gz = {"gunzip -v -r \"%s\"", ... + "gunzip -r \"%s\"", ... + @__parse_gzip__, true}; + commandlist.z = commandlist.gz; + commandlist.bz2 = {"bunzip2 -v \"%s\"", ... + "bunzip2 \"%s\"", ... + @__parse_bzip2__, true}; + commandlist.bz = commandlist.bz2; + commandlist.tar = {"tar -x -v -f \"%s\"", ... + "tar -x -f \"%s\"", ... + @__parse_tar__, false}; + commandlist.targz = {"gunzip -c \"%s\" | tar -x -v", ... + "gunzip -c \"%s\" | tar -x", ... + @__parse_tar__, false}; + commandlist.tgz = commandlist.targz; + commandlist.tarbz2 = {"bunzip2 -c \"%s\" | tar -x -v", ... + "bunzip2 -c \"%s\" | tar -x", ... + @__parse_tar__, false}; + commandlist.tarbz = commandlist.tarbz2; + commandlist.tbz2 = commandlist.tarbz2; + commandlist.tbz = commandlist.tarbz2; + commandlist.zip = {"unzip \"%s\"", ... + "unzip -q \"%s\"", ... + @__parse_zip__, false}; + endif + + nodotext = ext(! ismember (ext, ".")); + + origdir = pwd (); + + if (isfield (commandlist, nodotext)) + [commandv, commandq, parser, move] = deal (commandlist.(nodotext){:}); + cstartdir = canonicalize_file_name (origdir); + cenddir = canonicalize_file_name (directory); + needmove = move && ! strcmp (cstartdir, cenddir); + if (nargout > 0 || needmove) + command = commandv; + else + command = commandq; + endif + else + warning ("unpack:filetype", "unrecognised file type, %s", ext); + files = file; + return; + endif + + ## Create the directory if necessary. + s = stat (directory); + if (isempty (s)) + [status, msg] = mkdir (directory); + if (! status) + error ("unpack: mkdir failed to create %s: %s", directory, msg); + endif + elseif (! S_ISDIR (s.mode)) + error ("unpack: %s: not a directory", directory); + endif + + unwind_protect + cd (directory); + [status, output] = system (sprintf (strcat (command " 2>&1"), file)); + unwind_protect_cleanup + cd (origdir); + end_unwind_protect + + if (status) + error ("unpack: unarchiving program exited with status: %d\n%s", + status, output); + endif + + if (needmove || nargout > 0) + ## Trim the last cr if needed. + ## FIXME -- will this need to change to a check for "\r\n" for windows? + if (output(length (output)) == "\n") + output(length (output)) = []; + endif + files = parser (cellstr (split (output, "\n")))'; + + ## Move files if necessary + if (needmove) + [st, msg, msgid] = movefile (files, directory); + if (! st) + error ("unpack: unable to move files to \"%s\": %s", + directory, msg); + endif + + ## Fix the names for the files since they were moved. + for i = 1:numel (files) + files{i} = strrep (files{i}, cstartdir, cenddir); + endfor + endif + + ## Return output if requested. + if (nargout > 0) + filelist = files; + endif + endif + +endfunction + +function files = __parse_zip__ (output) + ## Parse the output from zip and unzip. + + for i = 1:length (output) + files{i} = output{i}(14:length(output{i})); + endfor +endfunction + +function output = __parse_tar__ (output) + ## This is a noop, but it makes things simpler for other cases. +endfunction + +function files = __parse_gzip__ (output) + ## Parse the output from gzip and gunzip returning the files + ## commpressed (or decompressed). + + files = {}; + ## The middle ": " should indicate a good place to start looking for + ## the filename. + for i = 1:length (output) + colons = strfind (output{i}, ":"); + if (isempty (colons)) + warning ("unpack:parsing", "Unable to parse line (gzip missing colon):\n%s", output{i}); + else + midcolon = colons(ceil (length (colons)/2)); + thisstr = output{i}(midcolon+2:length(output{i})); + idx = index (thisstr, "with") + 5; + if (isempty (idx)) + warning ("unpack:parsing", "Unable to parse line (gzip missing with):\n%s", output{i}); + else + files{i} = thisstr(idx:length (thisstr)); + endif + endif + endfor +endfunction + +function files = __parse_bzip2__ (output) + ## Parse the output from bzip2 and bunzip2 returning the files + ## commpressed (or decompressed). + + files = {}; + for i = 1:length (output) + ## the -5 is to remove the ".bz2:" + endoffilename = rindex (output{i}, ": ") - 5; + if (isempty (endoffilename)) + warning ("unpack:parsing", "Unable to parse line:\n%s", output{i}); + else + files{i} = output{i}(3:endoffilename); + endif + endfor +endfunction