7017
|
1 ## Copyright (C) 2006, 2007 Bill Denney |
6082
|
2 ## |
|
3 ## This file is part of Octave. |
|
4 ## |
|
5 ## Octave is free software; you can redistribute it and/or modify it |
|
6 ## under the terms of the GNU General Public License as published by |
7016
|
7 ## the Free Software Foundation; either version 3 of the License, or (at |
|
8 ## your option) any later version. |
6082
|
9 ## |
|
10 ## Octave is distributed in the hope that it will be useful, but |
|
11 ## WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
13 ## General Public License for more details. |
|
14 ## |
|
15 ## You should have received a copy of the GNU General Public License |
7016
|
16 ## along with Octave; see the file COPYING. If not, see |
|
17 ## <http://www.gnu.org/licenses/>. |
6082
|
18 |
|
19 ## -*- texinfo -*- |
|
20 ## @deftypefn {Function File} {@var{files} =} unpack (@var{file}, @var{dir}) |
|
21 ## @deftypefnx {Function File} {@var{files} =} unpack (@var{file}, @var{dir}, @var{filetype}) |
|
22 ## Unpack the archive @var{file} based on its extension to the directory |
|
23 ## @var{dir}. If @var{file} is a cellstr, then all files will be |
|
24 ## handled individually. If @var{dir} is not specified, it defaults to |
|
25 ## the current directory. It returns a list of @var{files} |
|
26 ## unpacked. If a directory is in the file list, then the |
|
27 ## @var{filetype} to unpack must also be specified. |
|
28 ## |
|
29 ## The @var{files} includes the entire path to the output files. |
|
30 ## @seealso{bzip2,bunzip2,tar,untar,gzip,gunzip,zip,unzip} |
|
31 ## @end deftypefn |
|
32 |
|
33 ## Author: Bill Denney <denney@seas.upenn.edu> |
|
34 |
|
35 function filelist = unpack (file, directory, filetype) |
|
36 |
|
37 if (nargin < 1 || nargin > 3) |
|
38 print_usage (); |
|
39 endif |
|
40 |
|
41 if (nargin < 2) |
|
42 directory = "."; |
|
43 endif |
|
44 if (nargin < 3) |
|
45 filetype = ""; |
|
46 endif |
|
47 |
|
48 if (ischar (file)) |
|
49 if (isdir (file)) |
|
50 if (isempty (filetype)) |
|
51 error ("unpack: filetype must be given for a directory"); |
|
52 elseif (! any (strcmpi (filetype, "gunzip"))) |
|
53 error ("unpack: filetype must be gunzip for a directory"); |
|
54 endif |
|
55 else |
|
56 [pathstr, name, ext] = fileparts (file); |
|
57 |
|
58 ## Check to see if it's .tar.gz, .tar.Z, etc. |
|
59 if (any (strcmpi ({".gz" ".Z" ".bz2" ".bz"}, ext))) |
|
60 [tmppathstr, tmpname, tmpext] = fileparts (name); |
|
61 if (strcmpi (tmpext, ".tar")) |
|
62 name = tmpname; |
|
63 ext = strcat (tmpext, ext); |
|
64 endif |
|
65 endif |
|
66 |
|
67 ## If the file is a url, download it and then work with that |
|
68 ## file. |
|
69 if (! isempty (strfind (file, "://"))) |
|
70 ## FIXME -- the above is not a perfect test for a url |
|
71 urlfile = file; |
|
72 ## FIXME -- should we name the file that we download with the |
|
73 ## same file name as the url requests? |
|
74 tmpfile = strcat (tmpnam (), ext); |
|
75 [file, success, msg] = urlwrite (urlfile, tmpfile); |
|
76 if (! success) |
|
77 error ("unpack: could not get \"%s\": %s", urlfile, msg); |
|
78 endif |
|
79 endif |
|
80 |
|
81 endif |
|
82 |
|
83 ## canonicalize_file_name returns empty if the file isn't found, so |
|
84 ## use that to check for existence |
|
85 cfile = canonicalize_file_name (file); |
|
86 |
|
87 if (isempty (cfile)) |
|
88 error ("unpack: file \"%s\" not found.", file); |
|
89 else |
|
90 file = cfile; |
|
91 endif |
|
92 |
|
93 elseif (iscellstr (file)) |
|
94 files = {}; |
|
95 for i = 1:numel (file) |
|
96 tmpfiles = unpack (file{i}, directory); |
|
97 files = {files{:} tmpfiles{:}}; |
|
98 endfor |
|
99 |
|
100 else |
|
101 error ("unpack: invalid input file class, %s", class(file)); |
|
102 endif |
|
103 |
|
104 ## Instructions on what to do for any extension. |
|
105 ## |
|
106 ## The field names are the file extension without periods. |
|
107 ## The first cell is what is executed to unpack an archive verbosely. |
|
108 ## The second cell is what is executed to unpack an archive quietly. |
|
109 ## The third cell is the function to execute on output to get the |
|
110 ## files list. |
|
111 ## The fourth cell indicates if the files may need to be manually moved |
|
112 ## (i.e. tar and unzip decompress into the current directory while |
|
113 ## bzip2 and gzip decompress the file at its location). |
|
114 persistent commandlist; |
|
115 if (isempty (commandlist)) |
6546
|
116 commandlist.gz = {"gzip -d -v -r \"%s\"", ... |
|
117 "gzip -d -r \"%s\"", ... |
6082
|
118 @__parse_gzip__, true}; |
|
119 commandlist.z = commandlist.gz; |
6546
|
120 commandlist.bz2 = {"bzip2 -d -v \"%s\"", ... |
|
121 "bzip2 -d \"%s\"", ... |
6082
|
122 @__parse_bzip2__, true}; |
|
123 commandlist.bz = commandlist.bz2; |
|
124 commandlist.tar = {"tar -x -v -f \"%s\"", ... |
|
125 "tar -x -f \"%s\"", ... |
|
126 @__parse_tar__, false}; |
6546
|
127 commandlist.targz = {"gzip -d -c \"%s\" | tar -x -v", ... |
|
128 "gzip -d -c \"%s\" | tar -x", ... |
6082
|
129 @__parse_tar__, false}; |
|
130 commandlist.tgz = commandlist.targz; |
6546
|
131 commandlist.tarbz2 = {"bzip2 -d -c \"%s\" | tar -x -v", ... |
|
132 "bzip2 -d -c \"%s\" | tar -x", ... |
6082
|
133 @__parse_tar__, false}; |
|
134 commandlist.tarbz = commandlist.tarbz2; |
|
135 commandlist.tbz2 = commandlist.tarbz2; |
|
136 commandlist.tbz = commandlist.tarbz2; |
|
137 commandlist.zip = {"unzip \"%s\"", ... |
|
138 "unzip -q \"%s\"", ... |
|
139 @__parse_zip__, false}; |
|
140 endif |
|
141 |
|
142 nodotext = ext(! ismember (ext, ".")); |
|
143 |
|
144 origdir = pwd (); |
|
145 |
|
146 if (isfield (commandlist, nodotext)) |
|
147 [commandv, commandq, parser, move] = deal (commandlist.(nodotext){:}); |
|
148 cstartdir = canonicalize_file_name (origdir); |
|
149 cenddir = canonicalize_file_name (directory); |
|
150 needmove = move && ! strcmp (cstartdir, cenddir); |
|
151 if (nargout > 0 || needmove) |
|
152 command = commandv; |
|
153 else |
|
154 command = commandq; |
|
155 endif |
|
156 else |
|
157 warning ("unpack:filetype", "unrecognised file type, %s", ext); |
|
158 files = file; |
|
159 return; |
|
160 endif |
|
161 |
|
162 ## Create the directory if necessary. |
|
163 s = stat (directory); |
|
164 if (isempty (s)) |
|
165 [status, msg] = mkdir (directory); |
|
166 if (! status) |
|
167 error ("unpack: mkdir failed to create %s: %s", directory, msg); |
|
168 endif |
|
169 elseif (! S_ISDIR (s.mode)) |
|
170 error ("unpack: %s: not a directory", directory); |
|
171 endif |
|
172 |
|
173 unwind_protect |
|
174 cd (directory); |
6112
|
175 [status, output] = system (sprintf (strcat (command, " 2>&1"), file)); |
6082
|
176 unwind_protect_cleanup |
|
177 cd (origdir); |
|
178 end_unwind_protect |
|
179 |
|
180 if (status) |
|
181 error ("unpack: unarchiving program exited with status: %d\n%s", |
|
182 status, output); |
|
183 endif |
|
184 |
6084
|
185 if (nargout > 0 || needmove) |
6082
|
186 ## Trim the last cr if needed. |
|
187 ## FIXME -- will this need to change to a check for "\r\n" for windows? |
|
188 if (output(length (output)) == "\n") |
|
189 output(length (output)) = []; |
|
190 endif |
|
191 files = parser (cellstr (split (output, "\n")))'; |
|
192 |
|
193 ## Move files if necessary |
|
194 if (needmove) |
|
195 [st, msg, msgid] = movefile (files, directory); |
|
196 if (! st) |
|
197 error ("unpack: unable to move files to \"%s\": %s", |
|
198 directory, msg); |
|
199 endif |
|
200 |
|
201 ## Fix the names for the files since they were moved. |
|
202 for i = 1:numel (files) |
|
203 files{i} = strrep (files{i}, cstartdir, cenddir); |
|
204 endfor |
|
205 endif |
|
206 |
|
207 ## Return output if requested. |
|
208 if (nargout > 0) |
|
209 filelist = files; |
|
210 endif |
|
211 endif |
|
212 |
|
213 endfunction |
|
214 |
|
215 function files = __parse_zip__ (output) |
|
216 ## Parse the output from zip and unzip. |
|
217 |
|
218 for i = 1:length (output) |
|
219 files{i} = output{i}(14:length(output{i})); |
|
220 endfor |
|
221 endfunction |
|
222 |
|
223 function output = __parse_tar__ (output) |
|
224 ## This is a noop, but it makes things simpler for other cases. |
|
225 endfunction |
|
226 |
|
227 function files = __parse_gzip__ (output) |
|
228 ## Parse the output from gzip and gunzip returning the files |
|
229 ## commpressed (or decompressed). |
|
230 |
|
231 files = {}; |
|
232 ## The middle ": " should indicate a good place to start looking for |
|
233 ## the filename. |
|
234 for i = 1:length (output) |
|
235 colons = strfind (output{i}, ":"); |
|
236 if (isempty (colons)) |
6083
|
237 warning ("unpack:parsing", |
|
238 "Unable to parse line (gzip missing colon):\n%s", output{i}); |
6082
|
239 else |
|
240 midcolon = colons(ceil (length (colons)/2)); |
|
241 thisstr = output{i}(midcolon+2:length(output{i})); |
|
242 idx = index (thisstr, "with") + 5; |
|
243 if (isempty (idx)) |
6083
|
244 warning ("unpack:parsing", |
|
245 "Unable to parse line (gzip missing with):\n%s", output{i}); |
6082
|
246 else |
|
247 files{i} = thisstr(idx:length (thisstr)); |
|
248 endif |
|
249 endif |
|
250 endfor |
|
251 endfunction |
|
252 |
|
253 function files = __parse_bzip2__ (output) |
|
254 ## Parse the output from bzip2 and bunzip2 returning the files |
|
255 ## commpressed (or decompressed). |
|
256 |
|
257 files = {}; |
|
258 for i = 1:length (output) |
|
259 ## the -5 is to remove the ".bz2:" |
|
260 endoffilename = rindex (output{i}, ": ") - 5; |
|
261 if (isempty (endoffilename)) |
|
262 warning ("unpack:parsing", "Unable to parse line:\n%s", output{i}); |
|
263 else |
|
264 files{i} = output{i}(3:endoffilename); |
|
265 endif |
|
266 endfor |
|
267 endfunction |