6082
|
1 ## Copyright (C) 2006 Bill Denney |
|
2 ## |
|
3 ## This file is part of Octave. |
|
4 ## |
|
5 ## Octave is free software; you can redistribute it and/or modify it |
|
6 ## under the terms of the GNU General Public License as published by |
|
7 ## the Free Software Foundation; either version 2, or (at your option) |
|
8 ## any later version. |
|
9 ## |
|
10 ## Octave is distributed in the hope that it will be useful, but |
|
11 ## WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
13 ## General Public License for more details. |
|
14 ## |
|
15 ## You should have received a copy of the GNU General Public License |
|
16 ## along with Octave; see the file COPYING. If not, write to the Free |
|
17 ## Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
|
18 ## 02110-1301, USA. |
|
19 |
|
20 ## -*- texinfo -*- |
|
21 ## @deftypefn {Function File} {@var{files} =} unpack (@var{file}, @var{dir}) |
|
22 ## @deftypefnx {Function File} {@var{files} =} unpack (@var{file}, @var{dir}, @var{filetype}) |
|
23 ## Unpack the archive @var{file} based on its extension to the directory |
|
24 ## @var{dir}. If @var{file} is a cellstr, then all files will be |
|
25 ## handled individually. If @var{dir} is not specified, it defaults to |
|
26 ## the current directory. It returns a list of @var{files} |
|
27 ## unpacked. If a directory is in the file list, then the |
|
28 ## @var{filetype} to unpack must also be specified. |
|
29 ## |
|
30 ## The @var{files} includes the entire path to the output files. |
|
31 ## @seealso{bzip2,bunzip2,tar,untar,gzip,gunzip,zip,unzip} |
|
32 ## @end deftypefn |
|
33 |
|
34 ## Author: Bill Denney <denney@seas.upenn.edu> |
|
35 |
|
36 function filelist = unpack (file, directory, filetype) |
|
37 |
|
38 if (nargin < 1 || nargin > 3) |
|
39 print_usage (); |
|
40 endif |
|
41 |
|
42 if (nargin < 2) |
|
43 directory = "."; |
|
44 endif |
|
45 if (nargin < 3) |
|
46 filetype = ""; |
|
47 endif |
|
48 |
|
49 if (ischar (file)) |
|
50 if (isdir (file)) |
|
51 if (isempty (filetype)) |
|
52 error ("unpack: filetype must be given for a directory"); |
|
53 elseif (! any (strcmpi (filetype, "gunzip"))) |
|
54 error ("unpack: filetype must be gunzip for a directory"); |
|
55 endif |
|
56 else |
|
57 [pathstr, name, ext] = fileparts (file); |
|
58 |
|
59 ## Check to see if it's .tar.gz, .tar.Z, etc. |
|
60 if (any (strcmpi ({".gz" ".Z" ".bz2" ".bz"}, ext))) |
|
61 [tmppathstr, tmpname, tmpext] = fileparts (name); |
|
62 if (strcmpi (tmpext, ".tar")) |
|
63 name = tmpname; |
|
64 ext = strcat (tmpext, ext); |
|
65 endif |
|
66 endif |
|
67 |
|
68 ## If the file is a url, download it and then work with that |
|
69 ## file. |
|
70 if (! isempty (strfind (file, "://"))) |
|
71 ## FIXME -- the above is not a perfect test for a url |
|
72 urlfile = file; |
|
73 ## FIXME -- should we name the file that we download with the |
|
74 ## same file name as the url requests? |
|
75 tmpfile = strcat (tmpnam (), ext); |
|
76 [file, success, msg] = urlwrite (urlfile, tmpfile); |
|
77 if (! success) |
|
78 error ("unpack: could not get \"%s\": %s", urlfile, msg); |
|
79 endif |
|
80 endif |
|
81 |
|
82 endif |
|
83 |
|
84 ## canonicalize_file_name returns empty if the file isn't found, so |
|
85 ## use that to check for existence |
|
86 cfile = canonicalize_file_name (file); |
|
87 |
|
88 if (isempty (cfile)) |
|
89 error ("unpack: file \"%s\" not found.", file); |
|
90 else |
|
91 file = cfile; |
|
92 endif |
|
93 |
|
94 elseif (iscellstr (file)) |
|
95 files = {}; |
|
96 for i = 1:numel (file) |
|
97 tmpfiles = unpack (file{i}, directory); |
|
98 files = {files{:} tmpfiles{:}}; |
|
99 endfor |
|
100 |
|
101 else |
|
102 error ("unpack: invalid input file class, %s", class(file)); |
|
103 endif |
|
104 |
|
105 ## Instructions on what to do for any extension. |
|
106 ## |
|
107 ## The field names are the file extension without periods. |
|
108 ## The first cell is what is executed to unpack an archive verbosely. |
|
109 ## The second cell is what is executed to unpack an archive quietly. |
|
110 ## The third cell is the function to execute on output to get the |
|
111 ## files list. |
|
112 ## The fourth cell indicates if the files may need to be manually moved |
|
113 ## (i.e. tar and unzip decompress into the current directory while |
|
114 ## bzip2 and gzip decompress the file at its location). |
|
115 persistent commandlist; |
|
116 if (isempty (commandlist)) |
|
117 commandlist.gz = {"gunzip -v -r \"%s\"", ... |
|
118 "gunzip -r \"%s\"", ... |
|
119 @__parse_gzip__, true}; |
|
120 commandlist.z = commandlist.gz; |
|
121 commandlist.bz2 = {"bunzip2 -v \"%s\"", ... |
|
122 "bunzip2 \"%s\"", ... |
|
123 @__parse_bzip2__, true}; |
|
124 commandlist.bz = commandlist.bz2; |
|
125 commandlist.tar = {"tar -x -v -f \"%s\"", ... |
|
126 "tar -x -f \"%s\"", ... |
|
127 @__parse_tar__, false}; |
|
128 commandlist.targz = {"gunzip -c \"%s\" | tar -x -v", ... |
|
129 "gunzip -c \"%s\" | tar -x", ... |
|
130 @__parse_tar__, false}; |
|
131 commandlist.tgz = commandlist.targz; |
|
132 commandlist.tarbz2 = {"bunzip2 -c \"%s\" | tar -x -v", ... |
|
133 "bunzip2 -c \"%s\" | tar -x", ... |
|
134 @__parse_tar__, false}; |
|
135 commandlist.tarbz = commandlist.tarbz2; |
|
136 commandlist.tbz2 = commandlist.tarbz2; |
|
137 commandlist.tbz = commandlist.tarbz2; |
|
138 commandlist.zip = {"unzip \"%s\"", ... |
|
139 "unzip -q \"%s\"", ... |
|
140 @__parse_zip__, false}; |
|
141 endif |
|
142 |
|
143 nodotext = ext(! ismember (ext, ".")); |
|
144 |
|
145 origdir = pwd (); |
|
146 |
|
147 if (isfield (commandlist, nodotext)) |
|
148 [commandv, commandq, parser, move] = deal (commandlist.(nodotext){:}); |
|
149 cstartdir = canonicalize_file_name (origdir); |
|
150 cenddir = canonicalize_file_name (directory); |
|
151 needmove = move && ! strcmp (cstartdir, cenddir); |
|
152 if (nargout > 0 || needmove) |
|
153 command = commandv; |
|
154 else |
|
155 command = commandq; |
|
156 endif |
|
157 else |
|
158 warning ("unpack:filetype", "unrecognised file type, %s", ext); |
|
159 files = file; |
|
160 return; |
|
161 endif |
|
162 |
|
163 ## Create the directory if necessary. |
|
164 s = stat (directory); |
|
165 if (isempty (s)) |
|
166 [status, msg] = mkdir (directory); |
|
167 if (! status) |
|
168 error ("unpack: mkdir failed to create %s: %s", directory, msg); |
|
169 endif |
|
170 elseif (! S_ISDIR (s.mode)) |
|
171 error ("unpack: %s: not a directory", directory); |
|
172 endif |
|
173 |
|
174 unwind_protect |
|
175 cd (directory); |
|
176 [status, output] = system (sprintf (strcat (command " 2>&1"), file)); |
|
177 unwind_protect_cleanup |
|
178 cd (origdir); |
|
179 end_unwind_protect |
|
180 |
|
181 if (status) |
|
182 error ("unpack: unarchiving program exited with status: %d\n%s", |
|
183 status, output); |
|
184 endif |
|
185 |
|
186 if (needmove || nargout > 0) |
|
187 ## Trim the last cr if needed. |
|
188 ## FIXME -- will this need to change to a check for "\r\n" for windows? |
|
189 if (output(length (output)) == "\n") |
|
190 output(length (output)) = []; |
|
191 endif |
|
192 files = parser (cellstr (split (output, "\n")))'; |
|
193 |
|
194 ## Move files if necessary |
|
195 if (needmove) |
|
196 [st, msg, msgid] = movefile (files, directory); |
|
197 if (! st) |
|
198 error ("unpack: unable to move files to \"%s\": %s", |
|
199 directory, msg); |
|
200 endif |
|
201 |
|
202 ## Fix the names for the files since they were moved. |
|
203 for i = 1:numel (files) |
|
204 files{i} = strrep (files{i}, cstartdir, cenddir); |
|
205 endfor |
|
206 endif |
|
207 |
|
208 ## Return output if requested. |
|
209 if (nargout > 0) |
|
210 filelist = files; |
|
211 endif |
|
212 endif |
|
213 |
|
214 endfunction |
|
215 |
|
216 function files = __parse_zip__ (output) |
|
217 ## Parse the output from zip and unzip. |
|
218 |
|
219 for i = 1:length (output) |
|
220 files{i} = output{i}(14:length(output{i})); |
|
221 endfor |
|
222 endfunction |
|
223 |
|
224 function output = __parse_tar__ (output) |
|
225 ## This is a noop, but it makes things simpler for other cases. |
|
226 endfunction |
|
227 |
|
228 function files = __parse_gzip__ (output) |
|
229 ## Parse the output from gzip and gunzip returning the files |
|
230 ## commpressed (or decompressed). |
|
231 |
|
232 files = {}; |
|
233 ## The middle ": " should indicate a good place to start looking for |
|
234 ## the filename. |
|
235 for i = 1:length (output) |
|
236 colons = strfind (output{i}, ":"); |
|
237 if (isempty (colons)) |
6083
|
238 warning ("unpack:parsing", |
|
239 "Unable to parse line (gzip missing colon):\n%s", output{i}); |
6082
|
240 else |
|
241 midcolon = colons(ceil (length (colons)/2)); |
|
242 thisstr = output{i}(midcolon+2:length(output{i})); |
|
243 idx = index (thisstr, "with") + 5; |
|
244 if (isempty (idx)) |
6083
|
245 warning ("unpack:parsing", |
|
246 "Unable to parse line (gzip missing with):\n%s", output{i}); |
6082
|
247 else |
|
248 files{i} = thisstr(idx:length (thisstr)); |
|
249 endif |
|
250 endif |
|
251 endfor |
|
252 endfunction |
|
253 |
|
254 function files = __parse_bzip2__ (output) |
|
255 ## Parse the output from bzip2 and bunzip2 returning the files |
|
256 ## commpressed (or decompressed). |
|
257 |
|
258 files = {}; |
|
259 for i = 1:length (output) |
|
260 ## the -5 is to remove the ".bz2:" |
|
261 endoffilename = rindex (output{i}, ": ") - 5; |
|
262 if (isempty (endoffilename)) |
|
263 warning ("unpack:parsing", "Unable to parse line:\n%s", output{i}); |
|
264 else |
|
265 files{i} = output{i}(3:endoffilename); |
|
266 endif |
|
267 endfor |
|
268 endfunction |