Mercurial > hg > octave-lyh
annotate scripts/testfun/speed.m @ 11188:4cb1522e4d0f
Use function handle as input to cellfun,
rather than quoted function name or anonymous function wrapper.
author | Rik <octave@nomad.inbox5.com> |
---|---|
date | Wed, 03 Nov 2010 17:20:56 -0700 |
parents | a4f482e66b65 |
children | 87f258202b0f |
rev | line source |
---|---|
8920 | 1 ## Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, |
2 ## 2009 Paul Kienzle | |
7016 | 3 ## |
4 ## This file is part of Octave. | |
5589 | 5 ## |
7016 | 6 ## Octave is free software; you can redistribute it and/or modify it |
7 ## under the terms of the GNU General Public License as published by | |
8 ## the Free Software Foundation; either version 3 of the License, or (at | |
9 ## your option) any later version. | |
5589 | 10 ## |
7016 | 11 ## Octave is distributed in the hope that it will be useful, but |
12 ## WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 ## General Public License for more details. | |
5589 | 15 ## |
16 ## You should have received a copy of the GNU General Public License | |
7016 | 17 ## along with Octave; see the file COPYING. If not, see |
18 ## <http://www.gnu.org/licenses/>. | |
5589 | 19 |
20 ## -*- texinfo -*- | |
10793
be55736a0783
Grammarcheck the documentation from m-files.
Rik <octave@nomad.inbox5.com>
parents:
10791
diff
changeset
|
21 ## @deftypefn {Function File} {} speed (@var{f}, @var{init}, @var{max_n}, @var{f2}, @var{tol}) |
5798 | 22 ## @deftypefnx {Function File} {[@var{order}, @var{n}, @var{T_f}, @var{T_f2}] =} speed (@dots{}) |
5589 | 23 ## |
24 ## Determine the execution time of an expression for various @var{n}. | |
25 ## The @var{n} are log-spaced from 1 to @var{max_n}. For each @var{n}, | |
26 ## an initialization expression is computed to create whatever data | |
5798 | 27 ## are needed for the test. If a second expression is given, the |
28 ## execution times of the two expressions will be compared. Called | |
29 ## without output arguments the results are presented graphically. | |
5589 | 30 ## |
31 ## @table @code | |
32 ## @item @var{f} | |
33 ## The expression to evaluate. | |
34 ## | |
35 ## @item @var{max_n} | |
9051
1bf0ce0930be
Grammar check TexInfo in all .m files
Rik <rdrider0-list@yahoo.com>
parents:
8920
diff
changeset
|
36 ## The maximum test length to run. Default value is 100. Alternatively, |
5798 | 37 ## use @code{[min_n,max_n]} or for complete control, @code{[n1,n2,@dots{},nk]}. |
5589 | 38 ## |
39 ## @item @var{init} | |
40 ## Initialization expression for function argument values. Use @var{k} | |
41 ## for the test number and @var{n} for the size of the test. This should | |
10791
3140cb7a05a1
Add spellchecker scripts for Octave and run spellcheck of documentation
Rik <octave@nomad.inbox5.com>
parents:
10549
diff
changeset
|
42 ## compute values for all variables used by @var{f}. Note that init will |
8507 | 43 ## be evaluated first for @math{k = 0}, so things which are constant throughout |
9051
1bf0ce0930be
Grammar check TexInfo in all .m files
Rik <rdrider0-list@yahoo.com>
parents:
8920
diff
changeset
|
44 ## the test can be computed then. The default value is @code{@var{x} = |
10791
3140cb7a05a1
Add spellchecker scripts for Octave and run spellcheck of documentation
Rik <octave@nomad.inbox5.com>
parents:
10549
diff
changeset
|
45 ## randn (@var{n}, 1)}. |
5589 | 46 ## |
47 ## @item @var{f2} | |
48 ## An alternative expression to evaluate, so the speed of the two | |
10791
3140cb7a05a1
Add spellchecker scripts for Octave and run spellcheck of documentation
Rik <octave@nomad.inbox5.com>
parents:
10549
diff
changeset
|
49 ## can be compared. The default is @code{[]}. |
5589 | 50 ## |
51 ## @item @var{tol} | |
52 ## If @var{tol} is @code{Inf}, then no comparison will be made between the | |
53 ## results of expression @var{f} and expression @var{f2}. Otherwise, | |
54 ## expression @var{f} should produce a value @var{v} and expression @var{f2} | |
10791
3140cb7a05a1
Add spellchecker scripts for Octave and run spellcheck of documentation
Rik <octave@nomad.inbox5.com>
parents:
10549
diff
changeset
|
55 ## should produce a value @var{v2}, and these will be compared using |
9051
1bf0ce0930be
Grammar check TexInfo in all .m files
Rik <rdrider0-list@yahoo.com>
parents:
8920
diff
changeset
|
56 ## @code{assert(@var{v},@var{v2},@var{tol})}. If @var{tol} is positive, |
1bf0ce0930be
Grammar check TexInfo in all .m files
Rik <rdrider0-list@yahoo.com>
parents:
8920
diff
changeset
|
57 ## the tolerance is assumed to be absolute. If @var{tol} is negative, |
1bf0ce0930be
Grammar check TexInfo in all .m files
Rik <rdrider0-list@yahoo.com>
parents:
8920
diff
changeset
|
58 ## the tolerance is assumed to be relative. The default is @code{eps}. |
5589 | 59 ## |
5798 | 60 ## @item @var{order} |
61 ## The time complexity of the expression @code{O(a n^p)}. This | |
62 ## is a structure with fields @code{a} and @code{p}. | |
5589 | 63 ## |
5798 | 64 ## @item @var{n} |
7001 | 65 ## The values @var{n} for which the expression was calculated and |
5798 | 66 ## the execution time was greater than zero. |
5589 | 67 ## |
5798 | 68 ## @item @var{T_f} |
69 ## The nonzero execution times recorded for the expression @var{f} in seconds. | |
70 ## | |
71 ## @item @var{T_f2} | |
72 ## The nonzero execution times recorded for the expression @var{f2} in seconds. | |
73 ## If it is needed, the mean time ratio is just @code{mean(T_f./T_f2)}. | |
74 ## | |
5589 | 75 ## @end table |
76 ## | |
5798 | 77 ## The slope of the execution time graph shows the approximate |
78 ## power of the asymptotic running time @code{O(n^p)}. This | |
79 ## power is plotted for the region over which it is approximated | |
80 ## (the latter half of the graph). The estimated power is not | |
81 ## very accurate, but should be sufficient to determine the | |
82 ## general order of your algorithm. It should indicate if for | |
83 ## example your implementation is unexpectedly @code{O(n^2)} | |
84 ## rather than @code{O(n)} because it extends a vector each | |
10791
3140cb7a05a1
Add spellchecker scripts for Octave and run spellcheck of documentation
Rik <octave@nomad.inbox5.com>
parents:
10549
diff
changeset
|
85 ## time through the loop rather than pre-allocating one which is |
5798 | 86 ## big enough. For example, in the current version of Octave, |
87 ## the following is not the expected @code{O(n)}: | |
5589 | 88 ## |
5798 | 89 ## @example |
8507 | 90 ## speed ("for i = 1:n, y@{i@} = x(i); end", "", [1000,10000]) |
5798 | 91 ## @end example |
92 ## | |
10846
a4f482e66b65
Grammarcheck more of the documentation.
Rik <octave@nomad.inbox5.com>
parents:
10793
diff
changeset
|
93 ## @noindent |
5798 | 94 ## but it is if you preallocate the cell array @code{y}: |
5589 | 95 ## |
96 ## @example | |
9051
1bf0ce0930be
Grammar check TexInfo in all .m files
Rik <rdrider0-list@yahoo.com>
parents:
8920
diff
changeset
|
97 ## @group |
8507 | 98 ## speed ("for i = 1:n, y@{i@} = x(i); end", ... |
99 ## "x = rand (n, 1); y = cell (size (x));", [1000, 10000]) | |
9051
1bf0ce0930be
Grammar check TexInfo in all .m files
Rik <rdrider0-list@yahoo.com>
parents:
8920
diff
changeset
|
100 ## @end group |
5798 | 101 ## @end example |
102 ## | |
103 ## An attempt is made to approximate the cost of the individual | |
104 ## operations, but it is wildly inaccurate. You can improve the | |
105 ## stability somewhat by doing more work for each @code{n}. For | |
106 ## example: | |
107 ## | |
108 ## @example | |
8507 | 109 ## speed ("airy(x)", "x = rand (n, 10)", [10000, 100000]) |
5589 | 110 ## @end example |
111 ## | |
5798 | 112 ## When comparing a new and original expression, the line on the |
113 ## speedup ratio graph should be larger than 1 if the new expression | |
114 ## is faster. Better algorithms have a shallow slope. Generally, | |
115 ## vectorizing an algorithm will not change the slope of the execution | |
116 ## time graph, but it will shift it relative to the original. For | |
117 ## example: | |
118 ## | |
119 ## @example | |
9051
1bf0ce0930be
Grammar check TexInfo in all .m files
Rik <rdrider0-list@yahoo.com>
parents:
8920
diff
changeset
|
120 ## @group |
8507 | 121 ## speed ("v = sum (x)", "", [10000, 100000], ... |
122 ## "v = 0; for i = 1:length (x), v += x(i); end") | |
9051
1bf0ce0930be
Grammar check TexInfo in all .m files
Rik <rdrider0-list@yahoo.com>
parents:
8920
diff
changeset
|
123 ## @end group |
5798 | 124 ## @end example |
125 ## | |
5589 | 126 ## A more complex example, if you had an original version of @code{xcorr} |
127 ## using for loops and another version using an FFT, you could compare the | |
128 ## run speed for various lags as follows, or for a fixed lag with varying | |
129 ## vector lengths as follows: | |
130 ## | |
131 ## @example | |
9051
1bf0ce0930be
Grammar check TexInfo in all .m files
Rik <rdrider0-list@yahoo.com>
parents:
8920
diff
changeset
|
132 ## @group |
8507 | 133 ## speed ("v = xcorr (x, n)", "x = rand (128, 1);", 100, |
134 ## "v2 = xcorr_orig (x, n)", -100*eps) | |
135 ## speed ("v = xcorr (x, 15)", "x = rand (20+n, 1);", 100, | |
136 ## "v2 = xcorr_orig (x, n)", -100*eps) | |
9051
1bf0ce0930be
Grammar check TexInfo in all .m files
Rik <rdrider0-list@yahoo.com>
parents:
8920
diff
changeset
|
137 ## @end group |
5589 | 138 ## @end example |
139 ## | |
7001 | 140 ## Assuming one of the two versions is in @var{xcorr_orig}, this |
5589 | 141 ## would compare their speed and their output values. Note that the |
142 ## FFT version is not exact, so we specify an acceptable tolerance on | |
143 ## the comparison @code{100*eps}, and the errors should be computed | |
144 ## relatively, as @code{abs((@var{x} - @var{y})./@var{y})} rather than | |
145 ## absolutely as @code{abs(@var{x} - @var{y})}. | |
146 ## | |
9051
1bf0ce0930be
Grammar check TexInfo in all .m files
Rik <rdrider0-list@yahoo.com>
parents:
8920
diff
changeset
|
147 ## Type @code{example('speed')} to see some real examples. Note for |
5589 | 148 ## obscure reasons, you can't run examples 1 and 2 directly using |
9051
1bf0ce0930be
Grammar check TexInfo in all .m files
Rik <rdrider0-list@yahoo.com>
parents:
8920
diff
changeset
|
149 ## @code{demo('speed')}. Instead use, @code{eval(example('speed',1))} |
5589 | 150 ## and @code{eval(example('speed',2))}. |
151 ## @end deftypefn | |
152 | |
8202
cf59d542f33e
replace all TODOs and XXXs with FIXMEs
Jaroslav Hajek <highegg@gmail.com>
parents:
7540
diff
changeset
|
153 ## FIXME: consider two dimensional speedup surfaces for functions like kron. |
5798 | 154 function [__order, __test_n, __tnew, __torig] ... |
6494 | 155 = speed (__f1, __init, __max_n, __f2, __tol) |
156 | |
157 if (nargin < 1 || nargin > 6) | |
6046 | 158 print_usage (); |
5589 | 159 endif |
6494 | 160 |
161 if (nargin < 2 || isempty (__init)) | |
5589 | 162 __init = "x = randn(n, 1);"; |
163 endif | |
6494 | 164 |
165 if (nargin < 3 || isempty (__max_n)) | |
166 __max_n = 100; | |
167 endif | |
168 | |
169 if (nargin < 4) | |
170 __f2 = []; | |
171 endif | |
172 | |
173 if (nargin < 5 || isempty (__tol)) | |
174 __tol = eps; | |
175 endif | |
5798 | 176 |
177 __numtests = 15; | |
5589 | 178 |
8506 | 179 ## Let user specify range of n. |
6494 | 180 if (isscalar (__max_n)) |
5798 | 181 __min_n = 1; |
6494 | 182 assert (__max_n > __min_n); |
183 __test_n = logspace (0, log10 (__max_n), __numtests); | |
184 elseif (length (__max_n) == 2) | |
5798 | 185 __min_n = __max_n(1); |
186 __max_n = __max_n(2); | |
6494 | 187 assert (__min_n >= 1); |
188 __test_n = logspace (log10 (__min_n), log10 (__max_n), __numtests); | |
5798 | 189 else |
190 __test_n = __max_n; | |
191 endif | |
8506 | 192 ## Force n to be an integer. |
193 __test_n = unique (round (__test_n)); | |
6494 | 194 assert (__test_n >= 1); |
5589 | 195 |
6494 | 196 __torig = __tnew = zeros (size (__test_n)); |
5589 | 197 |
7540
3422f39573b1
strcat.m: Matlab compatibility, with cstrcat.m replacing conventional strcat.m.
Ben Abbott <bpabbott@mac.com>
parents:
7017
diff
changeset
|
198 disp (cstrcat ("testing ", __f1, "\ninit: ", __init)); |
5589 | 199 |
8506 | 200 ## Make sure the functions are freshly loaded by evaluating them at |
5798 | 201 ## test_n(1); first have to initialize the args though. |
6494 | 202 n = 1; |
203 k = 0; | |
7540
3422f39573b1
strcat.m: Matlab compatibility, with cstrcat.m replacing conventional strcat.m.
Ben Abbott <bpabbott@mac.com>
parents:
7017
diff
changeset
|
204 eval (cstrcat (__init, ";")); |
6494 | 205 if (! isempty (__f2)) |
7540
3422f39573b1
strcat.m: Matlab compatibility, with cstrcat.m replacing conventional strcat.m.
Ben Abbott <bpabbott@mac.com>
parents:
7017
diff
changeset
|
206 eval (cstrcat (__f2, ";")); |
6494 | 207 endif |
7540
3422f39573b1
strcat.m: Matlab compatibility, with cstrcat.m replacing conventional strcat.m.
Ben Abbott <bpabbott@mac.com>
parents:
7017
diff
changeset
|
208 eval (cstrcat (__f1, ";")); |
5589 | 209 |
8506 | 210 ## Run the tests. |
6494 | 211 for k = 1:length (__test_n) |
212 n = __test_n(k); | |
7540
3422f39573b1
strcat.m: Matlab compatibility, with cstrcat.m replacing conventional strcat.m.
Ben Abbott <bpabbott@mac.com>
parents:
7017
diff
changeset
|
213 eval (cstrcat (__init, ";")); |
5589 | 214 |
8507 | 215 printf ("n%i = %i ",k, n); |
6494 | 216 fflush (stdout); |
8507 | 217 eval (cstrcat ("__t = time();", __f1, "; __v1=ans; __t = time()-__t;")); |
5589 | 218 if (__t < 0.25) |
8507 | 219 eval (cstrcat ("__t2 = time();", __f1, "; __t2 = time()-__t2;")); |
220 eval (cstrcat ("__t3 = time();", __f1, "; __t3 = time()-__t3;")); | |
6494 | 221 __t = min ([__t, __t2, __t3]); |
5589 | 222 endif |
223 __tnew(k) = __t; | |
224 | |
6494 | 225 if (! isempty (__f2)) |
8507 | 226 eval (cstrcat ("__t = time();", __f2, "; __v2=ans; __t = time()-__t;")); |
5589 | 227 if (__t < 0.25) |
10549 | 228 eval (cstrcat ("__t2 = time();", __f2, "; __t2 = time()-__t2;")); |
229 eval (cstrcat ("__t3 = time();", __f2, "; __t3 = time()-__t3;")); | |
5589 | 230 endif |
231 __torig(k) = __t; | |
6494 | 232 if (! isinf(__tol)) |
10549 | 233 assert (__v1, __v2, __tol); |
5589 | 234 endif |
235 endif | |
5798 | 236 endfor |
5589 | 237 |
8506 | 238 ## Drop times of zero. |
6494 | 239 if (! isempty (__f2)) |
240 zidx = (__tnew < 100*eps | __torig < 100*eps); | |
5798 | 241 __test_n(zidx) = []; |
242 __tnew(zidx) = []; | |
243 __torig(zidx) = []; | |
5589 | 244 else |
6494 | 245 zidx = (__tnew < 100*eps); |
5798 | 246 __test_n(zidx) = []; |
247 __tnew(zidx) = []; | |
5589 | 248 endif |
6494 | 249 |
8506 | 250 ## Approximate time complexity and return it if requested. |
6494 | 251 tailidx = ceil(length(__test_n)/2):length(__test_n); |
252 p = polyfit (log (__test_n(tailidx)), log (__tnew(tailidx)), 1); | |
253 if (nargout > 0) | |
5798 | 254 __order.p = p(1); |
6494 | 255 __order.a = exp (p(2)); |
5798 | 256 endif |
5589 | 257 |
5798 | 258 ## Plot the data if no output is requested. |
259 doplot = (nargout == 0); | |
6430 | 260 |
261 if (doplot) | |
262 figure; | |
263 endif | |
5798 | 264 |
6494 | 265 if (doplot && ! isempty (__f2)) |
266 subplot (1, 2, 1); | |
267 semilogx (__test_n, __torig./__tnew, | |
10549 | 268 cstrcat ("-*r;", strrep (__f1, ";", "."), "/", |
269 strrep (__f2, ";", "."), ";"), | |
270 __test_n, __tnew./__torig, | |
271 cstrcat ("-*g;", strrep (__f2, ";", "."), "/", | |
272 strrep (__f1, ";", "."), ";")); | |
6494 | 273 xlabel ("test length"); |
274 title (__f1); | |
275 ylabel ("speedup ratio"); | |
276 | |
277 subplot (1, 2, 2); | |
278 loglog (__test_n, __tnew*1000, | |
10549 | 279 cstrcat ("*-g;", strrep (__f1, ";", "."), ";"), |
280 __test_n, __torig*1000, | |
281 cstrcat ("*-r;", strrep (__f2,";","."), ";")); | |
6494 | 282 |
283 xlabel ("test length"); | |
5589 | 284 ylabel ("best execution time (ms)"); |
7540
3422f39573b1
strcat.m: Matlab compatibility, with cstrcat.m replacing conventional strcat.m.
Ben Abbott <bpabbott@mac.com>
parents:
7017
diff
changeset
|
285 title (cstrcat ("init: ", __init)); |
6494 | 286 |
5798 | 287 ratio = mean (__torig ./ __tnew); |
6494 | 288 printf ("\n\nMean runtime ratio = %.3g for '%s' vs '%s'\n", |
5798 | 289 ratio, __f2, __f1); |
290 | |
6494 | 291 elseif (doplot) |
5798 | 292 |
6494 | 293 loglog (__test_n, __tnew*1000, "*-g;execution time;"); |
294 xlabel ("test length"); | |
5589 | 295 ylabel ("best execution time (ms)"); |
7540
3422f39573b1
strcat.m: Matlab compatibility, with cstrcat.m replacing conventional strcat.m.
Ben Abbott <bpabbott@mac.com>
parents:
7017
diff
changeset
|
296 title (cstrcat (__f1, " init: ", __init)); |
5798 | 297 |
5589 | 298 endif |
5798 | 299 |
6494 | 300 if (doplot) |
5798 | 301 |
302 ## Plot time complexity approximation (using milliseconds). | |
6494 | 303 order = sprintf ("O(n^%g)", round (10*p(1))/10); |
304 v = polyval (p, log (__test_n(tailidx))); | |
305 | |
306 loglog (__test_n(tailidx), exp(v)*1000, sprintf ("b;%s;", order)); | |
5798 | 307 |
8506 | 308 ## Get base time to 1 digit of accuracy. |
6494 | 309 dt = exp (p(2)); |
310 dt = floor (dt/10^floor(log10(dt)))*10^floor(log10(dt)); | |
311 if (log10 (dt) >= -0.5) | |
312 time = sprintf ("%g s", dt); | |
313 elseif (log10 (dt) >= -3.5) | |
314 time = sprintf ("%g ms", dt*1e3); | |
315 elseif (log10 (dt) >= -6.5) | |
316 time = sprintf ("%g us", dt*1e6); | |
317 else | |
318 time = sprintf ("%g ns", dt*1e9); | |
5798 | 319 endif |
320 | |
321 ## Display nicely formatted complexity. | |
6494 | 322 printf ("\nFor %s:\n", __f1); |
5798 | 323 printf (" asymptotic power: %s\n", order); |
324 printf (" approximate time per operation: %s\n", time); | |
325 | |
326 endif | |
327 | |
5589 | 328 endfunction |
329 | |
330 %!demo if 1 | |
331 %! function x = build_orig(n) | |
332 %! ## extend the target vector on the fly | |
333 %! for i=0:n-1, x([1:10]+i*10) = 1:10; endfor | |
334 %! endfunction | |
335 %! function x = build(n) | |
336 %! ## preallocate the target vector | |
337 %! x = zeros(1, n*10); | |
338 %! try | |
339 %! if (prefer_column_vectors), x = x.'; endif | |
340 %! catch | |
341 %! end | |
342 %! for i=0:n-1, x([1:10]+i*10) = 1:10; endfor | |
343 %! endfunction | |
344 %! | |
345 %! disp("-----------------------"); | |
346 %! type build_orig; | |
347 %! disp("-----------------------"); | |
348 %! type build; | |
349 %! disp("-----------------------"); | |
350 %! | |
351 %! disp("Preallocated vector test.\nThis takes a little while..."); | |
6429 | 352 %! speed('build(n)', '', 1000, 'build_orig(n)'); |
5589 | 353 %! clear build build_orig |
354 %! disp("Note how much faster it is to pre-allocate a vector."); | |
355 %! disp("Notice the peak speedup ratio."); | |
356 %! endif | |
357 | |
358 %!demo if 1 | |
359 %! function x = build_orig(n) | |
360 %! for i=0:n-1, x([1:10]+i*10) = 1:10; endfor | |
361 %! endfunction | |
362 %! function x = build(n) | |
363 %! idx = [1:10]'; | |
364 %! x = idx(:,ones(1,n)); | |
365 %! x = reshape(x, 1, n*10); | |
366 %! try | |
367 %! if (prefer_column_vectors), x = x.'; endif | |
368 %! catch | |
369 %! end | |
370 %! endfunction | |
371 %! | |
372 %! disp("-----------------------"); | |
373 %! type build_orig; | |
374 %! disp("-----------------------"); | |
375 %! type build; | |
376 %! disp("-----------------------"); | |
377 %! | |
378 %! disp("Vectorized test. This takes a little while..."); | |
6429 | 379 %! speed('build(n)', '', 1000, 'build_orig(n)'); |
5589 | 380 %! clear build build_orig |
381 %! disp("-----------------------"); | |
382 %! disp("This time, the for loop is done away with entirely."); | |
383 %! disp("Notice how much bigger the speedup is then in example 1."); | |
384 %! endif |