5589
|
1 ## Copyright (C) 2000-2001 Paul Kienzle |
|
2 ## |
|
3 ## This program is free software; you can redistribute it and/or modify |
|
4 ## it under the terms of the GNU General Public License as published by |
|
5 ## the Free Software Foundation; either version 2 of the License, or |
|
6 ## (at your option) any later version. |
|
7 ## |
|
8 ## This program is distributed in the hope that it will be useful, |
|
9 ## but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
10 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
11 ## GNU General Public License for more details. |
|
12 ## |
|
13 ## You should have received a copy of the GNU General Public License |
|
14 ## along with this program; if not, write to the Free Software |
|
15 ## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
|
16 ## 02110-1301 USA |
|
17 |
|
18 ## -*- texinfo -*- |
|
19 ## @deftypefn {Function File} {} speed (@var{f}, @var{init}, @var{max_n}, @var{f2}, @var{tol}, @var{err}) |
|
20 ## @deftypefnx {Function File} {@var{r} =} speed (@dots{}) |
|
21 ## |
|
22 ## Determine the execution time of an expression for various @var{n}. |
|
23 ## The @var{n} are log-spaced from 1 to @var{max_n}. For each @var{n}, |
|
24 ## an initialization expression is computed to create whatever data |
|
25 ## are needed for the test. Called without output arguments the data |
|
26 ## is presented graphically. Called with an output argument @var{r}, |
|
27 ## the speedup ratio is returned instead of displaying it graphically. |
|
28 ## |
|
29 ## @table @code |
|
30 ## @item @var{f} |
|
31 ## The expression to evaluate. |
|
32 ## |
|
33 ## @item @var{max_n} |
|
34 ## The maximum test length to run. Default value is 100. |
|
35 ## |
|
36 ## @item @var{init} |
|
37 ## Initialization expression for function argument values. Use @var{k} |
|
38 ## for the test number and @var{n} for the size of the test. This should |
|
39 ## compute values for all variables listed in args. Note that init will |
|
40 ## be evaluated first for k=0, so things which are constant throughout |
|
41 ## the test can be computed then. The default value is @code{@var{x} = |
|
42 ## randn (@var{n}, 1);}. |
|
43 ## |
|
44 ## @item @var{f2} |
|
45 ## An alternative expression to evaluate, so the speed of the two |
|
46 ## can be compared. Default is @code{[]}. |
|
47 ## |
|
48 ## @item @var{tol} |
|
49 ## If @var{tol} is @code{Inf}, then no comparison will be made between the |
|
50 ## results of expression @var{f} and expression @var{f2}. Otherwise, |
|
51 ## expression @var{f} should produce a value @var{v} and expression @var{f2} |
|
52 ## should produce a value @var{v2}, and these shall be compared using |
|
53 ## @code{assert(@var{v},@var{v2},@var{tol},@var{err})}. The default is |
|
54 ## @code{eps}. |
|
55 ## @end table |
|
56 ## |
|
57 ## Some global variables are also referenced. Choose values suitable to |
|
58 ## your machine and your work style. |
|
59 ## |
|
60 ## @table @code |
|
61 ## @item speed_test_plot |
|
62 ## If true, plot a nice speed comparison graph. Default is true. |
|
63 ## |
|
64 ## @item speed_test_numtests |
|
65 ## Number of vector lengths to test. The default is 25. |
|
66 ## @end table |
|
67 ## |
|
68 ## Some comments on the graphs. The line on the speedup ratio graph |
|
69 ## should be larger than 1 if your function is faster. The slope on |
|
70 ## the runtime graph shows you the O(f) speed characteristics. Where it |
|
71 ## is flat, execution time is O(1). Where it is sloping, execution time |
|
72 ## is O(n^m), with steeper slopes for larger @var{n}. Generally vectorizing |
|
73 ## a function will not change the slope of the run-time graph, but it |
|
74 ## will shift it relative to the original. |
|
75 ## |
|
76 ## A simple example is |
|
77 ## |
|
78 ## @example |
|
79 ## speed("strrep(s,x,y)", "s=blanks(n);x=' ';y='b';", 100) |
|
80 ## @end example |
|
81 ## |
|
82 ## A more complex example, if you had an original version of @code{xcorr} |
|
83 ## using for loops and another version using an FFT, you could compare the |
|
84 ## run speed for various lags as follows, or for a fixed lag with varying |
|
85 ## vector lengths as follows: |
|
86 ## |
|
87 ## @example |
|
88 ## speed("v=xcorr(x,n)", "x=rand(128,1);", 100, ... |
|
89 ## "v2=xcorr_orig(x,n)", 100*eps,'rel') |
|
90 ## speed("v=xcorr(x,15)", "x=rand(20+n,1);", 100, ... |
|
91 ## "v2=xcorr_orig(x,n)", 100*eps,'rel') |
|
92 ## @end example |
|
93 ## |
|
94 ## Assuming one of the two versions is in @var{xcorr_orig}, this would |
|
95 ## would compare their speed and their output values. Note that the |
|
96 ## FFT version is not exact, so we specify an acceptable tolerance on |
|
97 ## the comparison @code{100*eps}, and the errors should be computed |
|
98 ## relatively, as @code{abs((@var{x} - @var{y})./@var{y})} rather than |
|
99 ## absolutely as @code{abs(@var{x} - @var{y})}. |
|
100 ## |
|
101 ## Type @code{example('speed')} to see some real examples. Note for |
|
102 ## obscure reasons, you can't run examples 1 and 2 directly using |
|
103 ## @code{demo('speed')}. Instead use, @code{eval(example('speed',1))} |
|
104 ## and @code{eval(example('speed',2))}. |
|
105 ## @end deftypefn |
|
106 |
|
107 ## TODO: consider two dimensional speedup surfaces for functions like kron. |
|
108 function __ratio_r = speed (__f1, __init, __max_n, __f2, __tol, __err) |
|
109 if nargin < 1 || nargin > 6, |
|
110 usage("speed_test(f, init, max_n, f2, tol, err)"); |
|
111 endif |
|
112 if nargin < 2 || isempty(__init), |
|
113 __init = "x = randn(n, 1);"; |
|
114 endif |
|
115 if nargin < 3 || isempty(__max_n), __max_n = 100; endif |
|
116 if nargin < 4, __f2 = []; endif |
|
117 if nargin < 5 || isempty(__tol), __tol = eps; endif |
|
118 if nargin < 6 || isempty(__err), __err = []; endif |
|
119 |
|
120 global speed_test_plot = 1; |
|
121 global speed_test_numtests = 25; |
|
122 |
|
123 __test_n = uniq(round(logspace(0,log10(__max_n),speed_test_numtests))); |
|
124 __torig = __tnew = zeros (size(__test_n)) ; |
|
125 |
|
126 disp (["testing..........", __f1, "\ninit: ", __init]); |
|
127 |
|
128 ## make sure the functions are freshly loaded by evaluating them at |
|
129 ## test_n(1); firt have to initialize the args though. |
|
130 n=1; k=0; |
|
131 eval ([__init, ";"]); |
|
132 if !isempty(__f2), eval ([__f2, ";"]); endif |
|
133 eval ([__f1, ";"]); |
|
134 |
|
135 ## run the tests |
|
136 for k=1:length(__test_n) |
|
137 if (k > 1) |
|
138 n=__test_n(k); |
|
139 eval ([__init, ";"]); |
|
140 endif |
|
141 |
|
142 printf ("n%i=%i ",k, n) ; fflush(1); |
|
143 |
|
144 eval (["__t=time();", __f1, "; __v1=ans; __t = time()-__t;"]); |
|
145 if (__t < 0.25) |
|
146 eval (["__t2=time();", __f1, "; __t2 = time()-__t2;"]); |
|
147 eval (["__t3=time();", __f1, "; __t3 = time()-__t3;"]); |
|
148 __t = min([__t,__t2,__t3]); |
|
149 endif |
|
150 __tnew(k) = __t; |
|
151 |
|
152 if !isempty(__f2) |
|
153 eval (["__t=time();", __f2, "; __v2=ans; __t = time()-__t;"]); |
|
154 if (__t < 0.25) |
|
155 eval (["__t2=time();", __f2, "; __t2 = time()-__t2;"]); |
|
156 eval (["__t3=time();", __f2, "; __t3 = time()-__t3;"]); |
|
157 endif |
|
158 __torig(k) = __t; |
|
159 if !isinf(__tol) |
|
160 assert(__v1,__v2,__tol,__err); |
|
161 endif |
|
162 endif |
|
163 |
|
164 end |
|
165 |
|
166 if !isempty(__f2), |
|
167 # Don't keep zero times |
|
168 idx = find ( __tnew>sqrt(eps) & __torig>sqrt(eps) ) ; |
|
169 ratio = mean (__torig(idx) ./ __tnew(idx)); |
|
170 if (nargout == 1) |
|
171 __ratio_r = ratio; |
|
172 else |
|
173 printf ("\nmean runtime ratio of %s / %s : %g\n", __f2, __f1, ratio); |
|
174 endif |
|
175 else |
|
176 if (nargout == 1) |
|
177 _ratio_r = mean(__tnew); |
|
178 else |
|
179 printf ("\nmean runtime: %g\n", mean(__tnew)); |
|
180 endif |
|
181 endif |
|
182 |
|
183 if (speed_test_plot && nargout == 0 && !isempty(__f2)) |
|
184 |
|
185 subplot(121); |
|
186 xlabel("test length"); |
|
187 title (__f1); |
|
188 ylabel("speedup ratio"); |
|
189 semilogx ( __test_n(idx), __torig(idx)./__tnew(idx) , |
|
190 ["-*r;", strrep(__f1,";","."), "/", strrep(__f2,";","."), ";"], |
|
191 __test_n(idx), __tnew(idx)./__torig(idx) , |
|
192 ["-*g;", strrep(__f2,";","."), "/", strrep(__f1,";","."), ";"]); |
|
193 subplot (122); |
|
194 |
|
195 ## convert best execution time to milliseconds. |
|
196 __torig = 1000*__torig; |
|
197 __tnew = 1000*__tnew; |
|
198 |
|
199 ylabel ("best execution time (ms)"); |
|
200 title (["init: ", __init]); |
|
201 loglog ( __test_n (idx), __tnew (idx), ["*-g;", strrep(__f1,";","."), ";" ], |
|
202 __test_n (idx), __torig (idx), ["*-r;", strrep(__f2,";","."), ";"]) |
|
203 title (""); xlabel (""); ylabel (""); oneplot(); |
|
204 elseif (speed_test_plot && nargout == 0) |
|
205 __tnew = 1000*__tnew; |
|
206 xlabel("test length"); |
|
207 ylabel ("best execution time (ms)"); |
|
208 title ([__f1, " init: ", __init]); |
|
209 loglog ( __test_n, __tnew, "*-g;;"); |
|
210 title (""); xlabel (""); ylabel (""); oneplot(); |
|
211 endif |
|
212 |
|
213 endfunction |
|
214 |
|
215 %!demo if 1 |
|
216 %! function x = build_orig(n) |
|
217 %! ## extend the target vector on the fly |
|
218 %! for i=0:n-1, x([1:10]+i*10) = 1:10; endfor |
|
219 %! endfunction |
|
220 %! function x = build(n) |
|
221 %! ## preallocate the target vector |
|
222 %! x = zeros(1, n*10); |
|
223 %! try |
|
224 %! if (prefer_column_vectors), x = x.'; endif |
|
225 %! catch |
|
226 %! end |
|
227 %! for i=0:n-1, x([1:10]+i*10) = 1:10; endfor |
|
228 %! endfunction |
|
229 %! |
|
230 %! disp("-----------------------"); |
|
231 %! type build_orig; |
|
232 %! disp("-----------------------"); |
|
233 %! type build; |
|
234 %! disp("-----------------------"); |
|
235 %! |
|
236 %! disp("Preallocated vector test.\nThis takes a little while..."); |
|
237 %! speed('build', 'build_orig', 1000, 'v=n;'); |
|
238 %! clear build build_orig |
|
239 %! disp("Note how much faster it is to pre-allocate a vector."); |
|
240 %! disp("Notice the peak speedup ratio."); |
|
241 %! clear build build_orig |
|
242 %! endif |
|
243 |
|
244 %!demo if 1 |
|
245 %! function x = build_orig(n) |
|
246 %! for i=0:n-1, x([1:10]+i*10) = 1:10; endfor |
|
247 %! endfunction |
|
248 %! function x = build(n) |
|
249 %! idx = [1:10]'; |
|
250 %! x = idx(:,ones(1,n)); |
|
251 %! x = reshape(x, 1, n*10); |
|
252 %! try |
|
253 %! if (prefer_column_vectors), x = x.'; endif |
|
254 %! catch |
|
255 %! end |
|
256 %! endfunction |
|
257 %! |
|
258 %! disp("-----------------------"); |
|
259 %! type build_orig; |
|
260 %! disp("-----------------------"); |
|
261 %! type build; |
|
262 %! disp("-----------------------"); |
|
263 %! |
|
264 %! disp("Vectorized test. This takes a little while..."); |
|
265 %! speed('build', 'build_orig', 1000, 'v=n;'); |
|
266 %! clear build build_orig |
|
267 %! disp("-----------------------"); |
|
268 %! disp("This time, the for loop is done away with entirely."); |
|
269 %! disp("Notice how much bigger the speedup is then in example 1."); |
|
270 %! endif |