Mercurial > hg > octave-lyh
comparison scripts/testfun/speed.m @ 11314:87f258202b0f
speed.m: Overhaul documentation string.
author | Rik <octave@nomad.inbox5.com> |
---|---|
date | Mon, 06 Dec 2010 17:03:00 -0800 |
parents | a4f482e66b65 |
children | cc7f30d3fd01 |
comparison
equal
deleted
inserted
replaced
11313:988d2bd6bacd | 11314:87f258202b0f |
---|---|
19 | 19 |
20 ## -*- texinfo -*- | 20 ## -*- texinfo -*- |
21 ## @deftypefn {Function File} {} speed (@var{f}, @var{init}, @var{max_n}, @var{f2}, @var{tol}) | 21 ## @deftypefn {Function File} {} speed (@var{f}, @var{init}, @var{max_n}, @var{f2}, @var{tol}) |
22 ## @deftypefnx {Function File} {[@var{order}, @var{n}, @var{T_f}, @var{T_f2}] =} speed (@dots{}) | 22 ## @deftypefnx {Function File} {[@var{order}, @var{n}, @var{T_f}, @var{T_f2}] =} speed (@dots{}) |
23 ## | 23 ## |
24 ## Determine the execution time of an expression for various @var{n}. | 24 ## Determine the execution time of an expression (@var{f}) for various input |
25 ## The @var{n} are log-spaced from 1 to @var{max_n}. For each @var{n}, | 25 ## values (@var{n}). The @var{n} are log-spaced from 1 to @var{max_n}. For |
26 ## an initialization expression is computed to create whatever data | 26 ## each @var{n}, an initialization expression (@var{init}) is computed to |
27 ## are needed for the test. If a second expression is given, the | 27 ## create any data needed for the test. If a second expression (@var{f2}) is |
28 ## execution times of the two expressions will be compared. Called | 28 ## given then the execution times of the two expressions are compared. When |
29 ## without output arguments the results are presented graphically. | 29 ## called without output arguments the results are displayed graphically. |
30 ## | 30 ## |
31 ## @table @code | 31 ## @table @code |
32 ## @item @var{f} | 32 ## @item @var{f} |
33 ## The expression to evaluate. | 33 ## The expression to evaluate. |
34 ## | 34 ## |
35 ## @item @var{max_n} | 35 ## @item @var{max_n} |
36 ## The maximum test length to run. Default value is 100. Alternatively, | 36 ## The maximum test length to run. Default value is 100. Alternatively, |
37 ## use @code{[min_n,max_n]} or for complete control, @code{[n1,n2,@dots{},nk]}. | 37 ## use @code{[min_n, max_n]} or specify the @var{n} exactly with |
38 ## @code{[n1, n2, @dots{}, nk]}. | |
38 ## | 39 ## |
39 ## @item @var{init} | 40 ## @item @var{init} |
40 ## Initialization expression for function argument values. Use @var{k} | 41 ## Initialization expression for function argument values. Use @var{k} |
41 ## for the test number and @var{n} for the size of the test. This should | 42 ## for the test number and @var{n} for the size of the test. This should |
42 ## compute values for all variables used by @var{f}. Note that init will | 43 ## compute values for all variables used by @var{f}. Note that @var{init} will |
43 ## be evaluated first for @math{k = 0}, so things which are constant throughout | 44 ## be evaluated first for @math{k = 0}, so things which are constant throughout |
44 ## the test can be computed then. The default value is @code{@var{x} = | 45 ## the test series can be computed once. The default value is |
45 ## randn (@var{n}, 1)}. | 46 ## @code{@var{x} = randn (@var{n}, 1)}. |
46 ## | 47 ## |
47 ## @item @var{f2} | 48 ## @item @var{f2} |
48 ## An alternative expression to evaluate, so the speed of the two | 49 ## An alternative expression to evaluate, so that the speed of two |
49 ## can be compared. The default is @code{[]}. | 50 ## expressions can be directly compared. The default is @code{[]}. |
50 ## | 51 ## |
51 ## @item @var{tol} | 52 ## @item @var{tol} |
52 ## If @var{tol} is @code{Inf}, then no comparison will be made between the | 53 ## Tolerance used to compare the results of expression @var{f} and expression |
53 ## results of expression @var{f} and expression @var{f2}. Otherwise, | 54 ## @var{f2}. If @var{tol} is positive, the tolerance is an absolute one. |
54 ## expression @var{f} should produce a value @var{v} and expression @var{f2} | 55 ## If @var{tol} is negative, the tolerance is a relative one. The default is |
55 ## should produce a value @var{v2}, and these will be compared using | 56 ## @code{eps}. If @var{tol} is @code{Inf}, then no comparison will be made. |
56 ## @code{assert(@var{v},@var{v2},@var{tol})}. If @var{tol} is positive, | |
57 ## the tolerance is assumed to be absolute. If @var{tol} is negative, | |
58 ## the tolerance is assumed to be relative. The default is @code{eps}. | |
59 ## | 57 ## |
60 ## @item @var{order} | 58 ## @item @var{order} |
61 ## The time complexity of the expression @code{O(a n^p)}. This | 59 ## The time complexity of the expression @math{O(a*n^p)}. This |
62 ## is a structure with fields @code{a} and @code{p}. | 60 ## is a structure with fields @code{a} and @code{p}. |
63 ## | 61 ## |
64 ## @item @var{n} | 62 ## @item @var{n} |
65 ## The values @var{n} for which the expression was calculated and | 63 ## The values @var{n} for which the expression was calculated AND |
66 ## the execution time was greater than zero. | 64 ## the execution time was greater than zero. |
67 ## | 65 ## |
68 ## @item @var{T_f} | 66 ## @item @var{T_f} |
69 ## The nonzero execution times recorded for the expression @var{f} in seconds. | 67 ## The nonzero execution times recorded for the expression @var{f} in seconds. |
70 ## | 68 ## |
71 ## @item @var{T_f2} | 69 ## @item @var{T_f2} |
72 ## The nonzero execution times recorded for the expression @var{f2} in seconds. | 70 ## The nonzero execution times recorded for the expression @var{f2} in seconds. |
73 ## If it is needed, the mean time ratio is just @code{mean(T_f./T_f2)}. | 71 ## If required, the mean time ratio is simply @code{mean (T_f./T_f2)}. |
74 ## | 72 ## |
75 ## @end table | 73 ## @end table |
76 ## | 74 ## |
77 ## The slope of the execution time graph shows the approximate | 75 ## The slope of the execution time graph shows the approximate |
78 ## power of the asymptotic running time @code{O(n^p)}. This | 76 ## power of the asymptotic running time @math{O(n^p)}. This |
79 ## power is plotted for the region over which it is approximated | 77 ## power is plotted for the region over which it is approximated |
80 ## (the latter half of the graph). The estimated power is not | 78 ## (the latter half of the graph). The estimated power is not |
81 ## very accurate, but should be sufficient to determine the | 79 ## very accurate, but should be sufficient to determine the |
82 ## general order of your algorithm. It should indicate if for | 80 ## general order of an algorithm. It should indicate if, for |
83 ## example your implementation is unexpectedly @code{O(n^2)} | 81 ## example, the implementation is unexpectedly @math{O(n^2)} |
84 ## rather than @code{O(n)} because it extends a vector each | 82 ## rather than @math{O(n)} because it extends a vector each |
85 ## time through the loop rather than pre-allocating one which is | 83 ## time through the loop rather than pre-allocating storage. |
86 ## big enough. For example, in the current version of Octave, | 84 ## In the current version of Octave, the following is not the |
87 ## the following is not the expected @code{O(n)}: | 85 ## expected @math{O(n)}. |
88 ## | 86 ## |
89 ## @example | 87 ## @example |
90 ## speed ("for i = 1:n, y@{i@} = x(i); end", "", [1000,10000]) | 88 ## speed ("for i = 1:n, y@{i@} = x(i); endfor", "", [1000, 10000]) |
91 ## @end example | 89 ## @end example |
92 ## | 90 ## |
93 ## @noindent | 91 ## @noindent |
94 ## but it is if you preallocate the cell array @code{y}: | 92 ## But it is if you preallocate the cell array @code{y}: |
95 ## | 93 ## |
96 ## @example | 94 ## @example |
97 ## @group | 95 ## @group |
98 ## speed ("for i = 1:n, y@{i@} = x(i); end", ... | 96 ## speed ("for i = 1:n, y@{i@} = x(i); endfor", ... |
99 ## "x = rand (n, 1); y = cell (size (x));", [1000, 10000]) | 97 ## "x = rand (n, 1); y = cell (size (x));", [1000, 10000]) |
100 ## @end group | 98 ## @end group |
101 ## @end example | 99 ## @end example |
102 ## | 100 ## |
103 ## An attempt is made to approximate the cost of the individual | 101 ## An attempt is made to approximate the cost of individual |
104 ## operations, but it is wildly inaccurate. You can improve the | 102 ## operations, but it is wildly inaccurate. You can improve the |
105 ## stability somewhat by doing more work for each @code{n}. For | 103 ## stability somewhat by doing more work for each @code{n}. For |
106 ## example: | 104 ## example: |
107 ## | 105 ## |
108 ## @example | 106 ## @example |
109 ## speed ("airy(x)", "x = rand (n, 10)", [10000, 100000]) | 107 ## speed ("airy(x)", "x = rand (n, 10)", [10000, 100000]) |
110 ## @end example | 108 ## @end example |
111 ## | 109 ## |
112 ## When comparing a new and original expression, the line on the | 110 ## When comparing two different expressions (@var{f}, @var{f2}), the slope |
113 ## speedup ratio graph should be larger than 1 if the new expression | 111 ## of the line on the speedup ratio graph should be larger than 1 if the new |
114 ## is faster. Better algorithms have a shallow slope. Generally, | 112 ## expression is faster. Better algorithms have a shallow slope. Generally, |
115 ## vectorizing an algorithm will not change the slope of the execution | 113 ## vectorizing an algorithm will not change the slope of the execution |
116 ## time graph, but it will shift it relative to the original. For | 114 ## time graph, but will shift it relative to the original. For |
117 ## example: | 115 ## example: |
118 ## | 116 ## |
119 ## @example | 117 ## @example |
120 ## @group | 118 ## @group |
121 ## speed ("v = sum (x)", "", [10000, 100000], ... | 119 ## speed ("v = sum (x)", "", [10000, 100000], ... |
122 ## "v = 0; for i = 1:length (x), v += x(i); end") | 120 ## "v = 0; for i = 1:length (x), v += x(i); end") |
123 ## @end group | 121 ## @end group |
124 ## @end example | 122 ## @end example |
125 ## | 123 ## |
126 ## A more complex example, if you had an original version of @code{xcorr} | 124 ## The following is a more complex example. If there was an original version |
127 ## using for loops and another version using an FFT, you could compare the | 125 ## of @code{xcorr} using for loops and a second version using an FFT, then |
128 ## run speed for various lags as follows, or for a fixed lag with varying | 126 ## one could compare the run speed for various lags as follows, or for a fixed |
129 ## vector lengths as follows: | 127 ## lag with varying vector lengths as follows: |
130 ## | 128 ## |
131 ## @example | 129 ## @example |
132 ## @group | 130 ## @group |
133 ## speed ("v = xcorr (x, n)", "x = rand (128, 1);", 100, | 131 ## speed ("v = xcorr (x, n)", "x = rand (128, 1);", 100, |
134 ## "v2 = xcorr_orig (x, n)", -100*eps) | 132 ## "v2 = xcorr_orig (x, n)", -100*eps) |
135 ## speed ("v = xcorr (x, 15)", "x = rand (20+n, 1);", 100, | 133 ## speed ("v = xcorr (x, 15)", "x = rand (20+n, 1);", 100, |
136 ## "v2 = xcorr_orig (x, n)", -100*eps) | 134 ## "v2 = xcorr_orig (x, n)", -100*eps) |
137 ## @end group | 135 ## @end group |
138 ## @end example | 136 ## @end example |
139 ## | 137 ## |
140 ## Assuming one of the two versions is in @var{xcorr_orig}, this | 138 ## Assuming one of the two versions is in xcorr_orig, this |
141 ## would compare their speed and their output values. Note that the | 139 ## would compare their speed and their output values. Note that the |
142 ## FFT version is not exact, so we specify an acceptable tolerance on | 140 ## FFT version is not exact, so we specify an acceptable tolerance on |
143 ## the comparison @code{100*eps}, and the errors should be computed | 141 ## the comparison @code{100*eps}, and that the errors should be computed |
144 ## relatively, as @code{abs((@var{x} - @var{y})./@var{y})} rather than | 142 ## relatively, as @code{abs ((@var{x} - @var{y}) ./ @var{y})} rather than |
145 ## absolutely as @code{abs(@var{x} - @var{y})}. | 143 ## absolutely as @code{abs (@var{x} - @var{y})}. |
146 ## | 144 ## |
147 ## Type @code{example('speed')} to see some real examples. Note for | 145 ## Type @code{example('speed')} to see some real examples. Note that for |
148 ## obscure reasons, you can't run examples 1 and 2 directly using | 146 ## obscure reasons, examples 1 and 2 can not be run directly using |
149 ## @code{demo('speed')}. Instead use, @code{eval(example('speed',1))} | 147 ## @code{demo('speed')}. Instead use, @code{eval ( example('speed', 1) )} |
150 ## and @code{eval(example('speed',2))}. | 148 ## or @code{eval ( example('speed', 2) )}. |
151 ## @end deftypefn | 149 ## @end deftypefn |
152 | 150 |
153 ## FIXME: consider two dimensional speedup surfaces for functions like kron. | 151 ## FIXME: consider two dimensional speedup surfaces for functions like kron. |
154 function [__order, __test_n, __tnew, __torig] ... | 152 function [__order, __test_n, __tnew, __torig] ... |
155 = speed (__f1, __init, __max_n, __f2, __tol) | 153 = speed (__f1, __init, __max_n, __f2, __tol) |
373 %! type build_orig; | 371 %! type build_orig; |
374 %! disp("-----------------------"); | 372 %! disp("-----------------------"); |
375 %! type build; | 373 %! type build; |
376 %! disp("-----------------------"); | 374 %! disp("-----------------------"); |
377 %! | 375 %! |
378 %! disp("Vectorized test. This takes a little while..."); | 376 %! disp("Vectorized test.\nThis takes a little while..."); |
379 %! speed('build(n)', '', 1000, 'build_orig(n)'); | 377 %! speed('build(n)', '', 1000, 'build_orig(n)'); |
380 %! clear build build_orig | 378 %! clear build build_orig |
381 %! disp("-----------------------"); | 379 %! disp("-----------------------"); |
382 %! disp("This time, the for loop is done away with entirely."); | 380 %! disp("This time, the for loop is done away with entirely."); |
383 %! disp("Notice how much bigger the speedup is then in example 1."); | 381 %! disp("Notice how much bigger the speedup is than in example 1."); |
384 %! endif | 382 %! endif |