comparison scripts/testfun/speed.m @ 11314:87f258202b0f

speed.m: Overhaul documentation string.
author Rik <octave@nomad.inbox5.com>
date Mon, 06 Dec 2010 17:03:00 -0800
parents a4f482e66b65
children cc7f30d3fd01
comparison
equal deleted inserted replaced
11313:988d2bd6bacd 11314:87f258202b0f
19 19
20 ## -*- texinfo -*- 20 ## -*- texinfo -*-
21 ## @deftypefn {Function File} {} speed (@var{f}, @var{init}, @var{max_n}, @var{f2}, @var{tol}) 21 ## @deftypefn {Function File} {} speed (@var{f}, @var{init}, @var{max_n}, @var{f2}, @var{tol})
22 ## @deftypefnx {Function File} {[@var{order}, @var{n}, @var{T_f}, @var{T_f2}] =} speed (@dots{}) 22 ## @deftypefnx {Function File} {[@var{order}, @var{n}, @var{T_f}, @var{T_f2}] =} speed (@dots{})
23 ## 23 ##
24 ## Determine the execution time of an expression for various @var{n}. 24 ## Determine the execution time of an expression (@var{f}) for various input
25 ## The @var{n} are log-spaced from 1 to @var{max_n}. For each @var{n}, 25 ## values (@var{n}). The @var{n} are log-spaced from 1 to @var{max_n}. For
26 ## an initialization expression is computed to create whatever data 26 ## each @var{n}, an initialization expression (@var{init}) is computed to
27 ## are needed for the test. If a second expression is given, the 27 ## create any data needed for the test. If a second expression (@var{f2}) is
28 ## execution times of the two expressions will be compared. Called 28 ## given then the execution times of the two expressions are compared. When
29 ## without output arguments the results are presented graphically. 29 ## called without output arguments the results are displayed graphically.
30 ## 30 ##
31 ## @table @code 31 ## @table @code
32 ## @item @var{f} 32 ## @item @var{f}
33 ## The expression to evaluate. 33 ## The expression to evaluate.
34 ## 34 ##
35 ## @item @var{max_n} 35 ## @item @var{max_n}
36 ## The maximum test length to run. Default value is 100. Alternatively, 36 ## The maximum test length to run. Default value is 100. Alternatively,
37 ## use @code{[min_n,max_n]} or for complete control, @code{[n1,n2,@dots{},nk]}. 37 ## use @code{[min_n, max_n]} or specify the @var{n} exactly with
38 ## @code{[n1, n2, @dots{}, nk]}.
38 ## 39 ##
39 ## @item @var{init} 40 ## @item @var{init}
40 ## Initialization expression for function argument values. Use @var{k} 41 ## Initialization expression for function argument values. Use @var{k}
41 ## for the test number and @var{n} for the size of the test. This should 42 ## for the test number and @var{n} for the size of the test. This should
42 ## compute values for all variables used by @var{f}. Note that init will 43 ## compute values for all variables used by @var{f}. Note that @var{init} will
43 ## be evaluated first for @math{k = 0}, so things which are constant throughout 44 ## be evaluated first for @math{k = 0}, so things which are constant throughout
44 ## the test can be computed then. The default value is @code{@var{x} = 45 ## the test series can be computed once. The default value is
45 ## randn (@var{n}, 1)}. 46 ## @code{@var{x} = randn (@var{n}, 1)}.
46 ## 47 ##
47 ## @item @var{f2} 48 ## @item @var{f2}
48 ## An alternative expression to evaluate, so the speed of the two 49 ## An alternative expression to evaluate, so that the speed of two
49 ## can be compared. The default is @code{[]}. 50 ## expressions can be directly compared. The default is @code{[]}.
50 ## 51 ##
51 ## @item @var{tol} 52 ## @item @var{tol}
52 ## If @var{tol} is @code{Inf}, then no comparison will be made between the 53 ## Tolerance used to compare the results of expression @var{f} and expression
53 ## results of expression @var{f} and expression @var{f2}. Otherwise, 54 ## @var{f2}. If @var{tol} is positive, the tolerance is an absolute one.
54 ## expression @var{f} should produce a value @var{v} and expression @var{f2} 55 ## If @var{tol} is negative, the tolerance is a relative one. The default is
55 ## should produce a value @var{v2}, and these will be compared using 56 ## @code{eps}. If @var{tol} is @code{Inf}, then no comparison will be made.
56 ## @code{assert(@var{v},@var{v2},@var{tol})}. If @var{tol} is positive,
57 ## the tolerance is assumed to be absolute. If @var{tol} is negative,
58 ## the tolerance is assumed to be relative. The default is @code{eps}.
59 ## 57 ##
60 ## @item @var{order} 58 ## @item @var{order}
61 ## The time complexity of the expression @code{O(a n^p)}. This 59 ## The time complexity of the expression @math{O(a*n^p)}. This
62 ## is a structure with fields @code{a} and @code{p}. 60 ## is a structure with fields @code{a} and @code{p}.
63 ## 61 ##
64 ## @item @var{n} 62 ## @item @var{n}
65 ## The values @var{n} for which the expression was calculated and 63 ## The values @var{n} for which the expression was calculated AND
66 ## the execution time was greater than zero. 64 ## the execution time was greater than zero.
67 ## 65 ##
68 ## @item @var{T_f} 66 ## @item @var{T_f}
69 ## The nonzero execution times recorded for the expression @var{f} in seconds. 67 ## The nonzero execution times recorded for the expression @var{f} in seconds.
70 ## 68 ##
71 ## @item @var{T_f2} 69 ## @item @var{T_f2}
72 ## The nonzero execution times recorded for the expression @var{f2} in seconds. 70 ## The nonzero execution times recorded for the expression @var{f2} in seconds.
73 ## If it is needed, the mean time ratio is just @code{mean(T_f./T_f2)}. 71 ## If required, the mean time ratio is simply @code{mean (T_f./T_f2)}.
74 ## 72 ##
75 ## @end table 73 ## @end table
76 ## 74 ##
77 ## The slope of the execution time graph shows the approximate 75 ## The slope of the execution time graph shows the approximate
78 ## power of the asymptotic running time @code{O(n^p)}. This 76 ## power of the asymptotic running time @math{O(n^p)}. This
79 ## power is plotted for the region over which it is approximated 77 ## power is plotted for the region over which it is approximated
80 ## (the latter half of the graph). The estimated power is not 78 ## (the latter half of the graph). The estimated power is not
81 ## very accurate, but should be sufficient to determine the 79 ## very accurate, but should be sufficient to determine the
82 ## general order of your algorithm. It should indicate if for 80 ## general order of an algorithm. It should indicate if, for
83 ## example your implementation is unexpectedly @code{O(n^2)} 81 ## example, the implementation is unexpectedly @math{O(n^2)}
84 ## rather than @code{O(n)} because it extends a vector each 82 ## rather than @math{O(n)} because it extends a vector each
85 ## time through the loop rather than pre-allocating one which is 83 ## time through the loop rather than pre-allocating storage.
86 ## big enough. For example, in the current version of Octave, 84 ## In the current version of Octave, the following is not the
87 ## the following is not the expected @code{O(n)}: 85 ## expected @math{O(n)}.
88 ## 86 ##
89 ## @example 87 ## @example
90 ## speed ("for i = 1:n, y@{i@} = x(i); end", "", [1000,10000]) 88 ## speed ("for i = 1:n, y@{i@} = x(i); endfor", "", [1000, 10000])
91 ## @end example 89 ## @end example
92 ## 90 ##
93 ## @noindent 91 ## @noindent
94 ## but it is if you preallocate the cell array @code{y}: 92 ## But it is if you preallocate the cell array @code{y}:
95 ## 93 ##
96 ## @example 94 ## @example
97 ## @group 95 ## @group
98 ## speed ("for i = 1:n, y@{i@} = x(i); end", ... 96 ## speed ("for i = 1:n, y@{i@} = x(i); endfor", ...
99 ## "x = rand (n, 1); y = cell (size (x));", [1000, 10000]) 97 ## "x = rand (n, 1); y = cell (size (x));", [1000, 10000])
100 ## @end group 98 ## @end group
101 ## @end example 99 ## @end example
102 ## 100 ##
103 ## An attempt is made to approximate the cost of the individual 101 ## An attempt is made to approximate the cost of individual
104 ## operations, but it is wildly inaccurate. You can improve the 102 ## operations, but it is wildly inaccurate. You can improve the
105 ## stability somewhat by doing more work for each @code{n}. For 103 ## stability somewhat by doing more work for each @code{n}. For
106 ## example: 104 ## example:
107 ## 105 ##
108 ## @example 106 ## @example
109 ## speed ("airy(x)", "x = rand (n, 10)", [10000, 100000]) 107 ## speed ("airy(x)", "x = rand (n, 10)", [10000, 100000])
110 ## @end example 108 ## @end example
111 ## 109 ##
112 ## When comparing a new and original expression, the line on the 110 ## When comparing two different expressions (@var{f}, @var{f2}), the slope
113 ## speedup ratio graph should be larger than 1 if the new expression 111 ## of the line on the speedup ratio graph should be larger than 1 if the new
114 ## is faster. Better algorithms have a shallow slope. Generally, 112 ## expression is faster. Better algorithms have a shallow slope. Generally,
115 ## vectorizing an algorithm will not change the slope of the execution 113 ## vectorizing an algorithm will not change the slope of the execution
116 ## time graph, but it will shift it relative to the original. For 114 ## time graph, but will shift it relative to the original. For
117 ## example: 115 ## example:
118 ## 116 ##
119 ## @example 117 ## @example
120 ## @group 118 ## @group
121 ## speed ("v = sum (x)", "", [10000, 100000], ... 119 ## speed ("v = sum (x)", "", [10000, 100000], ...
122 ## "v = 0; for i = 1:length (x), v += x(i); end") 120 ## "v = 0; for i = 1:length (x), v += x(i); end")
123 ## @end group 121 ## @end group
124 ## @end example 122 ## @end example
125 ## 123 ##
126 ## A more complex example, if you had an original version of @code{xcorr} 124 ## The following is a more complex example. If there was an original version
127 ## using for loops and another version using an FFT, you could compare the 125 ## of @code{xcorr} using for loops and a second version using an FFT, then
128 ## run speed for various lags as follows, or for a fixed lag with varying 126 ## one could compare the run speed for various lags as follows, or for a fixed
129 ## vector lengths as follows: 127 ## lag with varying vector lengths as follows:
130 ## 128 ##
131 ## @example 129 ## @example
132 ## @group 130 ## @group
133 ## speed ("v = xcorr (x, n)", "x = rand (128, 1);", 100, 131 ## speed ("v = xcorr (x, n)", "x = rand (128, 1);", 100,
134 ## "v2 = xcorr_orig (x, n)", -100*eps) 132 ## "v2 = xcorr_orig (x, n)", -100*eps)
135 ## speed ("v = xcorr (x, 15)", "x = rand (20+n, 1);", 100, 133 ## speed ("v = xcorr (x, 15)", "x = rand (20+n, 1);", 100,
136 ## "v2 = xcorr_orig (x, n)", -100*eps) 134 ## "v2 = xcorr_orig (x, n)", -100*eps)
137 ## @end group 135 ## @end group
138 ## @end example 136 ## @end example
139 ## 137 ##
140 ## Assuming one of the two versions is in @var{xcorr_orig}, this 138 ## Assuming one of the two versions is in xcorr_orig, this
141 ## would compare their speed and their output values. Note that the 139 ## would compare their speed and their output values. Note that the
142 ## FFT version is not exact, so we specify an acceptable tolerance on 140 ## FFT version is not exact, so we specify an acceptable tolerance on
143 ## the comparison @code{100*eps}, and the errors should be computed 141 ## the comparison @code{100*eps}, and that the errors should be computed
144 ## relatively, as @code{abs((@var{x} - @var{y})./@var{y})} rather than 142 ## relatively, as @code{abs ((@var{x} - @var{y}) ./ @var{y})} rather than
145 ## absolutely as @code{abs(@var{x} - @var{y})}. 143 ## absolutely as @code{abs (@var{x} - @var{y})}.
146 ## 144 ##
147 ## Type @code{example('speed')} to see some real examples. Note for 145 ## Type @code{example('speed')} to see some real examples. Note that for
148 ## obscure reasons, you can't run examples 1 and 2 directly using 146 ## obscure reasons, examples 1 and 2 can not be run directly using
149 ## @code{demo('speed')}. Instead use, @code{eval(example('speed',1))} 147 ## @code{demo('speed')}. Instead use, @code{eval ( example('speed', 1) )}
150 ## and @code{eval(example('speed',2))}. 148 ## or @code{eval ( example('speed', 2) )}.
151 ## @end deftypefn 149 ## @end deftypefn
152 150
153 ## FIXME: consider two dimensional speedup surfaces for functions like kron. 151 ## FIXME: consider two dimensional speedup surfaces for functions like kron.
154 function [__order, __test_n, __tnew, __torig] ... 152 function [__order, __test_n, __tnew, __torig] ...
155 = speed (__f1, __init, __max_n, __f2, __tol) 153 = speed (__f1, __init, __max_n, __f2, __tol)
373 %! type build_orig; 371 %! type build_orig;
374 %! disp("-----------------------"); 372 %! disp("-----------------------");
375 %! type build; 373 %! type build;
376 %! disp("-----------------------"); 374 %! disp("-----------------------");
377 %! 375 %!
378 %! disp("Vectorized test. This takes a little while..."); 376 %! disp("Vectorized test.\nThis takes a little while...");
379 %! speed('build(n)', '', 1000, 'build_orig(n)'); 377 %! speed('build(n)', '', 1000, 'build_orig(n)');
380 %! clear build build_orig 378 %! clear build build_orig
381 %! disp("-----------------------"); 379 %! disp("-----------------------");
382 %! disp("This time, the for loop is done away with entirely."); 380 %! disp("This time, the for loop is done away with entirely.");
383 %! disp("Notice how much bigger the speedup is then in example 1."); 381 %! disp("Notice how much bigger the speedup is than in example 1.");
384 %! endif 382 %! endif