Mercurial > hg > octave-jordi
comparison scripts/testfun/speed.m @ 5798:7e7ed81f5566
[project @ 2006-05-09 17:24:33 by jwe]
author | jwe |
---|---|
date | Tue, 09 May 2006 17:24:34 +0000 |
parents | f812a0680d05 |
children | 34f96dd5441b |
comparison
equal
deleted
inserted
replaced
5797:11fcab4c461d | 5798:7e7ed81f5566 |
---|---|
1 ## Copyright (C) 2000-2001 Paul Kienzle | 1 ## Copyright (C) 2000-2006 Paul Kienzle |
2 ## | 2 ## |
3 ## This program is free software; you can redistribute it and/or modify | 3 ## This program is free software; you can redistribute it and/or modify |
4 ## it under the terms of the GNU General Public License as published by | 4 ## it under the terms of the GNU General Public License as published by |
5 ## the Free Software Foundation; either version 2 of the License, or | 5 ## the Free Software Foundation; either version 2 of the License, or |
6 ## (at your option) any later version. | 6 ## (at your option) any later version. |
14 ## along with this program; if not, write to the Free Software | 14 ## along with this program; if not, write to the Free Software |
15 ## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA | 15 ## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
16 ## 02110-1301 USA | 16 ## 02110-1301 USA |
17 | 17 |
18 ## -*- texinfo -*- | 18 ## -*- texinfo -*- |
19 ## @deftypefn {Function File} {} speed (@var{f}, @var{init}, @var{max_n}, @var{f2}, @var{tol}, @var{err}) | 19 ## @deftypefn {Function File} {} speed (@var{f}, @var{init}, @var{max_n}, @var{f2}, @var{tol}) |
20 ## @deftypefnx {Function File} {@var{r} =} speed (@dots{}) | 20 ## @deftypefnx {Function File} {[@var{order}, @var{n}, @var{T_f}, @var{T_f2}] =} speed (@dots{}) |
21 ## | 21 ## |
22 ## Determine the execution time of an expression for various @var{n}. | 22 ## Determine the execution time of an expression for various @var{n}. |
23 ## The @var{n} are log-spaced from 1 to @var{max_n}. For each @var{n}, | 23 ## The @var{n} are log-spaced from 1 to @var{max_n}. For each @var{n}, |
24 ## an initialization expression is computed to create whatever data | 24 ## an initialization expression is computed to create whatever data |
25 ## are needed for the test. Called without output arguments the data | 25 ## are needed for the test. If a second expression is given, the |
26 ## is presented graphically. Called with an output argument @var{r}, | 26 ## execution times of the two expressions will be compared. Called |
27 ## the speedup ratio is returned instead of displaying it graphically. | 27 ## without output arguments the results are presented graphically. |
28 ## | 28 ## |
29 ## @table @code | 29 ## @table @code |
30 ## @item @var{f} | 30 ## @item @var{f} |
31 ## The expression to evaluate. | 31 ## The expression to evaluate. |
32 ## | 32 ## |
33 ## @item @var{max_n} | 33 ## @item @var{max_n} |
34 ## The maximum test length to run. Default value is 100. | 34 ## The maximum test length to run. Default value is 100. Alternatively, |
35 ## use @code{[min_n,max_n]} or for complete control, @code{[n1,n2,@dots{},nk]}. | |
35 ## | 36 ## |
36 ## @item @var{init} | 37 ## @item @var{init} |
37 ## Initialization expression for function argument values. Use @var{k} | 38 ## Initialization expression for function argument values. Use @var{k} |
38 ## for the test number and @var{n} for the size of the test. This should | 39 ## for the test number and @var{n} for the size of the test. This should |
39 ## compute values for all variables listed in args. Note that init will | 40 ## compute values for all variables listed in args. Note that init will |
48 ## @item @var{tol} | 49 ## @item @var{tol} |
49 ## If @var{tol} is @code{Inf}, then no comparison will be made between the | 50 ## If @var{tol} is @code{Inf}, then no comparison will be made between the |
50 ## results of expression @var{f} and expression @var{f2}. Otherwise, | 51 ## results of expression @var{f} and expression @var{f2}. Otherwise, |
51 ## expression @var{f} should produce a value @var{v} and expression @var{f2} | 52 ## expression @var{f} should produce a value @var{v} and expression @var{f2} |
52 ## should produce a value @var{v2}, and these shall be compared using | 53 ## should produce a value @var{v2}, and these shall be compared using |
53 ## @code{assert(@var{v},@var{v2},@var{tol},@var{err})}. The default is | 54 ## @code{assert(@var{v},@var{v2},@var{tol})}. The default is |
54 ## @code{eps}. | 55 ## @code{eps}. |
56 ## | |
57 ## @item @var{order} | |
58 ## The time complexity of the expression @code{O(a n^p)}. This | |
59 ## is a structure with fields @code{a} and @code{p}. | |
60 ## | |
61 ## @item @var{n} | |
62 ## The values @var{n} for which the expression was calculated and the | |
63 ## the execution time was greater than zero. | |
64 ## | |
65 ## @item @var{T_f} | |
66 ## The nonzero execution times recorded for the expression @var{f} in seconds. | |
67 ## | |
68 ## @item @var{T_f2} | |
69 ## The nonzero execution times recorded for the expression @var{f2} in seconds. | |
70 ## If it is needed, the mean time ratio is just @code{mean(T_f./T_f2)}. | |
71 ## | |
55 ## @end table | 72 ## @end table |
56 ## | 73 ## |
57 ## Some global variables are also referenced. Choose values suitable to | 74 ## The slope of the execution time graph shows the approximate |
58 ## your machine and your work style. | 75 ## power of the asymptotic running time @code{O(n^p)}. This |
59 ## | 76 ## power is plotted for the region over which it is approximated |
60 ## @table @code | 77 ## (the latter half of the graph). The estimated power is not |
61 ## @item speed_test_plot | 78 ## very accurate, but should be sufficient to determine the |
62 ## If true, plot a nice speed comparison graph. Default is true. | 79 ## general order of your algorithm. It should indicate if for |
63 ## | 80 ## example your implementation is unexpectedly @code{O(n^2)} |
64 ## @item speed_test_numtests | 81 ## rather than @code{O(n)} because it extends a vector each |
65 ## Number of vector lengths to test. The default is 25. | 82 ## time through the loop rather than preallocating one which is |
66 ## @end table | 83 ## big enough. For example, in the current version of Octave, |
67 ## | 84 ## the following is not the expected @code{O(n)}: |
68 ## Some comments on the graphs. The line on the speedup ratio graph | 85 ## |
69 ## should be larger than 1 if your function is faster. The slope on | 86 ## @example |
70 ## the runtime graph shows you the O(f) speed characteristics. Where it | 87 ## speed("for i=1:n,y@{i@}=x(i); end", "", [1000,10000]) |
71 ## is flat, execution time is O(1). Where it is sloping, execution time | 88 ## @end example |
72 ## is O(n^m), with steeper slopes for larger @var{n}. Generally vectorizing | 89 ## |
73 ## a function will not change the slope of the run-time graph, but it | 90 ## but it is if you preallocate the cell array @code{y}: |
74 ## will shift it relative to the original. | 91 ## |
75 ## | 92 ## @example |
76 ## A simple example is | 93 ## speed("for i=1:n,y@{i@}=x(i);end", ... |
77 ## | 94 ## "x=rand(n,1);y=cell(size(x));", [1000,10000]) |
78 ## @example | 95 ## @end example |
79 ## speed("strrep(s,x,y)", "s=blanks(n);x=' ';y='b';", 100) | 96 ## |
80 ## @end example | 97 ## An attempt is made to approximate the cost of the individual |
81 ## | 98 ## operations, but it is wildly inaccurate. You can improve the |
99 ## stability somewhat by doing more work for each @code{n}. For | |
100 ## example: | |
101 ## | |
102 ## @example | |
103 ## speed("airy(x)", "x=rand(n,10)", [10000,100000]) | |
104 ## @end example | |
105 ## | |
106 ## When comparing a new and original expression, the line on the | |
107 ## speedup ratio graph should be larger than 1 if the new expression | |
108 ## is faster. Better algorithms have a shallow slope. Generally, | |
109 ## vectorizing an algorithm will not change the slope of the execution | |
110 ## time graph, but it will shift it relative to the original. For | |
111 ## example: | |
112 ## | |
113 ## @example | |
114 ## speed("v=sum(x)", "", [10000,100000], ... | |
115 ## "v=0;for i=1:length(x),v+=x(i);end") | |
116 ## @end example | |
117 ## | |
82 ## A more complex example, if you had an original version of @code{xcorr} | 118 ## A more complex example, if you had an original version of @code{xcorr} |
83 ## using for loops and another version using an FFT, you could compare the | 119 ## using for loops and another version using an FFT, you could compare the |
84 ## run speed for various lags as follows, or for a fixed lag with varying | 120 ## run speed for various lags as follows, or for a fixed lag with varying |
85 ## vector lengths as follows: | 121 ## vector lengths as follows: |
86 ## | 122 ## |
103 ## @code{demo('speed')}. Instead use, @code{eval(example('speed',1))} | 139 ## @code{demo('speed')}. Instead use, @code{eval(example('speed',1))} |
104 ## and @code{eval(example('speed',2))}. | 140 ## and @code{eval(example('speed',2))}. |
105 ## @end deftypefn | 141 ## @end deftypefn |
106 | 142 |
107 ## TODO: consider two dimensional speedup surfaces for functions like kron. | 143 ## TODO: consider two dimensional speedup surfaces for functions like kron. |
108 function __ratio_r = speed (__f1, __init, __max_n, __f2, __tol, __err) | 144 function [__order, __test_n, __tnew, __torig] ... |
145 = speed (__f1, __init, __max_n, __f2, __tol) | |
109 if nargin < 1 || nargin > 6, | 146 if nargin < 1 || nargin > 6, |
110 usage("speed_test(f, init, max_n, f2, tol, err)"); | 147 usage("speed_test(f, init, max_n, f2, tol)"); |
111 endif | 148 endif |
112 if nargin < 2 || isempty(__init), | 149 if nargin < 2 || isempty(__init), |
113 __init = "x = randn(n, 1);"; | 150 __init = "x = randn(n, 1);"; |
114 endif | 151 endif |
115 if nargin < 3 || isempty(__max_n), __max_n = 100; endif | 152 if nargin < 3 || isempty(__max_n), __max_n = 100; endif |
116 if nargin < 4, __f2 = []; endif | 153 if nargin < 4, __f2 = []; endif |
117 if nargin < 5 || isempty(__tol), __tol = eps; endif | 154 if nargin < 5 || isempty(__tol), __tol = eps; endif |
118 if nargin < 6 || isempty(__err), __err = []; endif | 155 |
119 | 156 __numtests = 15; |
120 global speed_test_plot = 1; | 157 |
121 global speed_test_numtests = 25; | 158 ## Let user specify range of n |
122 | 159 if isscalar(__max_n) |
123 __test_n = uniq(round(logspace(0,log10(__max_n),speed_test_numtests))); | 160 __min_n = 1; |
161 assert(__max_n > __min_n); | |
162 __test_n = logspace(0,log10(__max_n),__numtests); | |
163 elseif length(__max_n) == 2 | |
164 __min_n = __max_n(1); | |
165 __max_n = __max_n(2); | |
166 assert(__min_n >= 1); | |
167 __test_n = logspace(log10(__min_n),log10(__max_n),__numtests); | |
168 else | |
169 __test_n = __max_n; | |
170 endif | |
171 __test_n = unique(round(__test_n)); # Force n to be an integer | |
172 assert(__test_n >= 1); | |
173 | |
124 __torig = __tnew = zeros (size(__test_n)) ; | 174 __torig = __tnew = zeros (size(__test_n)) ; |
125 | 175 |
126 disp (["testing..........", __f1, "\ninit: ", __init]); | 176 disp (["testing ", __f1, "\ninit: ", __init]); |
127 | 177 |
128 ## make sure the functions are freshly loaded by evaluating them at | 178 ## make sure the functions are freshly loaded by evaluating them at |
129 ## test_n(1); firt have to initialize the args though. | 179 ## test_n(1); first have to initialize the args though. |
130 n=1; k=0; | 180 n=1; k=0; |
131 eval ([__init, ";"]); | 181 eval ([__init, ";"]); |
132 if !isempty(__f2), eval ([__f2, ";"]); endif | 182 if !isempty(__f2), eval ([__f2, ";"]); endif |
133 eval ([__f1, ";"]); | 183 eval ([__f1, ";"]); |
134 | 184 |
135 ## run the tests | 185 ## run the tests |
136 for k=1:length(__test_n) | 186 for k=1:length(__test_n) |
137 if (k > 1) | 187 n=__test_n(k); |
138 n=__test_n(k); | 188 eval ([__init, ";"]); |
139 eval ([__init, ";"]); | |
140 endif | |
141 | 189 |
142 printf ("n%i=%i ",k, n) ; fflush(1); | 190 printf ("n%i=%i ",k, n) ; fflush(1); |
143 | |
144 eval (["__t=time();", __f1, "; __v1=ans; __t = time()-__t;"]); | 191 eval (["__t=time();", __f1, "; __v1=ans; __t = time()-__t;"]); |
145 if (__t < 0.25) | 192 if (__t < 0.25) |
146 eval (["__t2=time();", __f1, "; __t2 = time()-__t2;"]); | 193 eval (["__t2=time();", __f1, "; __t2 = time()-__t2;"]); |
147 eval (["__t3=time();", __f1, "; __t3 = time()-__t3;"]); | 194 eval (["__t3=time();", __f1, "; __t3 = time()-__t3;"]); |
148 __t = min([__t,__t2,__t3]); | 195 __t = min([__t,__t2,__t3]); |
155 eval (["__t2=time();", __f2, "; __t2 = time()-__t2;"]); | 202 eval (["__t2=time();", __f2, "; __t2 = time()-__t2;"]); |
156 eval (["__t3=time();", __f2, "; __t3 = time()-__t3;"]); | 203 eval (["__t3=time();", __f2, "; __t3 = time()-__t3;"]); |
157 endif | 204 endif |
158 __torig(k) = __t; | 205 __torig(k) = __t; |
159 if !isinf(__tol) | 206 if !isinf(__tol) |
160 assert(__v1,__v2,__tol,__err); | 207 assert(__v1,__v2,__tol); |
161 endif | 208 endif |
162 endif | 209 endif |
163 | 210 |
164 end | 211 endfor |
165 | 212 |
166 if !isempty(__f2), | 213 ## Drop times of zero |
167 # Don't keep zero times | 214 if !isempty(__f2) |
168 idx = find ( __tnew>sqrt(eps) & __torig>sqrt(eps) ) ; | 215 zidx = ( __tnew < 100*eps | __torig < 100*eps ) ; |
169 ratio = mean (__torig(idx) ./ __tnew(idx)); | 216 __test_n(zidx) = []; |
170 if (nargout == 1) | 217 __tnew(zidx) = []; |
171 __ratio_r = ratio; | 218 __torig(zidx) = []; |
172 else | |
173 printf ("\nmean runtime ratio of %s / %s : %g\n", __f2, __f1, ratio); | |
174 endif | |
175 else | 219 else |
176 if (nargout == 1) | 220 zidx = ( __tnew < 100*eps ) ; |
177 _ratio_r = mean(__tnew); | 221 __test_n(zidx) = []; |
178 else | 222 __tnew(zidx) = []; |
179 printf ("\nmean runtime: %g\n", mean(__tnew)); | 223 endif |
180 endif | 224 |
181 endif | 225 ## Approximate time complexity and return it if requested |
182 | 226 tailidx = [ceil(length(__test_n)/2):length(__test_n)]; |
183 if (speed_test_plot && nargout == 0 && !isempty(__f2)) | 227 p = polyfit(log(__test_n(tailidx)),log(__tnew(tailidx)), 1); |
228 if nargout > 0, | |
229 __order.p = p(1); | |
230 __order.a = exp(p(2)); | |
231 endif | |
232 | |
233 | |
234 ## Plot the data if no output is requested. | |
235 doplot = (nargout == 0); | |
236 | |
237 if doplot && !isempty(__f2) | |
238 | |
184 | 239 |
185 subplot(121); | 240 subplot(121); |
186 xlabel("test length"); | 241 xlabel("test length"); |
187 title (__f1); | 242 title (__f1); |
188 ylabel("speedup ratio"); | 243 ylabel("speedup ratio"); |
189 semilogx ( __test_n(idx), __torig(idx)./__tnew(idx) , | 244 semilogx ( __test_n, __torig./__tnew, |
190 ["-*r;", strrep(__f1,";","."), "/", strrep(__f2,";","."), ";"], | 245 ["-*r;", strrep(__f1,";","."), "/", strrep(__f2,";","."), ";"], |
191 __test_n(idx), __tnew(idx)./__torig(idx) , | 246 __test_n, __tnew./__torig, |
192 ["-*g;", strrep(__f2,";","."), "/", strrep(__f1,";","."), ";"]); | 247 ["-*g;", strrep(__f2,";","."), "/", strrep(__f1,";","."), ";"]); |
193 subplot (122); | 248 subplot (122); |
194 | 249 |
195 ## convert best execution time to milliseconds. | |
196 __torig = 1000*__torig; | |
197 __tnew = 1000*__tnew; | |
198 | |
199 ylabel ("best execution time (ms)"); | 250 ylabel ("best execution time (ms)"); |
200 title (["init: ", __init]); | 251 title (["init: ", __init]); |
201 loglog ( __test_n (idx), __tnew (idx), ["*-g;", strrep(__f1,";","."), ";" ], | 252 loglog ( __test_n, __tnew*1000, ["*-g;", strrep(__f1,";","."), ";" ], |
202 __test_n (idx), __torig (idx), ["*-r;", strrep(__f2,";","."), ";"]) | 253 __test_n, __torig*1000, ["*-r;", strrep(__f2,";","."), ";"]) |
203 title (""); xlabel (""); ylabel (""); oneplot(); | 254 |
204 elseif (speed_test_plot && nargout == 0) | 255 ratio = mean (__torig ./ __tnew); |
205 __tnew = 1000*__tnew; | 256 printf ("\n\nMean runtime ratio = %.3g for '%s' vs '%s'\n", ... |
257 ratio, __f2, __f1); | |
258 | |
259 elseif doplot | |
260 | |
261 subplot(111); | |
206 xlabel("test length"); | 262 xlabel("test length"); |
207 ylabel ("best execution time (ms)"); | 263 ylabel ("best execution time (ms)"); |
208 title ([__f1, " init: ", __init]); | 264 title ([__f1, " init: ", __init]); |
209 loglog ( __test_n, __tnew, "*-g;;"); | 265 loglog ( __test_n, __tnew*1000, "*-g;execution time;"); |
210 title (""); xlabel (""); ylabel (""); oneplot(); | 266 |
211 endif | 267 endif |
212 | 268 |
269 if doplot | |
270 | |
271 ## Plot time complexity approximation (using milliseconds). | |
272 order = sprintf("O(n^%g)",round(10*p(1))/10); | |
273 v = polyval(p,log(__test_n(tailidx))); | |
274 hold on; | |
275 loglog(__test_n(tailidx), exp(v)*1000, sprintf("b;%s;",order)); | |
276 hold off; | |
277 | |
278 ## Get base time to 1 digit of accuracy | |
279 dt = exp(p(2)); | |
280 dt = floor(dt/10^floor(log10(dt)))*10^floor(log10(dt)); | |
281 if log10(dt) >= -0.5, time = sprintf("%g s", dt); | |
282 elseif log10(dt) >= -3.5, time = sprintf("%g ms", dt*1e3); | |
283 elseif log10(dt) >= -6.5, time = sprintf("%g us", dt*1e6); | |
284 else time = sprintf("%g ns", dt*1e9); | |
285 endif | |
286 | |
287 ## Display nicely formatted complexity. | |
288 printf ("\nFor %s:\n",__f1); | |
289 printf (" asymptotic power: %s\n", order); | |
290 printf (" approximate time per operation: %s\n", time); | |
291 | |
292 endif | |
293 | |
213 endfunction | 294 endfunction |
214 | 295 |
215 %!demo if 1 | 296 %!demo if 1 |
216 %! function x = build_orig(n) | 297 %! function x = build_orig(n) |
217 %! ## extend the target vector on the fly | 298 %! ## extend the target vector on the fly |