comparison scripts/testfun/speed.m @ 5798:7e7ed81f5566

[project @ 2006-05-09 17:24:33 by jwe]
author jwe
date Tue, 09 May 2006 17:24:34 +0000
parents f812a0680d05
children 34f96dd5441b
comparison
equal deleted inserted replaced
5797:11fcab4c461d 5798:7e7ed81f5566
1 ## Copyright (C) 2000-2001 Paul Kienzle 1 ## Copyright (C) 2000-2006 Paul Kienzle
2 ## 2 ##
3 ## This program is free software; you can redistribute it and/or modify 3 ## This program is free software; you can redistribute it and/or modify
4 ## it under the terms of the GNU General Public License as published by 4 ## it under the terms of the GNU General Public License as published by
5 ## the Free Software Foundation; either version 2 of the License, or 5 ## the Free Software Foundation; either version 2 of the License, or
6 ## (at your option) any later version. 6 ## (at your option) any later version.
14 ## along with this program; if not, write to the Free Software 14 ## along with this program; if not, write to the Free Software
15 ## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 15 ## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 ## 02110-1301 USA 16 ## 02110-1301 USA
17 17
18 ## -*- texinfo -*- 18 ## -*- texinfo -*-
19 ## @deftypefn {Function File} {} speed (@var{f}, @var{init}, @var{max_n}, @var{f2}, @var{tol}, @var{err}) 19 ## @deftypefn {Function File} {} speed (@var{f}, @var{init}, @var{max_n}, @var{f2}, @var{tol})
20 ## @deftypefnx {Function File} {@var{r} =} speed (@dots{}) 20 ## @deftypefnx {Function File} {[@var{order}, @var{n}, @var{T_f}, @var{T_f2}] =} speed (@dots{})
21 ## 21 ##
22 ## Determine the execution time of an expression for various @var{n}. 22 ## Determine the execution time of an expression for various @var{n}.
23 ## The @var{n} are log-spaced from 1 to @var{max_n}. For each @var{n}, 23 ## The @var{n} are log-spaced from 1 to @var{max_n}. For each @var{n},
24 ## an initialization expression is computed to create whatever data 24 ## an initialization expression is computed to create whatever data
25 ## are needed for the test. Called without output arguments the data 25 ## are needed for the test. If a second expression is given, the
26 ## is presented graphically. Called with an output argument @var{r}, 26 ## execution times of the two expressions will be compared. Called
27 ## the speedup ratio is returned instead of displaying it graphically. 27 ## without output arguments the results are presented graphically.
28 ## 28 ##
29 ## @table @code 29 ## @table @code
30 ## @item @var{f} 30 ## @item @var{f}
31 ## The expression to evaluate. 31 ## The expression to evaluate.
32 ## 32 ##
33 ## @item @var{max_n} 33 ## @item @var{max_n}
34 ## The maximum test length to run. Default value is 100. 34 ## The maximum test length to run. Default value is 100. Alternatively,
35 ## use @code{[min_n,max_n]} or for complete control, @code{[n1,n2,@dots{},nk]}.
35 ## 36 ##
36 ## @item @var{init} 37 ## @item @var{init}
37 ## Initialization expression for function argument values. Use @var{k} 38 ## Initialization expression for function argument values. Use @var{k}
38 ## for the test number and @var{n} for the size of the test. This should 39 ## for the test number and @var{n} for the size of the test. This should
39 ## compute values for all variables listed in args. Note that init will 40 ## compute values for all variables listed in args. Note that init will
48 ## @item @var{tol} 49 ## @item @var{tol}
49 ## If @var{tol} is @code{Inf}, then no comparison will be made between the 50 ## If @var{tol} is @code{Inf}, then no comparison will be made between the
50 ## results of expression @var{f} and expression @var{f2}. Otherwise, 51 ## results of expression @var{f} and expression @var{f2}. Otherwise,
51 ## expression @var{f} should produce a value @var{v} and expression @var{f2} 52 ## expression @var{f} should produce a value @var{v} and expression @var{f2}
52 ## should produce a value @var{v2}, and these shall be compared using 53 ## should produce a value @var{v2}, and these shall be compared using
53 ## @code{assert(@var{v},@var{v2},@var{tol},@var{err})}. The default is 54 ## @code{assert(@var{v},@var{v2},@var{tol})}. The default is
54 ## @code{eps}. 55 ## @code{eps}.
56 ##
57 ## @item @var{order}
58 ## The time complexity of the expression @code{O(a n^p)}. This
59 ## is a structure with fields @code{a} and @code{p}.
60 ##
61 ## @item @var{n}
62 ## The values @var{n} for which the expression was calculated and the
63 ## the execution time was greater than zero.
64 ##
65 ## @item @var{T_f}
66 ## The nonzero execution times recorded for the expression @var{f} in seconds.
67 ##
68 ## @item @var{T_f2}
69 ## The nonzero execution times recorded for the expression @var{f2} in seconds.
70 ## If it is needed, the mean time ratio is just @code{mean(T_f./T_f2)}.
71 ##
55 ## @end table 72 ## @end table
56 ## 73 ##
57 ## Some global variables are also referenced. Choose values suitable to 74 ## The slope of the execution time graph shows the approximate
58 ## your machine and your work style. 75 ## power of the asymptotic running time @code{O(n^p)}. This
59 ## 76 ## power is plotted for the region over which it is approximated
60 ## @table @code 77 ## (the latter half of the graph). The estimated power is not
61 ## @item speed_test_plot 78 ## very accurate, but should be sufficient to determine the
62 ## If true, plot a nice speed comparison graph. Default is true. 79 ## general order of your algorithm. It should indicate if for
63 ## 80 ## example your implementation is unexpectedly @code{O(n^2)}
64 ## @item speed_test_numtests 81 ## rather than @code{O(n)} because it extends a vector each
65 ## Number of vector lengths to test. The default is 25. 82 ## time through the loop rather than preallocating one which is
66 ## @end table 83 ## big enough. For example, in the current version of Octave,
67 ## 84 ## the following is not the expected @code{O(n)}:
68 ## Some comments on the graphs. The line on the speedup ratio graph 85 ##
69 ## should be larger than 1 if your function is faster. The slope on 86 ## @example
70 ## the runtime graph shows you the O(f) speed characteristics. Where it 87 ## speed("for i=1:n,y@{i@}=x(i); end", "", [1000,10000])
71 ## is flat, execution time is O(1). Where it is sloping, execution time 88 ## @end example
72 ## is O(n^m), with steeper slopes for larger @var{n}. Generally vectorizing 89 ##
73 ## a function will not change the slope of the run-time graph, but it 90 ## but it is if you preallocate the cell array @code{y}:
74 ## will shift it relative to the original. 91 ##
75 ## 92 ## @example
76 ## A simple example is 93 ## speed("for i=1:n,y@{i@}=x(i);end", ...
77 ## 94 ## "x=rand(n,1);y=cell(size(x));", [1000,10000])
78 ## @example 95 ## @end example
79 ## speed("strrep(s,x,y)", "s=blanks(n);x=' ';y='b';", 100) 96 ##
80 ## @end example 97 ## An attempt is made to approximate the cost of the individual
81 ## 98 ## operations, but it is wildly inaccurate. You can improve the
99 ## stability somewhat by doing more work for each @code{n}. For
100 ## example:
101 ##
102 ## @example
103 ## speed("airy(x)", "x=rand(n,10)", [10000,100000])
104 ## @end example
105 ##
106 ## When comparing a new and original expression, the line on the
107 ## speedup ratio graph should be larger than 1 if the new expression
108 ## is faster. Better algorithms have a shallow slope. Generally,
109 ## vectorizing an algorithm will not change the slope of the execution
110 ## time graph, but it will shift it relative to the original. For
111 ## example:
112 ##
113 ## @example
114 ## speed("v=sum(x)", "", [10000,100000], ...
115 ## "v=0;for i=1:length(x),v+=x(i);end")
116 ## @end example
117 ##
82 ## A more complex example, if you had an original version of @code{xcorr} 118 ## A more complex example, if you had an original version of @code{xcorr}
83 ## using for loops and another version using an FFT, you could compare the 119 ## using for loops and another version using an FFT, you could compare the
84 ## run speed for various lags as follows, or for a fixed lag with varying 120 ## run speed for various lags as follows, or for a fixed lag with varying
85 ## vector lengths as follows: 121 ## vector lengths as follows:
86 ## 122 ##
103 ## @code{demo('speed')}. Instead use, @code{eval(example('speed',1))} 139 ## @code{demo('speed')}. Instead use, @code{eval(example('speed',1))}
104 ## and @code{eval(example('speed',2))}. 140 ## and @code{eval(example('speed',2))}.
105 ## @end deftypefn 141 ## @end deftypefn
106 142
107 ## TODO: consider two dimensional speedup surfaces for functions like kron. 143 ## TODO: consider two dimensional speedup surfaces for functions like kron.
108 function __ratio_r = speed (__f1, __init, __max_n, __f2, __tol, __err) 144 function [__order, __test_n, __tnew, __torig] ...
145 = speed (__f1, __init, __max_n, __f2, __tol)
109 if nargin < 1 || nargin > 6, 146 if nargin < 1 || nargin > 6,
110 usage("speed_test(f, init, max_n, f2, tol, err)"); 147 usage("speed_test(f, init, max_n, f2, tol)");
111 endif 148 endif
112 if nargin < 2 || isempty(__init), 149 if nargin < 2 || isempty(__init),
113 __init = "x = randn(n, 1);"; 150 __init = "x = randn(n, 1);";
114 endif 151 endif
115 if nargin < 3 || isempty(__max_n), __max_n = 100; endif 152 if nargin < 3 || isempty(__max_n), __max_n = 100; endif
116 if nargin < 4, __f2 = []; endif 153 if nargin < 4, __f2 = []; endif
117 if nargin < 5 || isempty(__tol), __tol = eps; endif 154 if nargin < 5 || isempty(__tol), __tol = eps; endif
118 if nargin < 6 || isempty(__err), __err = []; endif 155
119 156 __numtests = 15;
120 global speed_test_plot = 1; 157
121 global speed_test_numtests = 25; 158 ## Let user specify range of n
122 159 if isscalar(__max_n)
123 __test_n = uniq(round(logspace(0,log10(__max_n),speed_test_numtests))); 160 __min_n = 1;
161 assert(__max_n > __min_n);
162 __test_n = logspace(0,log10(__max_n),__numtests);
163 elseif length(__max_n) == 2
164 __min_n = __max_n(1);
165 __max_n = __max_n(2);
166 assert(__min_n >= 1);
167 __test_n = logspace(log10(__min_n),log10(__max_n),__numtests);
168 else
169 __test_n = __max_n;
170 endif
171 __test_n = unique(round(__test_n)); # Force n to be an integer
172 assert(__test_n >= 1);
173
124 __torig = __tnew = zeros (size(__test_n)) ; 174 __torig = __tnew = zeros (size(__test_n)) ;
125 175
126 disp (["testing..........", __f1, "\ninit: ", __init]); 176 disp (["testing ", __f1, "\ninit: ", __init]);
127 177
128 ## make sure the functions are freshly loaded by evaluating them at 178 ## make sure the functions are freshly loaded by evaluating them at
129 ## test_n(1); firt have to initialize the args though. 179 ## test_n(1); first have to initialize the args though.
130 n=1; k=0; 180 n=1; k=0;
131 eval ([__init, ";"]); 181 eval ([__init, ";"]);
132 if !isempty(__f2), eval ([__f2, ";"]); endif 182 if !isempty(__f2), eval ([__f2, ";"]); endif
133 eval ([__f1, ";"]); 183 eval ([__f1, ";"]);
134 184
135 ## run the tests 185 ## run the tests
136 for k=1:length(__test_n) 186 for k=1:length(__test_n)
137 if (k > 1) 187 n=__test_n(k);
138 n=__test_n(k); 188 eval ([__init, ";"]);
139 eval ([__init, ";"]);
140 endif
141 189
142 printf ("n%i=%i ",k, n) ; fflush(1); 190 printf ("n%i=%i ",k, n) ; fflush(1);
143
144 eval (["__t=time();", __f1, "; __v1=ans; __t = time()-__t;"]); 191 eval (["__t=time();", __f1, "; __v1=ans; __t = time()-__t;"]);
145 if (__t < 0.25) 192 if (__t < 0.25)
146 eval (["__t2=time();", __f1, "; __t2 = time()-__t2;"]); 193 eval (["__t2=time();", __f1, "; __t2 = time()-__t2;"]);
147 eval (["__t3=time();", __f1, "; __t3 = time()-__t3;"]); 194 eval (["__t3=time();", __f1, "; __t3 = time()-__t3;"]);
148 __t = min([__t,__t2,__t3]); 195 __t = min([__t,__t2,__t3]);
155 eval (["__t2=time();", __f2, "; __t2 = time()-__t2;"]); 202 eval (["__t2=time();", __f2, "; __t2 = time()-__t2;"]);
156 eval (["__t3=time();", __f2, "; __t3 = time()-__t3;"]); 203 eval (["__t3=time();", __f2, "; __t3 = time()-__t3;"]);
157 endif 204 endif
158 __torig(k) = __t; 205 __torig(k) = __t;
159 if !isinf(__tol) 206 if !isinf(__tol)
160 assert(__v1,__v2,__tol,__err); 207 assert(__v1,__v2,__tol);
161 endif 208 endif
162 endif 209 endif
163 210
164 end 211 endfor
165 212
166 if !isempty(__f2), 213 ## Drop times of zero
167 # Don't keep zero times 214 if !isempty(__f2)
168 idx = find ( __tnew>sqrt(eps) & __torig>sqrt(eps) ) ; 215 zidx = ( __tnew < 100*eps | __torig < 100*eps ) ;
169 ratio = mean (__torig(idx) ./ __tnew(idx)); 216 __test_n(zidx) = [];
170 if (nargout == 1) 217 __tnew(zidx) = [];
171 __ratio_r = ratio; 218 __torig(zidx) = [];
172 else
173 printf ("\nmean runtime ratio of %s / %s : %g\n", __f2, __f1, ratio);
174 endif
175 else 219 else
176 if (nargout == 1) 220 zidx = ( __tnew < 100*eps ) ;
177 _ratio_r = mean(__tnew); 221 __test_n(zidx) = [];
178 else 222 __tnew(zidx) = [];
179 printf ("\nmean runtime: %g\n", mean(__tnew)); 223 endif
180 endif 224
181 endif 225 ## Approximate time complexity and return it if requested
182 226 tailidx = [ceil(length(__test_n)/2):length(__test_n)];
183 if (speed_test_plot && nargout == 0 && !isempty(__f2)) 227 p = polyfit(log(__test_n(tailidx)),log(__tnew(tailidx)), 1);
228 if nargout > 0,
229 __order.p = p(1);
230 __order.a = exp(p(2));
231 endif
232
233
234 ## Plot the data if no output is requested.
235 doplot = (nargout == 0);
236
237 if doplot && !isempty(__f2)
238
184 239
185 subplot(121); 240 subplot(121);
186 xlabel("test length"); 241 xlabel("test length");
187 title (__f1); 242 title (__f1);
188 ylabel("speedup ratio"); 243 ylabel("speedup ratio");
189 semilogx ( __test_n(idx), __torig(idx)./__tnew(idx) , 244 semilogx ( __test_n, __torig./__tnew,
190 ["-*r;", strrep(__f1,";","."), "/", strrep(__f2,";","."), ";"], 245 ["-*r;", strrep(__f1,";","."), "/", strrep(__f2,";","."), ";"],
191 __test_n(idx), __tnew(idx)./__torig(idx) , 246 __test_n, __tnew./__torig,
192 ["-*g;", strrep(__f2,";","."), "/", strrep(__f1,";","."), ";"]); 247 ["-*g;", strrep(__f2,";","."), "/", strrep(__f1,";","."), ";"]);
193 subplot (122); 248 subplot (122);
194 249
195 ## convert best execution time to milliseconds.
196 __torig = 1000*__torig;
197 __tnew = 1000*__tnew;
198
199 ylabel ("best execution time (ms)"); 250 ylabel ("best execution time (ms)");
200 title (["init: ", __init]); 251 title (["init: ", __init]);
201 loglog ( __test_n (idx), __tnew (idx), ["*-g;", strrep(__f1,";","."), ";" ], 252 loglog ( __test_n, __tnew*1000, ["*-g;", strrep(__f1,";","."), ";" ],
202 __test_n (idx), __torig (idx), ["*-r;", strrep(__f2,";","."), ";"]) 253 __test_n, __torig*1000, ["*-r;", strrep(__f2,";","."), ";"])
203 title (""); xlabel (""); ylabel (""); oneplot(); 254
204 elseif (speed_test_plot && nargout == 0) 255 ratio = mean (__torig ./ __tnew);
205 __tnew = 1000*__tnew; 256 printf ("\n\nMean runtime ratio = %.3g for '%s' vs '%s'\n", ...
257 ratio, __f2, __f1);
258
259 elseif doplot
260
261 subplot(111);
206 xlabel("test length"); 262 xlabel("test length");
207 ylabel ("best execution time (ms)"); 263 ylabel ("best execution time (ms)");
208 title ([__f1, " init: ", __init]); 264 title ([__f1, " init: ", __init]);
209 loglog ( __test_n, __tnew, "*-g;;"); 265 loglog ( __test_n, __tnew*1000, "*-g;execution time;");
210 title (""); xlabel (""); ylabel (""); oneplot(); 266
211 endif 267 endif
212 268
269 if doplot
270
271 ## Plot time complexity approximation (using milliseconds).
272 order = sprintf("O(n^%g)",round(10*p(1))/10);
273 v = polyval(p,log(__test_n(tailidx)));
274 hold on;
275 loglog(__test_n(tailidx), exp(v)*1000, sprintf("b;%s;",order));
276 hold off;
277
278 ## Get base time to 1 digit of accuracy
279 dt = exp(p(2));
280 dt = floor(dt/10^floor(log10(dt)))*10^floor(log10(dt));
281 if log10(dt) >= -0.5, time = sprintf("%g s", dt);
282 elseif log10(dt) >= -3.5, time = sprintf("%g ms", dt*1e3);
283 elseif log10(dt) >= -6.5, time = sprintf("%g us", dt*1e6);
284 else time = sprintf("%g ns", dt*1e9);
285 endif
286
287 ## Display nicely formatted complexity.
288 printf ("\nFor %s:\n",__f1);
289 printf (" asymptotic power: %s\n", order);
290 printf (" approximate time per operation: %s\n", time);
291
292 endif
293
213 endfunction 294 endfunction
214 295
215 %!demo if 1 296 %!demo if 1
216 %! function x = build_orig(n) 297 %! function x = build_orig(n)
217 %! ## extend the target vector on the fly 298 %! ## extend the target vector on the fly