Mercurial > hg > octave-lyh
view scripts/testfun/speed.m @ 14363:f3d52523cde1
Use Octave coding conventions in all m-file %!test blocks
* wavread.m, acosd.m, acot.m, acotd.m, acoth.m, acsc.m, acscd.m, acsch.m,
asec.m, asecd.m, asech.m, asind.m, atand.m, cosd.m, cot.m, cotd.m, coth.m,
csc.m, cscd.m, csch.m, sec.m, secd.m, sech.m, sind.m, tand.m, accumarray.m,
accumdim.m, bitcmp.m, bitget.m, bitset.m, blkdiag.m, cart2pol.m, cart2sph.m,
celldisp.m, chop.m, circshift.m, colon.m, common_size.m, cplxpair.m,
cumtrapz.m, curl.m, dblquad.m, deal.m, divergence.m, flipdim.m, fliplr.m,
flipud.m, genvarname.m, gradient.m, idivide.m, int2str.m, interp1.m,
interp1q.m, interp2.m, interp3.m, interpft.m, interpn.m, isa.m, isdir.m,
isequal.m, isequalwithequalnans.m, issquare.m, logspace.m, nargchk.m,
narginchk.m, nargoutchk.m, nextpow2.m, nthargout.m, num2str.m, pol2cart.m,
polyarea.m, postpad.m, prepad.m, profile.m, profshow.m, quadgk.m, quadv.m,
randi.m, rat.m, repmat.m, rot90.m, rotdim.m, shift.m, shiftdim.m, sph2cart.m,
structfun.m, trapz.m, triplequad.m, convhull.m, dsearch.m, dsearchn.m,
griddata3.m, griddatan.m, rectint.m, tsearchn.m, __makeinfo__.m, doc.m,
get_first_help_sentence.m, help.m, type.m, unimplemented.m, which.m, imread.m,
imwrite.m, dlmwrite.m, fileread.m, is_valid_file_id.m, strread.m, textread.m,
textscan.m, commutation_matrix.m, cond.m, condest.m, cross.m,
duplication_matrix.m, expm.m, housh.m, isdefinite.m, ishermitian.m,
issymmetric.m, logm.m, normest.m, null.m, onenormest.m, orth.m, planerot.m,
qzhess.m, rank.m, rref.m, trace.m, vech.m, ans.m, bincoeff.m, bug_report.m,
bzip2.m, comma.m, compare_versions.m, computer.m, edit.m, fileparts.m,
fullfile.m, getfield.m, gzip.m, info.m, inputname.m, isappdata.m, isdeployed.m,
ismac.m, ispc.m, isunix.m, list_primes.m, ls.m, mexext.m, namelengthmax.m,
news.m, orderfields.m, paren.m, recycle.m, rmappdata.m, semicolon.m,
setappdata.m, setfield.m, substruct.m, symvar.m, ver.m, version.m,
warning_ids.m, xor.m, fminbnd.m, fsolve.m, fzero.m, lsqnonneg.m, optimset.m,
pqpnonneg.m, sqp.m, matlabroot.m, __gnuplot_drawnow__.m,
__plt_get_axis_arg__.m, ancestor.m, cla.m, clf.m, close.m, colorbar.m,
colstyle.m, comet3.m, contourc.m, figure.m, gca.m, gcbf.m, gcbo.m, gcf.m,
ginput.m, graphics_toolkit.m, gtext.m, hggroup.m, hist.m, hold.m, isfigure.m,
ishghandle.m, ishold.m, isocolors.m, isonormals.m, isosurface.m, isprop.m,
legend.m, line.m, loglog.m, loglogerr.m, meshgrid.m, ndgrid.m, newplot.m,
orient.m, patch.m, plot3.m, plotyy.m, __print_parse_opts__.m, quiver3.m,
refreshdata.m, ribbon.m, semilogx.m, semilogxerr.m, semilogy.m, stem.m,
stem3.m, subplot.m, title.m, uigetfile.m, view.m, whitebg.m, compan.m, conv.m,
deconv.m, mkpp.m, mpoles.m, pchip.m, poly.m, polyaffine.m, polyder.m,
polyfit.m, polygcd.m, polyint.m, polyout.m, polyval.m, polyvalm.m, ppder.m,
ppint.m, ppjumps.m, ppval.m, residue.m, roots.m, spline.m, intersect.m,
ismember.m, powerset.m, setdiff.m, setxor.m, union.m, unique.m,
autoreg_matrix.m, bartlett.m, blackman.m, detrend.m, fftconv.m, fftfilt.m,
fftshift.m, freqz.m, hamming.m, hanning.m, ifftshift.m, sinc.m, sinetone.m,
sinewave.m, unwrap.m, bicg.m, bicgstab.m, gmres.m, gplot.m, nonzeros.m, pcg.m,
pcr.m, spaugment.m, spconvert.m, spdiags.m, speye.m, spfun.m, spones.m,
sprand.m, sprandsym.m, spstats.m, spy.m, svds.m, treelayout.m, bessel.m,
beta.m, betaln.m, factor.m, factorial.m, isprime.m, lcm.m, legendre.m,
nchoosek.m, nthroot.m, perms.m, pow2.m, primes.m, reallog.m, realpow.m,
realsqrt.m, hadamard.m, hankel.m, hilb.m, invhilb.m, magic.m, rosser.m,
vander.m, __finish__.m, center.m, cloglog.m, corr.m, cov.m, gls.m, histc.m,
iqr.m, kendall.m, kurtosis.m, logit.m, mahalanobis.m, mean.m, meansq.m,
median.m, mode.m, moment.m, ols.m, ppplot.m, prctile.m, probit.m, quantile.m,
range.m, ranks.m, run_count.m, runlength.m, skewness.m, spearman.m,
statistics.m, std.m, table.m, var.m, zscore.m, betacdf.m, betainv.m, betapdf.m,
betarnd.m, binocdf.m, binoinv.m, binopdf.m, binornd.m, cauchy_cdf.m,
cauchy_inv.m, cauchy_pdf.m, cauchy_rnd.m, chi2cdf.m, chi2inv.m, chi2pdf.m,
chi2rnd.m, discrete_cdf.m, discrete_inv.m, discrete_pdf.m, discrete_rnd.m,
empirical_cdf.m, empirical_inv.m, empirical_pdf.m, empirical_rnd.m, expcdf.m,
expinv.m, exppdf.m, exprnd.m, fcdf.m, finv.m, fpdf.m, frnd.m, gamcdf.m,
gaminv.m, gampdf.m, gamrnd.m, geocdf.m, geoinv.m, geopdf.m, geornd.m,
hygecdf.m, hygeinv.m, hygepdf.m, hygernd.m, kolmogorov_smirnov_cdf.m,
laplace_cdf.m, laplace_inv.m, laplace_pdf.m, laplace_rnd.m, logistic_cdf.m,
logistic_inv.m, logistic_pdf.m, logistic_rnd.m, logncdf.m, logninv.m,
lognpdf.m, lognrnd.m, nbincdf.m, nbininv.m, nbinpdf.m, nbinrnd.m, normcdf.m,
norminv.m, normpdf.m, normrnd.m, poisscdf.m, poissinv.m, poisspdf.m,
poissrnd.m, stdnormal_cdf.m, stdnormal_inv.m, stdnormal_pdf.m, stdnormal_rnd.m,
tcdf.m, tinv.m, tpdf.m, trnd.m, unidcdf.m, unidinv.m, unidpdf.m, unidrnd.m,
unifcdf.m, unifinv.m, unifpdf.m, unifrnd.m, wblcdf.m, wblinv.m, wblpdf.m,
wblrnd.m, kolmogorov_smirnov_test.m, kruskal_wallis_test.m, base2dec.m,
bin2dec.m, blanks.m, cstrcat.m, deblank.m, dec2base.m, dec2bin.m, dec2hex.m,
findstr.m, hex2dec.m, index.m, isletter.m, mat2str.m, rindex.m, str2num.m,
strcat.m, strjust.m, strmatch.m, strsplit.m, strtok.m, strtrim.m, strtrunc.m,
substr.m, validatestring.m, demo.m, example.m, fail.m, speed.m, addtodate.m,
asctime.m, clock.m, ctime.m, date.m, datenum.m, datetick.m, datevec.m,
eomday.m, etime.m, is_leap_year.m, now.m:
Use Octave coding conventions in all m-file %!test blocks
author | Rik <octave@nomad.inbox5.com> |
---|---|
date | Mon, 13 Feb 2012 07:29:44 -0800 |
parents | 11949c9795a0 |
children | 5d3a684236b0 |
line wrap: on
line source
## Copyright (C) 2000-2012 Paul Kienzle ## ## This file is part of Octave. ## ## Octave is free software; you can redistribute it and/or modify it ## under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or (at ## your option) any later version. ## ## Octave is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Octave; see the file COPYING. If not, see ## <http://www.gnu.org/licenses/>. ## -*- texinfo -*- ## @deftypefn {Function File} {} speed (@var{f}, @var{init}, @var{max_n}, @var{f2}, @var{tol}) ## @deftypefnx {Function File} {[@var{order}, @var{n}, @var{T_f}, @var{T_f2}] =} speed (@dots{}) ## ## Determine the execution time of an expression (@var{f}) for various input ## values (@var{n}). The @var{n} are log-spaced from 1 to @var{max_n}. For ## each @var{n}, an initialization expression (@var{init}) is computed to ## create any data needed for the test. If a second expression (@var{f2}) is ## given then the execution times of the two expressions are compared. When ## called without output arguments the results are printed to stdout and ## displayed graphically. ## ## @table @code ## @item @var{f} ## The code expression to evaluate. ## ## @item @var{max_n} ## The maximum test length to run. The default value is 100. Alternatively, ## use @code{[min_n, max_n]} or specify the @var{n} exactly with ## @code{[n1, n2, @dots{}, nk]}. ## ## @item @var{init} ## Initialization expression for function argument values. Use @var{k} ## for the test number and @var{n} for the size of the test. This should ## compute values for all variables used by @var{f}. Note that @var{init} will ## be evaluated first for @math{k = 0}, so things which are constant throughout ## the test series can be computed once. The default value is ## @code{@var{x} = randn (@var{n}, 1)}. ## ## @item @var{f2} ## An alternative expression to evaluate, so that the speed of two ## expressions can be directly compared. The default is @code{[]}. ## ## @item @var{tol} ## Tolerance used to compare the results of expression @var{f} and expression ## @var{f2}. If @var{tol} is positive, the tolerance is an absolute one. ## If @var{tol} is negative, the tolerance is a relative one. The default is ## @code{eps}. If @var{tol} is @code{Inf}, then no comparison will be made. ## ## @item @var{order} ## The time complexity of the expression @math{O(a*n^p)}. This ## is a structure with fields @code{a} and @code{p}. ## ## @item @var{n} ## The values @var{n} for which the expression was calculated @strong{AND} ## the execution time was greater than zero. ## ## @item @var{T_f} ## The nonzero execution times recorded for the expression @var{f} in seconds. ## ## @item @var{T_f2} ## The nonzero execution times recorded for the expression @var{f2} in seconds. ## If required, the mean time ratio is simply @code{mean (T_f ./ T_f2)}. ## ## @end table ## ## The slope of the execution time graph shows the approximate ## power of the asymptotic running time @math{O(n^p)}. This ## power is plotted for the region over which it is approximated ## (the latter half of the graph). The estimated power is not ## very accurate, but should be sufficient to determine the ## general order of an algorithm. It should indicate if, for ## example, the implementation is unexpectedly @math{O(n^2)} ## rather than @math{O(n)} because it extends a vector each ## time through the loop rather than pre-allocating storage. ## In the current version of Octave, the following is not the ## expected @math{O(n)}. ## ## @example ## speed ("for i = 1:n, y@{i@} = x(i); endfor", "", [1000, 10000]) ## @end example ## ## @noindent ## But it is if you preallocate the cell array @code{y}: ## ## @example ## @group ## speed ("for i = 1:n, y@{i@} = x(i); endfor", ... ## "x = rand (n, 1); y = cell (size (x));", [1000, 10000]) ## @end group ## @end example ## ## An attempt is made to approximate the cost of individual ## operations, but it is wildly inaccurate. You can improve the ## stability somewhat by doing more work for each @code{n}. For ## example: ## ## @example ## speed ("airy(x)", "x = rand (n, 10)", [10000, 100000]) ## @end example ## ## When comparing two different expressions (@var{f}, @var{f2}), the slope ## of the line on the speedup ratio graph should be larger than 1 if the new ## expression is faster. Better algorithms have a shallow slope. Generally, ## vectorizing an algorithm will not change the slope of the execution ## time graph, but will shift it relative to the original. For ## example: ## ## @example ## @group ## speed ("sum (x)", "", [10000, 100000], ... ## "v = 0; for i = 1:length (x), v += x(i); endfor") ## @end group ## @end example ## ## The following is a more complex example. If there was an original version ## of @code{xcorr} using for loops and a second version using an FFT, then ## one could compare the run speed for various lags as follows, or for a fixed ## lag with varying vector lengths as follows: ## ## @example ## @group ## speed ("xcorr (x, n)", "x = rand (128, 1);", 100, ## "xcorr_orig (x, n)", -100*eps) ## speed ("xcorr (x, 15)", "x = rand (20+n, 1);", 100, ## "xcorr_orig (x, n)", -100*eps) ## @end group ## @end example ## ## Assuming one of the two versions is in xcorr_orig, this ## would compare their speed and their output values. Note that the ## FFT version is not exact, so one must specify an acceptable tolerance on ## the comparison @code{100*eps}. In this case, the comparison should be ## computed relatively, as @code{abs ((@var{x} - @var{y}) ./ @var{y})} rather ## than absolutely as @code{abs (@var{x} - @var{y})}. ## ## Type @kbd{example ("speed")} to see some real examples or ## @kbd{demo ("speed")} to run them. ## @end deftypefn ## FIXME: consider two dimensional speedup surfaces for functions like kron. function [__order, __test_n, __tnew, __torig] = speed (__f1, __init, __max_n = 100, __f2 = "", __tol = eps) if (nargin < 1 || nargin > 6) print_usage (); endif if (nargin < 2 || isempty (__init)) __init = "x = randn (n, 1)"; endif if (isempty (__max_n)) __max_n = 100; endif __numtests = 15; ## Let user specify range of n. if (isscalar (__max_n)) __min_n = 1; assert (__max_n > __min_n); __test_n = logspace (0, log10 (__max_n), __numtests); elseif (length (__max_n) == 2) [__min_n, __max_n] = deal (__max_n(1), __max_n(2)); assert (__min_n >= 1); assert (__max_n > __min_n); __test_n = logspace (log10 (__min_n), log10 (__max_n), __numtests); else assert (all (__max_n > 0)); __test_n = __max_n; endif ## Force n to be an integer. __test_n = unique (round (__test_n)); assert (__test_n >= 1); __torig = __tnew = zeros (size (__test_n)); ## Print and plot the data if no output is requested. do_display = (nargout == 0); if (do_display) disp (cstrcat ("testing ", __f1, "\ninit: ", __init)); endif ## Add semicolon closure to all code fragments in case user has not done so. __init = cstrcat (__init, ";"); __f1 = cstrcat (__f1, ";"); if (! isempty (__f2)) __f2 = cstrcat (__f2, ";"); endif ## Make sure the functions are freshly loaded by evaluating them at ## test_n(1); first have to initialize the args though. n = 1; k = 0; eval (__init); eval (__f1); if (! isempty (__f2)) eval (__f2); endif ## Run the tests. for k = 1:length (__test_n) n = __test_n(k); eval (__init); if (do_display) printf ("n%i = %i ", k, n); fflush (stdout); endif eval (cstrcat ("__t = time();", __f1, "__v1=ans; __t = time()-__t;")); if (__t < 0.25) eval (cstrcat ("__t2 = time();", __f1, "__t2 = time()-__t2;")); eval (cstrcat ("__t3 = time();", __f1, "__t3 = time()-__t3;")); __t = min ([__t, __t2, __t3]); endif __tnew(k) = __t; if (! isempty (__f2)) eval (cstrcat ("__t = time();", __f2, "__v2=ans; __t = time()-__t;")); if (__t < 0.25) eval (cstrcat ("__t2 = time();", __f2, "__t2 = time()-__t2;")); eval (cstrcat ("__t3 = time();", __f2, "__t3 = time()-__t3;")); __t = min ([__t, __t2, __t3]); endif __torig(k) = __t; if (! isinf(__tol)) assert (__v1, __v2, __tol); endif endif endfor ## Drop times of zero. if (isempty (__f2)) zidx = (__tnew < 100*eps); __test_n(zidx) = []; __tnew(zidx) = []; else zidx = (__tnew < 100*eps | __torig < 100*eps); __test_n(zidx) = []; __tnew(zidx) = []; __torig(zidx) = []; endif if (isempty (__test_n)) error (["speed: All running times were zero.\n", "error: speed: Choose larger MAX_N or do more work per function evaluation"]); endif ## Approximate time complexity and return it if requested. tailidx = ceil (length (__test_n)/2):length (__test_n); p = polyfit (log (__test_n(tailidx)), log (__tnew(tailidx)), 1); if (nargout > 0) __order.p = p(1); __order.a = exp (p(2)); endif if (do_display) figure; ## Strip semicolon added to code fragments before displaying __init(end) = ""; __f1(end) = ""; if (! isempty (__f2)) __f2(end) = ""; endif endif if (do_display && isempty (__f2)) loglog (__test_n, __tnew*1000, "*-g;execution time;"); xlabel ("test length"); ylabel ("best execution time (ms)"); title ({__f1, cstrcat("init: ", __init)}); elseif (do_display) subplot (1, 2, 1); semilogx (__test_n, __torig./__tnew, cstrcat ("-*r;", strrep (__f1, ";", "."), " / ", strrep (__f2, ";", "."), ";"), __test_n, __tnew./__torig, cstrcat ("-*g;", strrep (__f2, ";", "."), " / ", strrep (__f1, ";", "."), ";")); title ("Speedup Ratio"); xlabel ("test length"); ylabel ("speedup ratio"); subplot (1, 2, 2); loglog (__test_n, __tnew*1000, cstrcat ("*-g;", strrep (__f1, ";", "."), ";"), __test_n, __torig*1000, cstrcat ("*-r;", strrep (__f2,";","."), ";")); title ({"Execution Times", cstrcat("init: ", __init)}); xlabel ("test length"); ylabel ("best execution time (ms)"); ratio = mean (__torig ./ __tnew); printf ("\n\nMean runtime ratio = %.3g for '%s' vs '%s'\n", ratio, __f2, __f1); endif if (do_display) ## Plot time complexity approximation (using milliseconds). figure; # Open second plot window order = round (10*p(1))/10; if (order >= 0.1) order = sprintf ("O(n^%g)", order); else order = "O(1)"; endif v = polyval (p, log (__test_n(tailidx))); loglog (__test_n(tailidx), exp(v)*1000, sprintf ("b;%s;", order)); title ({"Time Complexity", __f1}); xlabel ("test length"); ## Get base time to 1 digit of accuracy. dt = exp (p(2)); dt = floor (dt/10^floor(log10(dt)))*10^floor(log10(dt)); if (log10 (dt) >= -0.5) time = sprintf ("%g s", dt); elseif (log10 (dt) >= -3.5) time = sprintf ("%g ms", dt*1e3); elseif (log10 (dt) >= -6.5) time = sprintf ("%g us", dt*1e6); else time = sprintf ("%g ns", dt*1e9); endif ## Display nicely formatted complexity. printf ("\nFor %s:\n", __f1); printf (" asymptotic power: %s\n", order); printf (" approximate time per operation: %s\n", time); endif endfunction %% FIXME: Demos with declared functions do not work. See bug #31815. %% A workaround has been hacked by not declaring the functions %% but using eval to create them in the proper context. %% Unfortunately, we can't remove them from the user's workspace %% because of another bug (#34497). %!demo %! fstr_build_orig = cstrcat ( %! "function x = build_orig (n)\n", %! " ## extend the target vector on the fly\n", %! " for i=0:n-1, x([1:100]+i*100) = 1:100; endfor\n", %! "endfunction"); %! fstr_build = cstrcat ( %! "function x = build (n)\n", %! " ## preallocate the target vector\n", %! " x = zeros (1, n*100);\n", %! " for i=0:n-1, x([1:100]+i*100) = 1:100; endfor\n", %! "endfunction"); %! %! disp ("-----------------------"); %! disp (fstr_build_orig); %! disp ("-----------------------"); %! disp (fstr_build); %! disp ("-----------------------"); %! %! ## Eval functions strings to create them in the current context %! eval (fstr_build_orig); %! eval (fstr_build); %! %! disp ("Preallocated vector test.\nThis takes a little while..."); %! speed("build (n)", "", 1000, "build_orig (n)"); %! clear -f build build_orig %! disp ("-----------------------"); %! disp ("Note how much faster it is to pre-allocate a vector."); %! disp ("Notice the peak speedup ratio."); %!demo %! fstr_build_orig = cstrcat ( %! "function x = build_orig (n)\n", %! " for i=0:n-1, x([1:100]+i*100) = 1:100; endfor\n", %! "endfunction"); %! fstr_build = cstrcat ( %! "function x = build (n)\n", %! " idx = [1:100]';\n", %! " x = idx(:,ones(1,n));\n", %! " x = reshape (x, 1, n*100);\n", %! "endfunction"); %! %! disp ("-----------------------"); %! disp (fstr_build_orig); %! disp ("-----------------------"); %! disp (fstr_build); %! disp ("-----------------------"); %! %! ## Eval functions strings to create them in the current context %! eval (fstr_build_orig); %! eval (fstr_build); %! %! disp ("Vectorized test.\nThis takes a little while..."); %! speed("build (n)", "", 1000, "build_orig (n)"); %! clear -f build build_orig %! disp ("-----------------------"); %! disp ("This time, the for loop is done away with entirely."); %! disp ("Notice how much bigger the speedup is than in example 1."); %!test %! [order, n, T_f1, T_f2] = speed ("airy (x)", "x = rand (n, 10)", [100, 1000]); %! assert (isstruct (order)); %! assert (size (order), [1, 1]); %! assert (fieldnames (order), {"p"; "a"}); %! assert (isnumeric (n)); %! assert (length (n) > 10); %! assert (isnumeric (T_f1)); %! assert (size (T_f1), size (n)); %! assert (isnumeric (T_f2)); %! assert (length (T_f2) > 10); %% This test is known to fail on operating systems with low resolution timers %% such as MinGW %!xtest %! [order, n, T_f1, T_f2] = speed ("sum (x)", "", [100, 1000], "v = 0; for i = 1:length (x), v += x(i); endfor"); %! assert (isstruct (order)); %! assert (size (order), [1, 1]); %! assert (fieldnames (order), {"p"; "a"}); %! assert (isnumeric (n)); %! assert (length (n) > 10); %! assert (isnumeric (T_f1)); %! assert (size (T_f1), size (n)); %! assert (isnumeric (T_f2)); %! assert (length (T_f2) > 10); %% Test input validation %!error speed () %!error speed (1, 2, 3, 4, 5, 6, 7)