Mercurial > hg > octave-jordi
changeset 5587:b4cb3f93c1e1
[project @ 2006-01-04 22:03:38 by dbateman]
author | dbateman |
---|---|
date | Wed, 04 Jan 2006 22:03:38 +0000 |
parents | d37b96139376 |
children | 79ec73a1ff15 |
files | liboctave/CSparse.cc liboctave/ChangeLog liboctave/Sparse-op-defs.h liboctave/dSparse.cc liboctave/sparse-sort.cc |
diffstat | 5 files changed, 94 insertions(+), 26 deletions(-) [+] |
line wrap: on
line diff
--- a/liboctave/CSparse.cc +++ b/liboctave/CSparse.cc @@ -44,6 +44,8 @@ #include "sparse-util.h" #include "SparseCmplxCHOL.h" +#include "oct-sort.h" + // Fortran functions we call. extern "C" {
--- a/liboctave/ChangeLog +++ b/liboctave/ChangeLog @@ -1,7 +1,18 @@ +2006-01-04 David Bateman <dbateman@free.fr> + + * Spars-op-defs.h (SPARSE_SPARSE_MUL): Previous change resulted in + elements not being sorted in return matrix. Sort them, and make + solver select between two algorithms to further improve the + performance. + * dSparse.cc: include oct-sort.h. + * CSparse.cc: ditto. + * sparse-sort.cc: Instantiate octave_sort<octave_idx_type>. + 2005-12-28 David Bateman <dbateman@free.fr> - * Sparse-op-defs.h (SPARSE_SPARSE_MUL): Improved algorithm that is faster in - all cases, and significantly so for low density or small oder problems. + * Sparse-op-defs.h (SPARSE_SPARSE_MUL): Improved algorithm that is + faster in all cases, and significantly so for low density or small + order problems. 2005-11-30 John W. Eaton <jwe@octave.org>
--- a/liboctave/Sparse-op-defs.h +++ b/liboctave/Sparse-op-defs.h @@ -1560,12 +1560,12 @@ octave_idx_type col = a.ridx(j); \ for (octave_idx_type k = m.cidx(col) ; k < m.cidx(col+1); k++) \ { \ - OCTAVE_QUIT; \ if (w[m.ridx(k)] < i + 1) \ { \ w[m.ridx(k)] = i + 1; \ nel++; \ } \ + OCTAVE_QUIT; \ } \ } \ } \ @@ -1580,35 +1580,85 @@ OCTAVE_LOCAL_BUFFER (EL_TYPE, Xcol, nr); \ \ RET_TYPE retval (nr, a_nc, nel); \ - \ octave_idx_type ii = 0; \ - \ - retval.xcidx(0) = 0; \ - for (octave_idx_type i = 0; i < a_nc ; i++) \ + /* The optimal break-point as estimated from simulations */ \ + /* Note that Mergesort is O(nz log(nz)) while searching all */ \ + /* values is O(nr), where nz here is non-zero per row of */ \ + /* length nr. The test itself was then derived from the */ \ + /* simulation with random square matrices and the observation */ \ + /* of the number of non-zero elements in the output matrix */ \ + /* it was found that the breakpoints were */ \ + /* nr: 500 1000 2000 5000 10000 */ \ + /* nz: 6 25 97 585 2202 */ \ + /* The below is a simplication of the 'polyfit'-ed parameters */ \ + /* to these breakpoints */ \ + if (nr > 43000 || ((nr * nr) * double(a_nc)) / 43000 > nel) \ { \ - for (octave_idx_type j = a.cidx(i); j < a.cidx(i+1); j++) \ + octave_idx_type *ri = retval.xridx(); \ + octave_sort<octave_idx_type> sort; \ + \ + retval.xcidx(0) = 0; \ + for (octave_idx_type i = 0; i < a_nc ; i++) \ { \ - octave_idx_type col = a.ridx(j); \ - EL_TYPE tmpval = a.data(j); \ - for (octave_idx_type k = m.cidx(col) ; k < m.cidx(col+1); k++) \ + for (octave_idx_type j = a.cidx(i); j < a.cidx(i+1); j++) \ { \ - OCTAVE_QUIT; \ - octave_idx_type row = m.ridx(k); \ - if (w[row] < i + 1) \ - { \ - w[row] = i + 1; \ - retval.xridx(ii++) = row; \ - Xcol[row] = tmpval * m.data(k); \ - } \ - else \ - Xcol[row] += tmpval * m.data(k); \ + octave_idx_type col = a.ridx(j); \ + EL_TYPE tmpval = a.data(j); \ + for (octave_idx_type k = m.cidx(col) ; \ + k < m.cidx(col+1); k++) \ + { \ + OCTAVE_QUIT; \ + octave_idx_type row = m.ridx(k); \ + if (w[row] < i + 1) \ + { \ + w[row] = i + 1; \ + retval.xridx(ii++) = row;\ + Xcol[row] = tmpval * m.data(k); \ + } \ + else \ + Xcol[row] += tmpval * m.data(k); \ + } \ } \ - } \ - retval.xcidx(i+1) = ii; \ - for (octave_idx_type k = retval.xcidx(i); k < retval.xcidx(i+1); k++) \ - retval.xdata(k) = Xcol[retval.xridx(k)]; \ + sort.sort (ri + retval.xcidx(i), ii - retval.xcidx(i)); \ + for (octave_idx_type k = retval.xcidx(i); k < ii; k++) \ + retval.xdata(k) = Xcol[retval.xridx(k)]; \ + retval.xcidx(i+1) = ii; \ + } \ + retval.maybe_compress (true);\ + } \ + else \ + { \ + retval.xcidx(0) = 0; \ + for (octave_idx_type i = 0; i < a_nc ; i++) \ + { \ + for (octave_idx_type j = a.cidx(i); j < a.cidx(i+1); j++) \ + { \ + octave_idx_type col = a.ridx(j); \ + EL_TYPE tmpval = a.data(j); \ + for (octave_idx_type k = m.cidx(col) ; \ + k < m.cidx(col+1); k++) \ + { \ + OCTAVE_QUIT; \ + octave_idx_type row = m.ridx(k); \ + if (w[row] < i + 1) \ + { \ + w[row] = i + 1; \ + Xcol[row] = tmpval * m.data(k); \ + } \ + else \ + Xcol[row] += tmpval * m.data(k); \ + } \ + } \ + for (octave_idx_type k = 0; k < nr; k++) \ + if (w[k] == i + 1 && Xcol[k] != 0.) \ + { \ + retval.xdata(ii) = Xcol[k]; \ + retval.xridx(ii++) = k; \ + } \ + retval.xcidx(i+1) = ii; \ + } \ + retval.maybe_compress ();\ } \ - retval.maybe_compress (true); \ return retval; \ } \ }
--- a/liboctave/dSparse.cc +++ b/liboctave/dSparse.cc @@ -45,6 +45,8 @@ #include "sparse-util.h" #include "SparsedbleCHOL.h" +#include "oct-sort.h" + // Fortran functions we call. extern "C" {
--- a/liboctave/sparse-sort.cc +++ b/liboctave/sparse-sort.cc @@ -50,6 +50,9 @@ // Instantiate the sparse sorting class template class octave_sort<octave_sparse_sort_idxl *>; +// Instantiate the sorting class of octave_idx_type, need in MUL macro +template class octave_sort<octave_idx_type>; + /* ;;; Local Variables: *** ;;; mode: C++ ***