Mercurial > hg > octave-jordi

--- a/liboctave/CSparse.cc
+++ b/liboctave/CSparse.cc
@@ -44,6 +44,8 @@
 #include "sparse-util.h"
 #include "SparseCmplxCHOL.h"

+#include "oct-sort.h"
+
 // Fortran functions we call.
 extern "C"
 {
--- a/liboctave/ChangeLog
+++ b/liboctave/ChangeLog
@@ -1,7 +1,18 @@
+2006-01-04  David Bateman  <dbateman@free.fr>
+
+	* Spars-op-defs.h (SPARSE_SPARSE_MUL): Previous change resulted in
+	elements not being sorted in return matrix. Sort them, and make
+	solver select between two algorithms to further improve the
+	performance.
+	* dSparse.cc: include oct-sort.h.
+	* CSparse.cc: ditto.
+	* sparse-sort.cc: Instantiate octave_sort<octave_idx_type>.
+
 2005-12-28  David Bateman  <dbateman@free.fr>

-	* Sparse-op-defs.h (SPARSE_SPARSE_MUL): Improved algorithm that is faster in
-	all cases, and significantly so for low density or small oder problems.
+	* Sparse-op-defs.h (SPARSE_SPARSE_MUL): Improved algorithm that is
+	faster in all cases, and significantly so for low density or small
+	order problems.

 2005-11-30  John W. Eaton  <jwe@octave.org>
--- a/liboctave/Sparse-op-defs.h
+++ b/liboctave/Sparse-op-defs.h
@@ -1560,12 +1560,12 @@
               octave_idx_type  col = a.ridx(j); \
               for (octave_idx_type k = m.cidx(col) ; k < m.cidx(col+1); k++) \
 		{ \
-		  OCTAVE_QUIT; \
 		  if (w[m.ridx(k)] < i + 1) \
                     { \
 		      w[m.ridx(k)] = i + 1; \
 		      nel++; \
 		    } \
+	          OCTAVE_QUIT; \
 		} \
 	    } \
 	} \
@@ -1580,35 +1580,85 @@
           OCTAVE_LOCAL_BUFFER (EL_TYPE, Xcol, nr); \
           \
 	  RET_TYPE retval (nr, a_nc, nel); \
-	  \
 	  octave_idx_type ii = 0; \
-	  \
-	  retval.xcidx(0) = 0; \
-	  for (octave_idx_type i = 0; i < a_nc ; i++) \
+	  /* The optimal break-point as estimated from simulations */ \
+	  /* Note that Mergesort is O(nz log(nz)) while searching all */ \
+	  /* values is O(nr), where nz here is non-zero per row of */ \
+	  /* length nr. The test itself was then derived from the */ \
+	  /* simulation with random square matrices and the observation */ \
+	  /* of the number of non-zero elements in the output matrix */ \
+	  /* it was found that the breakpoints were */ \
+	  /*   nr: 500  1000  2000  5000 10000 */ \
+	  /*   nz:   6    25    97   585  2202 */ \
+	  /* The below is a simplication of the 'polyfit'-ed parameters */ \
+	  /* to these breakpoints */ \
+	  if (nr > 43000 || ((nr * nr) * double(a_nc)) / 43000 > nel) \
 	    { \
-	      for (octave_idx_type j = a.cidx(i); j < a.cidx(i+1); j++) \
+	      octave_idx_type *ri = retval.xridx(); \
+	      octave_sort<octave_idx_type> sort; \
+	      \
+	      retval.xcidx(0) = 0; \
+	      for (octave_idx_type i = 0; i < a_nc ; i++) \
 		{ \
-                  octave_idx_type col = a.ridx(j); \
-                  EL_TYPE tmpval = a.data(j); \
-		  for (octave_idx_type k = m.cidx(col) ; k < m.cidx(col+1); k++) \
+		  for (octave_idx_type j = a.cidx(i); j < a.cidx(i+1); j++) \
 		    { \
-                      OCTAVE_QUIT; \
-		      octave_idx_type row = m.ridx(k); \
-		      if (w[row] < i + 1) \
-		        { \
-		          w[row] = i + 1; \
-		          retval.xridx(ii++) = row; \
-		          Xcol[row] = tmpval * m.data(k); \
-		        } \
-		      else \
-		        Xcol[row] += tmpval * m.data(k); \
+		      octave_idx_type col = a.ridx(j); \
+		      EL_TYPE tmpval = a.data(j); \
+		      for (octave_idx_type k = m.cidx(col) ; \
+			   k < m.cidx(col+1); k++) \
+			{ \
+			  OCTAVE_QUIT; \
+			  octave_idx_type row = m.ridx(k); \
+			  if (w[row] < i + 1) \
+			    { \
+			      w[row] = i + 1; \
+			      retval.xridx(ii++) = row;\
+			      Xcol[row] = tmpval * m.data(k); \
+			    } \
+			  else \
+			    Xcol[row] += tmpval * m.data(k); \
+			} \
 		    } \
-		} \
-	      retval.xcidx(i+1) = ii; \
-	      for (octave_idx_type k = retval.xcidx(i); k < retval.xcidx(i+1); k++) \
-		retval.xdata(k) = Xcol[retval.xridx(k)]; \
+	          sort.sort (ri + retval.xcidx(i), ii - retval.xcidx(i)); \
+	          for (octave_idx_type k = retval.xcidx(i); k < ii; k++) \
+		    retval.xdata(k) = Xcol[retval.xridx(k)]; \
+		  retval.xcidx(i+1) = ii; \
+		}  \
+	      retval.maybe_compress (true);\
+	    }				   \
+	  else \
+	    { \
+	      retval.xcidx(0) = 0; \
+	      for (octave_idx_type i = 0; i < a_nc ; i++) \
+		{ \
+		  for (octave_idx_type j = a.cidx(i); j < a.cidx(i+1); j++) \
+		    { \
+		      octave_idx_type col = a.ridx(j); \
+		      EL_TYPE tmpval = a.data(j); \
+		      for (octave_idx_type k = m.cidx(col) ; \
+			   k < m.cidx(col+1); k++) \
+			{ \
+			  OCTAVE_QUIT; \
+			  octave_idx_type row = m.ridx(k); \
+			  if (w[row] < i + 1) \
+			    { \
+			      w[row] = i + 1; \
+			      Xcol[row] = tmpval * m.data(k); \
+			    } \
+			  else \
+			    Xcol[row] += tmpval * m.data(k); \
+			} \
+		    } \
+		  for (octave_idx_type k = 0; k < nr; k++) \
+		    if (w[k] == i + 1 && Xcol[k] != 0.) \
+		      { \
+		        retval.xdata(ii) = Xcol[k]; \
+		        retval.xridx(ii++) = k; \
+		      } \
+		  retval.xcidx(i+1) = ii; \
+		}  \
+	      retval.maybe_compress ();\
 	    } \
-	  retval.maybe_compress (true); \
 	  return retval; \
 	} \
     }
--- a/liboctave/dSparse.cc
+++ b/liboctave/dSparse.cc
@@ -45,6 +45,8 @@
 #include "sparse-util.h"
 #include "SparsedbleCHOL.h"

+#include "oct-sort.h"
+
 // Fortran functions we call.
 extern "C"
 {
--- a/liboctave/sparse-sort.cc
+++ b/liboctave/sparse-sort.cc
@@ -50,6 +50,9 @@
 // Instantiate the sparse sorting class
 template class octave_sort<octave_sparse_sort_idxl *>;

+// Instantiate the sorting class of octave_idx_type, need in MUL macro
+template class octave_sort<octave_idx_type>;
+
 /*
 ;;; Local Variables: ***
 ;;; mode: C++ ***