Mercurial > hg > octave-lyh
changeset 10804:3d5c6b84ddaf
speed-up fixes to dlmread
author | Jaroslav Hajek <highegg@gmail.com> |
---|---|
date | Tue, 20 Jul 2010 12:50:54 +0200 |
parents | 75780a2b0417 |
children | 8c858a1a2079 |
files | src/ChangeLog src/DLD-FUNCTIONS/dlmread.cc |
diffstat | 2 files changed, 95 insertions(+), 52 deletions(-) [+] |
line wrap: on
line diff
--- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,10 @@ +2010-07-20 Jaroslav Hajek <highegg@gmail.com> + + * DLD-FUNCTIONS/dlmread.cc (Fdlmread): Use octave_idx_type instead of + unsigned long. Over-allocate by a factor 2 to reduce copying. + Attempt to estimate number of columns from first line. + Hoist tmp_stream out of the loop and reset it for each substring. + 2010-07-17 Rik <octave@nomad.inbox5.com> * data.cc (repelems): Add more documentation about new function.
--- a/src/DLD-FUNCTIONS/dlmread.cc +++ b/src/DLD-FUNCTIONS/dlmread.cc @@ -1,6 +1,7 @@ /* Copyright (C) 2008, 2009 Jonathan Stickel +Copyright (C) 2010 Jaroslav Hajek This file is part of Octave. @@ -29,6 +30,7 @@ #include <cctype> #include <fstream> +#include <limits> #include "file-ops.h" #include "lo-ieee.h" @@ -38,8 +40,10 @@ #include "oct-obj.h" #include "utils.h" +static const octave_idx_type idx_max = std::numeric_limits<octave_idx_type>::max (); + static bool -read_cell_spec (std::istream& is, unsigned long& row, unsigned long& col) +read_cell_spec (std::istream& is, octave_idx_type& row, octave_idx_type& col) { bool stat = false; @@ -76,8 +80,8 @@ static bool parse_range_spec (const octave_value& range_spec, - unsigned long& rlo, unsigned long& clo, - unsigned long& rup, unsigned long& cup) + octave_idx_type& rlo, octave_idx_type& clo, + octave_idx_type& rup, octave_idx_type& cup) { bool stat = true; @@ -116,8 +120,8 @@ stat = false; } - rup = ULONG_MAX - 1; - cup = ULONG_MAX - 1; + rup = idx_max - 1; + cup = idx_max - 1; } else { @@ -139,10 +143,10 @@ { ColumnVector range(range_spec.vector_value ()); // double --> unsigned int - rlo = static_cast<unsigned long> (range(0)); - clo = static_cast<unsigned long> (range(1)); - rup = static_cast<unsigned long> (range(2)); - cup = static_cast<unsigned long> (range(3)); + rlo = static_cast<octave_idx_type> (range(0)); + clo = static_cast<octave_idx_type> (range(1)); + rup = static_cast<octave_idx_type> (range(2)); + cup = static_cast<octave_idx_type> (range(3)); } else stat = false; @@ -215,7 +219,7 @@ } // Take a subset if a range was given. - unsigned long r0 = 0, c0 = 0, r1 = ULONG_MAX-1, c1 = ULONG_MAX-1; + octave_idx_type r0 = 0, c0 = 0, r1 = idx_max-1, c1 = idx_max-1; if (nargin > 2) { if (nargin == 3) @@ -225,17 +229,20 @@ } else if (nargin == 4) { - r0 = args(2).ulong_value (); - c0 = args(3).ulong_value (); + r0 = args(2).idx_type_value (); + c0 = args(3).idx_type_value (); if (error_state) return retval; } + + if (r0 < 0 || c0 < 0) + error ("dlmread: left & top must not be negative"); } if (!error_state) { - unsigned long i = 0, j = 0, r = 1, c = 1, rmax = 0, cmax = 0; + octave_idx_type i = 0, j = 0, r = 1, c = 1, rmax = 0, cmax = 0; Matrix rdata; ComplexMatrix cdata; @@ -243,15 +250,17 @@ bool iscmplx = false; bool sepflag = false; - unsigned long maxrows = r1 - r0; + octave_idx_type maxrows = r1 - r0; std::string line; // Skip the r0 leading lines as these might be a header. - for (unsigned long m = 0; m < r0; m++) + for (octave_idx_type m = 0; m < r0; m++) getline (file, line); r1 -= r0; + std::istringstream tmp_stream; + // Read in the data one field at a time, growing the data matrix // as needed. while (getline (file, line)) @@ -290,6 +299,40 @@ } } + if (cmax == 0) + { + // Try to estimate the number of columns. + size_t pos1 = 0; + do + { + size_t pos2 = line.find_first_of (sep, pos1); + + if (sepflag && pos2 != std::string::npos) + // Treat consecutive separators as one. + { + pos2 = line.find_first_not_of (sep, pos2); + if (pos2 != std::string::npos) + pos2 -= 1; + else + pos2 = line.length () - 1; + } + + cmax++; + + if (pos2 != std::string::npos) + pos1 = pos2 + 1; + else + pos1 = std::string::npos; + + } + while (pos1 != std::string::npos); + + if (iscmplx) + cdata.resize (rmax, cmax); + else + rdata.resize (rmax, cmax); + } + r = (r > i + 1 ? r : i + 1); j = 0; size_t pos1 = 0; @@ -307,23 +350,35 @@ { // Use resize_and_fill for the case of not-equal // length rows. + rmax = 2*r; + cmax = c; if (iscmplx) - cdata.resize (r, c, 0); + cdata.resize (rmax, cmax); else - rdata.resize (r, c, 0); - rmax = r; - cmax = c; + rdata.resize (rmax, cmax); } - std::istringstream tmp_stream (str); + tmp_stream.str (str); + tmp_stream.clear (); + double x = octave_read_double (tmp_stream); if (tmp_stream) { if (tmp_stream.eof ()) - if (iscmplx) - cdata(i,j++) = x; - else - rdata(i,j++) = x; + { + if (iscmplx) + cdata(i,j++) = x; + else + rdata(i,j++) = x; + } + else if (std::toupper (tmp_stream.peek ()) == 'I') + { + // This is to allow pure imaginary numbers. + if (iscmplx) + cdata(i,j++) = x; + else + rdata(i,j++) = x; + } else { double y = octave_read_double (tmp_stream); @@ -359,35 +414,16 @@ i++; } - if (nargin > 2) - { - if (nargin == 3) - { - if (r1 >= r) - r1 = r - 1; - if (c1 >= c) - c1 = c - 1; - } - else if (nargin == 4) - { - // If r1 and c1 are not given, use what was found to be - // the maximum. - r1 = r - 1; - c1 = c - 1; - } + if (r1 >= r) + r1 = r - 1; + if (c1 >= c) + c1 = c - 1; - // Now take the subset of the matrix. - if (iscmplx) - { - cdata = cdata.extract (0, c0, r1, c1); - cdata.resize (r1 + 1, c1 - c0 + 1); - } - else - { - rdata = rdata.extract (0, c0, r1, c1); - rdata.resize (r1 + 1, c1 - c0 + 1); - } - } + // Now take the subset of the matrix. + if (iscmplx) + cdata = cdata.extract (0, c0, r1, c1); + else + rdata = rdata.extract (0, c0, r1, c1); if (iscmplx) retval(0) = cdata;