Here is a simple unit test.
#include <iostream>
#include <vector>
#include "data.generator.h"
#include "reg.class.h"
#include <boost/smart_ptr/shared_ptr.hpp>
int main(int argc, char* argv[]) {
basicGenerator bg;
std::cout << "Sample size: " << bg.get_sampleSize() << std::endl;
bg.makeData();
std::vector<std::vector<double> > x;
std::vector<double> y;
bg.getDataForRegression(x,y);
unsigned int imax = y.size();
for (unsigned int i = 0 ; i < imax ; i++) {
std::cout << i << "\t" << y[i] << "\t" << x[i][0] << "\t" << x[i][1] << std::endl;
}
std::cout << "==================================================================" << std::endl;
regressionPCA rpca(x,y);
std::cout << "==================================================================" << std::endl;
boost::shared_ptr<regressionPCA> prpca;
for (unsigned int j = 0 ; j < 25 ; j++) {
std::cout << std::endl << std::endl << "Run #: " << (j + 1) << std::endl;
bg.makeData();
bg.getDataForRegression(x,y);
/* for (unsigned int i = 0 ; i < imax ; i++) {
std::cout << i << "\t" << y[i] << "\t" << x[i][0] << "\t" << x[i][1] << std::endl;
}*/
prpca.reset(new regressionPCA(x,y));
std::cout << "==================================================================" << std::endl;
}
return 0;
}
The mystery is why I get the following output:
86 -0.727354 -0.252937 -0.841919
87 2.46787 2.13614 1.48789
88 1.79373 1.49828 1.25923
89 0.416356 0.565407 -0.114605
90 -0.258883 -0.585597 -0.183372
91 0.833355 0.914344 0.81171
92 -0.0338814 -0.00264442 -0.118973
93 1.13764 0.41599 1.33175
94 -1.86323 -1.90867 -1.35118
95 0.907604 1.14917 0.621669
96 2.1166 1.06194 1.1703
97 0.159543 0.14446 -0.665135
98 -0.508617 -0.370597 -0.703225
99 2.69086 2.75267 1.40633
==================================================================
r: 0 c: 0 n: 0
r: 100 c: 0 n: 0
r: 100 c: 2 n: 0
r: 100 c: 2 n: 200
Sv = 18.3225 4.69155
V =
0.695362 -0.718659
0.718659 0.695362
Beta = 0.693195 0.627794
==================================================================
Run #: 1
r: 0 c: 0 n: 0
r: 100 c: 0 n: 0
r: 100 c: 2 n: 0
r: 100 c: 2 n: 200
Sv = 14.1699 10.7091
V =
0.49497 -0.86891
0.86891 0.49497
Beta = 0.476181 0.391545
Aborted (core dumped)
Ted@Ted-acer-i7w7 ~/New.System/tests
$
I do not know if it is something peculiar to GSL or to my use of either the boost::shared_ptr or the boost::shared_array, but for whatever reason, I get only one analysis out if a given instance of my PCA regression class. Here is how it is declared:
#ifndef REG_CLASS_H
#define REG_CLASS_H
#include <iosfwd>
#include <vector>
#include <gsl/gsl_linalg.h>
class regressionPCA {
private:
unsigned int nrows, ncols;
regressionPCA(void) {}; // makes default construction impossible
public:
regressionPCA(const std::vector<std::vector<double> >&, const std::vector<double>&);
void Reset(const std::vector<std::vector<double> >&, const std::vector<double>&);
inline void setNrows(unsigned int v) { nrows = v;};
inline void setNcols(unsigned int v) { ncols = v;};
inline unsigned int getNrows(void) const { return nrows; };
inline unsigned int getNcols(void) const { return ncols; };
};
#endif
And here is how it is implemented:
#include "reg.class.h"
#include <iostream>
#include <algorithm>
#include <gsl/gsl_linalg.h>
#include <boost/smart_ptr/shared_array.hpp>
regressionPCA::regressionPCA(const std::vector<std::vector<double> >&data,
const std::vector<double>& Ydata) {
Reset(data,Ydata);
}
void regressionPCA::Reset(const std::vector<std::vector<double> >&data,
const std::vector<double>& Ydata) {
unsigned int r(0),c(0),n(0);
std::cout << "r: " << r << "\tc: " << c << "\tn: " << n << std::endl;
r = data.size();
std::cout << "r: " << r << "\tc: " << c << "\tn: " << n << std::endl;
c = data[0].size();
std::cout << "r: " << r << "\tc: " << c << "\tn: " << n << std::endl;
setNrows(r);
setNcols(c);
n = r * c;
std::cout << "r: " << r << "\tc: " << c << "\tn: " << n << std::endl;
/* */
boost::shared_array<double> Btmp(new double[c]),
Ytmp(new double[r]),
Utmp(new double[n]),
Vtmp(new double[c*c]),
Stmp(new double[c]);
double* B = Btmp.get();
double* Y = Ytmp.get();
double* U = Utmp.get();
double* V = Vtmp.get();
double* S = Stmp.get();
/* */
/*
double* B = new double[c];
double* Y = new double[r];
double* U = new double[n];
double* V = new double[c*c];
double* S = new double[c];
*/
// double *bptr = U.get();
double *bptr = U;
std::vector<std::vector<double> >::const_iterator it = data.begin(), end = data.end();
while (it != end) {
bptr = std::copy(it->begin(), it->end(),bptr);
++it;
}
bptr = Y;
std::copy(Ydata.begin(),Ydata.end(),bptr);
gsl_matrix_view Um = gsl_matrix_view_array(U, getNrows(), getNcols());
gsl_vector_view Ym = gsl_vector_view_array(Y, getNrows());
gsl_vector_view Bm = gsl_vector_view_array(B, getNcols());
gsl_vector_view Sm = gsl_vector_view_array(S, getNcols());
gsl_matrix_view Vm = gsl_matrix_view_array(V, getNcols(), getNcols());
gsl_linalg_SV_decomp_jacobi(&Um.matrix,&Vm.matrix,&Sm.vector);
gsl_linalg_SV_solve(&Um.matrix,&Vm.matrix,&Sm.vector,&Ym.vector,&Bm.vector);
std::cout << std::endl << std::endl << "Sv = " << gsl_vector_get(&Sm.vector,0) << "\t" << gsl_vector_get(&Sm.vector,1) << std::endl;
std::cout << std::endl << std::endl << "V = " << std::endl;
std::cout << "\t" << gsl_matrix_get(&Vm.matrix,0,0) << "\t" << gsl_matrix_get(&Vm.matrix,0,1) << std::endl;
std::cout << "\t" << gsl_matrix_get(&Vm.matrix,1,0) << "\t" << gsl_matrix_get(&Vm.matrix,1,1) << std::endl;
std::cout << std::endl << std::endl << "Beta = " << gsl_vector_get(&Bm.vector,0) << "\t" << gsl_vector_get(&Bm.vector,1) << std::endl;
/*
delete[] B;
delete[] Y;
delete[] U;
delete[] V;
delete[] S;
*/
};
NB: It does not matter whether I use naked arrays (and explicitly use operator delete[], or boost::array. Nor does it matter whether I have the instance of it in the loop on the stack or heap (I did make all the work inReset so that the same instance could be used to do all of a series of regressions, but there is little point if I get a core dump).
Since I am working with a system that may be non-autonomous, I was going to modify it to work with a boost:: circular_buffer, and then use the std::copy on that container's begin and end iterators in exactly the same way that I use it with the std:;vector above, so I can doing a moving analysis with a fixed length sample temporal period. And I can't stop here, as I need not only the regression coefficients, but the residuals.
Since this core dump is happening after the last executable line of function reset, and before the last executable line in my for loop in function main (and I don't have any lines of code to eb executed between the two), I wonder if this is due to an unfortunate interaction between the boost::shared_array and the gsl code. Do the gsl matrix and vector views have to be cleaned up before the memory they're configured to use is freed? Or is there something odd with boost::shared_array (I have only used boost::shared_ptr before - so I am not entirely comfortable with my knowledge of boost::shared_array)? If so, how? Or is it the case I have abused the shared_array? If so, how do I fix it?
Any insights as to what may be awry would be appreciated.
Cheers
Ted