From 331345d06128a51c740a8d9c52babf72efd57a73 Mon Sep 17 00:00:00 2001 From: Gauthier Becker <gauthierbecker@gmail.com> Date: Fri, 21 Jan 2011 13:11:10 +0000 Subject: [PATCH] Rewrite for more efficiency --- Geo/SPoint3.h | 2 +- Numeric/fullMatrix.cpp | 103 ++++++++++++++++++++++++----------------- Numeric/fullMatrix.h | 55 +++++++++++++--------- 3 files changed, 94 insertions(+), 66 deletions(-) diff --git a/Geo/SPoint3.h b/Geo/SPoint3.h index c4cf69bc28..b48c4f417d 100644 --- a/Geo/SPoint3.h +++ b/Geo/SPoint3.h @@ -16,9 +16,9 @@ class SPoint3 { SPoint3(double x, double y, double z) { P[0] = x; P[1] = y; P[2] = z; } SPoint3(const double *p) { P[0] = p[0]; P[1] = p[1]; P[2] = p[2]; } SPoint3(const SPoint3 &pt) { P[0] = pt.P[0]; P[1] = pt.P[1]; P[2] = pt.P[2]; } - SPoint3(const SPoint3 &pt,const SPoint3 &dir,const double dist_) {P[0]=pt.P[0]; P[1]=pt.P[1]; P[2]=pt.P[2]; SPoint3 a(dir); a*=dist_; P[0]+=a[0]; P[1]+=a[1]; P[2]+=a[2];} virtual ~SPoint3() {} void setPosition(double xx, double yy, double zz); + void setPosition(const SPoint3 &pt,const SPoint3 &dir,const double dist_) {P[0]=pt.P[0]; P[1]=pt.P[1]; P[2]=pt.P[2]; SPoint3 a(dir); a*=dist_; P[0]+=a[0]; P[1]+=a[1]; P[2]+=a[2];} void getPosition(double *xx, double *yy, double *zz) const; void position(double *) const; inline double x(void) const; diff --git a/Numeric/fullMatrix.cpp b/Numeric/fullMatrix.cpp index 205417ca97..72e9ccf15f 100644 --- a/Numeric/fullMatrix.cpp +++ b/Numeric/fullMatrix.cpp @@ -23,34 +23,34 @@ extern "C" { void F77NAME(daxpy)(int *n, double *alpha, double *x, int *incx, double *y, int *incy); - void F77NAME(dgemm)(const char *transa, const char *transb, int *m, int *n, int *k, - double *alpha, double *a, int *lda, - double *b, int *ldb, double *beta, + void F77NAME(dgemm)(const char *transa, const char *transb, int *m, int *n, int *k, + double *alpha, double *a, int *lda, + double *b, int *ldb, double *beta, double *c, int *ldc); - void F77NAME(zgemm)(const char *transa, const char *transb, int *m, int *n, int *k, - std::complex<double> *alpha, std::complex<double> *a, int *lda, - std::complex<double> *b, int *ldb, std::complex<double> *beta, + void F77NAME(zgemm)(const char *transa, const char *transb, int *m, int *n, int *k, + std::complex<double> *alpha, std::complex<double> *a, int *lda, + std::complex<double> *b, int *ldb, std::complex<double> *beta, std::complex<double> *c, int *ldc); - void F77NAME(dgemv)(const char *trans, int *m, int *n, - double *alpha, double *a, int *lda, - double *x, int *incx, double *beta, + void F77NAME(dgemv)(const char *trans, int *m, int *n, + double *alpha, double *a, int *lda, + double *x, int *incx, double *beta, double *y, int *incy); - void F77NAME(zgemv)(const char *trans, int *m, int *n, - std::complex<double> *alpha, std::complex<double> *a, int *lda, - std::complex<double> *x, int *incx, std::complex<double> *beta, + void F77NAME(zgemv)(const char *trans, int *m, int *n, + std::complex<double> *alpha, std::complex<double> *a, int *lda, + std::complex<double> *x, int *incx, std::complex<double> *beta, std::complex<double> *y, int *incy); void F77NAME(dscal)(int *n, double *alpha,double *x, int *incx); void F77NAME(zscal)(int *n, std::complex<double> *alpha,std::complex<double> *x, int *incx); } -template<> +template<> void fullVector<double>::axpy(const fullVector<double> &x,double alpha) { int M = _r, INCX = 1, INCY = 1; F77NAME(daxpy)(&M, &alpha, x._data,&INCX, _data, &INCY); } -template<> +template<> void fullMatrix<double>::scale(const double s) { int N = _r * _c; @@ -59,7 +59,7 @@ void fullMatrix<double>::scale(const double s) F77NAME(dscal)(&N, &ss,_data, &stride); } -template<> +template<> void fullMatrix<std::complex<double> >::scale(const double s) { int N = _r * _c; @@ -68,57 +68,57 @@ void fullMatrix<std::complex<double> >::scale(const double s) F77NAME(zscal)(&N, &ss,_data, &stride); } -template<> +template<> void fullMatrix<double>::mult(const fullMatrix<double> &b, fullMatrix<double> &c) const { int M = c.size1(), N = c.size2(), K = _c; int LDA = _r, LDB = b.size1(), LDC = c.size1(); double alpha = 1., beta = 0.; - F77NAME(dgemm)("N", "N", &M, &N, &K, &alpha, _data, &LDA, b._data, &LDB, + F77NAME(dgemm)("N", "N", &M, &N, &K, &alpha, _data, &LDA, b._data, &LDB, &beta, c._data, &LDC); } -template<> -void fullMatrix<std::complex<double> >::mult(const fullMatrix<std::complex<double> > &b, +template<> +void fullMatrix<std::complex<double> >::mult(const fullMatrix<std::complex<double> > &b, fullMatrix<std::complex<double> > &c) const { int M = c.size1(), N = c.size2(), K = _c; int LDA = _r, LDB = b.size1(), LDC = c.size1(); std::complex<double> alpha = 1., beta = 0.; - F77NAME(zgemm)("N", "N", &M, &N, &K, &alpha, _data, &LDA, b._data, &LDB, + F77NAME(zgemm)("N", "N", &M, &N, &K, &alpha, _data, &LDA, b._data, &LDB, &beta, c._data, &LDC); } -template<> -void fullMatrix<double>::gemm(const fullMatrix<double> &a, const fullMatrix<double> &b, +template<> +void fullMatrix<double>::gemm(const fullMatrix<double> &a, const fullMatrix<double> &b, double alpha, double beta) { int M = size1(), N = size2(), K = a.size2(); int LDA = a.size1(), LDB = b.size1(), LDC = size1(); - F77NAME(dgemm)("N", "N", &M, &N, &K, &alpha, a._data, &LDA, b._data, &LDB, + F77NAME(dgemm)("N", "N", &M, &N, &K, &alpha, a._data, &LDA, b._data, &LDB, &beta, _data, &LDC); } -template<> -void fullMatrix<std::complex<double> >::gemm(const fullMatrix<std::complex<double> > &a, - const fullMatrix<std::complex<double> > &b, - std::complex<double> alpha, +template<> +void fullMatrix<std::complex<double> >::gemm(const fullMatrix<std::complex<double> > &a, + const fullMatrix<std::complex<double> > &b, + std::complex<double> alpha, std::complex<double> beta) { int M = size1(), N = size2(), K = a.size2(); int LDA = a.size1(), LDB = b.size1(), LDC = size1(); - F77NAME(zgemm)("N", "N", &M, &N, &K, &alpha, a._data, &LDA, b._data, &LDB, + F77NAME(zgemm)("N", "N", &M, &N, &K, &alpha, a._data, &LDA, b._data, &LDB, &beta, _data, &LDC); } -template<> +template<> void fullMatrix<double>::axpy(const fullMatrix<double> &x,double alpha) { int M = _r * _c, INCX = 1, INCY = 1; F77NAME(daxpy)(&M, &alpha, x._data,&INCX, _data, &INCY); } -template<> +template<> void fullMatrix<double>::mult(const fullVector<double> &x, fullVector<double> &y) const { int M = _r, N = _c, LDA = _r, INCX = 1, INCY = 1; @@ -127,8 +127,17 @@ void fullMatrix<double>::mult(const fullVector<double> &x, fullVector<double> &y &beta, y._data, &INCY); } -template<> -void fullMatrix<std::complex<double> >::mult(const fullVector<std::complex<double> > &x, +template<> +void fullMatrix<double>::multAddy(const fullVector<double> &x, fullVector<double> &y) const +{ + int M = _r, N = _c, LDA = _r, INCX = 1, INCY = 1; + double alpha = 1., beta = 1.; + F77NAME(dgemv)("N", &M, &N, &alpha, _data, &LDA, x._data, &INCX, + &beta, y._data, &INCY); +} + +template<> +void fullMatrix<std::complex<double> >::mult(const fullVector<std::complex<double> > &x, fullVector<std::complex<double> > &y) const { int M = _r, N = _c, LDA = _r, INCX = 1, INCY = 1; @@ -137,6 +146,16 @@ void fullMatrix<std::complex<double> >::mult(const fullVector<std::complex<doubl &beta, y._data, &INCY); } +template<> +void fullMatrix<std::complex<double> >::multAddy(const fullVector<std::complex<double> > &x, + fullVector<std::complex<double> > &y) const +{ + int M = _r, N = _c, LDA = _r, INCX = 1, INCY = 1; + std::complex<double> alpha = 1., beta = 1.; + F77NAME(zgemv)("N", &M, &N, &alpha, _data, &LDA, x._data, &INCX, + &beta, y._data, &INCY); +} + #endif @@ -147,13 +166,13 @@ extern "C" { void F77NAME(dgesv)(int *N, int *nrhs, double *A, int *lda, int *ipiv, double *b, int *ldb, int *info); void F77NAME(dgetrf)(int *M, int *N, double *A, int *lda, int *ipiv, int *info); - void F77NAME(dgetri)(int *M, double *A, int *lda, int *ipiv, double *work, + void F77NAME(dgetri)(int *M, double *A, int *lda, int *ipiv, double *work, int *lwork, int *info); void F77NAME(dgesvd)(const char* jobu, const char *jobvt, int *M, int *N, double *A, int *lda, double *S, double* U, int *ldu, double *VT, int *ldvt, double *work, int *lwork, int *info); void F77NAME(dgeev)(const char *jobvl, const char *jobvr, int *n, double *a, - int *lda, double *wr, double *wi, double *vl, int *ldvl, + int *lda, double *wr, double *wi, double *vl, int *ldvl, double *vr, int *ldvr, double *work, int *lwork, int *info); } @@ -192,7 +211,7 @@ static void eigSort(int n, double *wr, double *wi, double *VL, double *VR) } } -template<> +template<> bool fullMatrix<double>::eig(fullVector<double> &DR, fullVector<double> &DI, fullMatrix<double> &VL, fullMatrix<double> &VR, bool sortRealPart) @@ -208,13 +227,13 @@ bool fullMatrix<double>::eig(fullVector<double> &DR, fullVector<double> &DI, Msg::Error("QR Algorithm failed to compute all the eigenvalues", info, info); else if(info < 0) Msg::Error("Wrong %d-th argument in eig", -info); - else if(sortRealPart) + else if(sortRealPart) eigSort(N, DR._data, DI._data, VL._data, VR._data); - + return true; } -template<> +template<> bool fullMatrix<double>::luSolve(const fullVector<double> &rhs, fullVector<double> &result) { int N = size1(), nrhs = 1, lda = N, ldb = N, info; @@ -252,7 +271,7 @@ bool fullMatrix<double>::invert(fullMatrix<double> &result) const return false; } -template<> +template<> bool fullMatrix<double>::invertInPlace() { int N = size1(), nrhs = N, lda = N, ldb = N, info; @@ -276,7 +295,7 @@ bool fullMatrix<double>::invertInPlace() return false; } -template<> +template<> double fullMatrix<double>::determinant() const { fullMatrix<double> tmp(*this); @@ -294,11 +313,11 @@ double fullMatrix<double>::determinant() const det = 0.; else Msg::Error("Wrong %d-th argument in matrix factorization", -info); - delete [] ipiv; + delete [] ipiv; return det; } -template<> +template<> bool fullMatrix<double>::svd(fullMatrix<double> &V, fullVector<double> &S) { fullMatrix<double> VT(V.size2(), V.size1()); diff --git a/Numeric/fullMatrix.h b/Numeric/fullMatrix.h index 9ccfc9d925..01f9cf8455 100644 --- a/Numeric/fullMatrix.h +++ b/Numeric/fullMatrix.h @@ -53,7 +53,7 @@ class fullVector Msg::Fatal("invalid index to access fullVector : %i (size = %i)", r, _r); #endif - (*this)(r) = v; + (*this)(r) = v; } // operations @@ -63,8 +63,8 @@ class fullVector for(int i = 0; i < _r; ++i) n += _data[i] * _data[i]; return sqrt(n); } - bool resize(int r) - { + bool resize(int r) + { if (_r < r) { if (_data) delete[] _data; _r = r; @@ -81,11 +81,11 @@ class fullVector } inline void scale(const scalar s) { - if(s == 0.) + if(s == 0.) for(int i = 0; i < _r; ++i) _data[i] = 0.; else if (s == -1.) for(int i = 0; i < _r; ++i) _data[i] = -_data[i]; - else + else for(int i = 0; i < _r; ++i) _data[i] *= s; } inline void setAll(const scalar &m) @@ -115,7 +115,7 @@ class fullVector } // printing and file treatment - void print(const char *name="") const + void print(const char *name="") const { printf("Printing vector %s:\n", name); printf(" "); @@ -126,11 +126,11 @@ class fullVector } void binarySave (FILE *f) const { - fwrite (_data, sizeof(scalar), _r, f); + fwrite (_data, sizeof(scalar), _r, f); } void binaryLoad (FILE *f) { - if(fread (_data, sizeof(scalar), _r, f) != _r) return; + if(fread (_data, sizeof(scalar), _r, f) != _r) return; } }; @@ -189,10 +189,10 @@ class fullMatrix { #ifdef _DEBUG if (r >= _r || r < 0 || c >= _c || c < 0) - Msg::Fatal("invalid index to access fullMatrix : %i %i (size = %i %i)", + Msg::Fatal("invalid index to access fullMatrix : %i %i (size = %i %i)", r, c, _r, _c); #endif - return (*this)(r, c); + return (*this)(r, c); } // operations @@ -202,13 +202,13 @@ class fullMatrix Msg::Fatal("invalid index to access fullMatrix : %i %i (size = %i %i)", r, c, _r, _c); #endif - (*this)(r, c) = v; + (*this)(r, c) = v; } inline scalar norm() const { scalar n = 0.; - for(int i = 0; i < _r; ++i) - for(int j = 0; j < _c; ++j) + for(int i = 0; i < _r; ++i) + for(int j = 0; j < _c; ++j) n += (*this)(i, j) * (*this)(i, j); return sqrt(n); } @@ -266,7 +266,7 @@ class fullMatrix { if(this != &other){ if (_r != other._r || _c != other._c) { - _r = other._r; + _r = other._r; _c = other._c; if (_data && _own_data) delete[] _data; if ((_r == 0) || (_c == 0)) @@ -304,7 +304,7 @@ class fullMatrix #endif return _data[i + _r * j]; } - void copy(const fullMatrix<scalar> &a, int i0, int ni, int j0, int nj, + void copy(const fullMatrix<scalar> &a, int i0, int ni, int j0, int nj, int desti0, int destj0) { for(int i = i0, desti = desti0; i < i0 + ni; i++, desti++) @@ -350,7 +350,7 @@ class fullMatrix } #endif ; - void gemm_naive(const fullMatrix<scalar> &a, const fullMatrix<scalar> &b, + void gemm_naive(const fullMatrix<scalar> &a, const fullMatrix<scalar> &b, scalar alpha=1., scalar beta=1.) { fullMatrix<scalar> temp(a.size1(), b.size2()); @@ -359,7 +359,7 @@ class fullMatrix scale(beta); add(temp); } - void gemm(const fullMatrix<scalar> &a, const fullMatrix<scalar> &b, + void gemm(const fullMatrix<scalar> &a, const fullMatrix<scalar> &b, scalar alpha=1., scalar beta=1.) #if !defined(HAVE_BLAS) { @@ -367,11 +367,11 @@ class fullMatrix } #endif ; - inline void setAll(const scalar &m) + inline void setAll(const scalar &m) { for(int i = 0; i < _r * _c; i++) _data[i] = m; } - inline void setAll(const fullMatrix<scalar> &m) + inline void setAll(const fullMatrix<scalar> &m) { for(int i = 0; i < _r * _c; i++) _data[i] = m._data[i]; } @@ -385,17 +385,17 @@ class fullMatrix } #endif ; - inline void add(const double &a) + inline void add(const double &a) { for(int i = 0; i < _r * _c; ++i) _data[i] += a; } - inline void add(const fullMatrix<scalar> &m) + inline void add(const fullMatrix<scalar> &m) { for(int i = 0; i < size1(); i++) for(int j = 0; j < size2(); j++) (*this)(i, j) += m(i, j); } - inline void add(const fullMatrix<scalar> &m, const double &a) + inline void add(const fullMatrix<scalar> &m, const double &a) { for(int i = 0; i < size1(); i++) for(int j = 0; j < size2(); j++) @@ -409,6 +409,15 @@ class fullMatrix for(int j = 0; j < _c; j++) y._data[i] += (*this)(i, j) * x(j); } +#endif + ; + void multAddy(const fullVector<scalar> &x, fullVector<scalar> &y) const +#if !defined(HAVE_BLAS) + { + for(int i = 0; i < _r; i++) + for(int j = 0; j < _c; j++) + y._data[i] += (*this)(i, j) * x(j); + } #endif ; inline fullMatrix<scalar> transpose() @@ -466,7 +475,7 @@ class fullMatrix } #endif ; - fullMatrix<scalar> cofactor(int i, int j) const + fullMatrix<scalar> cofactor(int i, int j) const { int ni = size1(); int nj = size2(); -- GitLab