From 331345d06128a51c740a8d9c52babf72efd57a73 Mon Sep 17 00:00:00 2001
From: Gauthier Becker <gauthierbecker@gmail.com>
Date: Fri, 21 Jan 2011 13:11:10 +0000
Subject: [PATCH] Rewrite for more efficiency

---
 Geo/SPoint3.h          |   2 +-
 Numeric/fullMatrix.cpp | 103 ++++++++++++++++++++++++-----------------
 Numeric/fullMatrix.h   |  55 +++++++++++++---------
 3 files changed, 94 insertions(+), 66 deletions(-)

diff --git a/Geo/SPoint3.h b/Geo/SPoint3.h
index c4cf69bc28..b48c4f417d 100644
--- a/Geo/SPoint3.h
+++ b/Geo/SPoint3.h
@@ -16,9 +16,9 @@ class SPoint3 {
   SPoint3(double x, double y, double z) { P[0] = x; P[1] = y; P[2] = z; }
   SPoint3(const double *p) { P[0] = p[0]; P[1] = p[1]; P[2] = p[2]; }
   SPoint3(const SPoint3 &pt) { P[0] = pt.P[0]; P[1] = pt.P[1]; P[2] = pt.P[2]; }
-  SPoint3(const SPoint3 &pt,const SPoint3 &dir,const double dist_) {P[0]=pt.P[0]; P[1]=pt.P[1]; P[2]=pt.P[2]; SPoint3 a(dir); a*=dist_; P[0]+=a[0]; P[1]+=a[1]; P[2]+=a[2];}
   virtual ~SPoint3() {}
   void setPosition(double xx, double yy, double zz);
+  void setPosition(const SPoint3 &pt,const SPoint3 &dir,const double dist_) {P[0]=pt.P[0]; P[1]=pt.P[1]; P[2]=pt.P[2]; SPoint3 a(dir); a*=dist_; P[0]+=a[0]; P[1]+=a[1]; P[2]+=a[2];}
   void getPosition(double *xx, double *yy, double *zz) const;
   void position(double *) const;
   inline double x(void) const;
diff --git a/Numeric/fullMatrix.cpp b/Numeric/fullMatrix.cpp
index 205417ca97..72e9ccf15f 100644
--- a/Numeric/fullMatrix.cpp
+++ b/Numeric/fullMatrix.cpp
@@ -23,34 +23,34 @@
 
 extern "C" {
   void F77NAME(daxpy)(int *n, double *alpha, double *x, int *incx, double *y, int *incy);
-  void F77NAME(dgemm)(const char *transa, const char *transb, int *m, int *n, int *k, 
-                      double *alpha, double *a, int *lda, 
-                      double *b, int *ldb, double *beta, 
+  void F77NAME(dgemm)(const char *transa, const char *transb, int *m, int *n, int *k,
+                      double *alpha, double *a, int *lda,
+                      double *b, int *ldb, double *beta,
                       double *c, int *ldc);
-  void F77NAME(zgemm)(const char *transa, const char *transb, int *m, int *n, int *k, 
-                      std::complex<double> *alpha, std::complex<double> *a, int *lda, 
-                      std::complex<double> *b, int *ldb, std::complex<double> *beta, 
+  void F77NAME(zgemm)(const char *transa, const char *transb, int *m, int *n, int *k,
+                      std::complex<double> *alpha, std::complex<double> *a, int *lda,
+                      std::complex<double> *b, int *ldb, std::complex<double> *beta,
                       std::complex<double> *c, int *ldc);
-  void F77NAME(dgemv)(const char *trans, int *m, int *n, 
-                      double *alpha, double *a, int *lda, 
-                      double *x, int *incx, double *beta, 
+  void F77NAME(dgemv)(const char *trans, int *m, int *n,
+                      double *alpha, double *a, int *lda,
+                      double *x, int *incx, double *beta,
                       double *y, int *incy);
-  void F77NAME(zgemv)(const char *trans, int *m, int *n, 
-                      std::complex<double> *alpha, std::complex<double> *a, int *lda, 
-                      std::complex<double> *x, int *incx, std::complex<double> *beta, 
+  void F77NAME(zgemv)(const char *trans, int *m, int *n,
+                      std::complex<double> *alpha, std::complex<double> *a, int *lda,
+                      std::complex<double> *x, int *incx, std::complex<double> *beta,
                       std::complex<double> *y, int *incy);
   void F77NAME(dscal)(int *n, double *alpha,double *x,  int *incx);
   void F77NAME(zscal)(int *n, std::complex<double> *alpha,std::complex<double> *x,  int *incx);
 }
 
-template<> 
+template<>
 void fullVector<double>::axpy(const fullVector<double> &x,double alpha)
 {
   int M = _r, INCX = 1, INCY = 1;
   F77NAME(daxpy)(&M, &alpha, x._data,&INCX, _data, &INCY);
 }
 
-template<> 
+template<>
 void fullMatrix<double>::scale(const double s)
 {
   int N = _r * _c;
@@ -59,7 +59,7 @@ void fullMatrix<double>::scale(const double s)
   F77NAME(dscal)(&N, &ss,_data, &stride);
 }
 
-template<> 
+template<>
 void fullMatrix<std::complex<double> >::scale(const double s)
 {
   int N = _r * _c;
@@ -68,57 +68,57 @@ void fullMatrix<std::complex<double> >::scale(const double s)
   F77NAME(zscal)(&N, &ss,_data, &stride);
 }
 
-template<> 
+template<>
 void fullMatrix<double>::mult(const fullMatrix<double> &b, fullMatrix<double> &c) const
 {
   int M = c.size1(), N = c.size2(), K = _c;
   int LDA = _r, LDB = b.size1(), LDC = c.size1();
   double alpha = 1., beta = 0.;
-  F77NAME(dgemm)("N", "N", &M, &N, &K, &alpha, _data, &LDA, b._data, &LDB, 
+  F77NAME(dgemm)("N", "N", &M, &N, &K, &alpha, _data, &LDA, b._data, &LDB,
                  &beta, c._data, &LDC);
 }
 
-template<> 
-void fullMatrix<std::complex<double> >::mult(const fullMatrix<std::complex<double> > &b, 
+template<>
+void fullMatrix<std::complex<double> >::mult(const fullMatrix<std::complex<double> > &b,
                                              fullMatrix<std::complex<double> > &c) const
 {
   int M = c.size1(), N = c.size2(), K = _c;
   int LDA = _r, LDB = b.size1(), LDC = c.size1();
   std::complex<double> alpha = 1., beta = 0.;
-  F77NAME(zgemm)("N", "N", &M, &N, &K, &alpha, _data, &LDA, b._data, &LDB, 
+  F77NAME(zgemm)("N", "N", &M, &N, &K, &alpha, _data, &LDA, b._data, &LDB,
                  &beta, c._data, &LDC);
 }
 
-template<> 
-void fullMatrix<double>::gemm(const fullMatrix<double> &a, const fullMatrix<double> &b, 
+template<>
+void fullMatrix<double>::gemm(const fullMatrix<double> &a, const fullMatrix<double> &b,
                               double alpha, double beta)
 {
   int M = size1(), N = size2(), K = a.size2();
   int LDA = a.size1(), LDB = b.size1(), LDC = size1();
-  F77NAME(dgemm)("N", "N", &M, &N, &K, &alpha, a._data, &LDA, b._data, &LDB, 
+  F77NAME(dgemm)("N", "N", &M, &N, &K, &alpha, a._data, &LDA, b._data, &LDB,
                  &beta, _data, &LDC);
 }
 
-template<> 
-void fullMatrix<std::complex<double> >::gemm(const fullMatrix<std::complex<double> > &a, 
-                                             const fullMatrix<std::complex<double> > &b, 
-                                             std::complex<double> alpha, 
+template<>
+void fullMatrix<std::complex<double> >::gemm(const fullMatrix<std::complex<double> > &a,
+                                             const fullMatrix<std::complex<double> > &b,
+                                             std::complex<double> alpha,
                                              std::complex<double> beta)
 {
   int M = size1(), N = size2(), K = a.size2();
   int LDA = a.size1(), LDB = b.size1(), LDC = size1();
-  F77NAME(zgemm)("N", "N", &M, &N, &K, &alpha, a._data, &LDA, b._data, &LDB, 
+  F77NAME(zgemm)("N", "N", &M, &N, &K, &alpha, a._data, &LDA, b._data, &LDB,
                  &beta, _data, &LDC);
 }
 
-template<> 
+template<>
 void fullMatrix<double>::axpy(const fullMatrix<double> &x,double alpha)
 {
   int M = _r * _c, INCX = 1, INCY = 1;
   F77NAME(daxpy)(&M, &alpha, x._data,&INCX, _data, &INCY);
 }
 
-template<> 
+template<>
 void fullMatrix<double>::mult(const fullVector<double> &x, fullVector<double> &y) const
 {
   int M = _r, N = _c, LDA = _r, INCX = 1, INCY = 1;
@@ -127,8 +127,17 @@ void fullMatrix<double>::mult(const fullVector<double> &x, fullVector<double> &y
                  &beta, y._data, &INCY);
 }
 
-template<> 
-void fullMatrix<std::complex<double> >::mult(const fullVector<std::complex<double> > &x, 
+template<>
+void fullMatrix<double>::multAddy(const fullVector<double> &x, fullVector<double> &y) const
+{
+  int M = _r, N = _c, LDA = _r, INCX = 1, INCY = 1;
+  double alpha = 1., beta = 1.;
+  F77NAME(dgemv)("N", &M, &N, &alpha, _data, &LDA, x._data, &INCX,
+                 &beta, y._data, &INCY);
+}
+
+template<>
+void fullMatrix<std::complex<double> >::mult(const fullVector<std::complex<double> > &x,
                                              fullVector<std::complex<double> > &y) const
 {
   int M = _r, N = _c, LDA = _r, INCX = 1, INCY = 1;
@@ -137,6 +146,16 @@ void fullMatrix<std::complex<double> >::mult(const fullVector<std::complex<doubl
                  &beta, y._data, &INCY);
 }
 
+template<>
+void fullMatrix<std::complex<double> >::multAddy(const fullVector<std::complex<double> > &x,
+                                             fullVector<std::complex<double> > &y) const
+{
+  int M = _r, N = _c, LDA = _r, INCX = 1, INCY = 1;
+  std::complex<double> alpha = 1., beta = 1.;
+  F77NAME(zgemv)("N", &M, &N, &alpha, _data, &LDA, x._data, &INCX,
+                 &beta, y._data, &INCY);
+}
+
 #endif
 
 
@@ -147,13 +166,13 @@ extern "C" {
   void F77NAME(dgesv)(int *N, int *nrhs, double *A, int *lda, int *ipiv,
                       double *b, int *ldb, int *info);
   void F77NAME(dgetrf)(int *M, int *N, double *A, int *lda, int *ipiv, int *info);
-  void F77NAME(dgetri)(int *M, double *A, int *lda, int *ipiv, double *work, 
+  void F77NAME(dgetri)(int *M, double *A, int *lda, int *ipiv, double *work,
                        int *lwork, int *info);
   void F77NAME(dgesvd)(const char* jobu, const char *jobvt, int *M, int *N,
                        double *A, int *lda, double *S, double* U, int *ldu,
                        double *VT, int *ldvt, double *work, int *lwork, int *info);
   void F77NAME(dgeev)(const char *jobvl, const char *jobvr, int *n, double *a,
-                      int *lda, double *wr, double *wi, double *vl, int *ldvl, 
+                      int *lda, double *wr, double *wi, double *vl, int *ldvl,
                       double *vr, int *ldvr, double *work, int *lwork, int *info);
 }
 
@@ -192,7 +211,7 @@ static void eigSort(int n, double *wr, double *wi, double *VL, double *VR)
   }
 }
 
-template<> 
+template<>
 bool fullMatrix<double>::eig(fullVector<double> &DR, fullVector<double> &DI,
                              fullMatrix<double> &VL, fullMatrix<double> &VR,
                              bool sortRealPart)
@@ -208,13 +227,13 @@ bool fullMatrix<double>::eig(fullVector<double> &DR, fullVector<double> &DI,
     Msg::Error("QR Algorithm failed to compute all the eigenvalues", info, info);
   else if(info < 0)
     Msg::Error("Wrong %d-th argument in eig", -info);
-  else if(sortRealPart) 
+  else if(sortRealPart)
     eigSort(N, DR._data, DI._data, VL._data, VR._data);
-  
+
   return true;
 }
 
-template<> 
+template<>
 bool fullMatrix<double>::luSolve(const fullVector<double> &rhs, fullVector<double> &result)
 {
   int N = size1(), nrhs = 1, lda = N, ldb = N, info;
@@ -252,7 +271,7 @@ bool fullMatrix<double>::invert(fullMatrix<double> &result) const
   return false;
 }
 
-template<> 
+template<>
 bool fullMatrix<double>::invertInPlace()
 {
   int N = size1(), nrhs = N, lda = N, ldb = N, info;
@@ -276,7 +295,7 @@ bool fullMatrix<double>::invertInPlace()
   return false;
 }
 
-template<> 
+template<>
 double fullMatrix<double>::determinant() const
 {
   fullMatrix<double> tmp(*this);
@@ -294,11 +313,11 @@ double fullMatrix<double>::determinant() const
     det = 0.;
   else
     Msg::Error("Wrong %d-th argument in matrix factorization", -info);
-  delete [] ipiv;  
+  delete [] ipiv;
   return det;
 }
 
-template<> 
+template<>
 bool fullMatrix<double>::svd(fullMatrix<double> &V, fullVector<double> &S)
 {
   fullMatrix<double> VT(V.size2(), V.size1());
diff --git a/Numeric/fullMatrix.h b/Numeric/fullMatrix.h
index 9ccfc9d925..01f9cf8455 100644
--- a/Numeric/fullMatrix.h
+++ b/Numeric/fullMatrix.h
@@ -53,7 +53,7 @@ class fullVector
       Msg::Fatal("invalid index to access fullVector : %i (size = %i)",
                  r, _r);
     #endif
-    (*this)(r) = v; 
+    (*this)(r) = v;
   }
 
   // operations
@@ -63,8 +63,8 @@ class fullVector
     for(int i = 0; i < _r; ++i) n += _data[i] * _data[i];
     return sqrt(n);
   }
-  bool resize(int r)  
-  { 
+  bool resize(int r)
+  {
     if (_r < r) {
       if (_data) delete[] _data;
       _r = r;
@@ -81,11 +81,11 @@ class fullVector
   }
   inline void scale(const scalar s)
   {
-    if(s == 0.) 
+    if(s == 0.)
       for(int i = 0; i < _r; ++i) _data[i] = 0.;
     else if (s == -1.)
       for(int i = 0; i < _r; ++i) _data[i] = -_data[i];
-    else 
+    else
       for(int i = 0; i < _r; ++i) _data[i] *= s;
   }
   inline void setAll(const scalar &m)
@@ -115,7 +115,7 @@ class fullVector
   }
 
   // printing and file treatment
-  void print(const char *name="") const 
+  void print(const char *name="") const
   {
     printf("Printing vector %s:\n", name);
     printf("  ");
@@ -126,11 +126,11 @@ class fullVector
   }
   void binarySave (FILE *f) const
   {
-    fwrite (_data, sizeof(scalar), _r, f); 
+    fwrite (_data, sizeof(scalar), _r, f);
   }
   void binaryLoad (FILE *f)
   {
-    if(fread (_data, sizeof(scalar), _r, f) != _r) return; 
+    if(fread (_data, sizeof(scalar), _r, f) != _r) return;
   }
 };
 
@@ -189,10 +189,10 @@ class fullMatrix
   {
     #ifdef _DEBUG
     if (r >= _r || r < 0 || c >= _c || c < 0)
-      Msg::Fatal("invalid index to access fullMatrix : %i %i (size = %i %i)", 
+      Msg::Fatal("invalid index to access fullMatrix : %i %i (size = %i %i)",
                  r, c, _r, _c);
     #endif
-    return (*this)(r, c); 
+    return (*this)(r, c);
   }
 
   // operations
@@ -202,13 +202,13 @@ class fullMatrix
       Msg::Fatal("invalid index to access fullMatrix : %i %i (size = %i %i)",
                  r, c, _r, _c);
     #endif
-    (*this)(r, c) = v; 
+    (*this)(r, c) = v;
   }
   inline scalar norm() const
   {
     scalar n = 0.;
-    for(int i = 0; i < _r; ++i) 
-      for(int j = 0; j < _c; ++j) 
+    for(int i = 0; i < _r; ++i)
+      for(int j = 0; j < _c; ++j)
 	n += (*this)(i, j) * (*this)(i, j);
     return sqrt(n);
   }
@@ -266,7 +266,7 @@ class fullMatrix
   {
     if(this != &other){
       if (_r != other._r || _c != other._c) {
-        _r = other._r; 
+        _r = other._r;
         _c = other._c;
         if (_data && _own_data) delete[] _data;
         if ((_r == 0) || (_c == 0))
@@ -304,7 +304,7 @@ class fullMatrix
     #endif
     return _data[i + _r * j];
   }
-  void copy(const fullMatrix<scalar> &a, int i0, int ni, int j0, int nj, 
+  void copy(const fullMatrix<scalar> &a, int i0, int ni, int j0, int nj,
             int desti0, int destj0)
   {
     for(int i = i0, desti = desti0; i < i0 + ni; i++, desti++)
@@ -350,7 +350,7 @@ class fullMatrix
   }
 #endif
   ;
-  void gemm_naive(const fullMatrix<scalar> &a, const fullMatrix<scalar> &b, 
+  void gemm_naive(const fullMatrix<scalar> &a, const fullMatrix<scalar> &b,
                   scalar alpha=1., scalar beta=1.)
   {
     fullMatrix<scalar> temp(a.size1(), b.size2());
@@ -359,7 +359,7 @@ class fullMatrix
     scale(beta);
     add(temp);
   }
-  void gemm(const fullMatrix<scalar> &a, const fullMatrix<scalar> &b, 
+  void gemm(const fullMatrix<scalar> &a, const fullMatrix<scalar> &b,
             scalar alpha=1., scalar beta=1.)
 #if !defined(HAVE_BLAS)
   {
@@ -367,11 +367,11 @@ class fullMatrix
   }
 #endif
   ;
-  inline void setAll(const scalar &m) 
+  inline void setAll(const scalar &m)
   {
     for(int i = 0; i < _r * _c; i++) _data[i] = m;
   }
-  inline void setAll(const fullMatrix<scalar> &m) 
+  inline void setAll(const fullMatrix<scalar> &m)
   {
     for(int i = 0; i < _r * _c; i++) _data[i] = m._data[i];
   }
@@ -385,17 +385,17 @@ class fullMatrix
   }
 #endif
   ;
-  inline void add(const double &a) 
+  inline void add(const double &a)
   {
     for(int i = 0; i < _r * _c; ++i) _data[i] += a;
   }
-  inline void add(const fullMatrix<scalar> &m) 
+  inline void add(const fullMatrix<scalar> &m)
   {
     for(int i = 0; i < size1(); i++)
       for(int j = 0; j < size2(); j++)
         (*this)(i, j) += m(i, j);
   }
-  inline void add(const fullMatrix<scalar> &m, const double &a) 
+  inline void add(const fullMatrix<scalar> &m, const double &a)
   {
     for(int i = 0; i < size1(); i++)
       for(int j = 0; j < size2(); j++)
@@ -409,6 +409,15 @@ class fullMatrix
       for(int j = 0; j < _c; j++)
         y._data[i] += (*this)(i, j) * x(j);
   }
+#endif
+  ;
+  void multAddy(const fullVector<scalar> &x, fullVector<scalar> &y) const
+#if !defined(HAVE_BLAS)
+  {
+    for(int i = 0; i < _r; i++)
+      for(int j = 0; j < _c; j++)
+        y._data[i] += (*this)(i, j) * x(j);
+  }
 #endif
   ;
   inline fullMatrix<scalar> transpose()
@@ -466,7 +475,7 @@ class fullMatrix
   }
 #endif
   ;
-  fullMatrix<scalar> cofactor(int i, int j) const 
+  fullMatrix<scalar> cofactor(int i, int j) const
   {
     int ni = size1();
     int nj = size2();
-- 
GitLab