From 7da370f5ecc7f4baac1ccc4a5f9c97021bafdd51 Mon Sep 17 00:00:00 2001 From: Amaury Johnan <amjohnen@gmail.com> Date: Fri, 15 Apr 2016 13:57:17 +0000 Subject: [PATCH] =?UTF-8?q?speedup=20for=20computing=20the=20B=C3=A9zier?= =?UTF-8?q?=20coefficients=20of=20f*g=20or=20f*g*h?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Numeric/bezierBasis.cpp | 214 ++++++++++++++++++++++++++++------------ Numeric/bezierBasis.h | 11 +-- 2 files changed, 156 insertions(+), 69 deletions(-) diff --git a/Numeric/bezierBasis.cpp b/Numeric/bezierBasis.cpp index fcdfef58f5..4fc93b8574 100644 --- a/Numeric/bezierBasis.cpp +++ b/Numeric/bezierBasis.cpp @@ -3,8 +3,8 @@ // See the LICENSE.txt file for license information. Please report all // bugs and problems to the public mailing list <gmsh@onelab.info>. +#include <map> #include <algorithm> -#include <vector> #include "bezierBasis.h" #include "GmshDefines.h" #include "GmshMessage.h" @@ -455,6 +455,19 @@ void double2int(const fullMatrix<double> &matDouble, fullMatrix<int> &matInt) } +bezierBasis::bezierBasis(FuncSpaceData data) : _data(data), _raiser(NULL) +{ + if (_data.elementType() == TYPE_PYR) + _constructPyr(); + else + _construct(); +} + +bezierBasis::~bezierBasis() +{ + delete _raiser; +} + void bezierBasis::generateBezierPoints(fullMatrix<double> &points) const { gmshGenerateMonomials(_data, points); @@ -706,6 +719,10 @@ void bezierBasisRaiser::_fillRaiserData() int dim = _bfs->getDim(); int dimSimplex = _bfs->getDimSimplex(); + // Speedup: Since the coefficients (num/den) are invariant from a permutation + // of the indices (i, j) or (i, j, k), we fill only the raiser data for + // i <= j <= k (and adapt the value to take into account the multiplicity). + // Construction of raiser 2 fullMatrix<int> exp2; { @@ -713,7 +730,7 @@ void bezierBasisRaiser::_fillRaiserData() FuncSpaceData dataRaiser2(_bfs->_data, 2*order); gmshGenerateMonomials(dataRaiser2, expD2); double2int(expD2, exp2); - _Raiser2.resize(exp2.size1()); + _raiser2.resize(exp2.size1()); } std::map<int, int> hashToInd2; @@ -726,32 +743,35 @@ void bezierBasisRaiser::_fillRaiserData() } for (int i = 0; i < ncoeff; i++) { - for (int j = 0; j < ncoeff; j++) { + for (int j = i; j < ncoeff; j++) { double num = 1, den = 1; { int compl1 = order; int compl2 = order; int compltot = 2*order; for (int l = 0; l < dimSimplex; l++) { - num *= nChoosek(compl1, exp(i, l)) - * nChoosek(compl2, exp(j, l)); + num *= nChoosek(compl1, exp(i, l)) * + nChoosek(compl2, exp(j, l)); den *= nChoosek(compltot, exp(i, l) + exp(j, l)); compl1 -= exp(i, l); compl2 -= exp(j, l); compltot -= exp(i, l) + exp(j, l); } for (int l = dimSimplex; l < dim; l++) { - num *= nChoosek(order, exp(i, l)) - * nChoosek(order, exp(j, l)); + num *= nChoosek(order, exp(i, l)) * + nChoosek(order, exp(j, l)); den *= nChoosek(2*order, exp(i, l) + exp(j, l)); } } + // taking into account the multiplicity (reminder: i <= j) + if (i < j) num *= 2; + int hash = 0; for (int l = 0; l < dim; l++) { hash += (exp(i, l)+exp(j, l)) * pow_int(2*order+1, l); } - _Raiser2[hashToInd2[hash]].push_back(_Data(num/den, i, j)); + _raiser2[hashToInd2[hash]].push_back(_Data(num/den, i, j)); } } @@ -762,7 +782,7 @@ void bezierBasisRaiser::_fillRaiserData() FuncSpaceData dataRaiser3(_bfs->_data, 3*order); gmshGenerateMonomials(dataRaiser3, expD3); double2int(expD3, exp3); - _Raiser3.resize(exp3.size1()); + _raiser3.resize(exp3.size1()); } std::map<int, int> hashToInd3; @@ -775,8 +795,8 @@ void bezierBasisRaiser::_fillRaiserData() } for (int i = 0; i < ncoeff; i++) { - for (int j = 0; j < ncoeff; j++) { - for (int k = 0; k < ncoeff; ++k) { + for (int j = i; j < ncoeff; j++) { + for (int k = j; k < ncoeff; ++k) { double num = 1, den = 1; { int compl1 = order; @@ -784,9 +804,9 @@ void bezierBasisRaiser::_fillRaiserData() int compl3 = order; int compltot = 3*order; for (int l = 0; l < dimSimplex; l++) { - num *= nChoosek(compl1, exp(i, l)) - * nChoosek(compl2, exp(j, l)) - * nChoosek(compl3, exp(k, l)); + num *= nChoosek(compl1, exp(i, l)) * + nChoosek(compl2, exp(j, l)) * + nChoosek(compl3, exp(k, l)); den *= nChoosek(compltot, exp(i, l) + exp(j, l) + exp(k, l)); compl1 -= exp(i, l); compl2 -= exp(j, l); @@ -794,18 +814,22 @@ void bezierBasisRaiser::_fillRaiserData() compltot -= exp(i, l) + exp(j, l) + exp(k, l); } for (int l = dimSimplex; l < dim; l++) { - num *= nChoosek(order, exp(i, l)) - * nChoosek(order, exp(j, l)) - * nChoosek(order, exp(k, l)); + num *= nChoosek(order, exp(i, l)) * + nChoosek(order, exp(j, l)) * + nChoosek(order, exp(k, l)); den *= nChoosek(3*order, exp(i, l) + exp(j, l) + exp(k, l)); } } + // taking into account the multiplicity (Reminder: i <= j <= k) + if (i < j && j < k) num *= 6; + else if (i < j || j < k) num *= 3; + int hash = 0; for (int l = 0; l < dim; l++) { hash += (exp(i, l)+exp(j, l)+exp(k, l)) * pow_int(3*order+1, l); } - _Raiser3[hashToInd3[hash]].push_back(_Data(num/den, i, j, k)); + _raiser3[hashToInd3[hash]].push_back(_Data(num/den, i, j, k)); } } } @@ -831,6 +855,10 @@ void bezierBasisRaiser::_fillRaiserDataPyr() int ncoeff = exp.size1(); int order[3] = {fsdata.nij(), fsdata.nij(), fsdata.nk()}; + // Speedup: Since the coefficients (num/den) are invariant from a permutation + // of the indices (i, j) or (i, j, k), we fill only the raiser data for + // i <= j <= k (and adapt the value to take into account the multiplicity). + // Construction of raiser 2 fullMatrix<int> exp2; { @@ -838,7 +866,7 @@ void bezierBasisRaiser::_fillRaiserDataPyr() FuncSpaceData dataRaiser2(_bfs->_data, 2*order[0], 2*order[2]); gmshGenerateMonomials(dataRaiser2, expD2); double2int(expD2, exp2); - _Raiser2.resize(exp2.size1()); + _raiser2.resize(exp2.size1()); } std::map<int, int> hashToInd2; @@ -851,7 +879,7 @@ void bezierBasisRaiser::_fillRaiserDataPyr() } for (int i = 0; i < ncoeff; i++) { - for (int j = 0; j < ncoeff; j++) { + for (int j = i; j < ncoeff; j++) { double num = 1, den = 1; for (int l = 0; l < 3; l++) { num *= nChoosek(order[l], exp(i, l)) @@ -859,12 +887,14 @@ void bezierBasisRaiser::_fillRaiserDataPyr() den *= nChoosek(2*order[l], exp(i, l) + exp(j, l)); } + // taking into account the multiplicity (reminder: i <= j) + if (i < j) num *= 2; + int hash = 0; for (int l = 0; l < 3; l++) { hash += (exp(i, l)+exp(j, l)) * pow_int(2*order[l]+1, l); } - //_raiser2[hash].push_back(_Data(num/den, i, j)); - _Raiser2[hashToInd2[hash]].push_back(_Data(num/den, i, j)); + _raiser2[hashToInd2[hash]].push_back(_Data(num/den, i, j)); } } @@ -875,7 +905,7 @@ void bezierBasisRaiser::_fillRaiserDataPyr() FuncSpaceData dataRaiser3(_bfs->_data, 3*order[0], 3*order[2]); gmshGenerateMonomials(dataRaiser3, expD3); double2int(expD3, exp3); - _Raiser3.resize(exp3.size1()); + _raiser3.resize(exp3.size1()); } std::map<int, int> hashToInd3; @@ -888,8 +918,8 @@ void bezierBasisRaiser::_fillRaiserDataPyr() } for (int i = 0; i < ncoeff; i++) { - for (int j = 0; j < ncoeff; j++) { - for (int k = 0; k < ncoeff; ++k) { + for (int j = i; j < ncoeff; j++) { + for (int k = j; k < ncoeff; ++k) { double num = 1, den = 1; for (int l = 0; l < 3; l++) { num *= nChoosek(order[l], exp(i, l)) @@ -898,12 +928,15 @@ void bezierBasisRaiser::_fillRaiserDataPyr() den *= nChoosek(3*order[l], exp(i, l) + exp(j, l) + exp(k, l)); } + // taking into account the multiplicity (Reminder: i <= j <= k) + if (i < j && j < k) num *= 6; + else if (i < j || j < k) num *= 3; + int hash = 0; for (int l = 0; l < 3; l++) { hash += (exp(i, l)+exp(j, l)+exp(k, l)) * pow_int(3*order[l]+1, l); } - //_raiser3[hash].push_back(_Data(num/den, i, j, k)); - _Raiser3[hashToInd3[hash]].push_back(_Data(num/den, i, j, k)); + _raiser3[hashToInd3[hash]].push_back(_Data(num/den, i, j, k)); } } } @@ -913,13 +946,23 @@ void bezierBasisRaiser::computeCoeff(const fullVector<double> &coeffA, const fullVector<double> &coeffB, fullVector<double> &coeffSquare) { - coeffSquare.resize(_Raiser2.size(), true); + coeffSquare.resize(_raiser2.size(), true); - for (unsigned int ind = 0; ind < _Raiser2.size(); ++ind) { - for (unsigned int l = 0; l < _Raiser2[ind].size(); ++l) { - const int i = _Raiser2[ind][l].i; - const int j = _Raiser2[ind][l].j; - coeffSquare(ind) += _Raiser2[ind][l].val * coeffA(i) * coeffB(j); + if (&coeffA == &coeffB) { + for (unsigned int ind = 0; ind < _raiser2.size(); ++ind) { + for (unsigned int l = 0; l < _raiser2[ind].size(); ++l) { + _Data &d = _raiser2[ind][l]; + coeffSquare(ind) += d.val * coeffA(d.i) * coeffB(d.j); + } + } + } + else { + for (unsigned int ind = 0; ind < _raiser2.size(); ++ind) { + for (unsigned int l = 0; l < _raiser2[ind].size(); ++l) { + _Data &d = _raiser2[ind][l]; + coeffSquare(ind) += d.val/2 * (coeffA(d.i) * coeffB(d.j) + + coeffA(d.j) * coeffB(d.i)); + } } } } @@ -929,31 +972,61 @@ void bezierBasisRaiser::computeCoeff(const fullVector<double> &coeffA, const fullVector<double> &coeffC, fullVector<double> &coeffCubic) { - coeffCubic.resize(_Raiser3.size(), true); - - for (unsigned int ind = 0; ind < _Raiser3.size(); ++ind) { - for (unsigned int l = 0; l < _Raiser3[ind].size(); ++l) { - const int i = _Raiser3[ind][l].i; - const int j = _Raiser3[ind][l].j; - const int k = _Raiser3[ind][l].k; - coeffCubic(ind) += _Raiser3[ind][l].val * coeffA(i) * coeffB(j) * coeffC(k); + coeffCubic.resize(_raiser3.size(), true); + + if (&coeffA == &coeffB && &coeffB == &coeffC) { + for (unsigned int ind = 0; ind < _raiser3.size(); ++ind) { + for (unsigned int l = 0; l < _raiser3[ind].size(); ++l) { + _Data &d = _raiser3[ind][l]; + coeffCubic(ind) += d.val * coeffA(d.i) * coeffB(d.j) * coeffC(d.k); + } + } + } + else if (&coeffA != &coeffB && &coeffB != &coeffC) { + for (unsigned int ind = 0; ind < _raiser3.size(); ++ind) { + for (unsigned int l = 0; l < _raiser3[ind].size(); ++l) { + _Data &d = _raiser3[ind][l]; + coeffCubic(ind) += d.val/6 * (coeffA(d.i) * coeffB(d.j) * coeffC(d.k) + + coeffA(d.i) * coeffB(d.k) * coeffC(d.j) + + coeffA(d.j) * coeffB(d.i) * coeffC(d.k) + + coeffA(d.j) * coeffB(d.k) * coeffC(d.i) + + coeffA(d.k) * coeffB(d.i) * coeffC(d.j) + + coeffA(d.k) * coeffB(d.j) * coeffC(d.i)); + } } } + else + Msg::Error("bezierBasisRaiser::computeCoeff not implemented for A == B != C " + "or A != B == C"); } void bezierBasisRaiser::computeCoeff(const fullMatrix<double> &coeffA, const fullMatrix<double> &coeffB, fullMatrix<double> &coeffSquare) { - coeffSquare.resize(_Raiser2.size(), coeffA.size2(), true); - - for (unsigned int ind = 0; ind < _Raiser2.size(); ++ind) { - for (unsigned int l = 0; l < _Raiser2[ind].size(); ++l) { - const int i = _Raiser2[ind][l].i; - const int j = _Raiser2[ind][l].j; - for (int ind2 = 0; ind2 < coeffA.size2(); ++ind2) { - coeffSquare(ind, ind2) += _Raiser2[ind][l].val * coeffA(i, ind2) - * coeffB(j, ind2); + coeffSquare.resize(_raiser2.size(), coeffA.size2(), true); + + if (&coeffA == &coeffB) { + for (unsigned int ind = 0; ind < _raiser2.size(); ++ind) { + for (unsigned int l = 0; l < _raiser2[ind].size(); ++l) { + _Data &d = _raiser2[ind][l]; + for (int ind2 = 0; ind2 < coeffA.size2(); ++ind2) { + coeffSquare(ind, ind2) += d.val * coeffA(d.i, ind2) + * coeffB(d.j, ind2); + } + } + } + } + else { + for (unsigned int ind = 0; ind < _raiser2.size(); ++ind) { + for (unsigned int l = 0; l < _raiser2[ind].size(); ++l) { + _Data &d = _raiser2[ind][l]; + double val = d.val/2; + for (int ind2 = 0; ind2 < coeffA.size2(); ++ind2) { + coeffSquare(ind, ind2) += val * + (coeffA(d.i, ind2) * coeffB(d.j, ind2) + + coeffA(d.j, ind2) * coeffB(d.i, ind2)); + } } } } @@ -964,17 +1037,36 @@ void bezierBasisRaiser::computeCoeff(const fullVector<double> &coeffA, const fullMatrix<double> &coeffC, fullMatrix<double> &coeffCubic) { - coeffCubic.resize(_Raiser3.size(), coeffB.size2(), true); - - for (unsigned int ind = 0; ind < _Raiser3.size(); ++ind) { - for (unsigned int l = 0; l < _Raiser3[ind].size(); ++l) { - const int i = _Raiser3[ind][l].i; - const int j = _Raiser3[ind][l].j; - const int k = _Raiser3[ind][l].k; - for (int ind2 = 0; ind2 < coeffB.size2(); ++ind2) { - coeffCubic(ind, ind2) += _Raiser3[ind][l].val * coeffA(i) - * coeffB(j, ind2) - * coeffC(k, ind2); + coeffCubic.resize(_raiser3.size(), coeffB.size2(), true); + + if (&coeffB == &coeffC) { + for (unsigned int ind = 0; ind < _raiser3.size(); ++ind) { + for (unsigned int l = 0; l < _raiser3[ind].size(); ++l) { + _Data &d = _raiser3[ind][l]; + double val = d.val/3; + for (int ind2 = 0; ind2 < coeffB.size2(); ++ind2) { + coeffCubic(ind, ind2) += val * + (coeffA(d.i) * coeffB(d.j, ind2) * coeffC(d.k, ind2) + + coeffA(d.j) * coeffB(d.i, ind2) * coeffC(d.k, ind2) + + coeffA(d.k) * coeffB(d.i, ind2) * coeffC(d.j, ind2)); + } + } + } + } + else { + for (unsigned int ind = 0; ind < _raiser3.size(); ++ind) { + for (unsigned int l = 0; l < _raiser3[ind].size(); ++l) { + _Data &d = _raiser3[ind][l]; + double val = d.val/6; + for (int ind2 = 0; ind2 < coeffB.size2(); ++ind2) { + coeffCubic(ind, ind2) += val * + (coeffA(d.i) * coeffB(d.j, ind2) * coeffC(d.k, ind2) + + coeffA(d.i) * coeffB(d.k, ind2) * coeffC(d.j, ind2) + + coeffA(d.j) * coeffB(d.i, ind2) * coeffC(d.k, ind2) + + coeffA(d.j) * coeffB(d.k, ind2) * coeffC(d.i, ind2) + + coeffA(d.k) * coeffB(d.i, ind2) * coeffC(d.j, ind2) + + coeffA(d.k) * coeffB(d.j, ind2) * coeffC(d.i, ind2)); + } } } } diff --git a/Numeric/bezierBasis.h b/Numeric/bezierBasis.h index 9e331135ed..64cf0b6813 100644 --- a/Numeric/bezierBasis.h +++ b/Numeric/bezierBasis.h @@ -6,7 +6,6 @@ #ifndef _BEZIER_BASIS_H_ #define _BEZIER_BASIS_H_ -#include <map> #include <vector> #include "fullMatrix.h" #include "FuncSpaceData.h" @@ -32,12 +31,8 @@ class bezierBasis { fullMatrix<double> subDivisor; // Constructors - inline bezierBasis(FuncSpaceData data) : _data(data), _raiser(NULL) { - if (_data.elementType() == TYPE_PYR) - _constructPyr(); - else - _construct(); - } + bezierBasis(FuncSpaceData data); + ~bezierBasis(); // get methods inline int getDim() const {return _exponents.size2();} @@ -99,7 +94,7 @@ private : _Data(double vv, int ii, int jj = -1, int kk = -1) : i(ii), j(jj), k(kk), val(vv) {} }; - std::vector<std::vector<_Data> > _Raiser2, _Raiser3; + std::vector<std::vector<_Data> > _raiser2, _raiser3; const bezierBasis *_bfs; public: -- GitLab