From 8ae178e66d150d12d718f2d055102ef911bfedd3 Mon Sep 17 00:00:00 2001 From: Tristan Carrier Baudouin <tristan.carrier@uclouvain.be> Date: Fri, 4 Mar 2011 11:11:30 +0000 Subject: [PATCH] lbfgs library --- contrib/lbfgs/CMakeLists.txt | 10 + contrib/lbfgs/alglibinternal.cpp | 10712 ++++++++++ contrib/lbfgs/alglibinternal.h | 707 + contrib/lbfgs/alglibmisc.cpp | 3083 +++ contrib/lbfgs/alglibmisc.h | 685 + contrib/lbfgs/ap.cpp | 8952 +++++++++ contrib/lbfgs/ap.h | 1203 ++ contrib/lbfgs/linalg.cpp | 30199 +++++++++++++++++++++++++++++ contrib/lbfgs/linalg.h | 4101 ++++ contrib/lbfgs/optimization.cpp | 11827 +++++++++++ contrib/lbfgs/optimization.h | 2649 +++ contrib/lbfgs/stdafx.h | 2 + 12 files changed, 74130 insertions(+) create mode 100644 contrib/lbfgs/CMakeLists.txt create mode 100755 contrib/lbfgs/alglibinternal.cpp create mode 100755 contrib/lbfgs/alglibinternal.h create mode 100755 contrib/lbfgs/alglibmisc.cpp create mode 100755 contrib/lbfgs/alglibmisc.h create mode 100755 contrib/lbfgs/ap.cpp create mode 100755 contrib/lbfgs/ap.h create mode 100755 contrib/lbfgs/linalg.cpp create mode 100755 contrib/lbfgs/linalg.h create mode 100755 contrib/lbfgs/optimization.cpp create mode 100755 contrib/lbfgs/optimization.h create mode 100755 contrib/lbfgs/stdafx.h diff --git a/contrib/lbfgs/CMakeLists.txt b/contrib/lbfgs/CMakeLists.txt new file mode 100644 index 0000000000..5c8988732e --- /dev/null +++ b/contrib/lbfgs/CMakeLists.txt @@ -0,0 +1,10 @@ +set(SRC + ap.cpp + alglibinternal.cpp + alglibmisc.cpp + linalg.cpp + optimization.cpp +) + +file(GLOB_RECURSE HDR RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.h) +append_gmsh_src(contrib/lbfgs "${SRC};${HDR}") diff --git a/contrib/lbfgs/alglibinternal.cpp b/contrib/lbfgs/alglibinternal.cpp new file mode 100755 index 0000000000..4d0ddef2a5 --- /dev/null +++ b/contrib/lbfgs/alglibinternal.cpp @@ -0,0 +1,10712 @@ +/************************************************************************* +Copyright (c) Sergey Bochkanov (ALGLIB project). + +>>> SOURCE LICENSE >>> +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation (www.fsf.org); either version 2 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +A copy of the GNU General Public License is available at +http://www.fsf.org/licensing/licenses +>>> END OF LICENSE >>> +*************************************************************************/ +#include "stdafx.h" +#include "alglibinternal.h" + +// disable some irrelevant warnings +#if (AE_COMPILER==AE_MSVC) +#pragma warning(disable:4100) +#pragma warning(disable:4127) +#pragma warning(disable:4702) +#pragma warning(disable:4996) +#endif +using namespace std; + +///////////////////////////////////////////////////////////////////////// +// +// THIS SECTION CONTAINS IMPLEMENTATION OF C++ INTERFACE +// +///////////////////////////////////////////////////////////////////////// +namespace alglib +{ + + +} + +///////////////////////////////////////////////////////////////////////// +// +// THIS SECTION CONTAINS IMPLEMENTATION OF COMPUTATIONAL CORE +// +///////////////////////////////////////////////////////////////////////// +namespace alglib_impl +{ +static void tsort_tagsortfastirec(/* Real */ ae_vector* a, + /* Integer */ ae_vector* b, + /* Real */ ae_vector* bufa, + /* Integer */ ae_vector* bufb, + ae_int_t i1, + ae_int_t i2, + ae_state *_state); +static void tsort_tagsortfastrrec(/* Real */ ae_vector* a, + /* Real */ ae_vector* b, + /* Real */ ae_vector* bufa, + /* Real */ ae_vector* bufb, + ae_int_t i1, + ae_int_t i2, + ae_state *_state); +static void tsort_tagsortfastrec(/* Real */ ae_vector* a, + /* Real */ ae_vector* bufa, + ae_int_t i1, + ae_int_t i2, + ae_state *_state); + + + + + + + + + + + + + + + + + + + + +static void hsschur_internalauxschur(ae_bool wantt, + ae_bool wantz, + ae_int_t n, + ae_int_t ilo, + ae_int_t ihi, + /* Real */ ae_matrix* h, + /* Real */ ae_vector* wr, + /* Real */ ae_vector* wi, + ae_int_t iloz, + ae_int_t ihiz, + /* Real */ ae_matrix* z, + /* Real */ ae_vector* work, + /* Real */ ae_vector* workv3, + /* Real */ ae_vector* workc1, + /* Real */ ae_vector* works1, + ae_int_t* info, + ae_state *_state); +static void hsschur_aux2x2schur(double* a, + double* b, + double* c, + double* d, + double* rt1r, + double* rt1i, + double* rt2r, + double* rt2i, + double* cs, + double* sn, + ae_state *_state); +static double hsschur_extschursign(double a, double b, ae_state *_state); +static ae_int_t hsschur_extschursigntoone(double b, ae_state *_state); + + + + +static ae_bool safesolve_cbasicsolveandupdate(ae_complex alpha, + ae_complex beta, + double lnmax, + double bnorm, + double maxgrowth, + double* xnorm, + ae_complex* x, + ae_state *_state); + + +static void xblas_xsum(/* Real */ ae_vector* w, + double mx, + ae_int_t n, + double* r, + double* rerr, + ae_state *_state); +static double xblas_xfastpow(double r, ae_int_t n, ae_state *_state); + + +static double linmin_ftol = 0.001; +static double linmin_xtol = 100*ae_machineepsilon; +static double linmin_gtol = 0.3; +static ae_int_t linmin_maxfev = 20; +static double linmin_stpmin = 1.0E-50; +static double linmin_defstpmax = 1.0E+50; +static double linmin_armijofactor = 1.3; +static void linmin_mcstep(double* stx, + double* fx, + double* dx, + double* sty, + double* fy, + double* dy, + double* stp, + double fp, + double dp, + ae_bool* brackt, + double stmin, + double stmax, + ae_int_t* info, + ae_state *_state); + + +static ae_int_t ftbase_ftbaseplanentrysize = 8; +static ae_int_t ftbase_ftbasecffttask = 0; +static ae_int_t ftbase_ftbaserfhttask = 1; +static ae_int_t ftbase_ftbaserffttask = 2; +static ae_int_t ftbase_fftcooleytukeyplan = 0; +static ae_int_t ftbase_fftbluesteinplan = 1; +static ae_int_t ftbase_fftcodeletplan = 2; +static ae_int_t ftbase_fhtcooleytukeyplan = 3; +static ae_int_t ftbase_fhtcodeletplan = 4; +static ae_int_t ftbase_fftrealcooleytukeyplan = 5; +static ae_int_t ftbase_fftemptyplan = 6; +static ae_int_t ftbase_fhtn2plan = 999; +static ae_int_t ftbase_ftbaseupdatetw = 4; +static ae_int_t ftbase_ftbasecodeletrecommended = 5; +static double ftbase_ftbaseinefficiencyfactor = 1.3; +static ae_int_t ftbase_ftbasemaxsmoothfactor = 5; +static void ftbase_ftbasegenerateplanrec(ae_int_t n, + ae_int_t tasktype, + ftplan* plan, + ae_int_t* plansize, + ae_int_t* precomputedsize, + ae_int_t* planarraysize, + ae_int_t* tmpmemsize, + ae_int_t* stackmemsize, + ae_int_t stackptr, + ae_state *_state); +static void ftbase_ftbaseprecomputeplanrec(ftplan* plan, + ae_int_t entryoffset, + ae_int_t stackptr, + ae_state *_state); +static void ftbase_ffttwcalc(/* Real */ ae_vector* a, + ae_int_t aoffset, + ae_int_t n1, + ae_int_t n2, + ae_state *_state); +static void ftbase_internalcomplexlintranspose(/* Real */ ae_vector* a, + ae_int_t m, + ae_int_t n, + ae_int_t astart, + /* Real */ ae_vector* buf, + ae_state *_state); +static void ftbase_internalreallintranspose(/* Real */ ae_vector* a, + ae_int_t m, + ae_int_t n, + ae_int_t astart, + /* Real */ ae_vector* buf, + ae_state *_state); +static void ftbase_ffticltrec(/* Real */ ae_vector* a, + ae_int_t astart, + ae_int_t astride, + /* Real */ ae_vector* b, + ae_int_t bstart, + ae_int_t bstride, + ae_int_t m, + ae_int_t n, + ae_state *_state); +static void ftbase_fftirltrec(/* Real */ ae_vector* a, + ae_int_t astart, + ae_int_t astride, + /* Real */ ae_vector* b, + ae_int_t bstart, + ae_int_t bstride, + ae_int_t m, + ae_int_t n, + ae_state *_state); +static void ftbase_ftbasefindsmoothrec(ae_int_t n, + ae_int_t seed, + ae_int_t leastfactor, + ae_int_t* best, + ae_state *_state); +static void ftbase_fftarrayresize(/* Integer */ ae_vector* a, + ae_int_t* asize, + ae_int_t newasize, + ae_state *_state); +static void ftbase_reffht(/* Real */ ae_vector* a, + ae_int_t n, + ae_int_t offs, + ae_state *_state); + + + + + + + + + +/************************************************************************* +This function sorts array of real keys by ascending. + +Its results are: +* sorted array A +* permutation tables P1, P2 + +Algorithm outputs permutation tables using two formats: +* as usual permutation of [0..N-1]. If P1[i]=j, then sorted A[i] contains + value which was moved there from J-th position. +* as a sequence of pairwise permutations. Sorted A[] may be obtained by + swaping A[i] and A[P2[i]] for all i from 0 to N-1. + +INPUT PARAMETERS: + A - unsorted array + N - array size + +OUPUT PARAMETERS: + A - sorted array + P1, P2 - permutation tables, array[N] + +NOTES: + this function assumes that A[] is finite; it doesn't checks that + condition. All other conditions (size of input arrays, etc.) are not + checked too. + + -- ALGLIB -- + Copyright 14.05.2008 by Bochkanov Sergey +*************************************************************************/ +void tagsort(/* Real */ ae_vector* a, + ae_int_t n, + /* Integer */ ae_vector* p1, + /* Integer */ ae_vector* p2, + ae_state *_state) +{ + ae_frame _frame_block; + ae_int_t i; + ae_vector pv; + ae_vector vp; + ae_vector bufa; + ae_vector bufb; + ae_int_t lv; + ae_int_t lp; + ae_int_t rv; + ae_int_t rp; + + ae_frame_make(_state, &_frame_block); + ae_vector_clear(p1); + ae_vector_clear(p2); + ae_vector_init(&pv, 0, DT_INT, _state, ae_true); + ae_vector_init(&vp, 0, DT_INT, _state, ae_true); + ae_vector_init(&bufa, 0, DT_REAL, _state, ae_true); + ae_vector_init(&bufb, 0, DT_INT, _state, ae_true); + + + /* + * Special cases + */ + if( n<=0 ) + { + ae_frame_leave(_state); + return; + } + if( n==1 ) + { + ae_vector_set_length(p1, 0+1, _state); + ae_vector_set_length(p2, 0+1, _state); + p1->ptr.p_int[0] = 0; + p2->ptr.p_int[0] = 0; + ae_frame_leave(_state); + return; + } + + /* + * General case, N>1: prepare permutations table P1 + */ + ae_vector_set_length(p1, n-1+1, _state); + for(i=0; i<=n-1; i++) + { + p1->ptr.p_int[i] = i; + } + + /* + * General case, N>1: sort, update P1 + */ + ae_vector_set_length(&bufa, n, _state); + ae_vector_set_length(&bufb, n, _state); + tagsortfasti(a, p1, &bufa, &bufb, n, _state); + + /* + * General case, N>1: fill permutations table P2 + * + * To fill P2 we maintain two arrays: + * * PV, Position(Value). PV[i] contains position of I-th key at the moment + * * VP, Value(Position). VP[i] contains key which has position I at the moment + * + * At each step we making permutation of two items: + * Left, which is given by position/value pair LP/LV + * and Right, which is given by RP/RV + * and updating PV[] and VP[] correspondingly. + */ + ae_vector_set_length(&pv, n-1+1, _state); + ae_vector_set_length(&vp, n-1+1, _state); + ae_vector_set_length(p2, n-1+1, _state); + for(i=0; i<=n-1; i++) + { + pv.ptr.p_int[i] = i; + vp.ptr.p_int[i] = i; + } + for(i=0; i<=n-1; i++) + { + + /* + * calculate LP, LV, RP, RV + */ + lp = i; + lv = vp.ptr.p_int[lp]; + rv = p1->ptr.p_int[i]; + rp = pv.ptr.p_int[rv]; + + /* + * Fill P2 + */ + p2->ptr.p_int[i] = rp; + + /* + * update PV and VP + */ + vp.ptr.p_int[lp] = rv; + vp.ptr.p_int[rp] = lv; + pv.ptr.p_int[lv] = rp; + pv.ptr.p_int[rv] = lp; + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Same as TagSort, but optimized for real keys and integer labels. + +A is sorted, and same permutations are applied to B. + +NOTES: +1. this function assumes that A[] is finite; it doesn't checks that + condition. All other conditions (size of input arrays, etc.) are not + checked too. +2. this function uses two buffers, BufA and BufB, each is N elements large. + They may be preallocated (which will save some time) or not, in which + case function will automatically allocate memory. + + -- ALGLIB -- + Copyright 11.12.2008 by Bochkanov Sergey +*************************************************************************/ +void tagsortfasti(/* Real */ ae_vector* a, + /* Integer */ ae_vector* b, + /* Real */ ae_vector* bufa, + /* Integer */ ae_vector* bufb, + ae_int_t n, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + ae_bool isascending; + ae_bool isdescending; + double tmpr; + ae_int_t tmpi; + + + + /* + * Special case + */ + if( n<=1 ) + { + return; + } + + /* + * Test for already sorted set + */ + isascending = ae_true; + isdescending = ae_true; + for(i=1; i<=n-1; i++) + { + isascending = isascending&&a->ptr.p_double[i]>=a->ptr.p_double[i-1]; + isdescending = isdescending&&a->ptr.p_double[i]<=a->ptr.p_double[i-1]; + } + if( isascending ) + { + return; + } + if( isdescending ) + { + for(i=0; i<=n-1; i++) + { + j = n-1-i; + if( j<=i ) + { + break; + } + tmpr = a->ptr.p_double[i]; + a->ptr.p_double[i] = a->ptr.p_double[j]; + a->ptr.p_double[j] = tmpr; + tmpi = b->ptr.p_int[i]; + b->ptr.p_int[i] = b->ptr.p_int[j]; + b->ptr.p_int[j] = tmpi; + } + return; + } + + /* + * General case + */ + if( bufa->cnt<n ) + { + ae_vector_set_length(bufa, n, _state); + } + if( bufb->cnt<n ) + { + ae_vector_set_length(bufb, n, _state); + } + tsort_tagsortfastirec(a, b, bufa, bufb, 0, n-1, _state); +} + + +/************************************************************************* +Same as TagSort, but optimized for real keys and real labels. + +A is sorted, and same permutations are applied to B. + +NOTES: +1. this function assumes that A[] is finite; it doesn't checks that + condition. All other conditions (size of input arrays, etc.) are not + checked too. +2. this function uses two buffers, BufA and BufB, each is N elements large. + They may be preallocated (which will save some time) or not, in which + case function will automatically allocate memory. + + -- ALGLIB -- + Copyright 11.12.2008 by Bochkanov Sergey +*************************************************************************/ +void tagsortfastr(/* Real */ ae_vector* a, + /* Real */ ae_vector* b, + /* Real */ ae_vector* bufa, + /* Real */ ae_vector* bufb, + ae_int_t n, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + ae_bool isascending; + ae_bool isdescending; + double tmpr; + + + + /* + * Special case + */ + if( n<=1 ) + { + return; + } + + /* + * Test for already sorted set + */ + isascending = ae_true; + isdescending = ae_true; + for(i=1; i<=n-1; i++) + { + isascending = isascending&&a->ptr.p_double[i]>=a->ptr.p_double[i-1]; + isdescending = isdescending&&a->ptr.p_double[i]<=a->ptr.p_double[i-1]; + } + if( isascending ) + { + return; + } + if( isdescending ) + { + for(i=0; i<=n-1; i++) + { + j = n-1-i; + if( j<=i ) + { + break; + } + tmpr = a->ptr.p_double[i]; + a->ptr.p_double[i] = a->ptr.p_double[j]; + a->ptr.p_double[j] = tmpr; + tmpr = b->ptr.p_double[i]; + b->ptr.p_double[i] = b->ptr.p_double[j]; + b->ptr.p_double[j] = tmpr; + } + return; + } + + /* + * General case + */ + if( bufa->cnt<n ) + { + ae_vector_set_length(bufa, n, _state); + } + if( bufb->cnt<n ) + { + ae_vector_set_length(bufb, n, _state); + } + tsort_tagsortfastrrec(a, b, bufa, bufb, 0, n-1, _state); +} + + +/************************************************************************* +Same as TagSort, but optimized for real keys without labels. + +A is sorted, and that's all. + +NOTES: +1. this function assumes that A[] is finite; it doesn't checks that + condition. All other conditions (size of input arrays, etc.) are not + checked too. +2. this function uses buffer, BufA, which is N elements large. It may be + preallocated (which will save some time) or not, in which case + function will automatically allocate memory. + + -- ALGLIB -- + Copyright 11.12.2008 by Bochkanov Sergey +*************************************************************************/ +void tagsortfast(/* Real */ ae_vector* a, + /* Real */ ae_vector* bufa, + ae_int_t n, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + ae_bool isascending; + ae_bool isdescending; + double tmpr; + + + + /* + * Special case + */ + if( n<=1 ) + { + return; + } + + /* + * Test for already sorted set + */ + isascending = ae_true; + isdescending = ae_true; + for(i=1; i<=n-1; i++) + { + isascending = isascending&&a->ptr.p_double[i]>=a->ptr.p_double[i-1]; + isdescending = isdescending&&a->ptr.p_double[i]<=a->ptr.p_double[i-1]; + } + if( isascending ) + { + return; + } + if( isdescending ) + { + for(i=0; i<=n-1; i++) + { + j = n-1-i; + if( j<=i ) + { + break; + } + tmpr = a->ptr.p_double[i]; + a->ptr.p_double[i] = a->ptr.p_double[j]; + a->ptr.p_double[j] = tmpr; + } + return; + } + + /* + * General case + */ + if( bufa->cnt<n ) + { + ae_vector_set_length(bufa, n, _state); + } + tsort_tagsortfastrec(a, bufa, 0, n-1, _state); +} + + +/************************************************************************* +Heap operations: adds element to the heap + +PARAMETERS: + A - heap itself, must be at least array[0..N] + B - array of integer tags, which are updated according to + permutations in the heap + N - size of the heap (without new element). + updated on output + VA - value of the element being added + VB - value of the tag + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void tagheappushi(/* Real */ ae_vector* a, + /* Integer */ ae_vector* b, + ae_int_t* n, + double va, + ae_int_t vb, + ae_state *_state) +{ + ae_int_t j; + ae_int_t k; + double v; + + + if( *n<0 ) + { + return; + } + + /* + * N=0 is a special case + */ + if( *n==0 ) + { + a->ptr.p_double[0] = va; + b->ptr.p_int[0] = vb; + *n = *n+1; + return; + } + + /* + * add current point to the heap + * (add to the bottom, then move up) + * + * we don't write point to the heap + * until its final position is determined + * (it allow us to reduce number of array access operations) + */ + j = *n; + *n = *n+1; + while(j>0) + { + k = (j-1)/2; + v = a->ptr.p_double[k]; + if( ae_fp_less(v,va) ) + { + + /* + * swap with higher element + */ + a->ptr.p_double[j] = v; + b->ptr.p_int[j] = b->ptr.p_int[k]; + j = k; + } + else + { + + /* + * element in its place. terminate. + */ + break; + } + } + a->ptr.p_double[j] = va; + b->ptr.p_int[j] = vb; +} + + +/************************************************************************* +Heap operations: replaces top element with new element +(which is moved down) + +PARAMETERS: + A - heap itself, must be at least array[0..N-1] + B - array of integer tags, which are updated according to + permutations in the heap + N - size of the heap + VA - value of the element which replaces top element + VB - value of the tag + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void tagheapreplacetopi(/* Real */ ae_vector* a, + /* Integer */ ae_vector* b, + ae_int_t n, + double va, + ae_int_t vb, + ae_state *_state) +{ + ae_int_t j; + ae_int_t k1; + ae_int_t k2; + double v; + double v1; + double v2; + + + if( n<1 ) + { + return; + } + + /* + * N=1 is a special case + */ + if( n==1 ) + { + a->ptr.p_double[0] = va; + b->ptr.p_int[0] = vb; + return; + } + + /* + * move down through heap: + * * J - current element + * * K1 - first child (always exists) + * * K2 - second child (may not exists) + * + * we don't write point to the heap + * until its final position is determined + * (it allow us to reduce number of array access operations) + */ + j = 0; + k1 = 1; + k2 = 2; + while(k1<n) + { + if( k2>=n ) + { + + /* + * only one child. + * + * swap and terminate (because this child + * have no siblings due to heap structure) + */ + v = a->ptr.p_double[k1]; + if( ae_fp_greater(v,va) ) + { + a->ptr.p_double[j] = v; + b->ptr.p_int[j] = b->ptr.p_int[k1]; + j = k1; + } + break; + } + else + { + + /* + * two childs + */ + v1 = a->ptr.p_double[k1]; + v2 = a->ptr.p_double[k2]; + if( ae_fp_greater(v1,v2) ) + { + if( ae_fp_less(va,v1) ) + { + a->ptr.p_double[j] = v1; + b->ptr.p_int[j] = b->ptr.p_int[k1]; + j = k1; + } + else + { + break; + } + } + else + { + if( ae_fp_less(va,v2) ) + { + a->ptr.p_double[j] = v2; + b->ptr.p_int[j] = b->ptr.p_int[k2]; + j = k2; + } + else + { + break; + } + } + k1 = 2*j+1; + k2 = 2*j+2; + } + } + a->ptr.p_double[j] = va; + b->ptr.p_int[j] = vb; +} + + +/************************************************************************* +Heap operations: pops top element from the heap + +PARAMETERS: + A - heap itself, must be at least array[0..N-1] + B - array of integer tags, which are updated according to + permutations in the heap + N - size of the heap, N>=1 + +On output top element is moved to A[N-1], B[N-1], heap is reordered, N is +decreased by 1. + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void tagheappopi(/* Real */ ae_vector* a, + /* Integer */ ae_vector* b, + ae_int_t* n, + ae_state *_state) +{ + double va; + ae_int_t vb; + + + if( *n<1 ) + { + return; + } + + /* + * N=1 is a special case + */ + if( *n==1 ) + { + *n = 0; + return; + } + + /* + * swap top element and last element, + * then reorder heap + */ + va = a->ptr.p_double[*n-1]; + vb = b->ptr.p_int[*n-1]; + a->ptr.p_double[*n-1] = a->ptr.p_double[0]; + b->ptr.p_int[*n-1] = b->ptr.p_int[0]; + *n = *n-1; + tagheapreplacetopi(a, b, *n, va, vb, _state); +} + + +/************************************************************************* +Internal TagSortFastI: sorts A[I1...I2] (both bounds are included), +applies same permutations to B. + + -- ALGLIB -- + Copyright 06.09.2010 by Bochkanov Sergey +*************************************************************************/ +static void tsort_tagsortfastirec(/* Real */ ae_vector* a, + /* Integer */ ae_vector* b, + /* Real */ ae_vector* bufa, + /* Integer */ ae_vector* bufb, + ae_int_t i1, + ae_int_t i2, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + ae_int_t k; + ae_int_t cntless; + ae_int_t cnteq; + ae_int_t cntgreater; + double tmpr; + ae_int_t tmpi; + double v0; + double v1; + double v2; + double vp; + + + + /* + * Fast exit + */ + if( i2<=i1 ) + { + return; + } + + /* + * Non-recursive sort for small arrays + */ + if( i2-i1<=16 ) + { + for(j=i1+1; j<=i2; j++) + { + + /* + * Search elements [I1..J-1] for place to insert Jth element. + * + * This code stops immediately if we can leave A[J] at J-th position + * (all elements have same value of A[J] larger than any of them) + */ + tmpr = a->ptr.p_double[j]; + tmpi = j; + for(k=j-1; k>=i1; k--) + { + if( a->ptr.p_double[k]<=tmpr ) + { + break; + } + tmpi = k; + } + k = tmpi; + + /* + * Insert Jth element into Kth position + */ + if( k!=j ) + { + tmpr = a->ptr.p_double[j]; + tmpi = b->ptr.p_int[j]; + for(i=j-1; i>=k; i--) + { + a->ptr.p_double[i+1] = a->ptr.p_double[i]; + b->ptr.p_int[i+1] = b->ptr.p_int[i]; + } + a->ptr.p_double[k] = tmpr; + b->ptr.p_int[k] = tmpi; + } + } + return; + } + + /* + * Quicksort: choose pivot + * Here we assume that I2-I1>=2 + */ + v0 = a->ptr.p_double[i1]; + v1 = a->ptr.p_double[i1+(i2-i1)/2]; + v2 = a->ptr.p_double[i2]; + if( v0>v1 ) + { + tmpr = v1; + v1 = v0; + v0 = tmpr; + } + if( v1>v2 ) + { + tmpr = v2; + v2 = v1; + v1 = tmpr; + } + if( v0>v1 ) + { + tmpr = v1; + v1 = v0; + v0 = tmpr; + } + vp = v1; + + /* + * now pass through A/B and: + * * move elements that are LESS than VP to the left of A/B + * * move elements that are EQUAL to VP to the right of BufA/BufB (in the reverse order) + * * move elements that are GREATER than VP to the left of BufA/BufB (in the normal order + * * move elements from the tail of BufA/BufB to the middle of A/B (restoring normal order) + * * move elements from the left of BufA/BufB to the end of A/B + */ + cntless = 0; + cnteq = 0; + cntgreater = 0; + for(i=i1; i<=i2; i++) + { + v0 = a->ptr.p_double[i]; + if( v0<vp ) + { + + /* + * LESS + */ + k = i1+cntless; + if( i!=k ) + { + a->ptr.p_double[k] = v0; + b->ptr.p_int[k] = b->ptr.p_int[i]; + } + cntless = cntless+1; + continue; + } + if( v0==vp ) + { + + /* + * EQUAL + */ + k = i2-cnteq; + bufa->ptr.p_double[k] = v0; + bufb->ptr.p_int[k] = b->ptr.p_int[i]; + cnteq = cnteq+1; + continue; + } + + /* + * GREATER + */ + k = i1+cntgreater; + bufa->ptr.p_double[k] = v0; + bufb->ptr.p_int[k] = b->ptr.p_int[i]; + cntgreater = cntgreater+1; + } + for(i=0; i<=cnteq-1; i++) + { + j = i1+cntless+cnteq-1-i; + k = i2+i-(cnteq-1); + a->ptr.p_double[j] = bufa->ptr.p_double[k]; + b->ptr.p_int[j] = bufb->ptr.p_int[k]; + } + for(i=0; i<=cntgreater-1; i++) + { + j = i1+cntless+cnteq+i; + k = i1+i; + a->ptr.p_double[j] = bufa->ptr.p_double[k]; + b->ptr.p_int[j] = bufb->ptr.p_int[k]; + } + + /* + * Sort left and right parts of the array (ignoring middle part) + */ + tsort_tagsortfastirec(a, b, bufa, bufb, i1, i1+cntless-1, _state); + tsort_tagsortfastirec(a, b, bufa, bufb, i1+cntless+cnteq, i2, _state); +} + + +/************************************************************************* +Internal TagSortFastR: sorts A[I1...I2] (both bounds are included), +applies same permutations to B. + + -- ALGLIB -- + Copyright 06.09.2010 by Bochkanov Sergey +*************************************************************************/ +static void tsort_tagsortfastrrec(/* Real */ ae_vector* a, + /* Real */ ae_vector* b, + /* Real */ ae_vector* bufa, + /* Real */ ae_vector* bufb, + ae_int_t i1, + ae_int_t i2, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + ae_int_t k; + double tmpr; + double tmpr2; + ae_int_t tmpi; + ae_int_t cntless; + ae_int_t cnteq; + ae_int_t cntgreater; + double v0; + double v1; + double v2; + double vp; + + + + /* + * Fast exit + */ + if( i2<=i1 ) + { + return; + } + + /* + * Non-recursive sort for small arrays + */ + if( i2-i1<=16 ) + { + for(j=i1+1; j<=i2; j++) + { + + /* + * Search elements [I1..J-1] for place to insert Jth element. + * + * This code stops immediatly if we can leave A[J] at J-th position + * (all elements have same value of A[J] larger than any of them) + */ + tmpr = a->ptr.p_double[j]; + tmpi = j; + for(k=j-1; k>=i1; k--) + { + if( a->ptr.p_double[k]<=tmpr ) + { + break; + } + tmpi = k; + } + k = tmpi; + + /* + * Insert Jth element into Kth position + */ + if( k!=j ) + { + tmpr = a->ptr.p_double[j]; + tmpr2 = b->ptr.p_double[j]; + for(i=j-1; i>=k; i--) + { + a->ptr.p_double[i+1] = a->ptr.p_double[i]; + b->ptr.p_double[i+1] = b->ptr.p_double[i]; + } + a->ptr.p_double[k] = tmpr; + b->ptr.p_double[k] = tmpr2; + } + } + return; + } + + /* + * Quicksort: choose pivot + * Here we assume that I2-I1>=16 + */ + v0 = a->ptr.p_double[i1]; + v1 = a->ptr.p_double[i1+(i2-i1)/2]; + v2 = a->ptr.p_double[i2]; + if( v0>v1 ) + { + tmpr = v1; + v1 = v0; + v0 = tmpr; + } + if( v1>v2 ) + { + tmpr = v2; + v2 = v1; + v1 = tmpr; + } + if( v0>v1 ) + { + tmpr = v1; + v1 = v0; + v0 = tmpr; + } + vp = v1; + + /* + * now pass through A/B and: + * * move elements that are LESS than VP to the left of A/B + * * move elements that are EQUAL to VP to the right of BufA/BufB (in the reverse order) + * * move elements that are GREATER than VP to the left of BufA/BufB (in the normal order + * * move elements from the tail of BufA/BufB to the middle of A/B (restoring normal order) + * * move elements from the left of BufA/BufB to the end of A/B + */ + cntless = 0; + cnteq = 0; + cntgreater = 0; + for(i=i1; i<=i2; i++) + { + v0 = a->ptr.p_double[i]; + if( v0<vp ) + { + + /* + * LESS + */ + k = i1+cntless; + if( i!=k ) + { + a->ptr.p_double[k] = v0; + b->ptr.p_double[k] = b->ptr.p_double[i]; + } + cntless = cntless+1; + continue; + } + if( v0==vp ) + { + + /* + * EQUAL + */ + k = i2-cnteq; + bufa->ptr.p_double[k] = v0; + bufb->ptr.p_double[k] = b->ptr.p_double[i]; + cnteq = cnteq+1; + continue; + } + + /* + * GREATER + */ + k = i1+cntgreater; + bufa->ptr.p_double[k] = v0; + bufb->ptr.p_double[k] = b->ptr.p_double[i]; + cntgreater = cntgreater+1; + } + for(i=0; i<=cnteq-1; i++) + { + j = i1+cntless+cnteq-1-i; + k = i2+i-(cnteq-1); + a->ptr.p_double[j] = bufa->ptr.p_double[k]; + b->ptr.p_double[j] = bufb->ptr.p_double[k]; + } + for(i=0; i<=cntgreater-1; i++) + { + j = i1+cntless+cnteq+i; + k = i1+i; + a->ptr.p_double[j] = bufa->ptr.p_double[k]; + b->ptr.p_double[j] = bufb->ptr.p_double[k]; + } + + /* + * Sort left and right parts of the array (ignoring middle part) + */ + tsort_tagsortfastrrec(a, b, bufa, bufb, i1, i1+cntless-1, _state); + tsort_tagsortfastrrec(a, b, bufa, bufb, i1+cntless+cnteq, i2, _state); +} + + +/************************************************************************* +Internal TagSortFastI: sorts A[I1...I2] (both bounds are included), +applies same permutations to B. + + -- ALGLIB -- + Copyright 06.09.2010 by Bochkanov Sergey +*************************************************************************/ +static void tsort_tagsortfastrec(/* Real */ ae_vector* a, + /* Real */ ae_vector* bufa, + ae_int_t i1, + ae_int_t i2, + ae_state *_state) +{ + ae_int_t cntless; + ae_int_t cnteq; + ae_int_t cntgreater; + ae_int_t i; + ae_int_t j; + ae_int_t k; + double tmpr; + ae_int_t tmpi; + double v0; + double v1; + double v2; + double vp; + + + + /* + * Fast exit + */ + if( i2<=i1 ) + { + return; + } + + /* + * Non-recursive sort for small arrays + */ + if( i2-i1<=16 ) + { + for(j=i1+1; j<=i2; j++) + { + + /* + * Search elements [I1..J-1] for place to insert Jth element. + * + * This code stops immediatly if we can leave A[J] at J-th position + * (all elements have same value of A[J] larger than any of them) + */ + tmpr = a->ptr.p_double[j]; + tmpi = j; + for(k=j-1; k>=i1; k--) + { + if( a->ptr.p_double[k]<=tmpr ) + { + break; + } + tmpi = k; + } + k = tmpi; + + /* + * Insert Jth element into Kth position + */ + if( k!=j ) + { + tmpr = a->ptr.p_double[j]; + for(i=j-1; i>=k; i--) + { + a->ptr.p_double[i+1] = a->ptr.p_double[i]; + } + a->ptr.p_double[k] = tmpr; + } + } + return; + } + + /* + * Quicksort: choose pivot + * Here we assume that I2-I1>=16 + */ + v0 = a->ptr.p_double[i1]; + v1 = a->ptr.p_double[i1+(i2-i1)/2]; + v2 = a->ptr.p_double[i2]; + if( v0>v1 ) + { + tmpr = v1; + v1 = v0; + v0 = tmpr; + } + if( v1>v2 ) + { + tmpr = v2; + v2 = v1; + v1 = tmpr; + } + if( v0>v1 ) + { + tmpr = v1; + v1 = v0; + v0 = tmpr; + } + vp = v1; + + /* + * now pass through A/B and: + * * move elements that are LESS than VP to the left of A/B + * * move elements that are EQUAL to VP to the right of BufA/BufB (in the reverse order) + * * move elements that are GREATER than VP to the left of BufA/BufB (in the normal order + * * move elements from the tail of BufA/BufB to the middle of A/B (restoring normal order) + * * move elements from the left of BufA/BufB to the end of A/B + */ + cntless = 0; + cnteq = 0; + cntgreater = 0; + for(i=i1; i<=i2; i++) + { + v0 = a->ptr.p_double[i]; + if( v0<vp ) + { + + /* + * LESS + */ + k = i1+cntless; + if( i!=k ) + { + a->ptr.p_double[k] = v0; + } + cntless = cntless+1; + continue; + } + if( v0==vp ) + { + + /* + * EQUAL + */ + k = i2-cnteq; + bufa->ptr.p_double[k] = v0; + cnteq = cnteq+1; + continue; + } + + /* + * GREATER + */ + k = i1+cntgreater; + bufa->ptr.p_double[k] = v0; + cntgreater = cntgreater+1; + } + for(i=0; i<=cnteq-1; i++) + { + j = i1+cntless+cnteq-1-i; + k = i2+i-(cnteq-1); + a->ptr.p_double[j] = bufa->ptr.p_double[k]; + } + for(i=0; i<=cntgreater-1; i++) + { + j = i1+cntless+cnteq+i; + k = i1+i; + a->ptr.p_double[j] = bufa->ptr.p_double[k]; + } + + /* + * Sort left and right parts of the array (ignoring middle part) + */ + tsort_tagsortfastrec(a, bufa, i1, i1+cntless-1, _state); + tsort_tagsortfastrec(a, bufa, i1+cntless+cnteq, i2, _state); +} + + + + +/************************************************************************* +This function generates 1-dimensional general interpolation task with +moderate Lipshitz constant (close to 1.0) + +If N=1 then suborutine generates only one point at the middle of [A,B] + + -- ALGLIB -- + Copyright 02.12.2009 by Bochkanov Sergey +*************************************************************************/ +void taskgenint1d(double a, + double b, + ae_int_t n, + /* Real */ ae_vector* x, + /* Real */ ae_vector* y, + ae_state *_state) +{ + ae_int_t i; + double h; + + ae_vector_clear(x); + ae_vector_clear(y); + + ae_assert(n>=1, "TaskGenInterpolationEqdist1D: N<1!", _state); + ae_vector_set_length(x, n, _state); + ae_vector_set_length(y, n, _state); + if( n>1 ) + { + x->ptr.p_double[0] = a; + y->ptr.p_double[0] = 2*ae_randomreal(_state)-1; + h = (b-a)/(n-1); + for(i=1; i<=n-1; i++) + { + if( i!=n-1 ) + { + x->ptr.p_double[i] = a+(i+0.2*(2*ae_randomreal(_state)-1))*h; + } + else + { + x->ptr.p_double[i] = b; + } + y->ptr.p_double[i] = y->ptr.p_double[i-1]+(2*ae_randomreal(_state)-1)*(x->ptr.p_double[i]-x->ptr.p_double[i-1]); + } + } + else + { + x->ptr.p_double[0] = 0.5*(a+b); + y->ptr.p_double[0] = 2*ae_randomreal(_state)-1; + } +} + + +/************************************************************************* +This function generates 1-dimensional equidistant interpolation task with +moderate Lipshitz constant (close to 1.0) + +If N=1 then suborutine generates only one point at the middle of [A,B] + + -- ALGLIB -- + Copyright 02.12.2009 by Bochkanov Sergey +*************************************************************************/ +void taskgenint1dequidist(double a, + double b, + ae_int_t n, + /* Real */ ae_vector* x, + /* Real */ ae_vector* y, + ae_state *_state) +{ + ae_int_t i; + double h; + + ae_vector_clear(x); + ae_vector_clear(y); + + ae_assert(n>=1, "TaskGenInterpolationEqdist1D: N<1!", _state); + ae_vector_set_length(x, n, _state); + ae_vector_set_length(y, n, _state); + if( n>1 ) + { + x->ptr.p_double[0] = a; + y->ptr.p_double[0] = 2*ae_randomreal(_state)-1; + h = (b-a)/(n-1); + for(i=1; i<=n-1; i++) + { + x->ptr.p_double[i] = a+i*h; + y->ptr.p_double[i] = y->ptr.p_double[i-1]+(2*ae_randomreal(_state)-1)*h; + } + } + else + { + x->ptr.p_double[0] = 0.5*(a+b); + y->ptr.p_double[0] = 2*ae_randomreal(_state)-1; + } +} + + +/************************************************************************* +This function generates 1-dimensional Chebyshev-1 interpolation task with +moderate Lipshitz constant (close to 1.0) + +If N=1 then suborutine generates only one point at the middle of [A,B] + + -- ALGLIB -- + Copyright 02.12.2009 by Bochkanov Sergey +*************************************************************************/ +void taskgenint1dcheb1(double a, + double b, + ae_int_t n, + /* Real */ ae_vector* x, + /* Real */ ae_vector* y, + ae_state *_state) +{ + ae_int_t i; + + ae_vector_clear(x); + ae_vector_clear(y); + + ae_assert(n>=1, "TaskGenInterpolation1DCheb1: N<1!", _state); + ae_vector_set_length(x, n, _state); + ae_vector_set_length(y, n, _state); + if( n>1 ) + { + for(i=0; i<=n-1; i++) + { + x->ptr.p_double[i] = 0.5*(b+a)+0.5*(b-a)*ae_cos(ae_pi*(2*i+1)/(2*n), _state); + if( i==0 ) + { + y->ptr.p_double[i] = 2*ae_randomreal(_state)-1; + } + else + { + y->ptr.p_double[i] = y->ptr.p_double[i-1]+(2*ae_randomreal(_state)-1)*(x->ptr.p_double[i]-x->ptr.p_double[i-1]); + } + } + } + else + { + x->ptr.p_double[0] = 0.5*(a+b); + y->ptr.p_double[0] = 2*ae_randomreal(_state)-1; + } +} + + +/************************************************************************* +This function generates 1-dimensional Chebyshev-2 interpolation task with +moderate Lipshitz constant (close to 1.0) + +If N=1 then suborutine generates only one point at the middle of [A,B] + + -- ALGLIB -- + Copyright 02.12.2009 by Bochkanov Sergey +*************************************************************************/ +void taskgenint1dcheb2(double a, + double b, + ae_int_t n, + /* Real */ ae_vector* x, + /* Real */ ae_vector* y, + ae_state *_state) +{ + ae_int_t i; + + ae_vector_clear(x); + ae_vector_clear(y); + + ae_assert(n>=1, "TaskGenInterpolation1DCheb2: N<1!", _state); + ae_vector_set_length(x, n, _state); + ae_vector_set_length(y, n, _state); + if( n>1 ) + { + for(i=0; i<=n-1; i++) + { + x->ptr.p_double[i] = 0.5*(b+a)+0.5*(b-a)*ae_cos(ae_pi*i/(n-1), _state); + if( i==0 ) + { + y->ptr.p_double[i] = 2*ae_randomreal(_state)-1; + } + else + { + y->ptr.p_double[i] = y->ptr.p_double[i-1]+(2*ae_randomreal(_state)-1)*(x->ptr.p_double[i]-x->ptr.p_double[i-1]); + } + } + } + else + { + x->ptr.p_double[0] = 0.5*(a+b); + y->ptr.p_double[0] = 2*ae_randomreal(_state)-1; + } +} + + +/************************************************************************* +This function checks that all values from X[] are distinct. It does more +than just usual floating point comparison: +* first, it calculates max(X) and min(X) +* second, it maps X[] from [min,max] to [1,2] +* only at this stage actual comparison is done + +The meaning of such check is to ensure that all values are "distinct enough" +and will not cause interpolation subroutine to fail. + +NOTE: + X[] must be sorted by ascending (subroutine ASSERT's it) + + -- ALGLIB -- + Copyright 02.12.2009 by Bochkanov Sergey +*************************************************************************/ +ae_bool aredistinct(/* Real */ ae_vector* x, + ae_int_t n, + ae_state *_state) +{ + double a; + double b; + ae_int_t i; + ae_bool nonsorted; + ae_bool result; + + + ae_assert(n>=1, "APSERVAreDistinct: internal error (N<1)", _state); + if( n==1 ) + { + + /* + * everything is alright, it is up to caller to decide whether it + * can interpolate something with just one point + */ + result = ae_true; + return result; + } + a = x->ptr.p_double[0]; + b = x->ptr.p_double[0]; + nonsorted = ae_false; + for(i=1; i<=n-1; i++) + { + a = ae_minreal(a, x->ptr.p_double[i], _state); + b = ae_maxreal(b, x->ptr.p_double[i], _state); + nonsorted = nonsorted||ae_fp_greater_eq(x->ptr.p_double[i-1],x->ptr.p_double[i]); + } + ae_assert(!nonsorted, "APSERVAreDistinct: internal error (not sorted)", _state); + for(i=1; i<=n-1; i++) + { + if( ae_fp_eq((x->ptr.p_double[i]-a)/(b-a)+1,(x->ptr.p_double[i-1]-a)/(b-a)+1) ) + { + result = ae_false; + return result; + } + } + result = ae_true; + return result; +} + + +/************************************************************************* +If Length(X)<N, resizes X + + -- ALGLIB -- + Copyright 20.03.2009 by Bochkanov Sergey +*************************************************************************/ +void bvectorsetlengthatleast(/* Boolean */ ae_vector* x, + ae_int_t n, + ae_state *_state) +{ + + + if( x->cnt<n ) + { + ae_vector_set_length(x, n, _state); + } +} + + +/************************************************************************* +If Length(X)<N, resizes X + + -- ALGLIB -- + Copyright 20.03.2009 by Bochkanov Sergey +*************************************************************************/ +void rvectorsetlengthatleast(/* Real */ ae_vector* x, + ae_int_t n, + ae_state *_state) +{ + + + if( x->cnt<n ) + { + ae_vector_set_length(x, n, _state); + } +} + + +/************************************************************************* +If Cols(X)<N or Rows(X)<M, resizes X + + -- ALGLIB -- + Copyright 20.03.2009 by Bochkanov Sergey +*************************************************************************/ +void rmatrixsetlengthatleast(/* Real */ ae_matrix* x, + ae_int_t m, + ae_int_t n, + ae_state *_state) +{ + + + if( x->rows<m||x->cols<n ) + { + ae_matrix_set_length(x, m, n, _state); + } +} + + +/************************************************************************* +This function checks that all values from X[] are finite + + -- ALGLIB -- + Copyright 18.06.2010 by Bochkanov Sergey +*************************************************************************/ +ae_bool isfinitevector(/* Real */ ae_vector* x, + ae_int_t n, + ae_state *_state) +{ + ae_int_t i; + ae_bool result; + + + ae_assert(n>=0, "APSERVIsFiniteVector: internal error (N<0)", _state); + for(i=0; i<=n-1; i++) + { + if( !ae_isfinite(x->ptr.p_double[i], _state) ) + { + result = ae_false; + return result; + } + } + result = ae_true; + return result; +} + + +/************************************************************************* +This function checks that all values from X[] are finite + + -- ALGLIB -- + Copyright 18.06.2010 by Bochkanov Sergey +*************************************************************************/ +ae_bool isfinitecvector(/* Complex */ ae_vector* z, + ae_int_t n, + ae_state *_state) +{ + ae_int_t i; + ae_bool result; + + + ae_assert(n>=0, "APSERVIsFiniteCVector: internal error (N<0)", _state); + for(i=0; i<=n-1; i++) + { + if( !ae_isfinite(z->ptr.p_complex[i].x, _state)||!ae_isfinite(z->ptr.p_complex[i].y, _state) ) + { + result = ae_false; + return result; + } + } + result = ae_true; + return result; +} + + +/************************************************************************* +This function checks that all values from X[0..M-1,0..N-1] are finite + + -- ALGLIB -- + Copyright 18.06.2010 by Bochkanov Sergey +*************************************************************************/ +ae_bool apservisfinitematrix(/* Real */ ae_matrix* x, + ae_int_t m, + ae_int_t n, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + ae_bool result; + + + ae_assert(n>=0, "APSERVIsFiniteMatrix: internal error (N<0)", _state); + ae_assert(m>=0, "APSERVIsFiniteMatrix: internal error (M<0)", _state); + for(i=0; i<=m-1; i++) + { + for(j=0; j<=n-1; j++) + { + if( !ae_isfinite(x->ptr.pp_double[i][j], _state) ) + { + result = ae_false; + return result; + } + } + } + result = ae_true; + return result; +} + + +/************************************************************************* +This function checks that all values from X[0..M-1,0..N-1] are finite + + -- ALGLIB -- + Copyright 18.06.2010 by Bochkanov Sergey +*************************************************************************/ +ae_bool apservisfinitecmatrix(/* Complex */ ae_matrix* x, + ae_int_t m, + ae_int_t n, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + ae_bool result; + + + ae_assert(n>=0, "APSERVIsFiniteCMatrix: internal error (N<0)", _state); + ae_assert(m>=0, "APSERVIsFiniteCMatrix: internal error (M<0)", _state); + for(i=0; i<=m-1; i++) + { + for(j=0; j<=n-1; j++) + { + if( !ae_isfinite(x->ptr.pp_complex[i][j].x, _state)||!ae_isfinite(x->ptr.pp_complex[i][j].y, _state) ) + { + result = ae_false; + return result; + } + } + } + result = ae_true; + return result; +} + + +/************************************************************************* +This function checks that all values from upper/lower triangle of +X[0..N-1,0..N-1] are finite + + -- ALGLIB -- + Copyright 18.06.2010 by Bochkanov Sergey +*************************************************************************/ +ae_bool isfinitertrmatrix(/* Real */ ae_matrix* x, + ae_int_t n, + ae_bool isupper, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j1; + ae_int_t j2; + ae_int_t j; + ae_bool result; + + + ae_assert(n>=0, "APSERVIsFiniteRTRMatrix: internal error (N<0)", _state); + for(i=0; i<=n-1; i++) + { + if( isupper ) + { + j1 = i; + j2 = n-1; + } + else + { + j1 = 0; + j2 = i; + } + for(j=j1; j<=j2; j++) + { + if( !ae_isfinite(x->ptr.pp_double[i][j], _state) ) + { + result = ae_false; + return result; + } + } + } + result = ae_true; + return result; +} + + +/************************************************************************* +This function checks that all values from upper/lower triangle of +X[0..N-1,0..N-1] are finite + + -- ALGLIB -- + Copyright 18.06.2010 by Bochkanov Sergey +*************************************************************************/ +ae_bool apservisfinitectrmatrix(/* Complex */ ae_matrix* x, + ae_int_t n, + ae_bool isupper, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j1; + ae_int_t j2; + ae_int_t j; + ae_bool result; + + + ae_assert(n>=0, "APSERVIsFiniteCTRMatrix: internal error (N<0)", _state); + for(i=0; i<=n-1; i++) + { + if( isupper ) + { + j1 = i; + j2 = n-1; + } + else + { + j1 = 0; + j2 = i; + } + for(j=j1; j<=j2; j++) + { + if( !ae_isfinite(x->ptr.pp_complex[i][j].x, _state)||!ae_isfinite(x->ptr.pp_complex[i][j].y, _state) ) + { + result = ae_false; + return result; + } + } + } + result = ae_true; + return result; +} + + +/************************************************************************* +This function checks that all values from X[0..M-1,0..N-1] are finite or +NaN's. + + -- ALGLIB -- + Copyright 18.06.2010 by Bochkanov Sergey +*************************************************************************/ +ae_bool apservisfiniteornanmatrix(/* Real */ ae_matrix* x, + ae_int_t m, + ae_int_t n, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + ae_bool result; + + + ae_assert(n>=0, "APSERVIsFiniteOrNaNMatrix: internal error (N<0)", _state); + ae_assert(m>=0, "APSERVIsFiniteOrNaNMatrix: internal error (M<0)", _state); + for(i=0; i<=m-1; i++) + { + for(j=0; j<=n-1; j++) + { + if( !(ae_isfinite(x->ptr.pp_double[i][j], _state)||ae_isnan(x->ptr.pp_double[i][j], _state)) ) + { + result = ae_false; + return result; + } + } + } + result = ae_true; + return result; +} + + +/************************************************************************* +Safe sqrt(x^2+y^2) + + -- ALGLIB -- + Copyright by Bochkanov Sergey +*************************************************************************/ +double safepythag2(double x, double y, ae_state *_state) +{ + double w; + double xabs; + double yabs; + double z; + double result; + + + xabs = ae_fabs(x, _state); + yabs = ae_fabs(y, _state); + w = ae_maxreal(xabs, yabs, _state); + z = ae_minreal(xabs, yabs, _state); + if( ae_fp_eq(z,0) ) + { + result = w; + } + else + { + result = w*ae_sqrt(1+ae_sqr(z/w, _state), _state); + } + return result; +} + + +/************************************************************************* +Safe sqrt(x^2+y^2) + + -- ALGLIB -- + Copyright by Bochkanov Sergey +*************************************************************************/ +double safepythag3(double x, double y, double z, ae_state *_state) +{ + double w; + double result; + + + w = ae_maxreal(ae_fabs(x, _state), ae_maxreal(ae_fabs(y, _state), ae_fabs(z, _state), _state), _state); + if( ae_fp_eq(w,0) ) + { + result = 0; + return result; + } + x = x/w; + y = y/w; + z = z/w; + result = w*ae_sqrt(ae_sqr(x, _state)+ae_sqr(y, _state)+ae_sqr(z, _state), _state); + return result; +} + + +/************************************************************************* +Safe division. + +This function attempts to calculate R=X/Y without overflow. + +It returns: +* +1, if abs(X/Y)>=MaxRealNumber or undefined - overflow-like situation + (no overlfow is generated, R is either NAN, PosINF, NegINF) +* 0, if MinRealNumber<abs(X/Y)<MaxRealNumber or X=0, Y<>0 + (R contains result, may be zero) +* -1, if 0<abs(X/Y)<MinRealNumber - underflow-like situation + (R contains zero; it corresponds to underflow) + +No overflow is generated in any case. + + -- ALGLIB -- + Copyright by Bochkanov Sergey +*************************************************************************/ +ae_int_t saferdiv(double x, double y, double* r, ae_state *_state) +{ + ae_int_t result; + + *r = 0; + + + /* + * Two special cases: + * * Y=0 + * * X=0 and Y<>0 + */ + if( ae_fp_eq(y,0) ) + { + result = 1; + if( ae_fp_eq(x,0) ) + { + *r = _state->v_nan; + } + if( ae_fp_greater(x,0) ) + { + *r = _state->v_posinf; + } + if( ae_fp_less(x,0) ) + { + *r = _state->v_neginf; + } + return result; + } + if( ae_fp_eq(x,0) ) + { + *r = 0; + result = 0; + return result; + } + + /* + * make Y>0 + */ + if( ae_fp_less(y,0) ) + { + x = -x; + y = -y; + } + + /* + * + */ + if( ae_fp_greater_eq(y,1) ) + { + *r = x/y; + if( ae_fp_less_eq(ae_fabs(*r, _state),ae_minrealnumber) ) + { + result = -1; + *r = 0; + } + else + { + result = 0; + } + } + else + { + if( ae_fp_greater_eq(ae_fabs(x, _state),ae_maxrealnumber*y) ) + { + if( ae_fp_greater(x,0) ) + { + *r = _state->v_posinf; + } + else + { + *r = _state->v_neginf; + } + result = 1; + } + else + { + *r = x/y; + result = 0; + } + } + return result; +} + + +/************************************************************************* +This function calculates "safe" min(X/Y,V) for positive finite X, Y, V. +No overflow is generated in any case. + + -- ALGLIB -- + Copyright by Bochkanov Sergey +*************************************************************************/ +double safeminposrv(double x, double y, double v, ae_state *_state) +{ + double r; + double result; + + + if( ae_fp_greater_eq(y,1) ) + { + + /* + * Y>=1, we can safely divide by Y + */ + r = x/y; + result = v; + if( ae_fp_greater(v,r) ) + { + result = r; + } + else + { + result = v; + } + } + else + { + + /* + * Y<1, we can safely multiply by Y + */ + if( ae_fp_less(x,v*y) ) + { + result = x/y; + } + else + { + result = v; + } + } + return result; +} + + +/************************************************************************* +This function makes periodic mapping of X to [A,B]. + +It accepts X, A, B (A>B). It returns T which lies in [A,B] and integer K, +such that X = T + K*(B-A). + +NOTES: +* K is represented as real value, although actually it is integer +* T is guaranteed to be in [A,B] +* T replaces X + + -- ALGLIB -- + Copyright by Bochkanov Sergey +*************************************************************************/ +void apperiodicmap(double* x, + double a, + double b, + double* k, + ae_state *_state) +{ + + *k = 0; + + ae_assert(ae_fp_less(a,b), "APPeriodicMap: internal error!", _state); + *k = ae_ifloor((*x-a)/(b-a), _state); + *x = *x-*k*(b-a); + while(ae_fp_less(*x,a)) + { + *x = *x+(b-a); + *k = *k-1; + } + while(ae_fp_greater(*x,b)) + { + *x = *x-(b-a); + *k = *k+1; + } + *x = ae_maxreal(*x, a, _state); + *x = ae_minreal(*x, b, _state); +} + + +/************************************************************************* +'bounds' value: maps X to [B1,B2] + + -- ALGLIB -- + Copyright 20.03.2009 by Bochkanov Sergey +*************************************************************************/ +double boundval(double x, double b1, double b2, ae_state *_state) +{ + double result; + + + if( ae_fp_less_eq(x,b1) ) + { + result = b1; + return result; + } + if( ae_fp_greater_eq(x,b2) ) + { + result = b2; + return result; + } + result = x; + return result; +} + + +ae_bool _apbuffers_init(apbuffers* p, ae_state *_state, ae_bool make_automatic) +{ + if( !ae_vector_init(&p->ia1, 0, DT_INT, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->ia2, 0, DT_INT, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->ra1, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->ra2, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + return ae_true; +} + + +ae_bool _apbuffers_init_copy(apbuffers* dst, apbuffers* src, ae_state *_state, ae_bool make_automatic) +{ + if( !ae_vector_init_copy(&dst->ia1, &src->ia1, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->ia2, &src->ia2, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->ra1, &src->ra1, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->ra2, &src->ra2, _state, make_automatic) ) + return ae_false; + return ae_true; +} + + +void _apbuffers_clear(apbuffers* p) +{ + ae_vector_clear(&p->ia1); + ae_vector_clear(&p->ia2); + ae_vector_clear(&p->ra1); + ae_vector_clear(&p->ra2); +} + + + + +/************************************************************************* +Internal ranking subroutine +*************************************************************************/ +void rankx(/* Real */ ae_vector* x, + ae_int_t n, + apbuffers* buf, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + ae_int_t k; + ae_int_t t; + double tmp; + ae_int_t tmpi; + + + + /* + * Prepare + */ + if( n<1 ) + { + return; + } + if( n==1 ) + { + x->ptr.p_double[0] = 1; + return; + } + if( buf->ra1.cnt<n ) + { + ae_vector_set_length(&buf->ra1, n, _state); + } + if( buf->ia1.cnt<n ) + { + ae_vector_set_length(&buf->ia1, n, _state); + } + for(i=0; i<=n-1; i++) + { + buf->ra1.ptr.p_double[i] = x->ptr.p_double[i]; + buf->ia1.ptr.p_int[i] = i; + } + + /* + * sort {R, C} + */ + if( n!=1 ) + { + i = 2; + do + { + t = i; + while(t!=1) + { + k = t/2; + if( ae_fp_greater_eq(buf->ra1.ptr.p_double[k-1],buf->ra1.ptr.p_double[t-1]) ) + { + t = 1; + } + else + { + tmp = buf->ra1.ptr.p_double[k-1]; + buf->ra1.ptr.p_double[k-1] = buf->ra1.ptr.p_double[t-1]; + buf->ra1.ptr.p_double[t-1] = tmp; + tmpi = buf->ia1.ptr.p_int[k-1]; + buf->ia1.ptr.p_int[k-1] = buf->ia1.ptr.p_int[t-1]; + buf->ia1.ptr.p_int[t-1] = tmpi; + t = k; + } + } + i = i+1; + } + while(i<=n); + i = n-1; + do + { + tmp = buf->ra1.ptr.p_double[i]; + buf->ra1.ptr.p_double[i] = buf->ra1.ptr.p_double[0]; + buf->ra1.ptr.p_double[0] = tmp; + tmpi = buf->ia1.ptr.p_int[i]; + buf->ia1.ptr.p_int[i] = buf->ia1.ptr.p_int[0]; + buf->ia1.ptr.p_int[0] = tmpi; + t = 1; + while(t!=0) + { + k = 2*t; + if( k>i ) + { + t = 0; + } + else + { + if( k<i ) + { + if( ae_fp_greater(buf->ra1.ptr.p_double[k],buf->ra1.ptr.p_double[k-1]) ) + { + k = k+1; + } + } + if( ae_fp_greater_eq(buf->ra1.ptr.p_double[t-1],buf->ra1.ptr.p_double[k-1]) ) + { + t = 0; + } + else + { + tmp = buf->ra1.ptr.p_double[k-1]; + buf->ra1.ptr.p_double[k-1] = buf->ra1.ptr.p_double[t-1]; + buf->ra1.ptr.p_double[t-1] = tmp; + tmpi = buf->ia1.ptr.p_int[k-1]; + buf->ia1.ptr.p_int[k-1] = buf->ia1.ptr.p_int[t-1]; + buf->ia1.ptr.p_int[t-1] = tmpi; + t = k; + } + } + } + i = i-1; + } + while(i>=1); + } + + /* + * compute tied ranks + */ + i = 0; + while(i<=n-1) + { + j = i+1; + while(j<=n-1) + { + if( ae_fp_neq(buf->ra1.ptr.p_double[j],buf->ra1.ptr.p_double[i]) ) + { + break; + } + j = j+1; + } + for(k=i; k<=j-1; k++) + { + buf->ra1.ptr.p_double[k] = 1+(double)(i+j-1)/(double)2; + } + i = j; + } + + /* + * back to x + */ + for(i=0; i<=n-1; i++) + { + x->ptr.p_double[buf->ia1.ptr.p_int[i]] = buf->ra1.ptr.p_double[i]; + } +} + + + + +/************************************************************************* +Fast kernel + + -- ALGLIB routine -- + 19.01.2010 + Bochkanov Sergey +*************************************************************************/ +ae_bool cmatrixrank1f(ae_int_t m, + ae_int_t n, + /* Complex */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + /* Complex */ ae_vector* u, + ae_int_t iu, + /* Complex */ ae_vector* v, + ae_int_t iv, + ae_state *_state) +{ +#ifndef ALGLIB_INTERCEPTS_ABLAS + ae_bool result; + + + result = ae_false; + return result; +#else + return _ialglib_i_cmatrixrank1f(m, n, a, ia, ja, u, iu, v, iv); +#endif +} + + +/************************************************************************* +Fast kernel + + -- ALGLIB routine -- + 19.01.2010 + Bochkanov Sergey +*************************************************************************/ +ae_bool rmatrixrank1f(ae_int_t m, + ae_int_t n, + /* Real */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + /* Real */ ae_vector* u, + ae_int_t iu, + /* Real */ ae_vector* v, + ae_int_t iv, + ae_state *_state) +{ +#ifndef ALGLIB_INTERCEPTS_ABLAS + ae_bool result; + + + result = ae_false; + return result; +#else + return _ialglib_i_rmatrixrank1f(m, n, a, ia, ja, u, iu, v, iv); +#endif +} + + +/************************************************************************* +Fast kernel + + -- ALGLIB routine -- + 19.01.2010 + Bochkanov Sergey +*************************************************************************/ +ae_bool cmatrixmvf(ae_int_t m, + ae_int_t n, + /* Complex */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + ae_int_t opa, + /* Complex */ ae_vector* x, + ae_int_t ix, + /* Complex */ ae_vector* y, + ae_int_t iy, + ae_state *_state) +{ + ae_bool result; + + + result = ae_false; + return result; +} + + +/************************************************************************* +Fast kernel + + -- ALGLIB routine -- + 19.01.2010 + Bochkanov Sergey +*************************************************************************/ +ae_bool rmatrixmvf(ae_int_t m, + ae_int_t n, + /* Real */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + ae_int_t opa, + /* Real */ ae_vector* x, + ae_int_t ix, + /* Real */ ae_vector* y, + ae_int_t iy, + ae_state *_state) +{ + ae_bool result; + + + result = ae_false; + return result; +} + + +/************************************************************************* +Fast kernel + + -- ALGLIB routine -- + 19.01.2010 + Bochkanov Sergey +*************************************************************************/ +ae_bool cmatrixrighttrsmf(ae_int_t m, + ae_int_t n, + /* Complex */ ae_matrix* a, + ae_int_t i1, + ae_int_t j1, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + /* Complex */ ae_matrix* x, + ae_int_t i2, + ae_int_t j2, + ae_state *_state) +{ +#ifndef ALGLIB_INTERCEPTS_ABLAS + ae_bool result; + + + result = ae_false; + return result; +#else + return _ialglib_i_cmatrixrighttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2); +#endif +} + + +/************************************************************************* +Fast kernel + + -- ALGLIB routine -- + 19.01.2010 + Bochkanov Sergey +*************************************************************************/ +ae_bool cmatrixlefttrsmf(ae_int_t m, + ae_int_t n, + /* Complex */ ae_matrix* a, + ae_int_t i1, + ae_int_t j1, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + /* Complex */ ae_matrix* x, + ae_int_t i2, + ae_int_t j2, + ae_state *_state) +{ +#ifndef ALGLIB_INTERCEPTS_ABLAS + ae_bool result; + + + result = ae_false; + return result; +#else + return _ialglib_i_cmatrixlefttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2); +#endif +} + + +/************************************************************************* +Fast kernel + + -- ALGLIB routine -- + 19.01.2010 + Bochkanov Sergey +*************************************************************************/ +ae_bool rmatrixrighttrsmf(ae_int_t m, + ae_int_t n, + /* Real */ ae_matrix* a, + ae_int_t i1, + ae_int_t j1, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + /* Real */ ae_matrix* x, + ae_int_t i2, + ae_int_t j2, + ae_state *_state) +{ +#ifndef ALGLIB_INTERCEPTS_ABLAS + ae_bool result; + + + result = ae_false; + return result; +#else + return _ialglib_i_rmatrixrighttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2); +#endif +} + + +/************************************************************************* +Fast kernel + + -- ALGLIB routine -- + 19.01.2010 + Bochkanov Sergey +*************************************************************************/ +ae_bool rmatrixlefttrsmf(ae_int_t m, + ae_int_t n, + /* Real */ ae_matrix* a, + ae_int_t i1, + ae_int_t j1, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + /* Real */ ae_matrix* x, + ae_int_t i2, + ae_int_t j2, + ae_state *_state) +{ +#ifndef ALGLIB_INTERCEPTS_ABLAS + ae_bool result; + + + result = ae_false; + return result; +#else + return _ialglib_i_rmatrixlefttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2); +#endif +} + + +/************************************************************************* +Fast kernel + + -- ALGLIB routine -- + 19.01.2010 + Bochkanov Sergey +*************************************************************************/ +ae_bool cmatrixsyrkf(ae_int_t n, + ae_int_t k, + double alpha, + /* Complex */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + ae_int_t optypea, + double beta, + /* Complex */ ae_matrix* c, + ae_int_t ic, + ae_int_t jc, + ae_bool isupper, + ae_state *_state) +{ +#ifndef ALGLIB_INTERCEPTS_ABLAS + ae_bool result; + + + result = ae_false; + return result; +#else + return _ialglib_i_cmatrixsyrkf(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper); +#endif +} + + +/************************************************************************* +Fast kernel + + -- ALGLIB routine -- + 19.01.2010 + Bochkanov Sergey +*************************************************************************/ +ae_bool rmatrixsyrkf(ae_int_t n, + ae_int_t k, + double alpha, + /* Real */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + ae_int_t optypea, + double beta, + /* Real */ ae_matrix* c, + ae_int_t ic, + ae_int_t jc, + ae_bool isupper, + ae_state *_state) +{ +#ifndef ALGLIB_INTERCEPTS_ABLAS + ae_bool result; + + + result = ae_false; + return result; +#else + return _ialglib_i_rmatrixsyrkf(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper); +#endif +} + + +/************************************************************************* +Fast kernel + + -- ALGLIB routine -- + 19.01.2010 + Bochkanov Sergey +*************************************************************************/ +ae_bool rmatrixgemmf(ae_int_t m, + ae_int_t n, + ae_int_t k, + double alpha, + /* Real */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + ae_int_t optypea, + /* Real */ ae_matrix* b, + ae_int_t ib, + ae_int_t jb, + ae_int_t optypeb, + double beta, + /* Real */ ae_matrix* c, + ae_int_t ic, + ae_int_t jc, + ae_state *_state) +{ +#ifndef ALGLIB_INTERCEPTS_ABLAS + ae_bool result; + + + result = ae_false; + return result; +#else + return _ialglib_i_rmatrixgemmf(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc); +#endif +} + + +/************************************************************************* +Fast kernel + + -- ALGLIB routine -- + 19.01.2010 + Bochkanov Sergey +*************************************************************************/ +ae_bool cmatrixgemmf(ae_int_t m, + ae_int_t n, + ae_int_t k, + ae_complex alpha, + /* Complex */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + ae_int_t optypea, + /* Complex */ ae_matrix* b, + ae_int_t ib, + ae_int_t jb, + ae_int_t optypeb, + ae_complex beta, + /* Complex */ ae_matrix* c, + ae_int_t ic, + ae_int_t jc, + ae_state *_state) +{ +#ifndef ALGLIB_INTERCEPTS_ABLAS + ae_bool result; + + + result = ae_false; + return result; +#else + return _ialglib_i_cmatrixgemmf(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc); +#endif +} + + + + +double vectornorm2(/* Real */ ae_vector* x, + ae_int_t i1, + ae_int_t i2, + ae_state *_state) +{ + ae_int_t n; + ae_int_t ix; + double absxi; + double scl; + double ssq; + double result; + + + n = i2-i1+1; + if( n<1 ) + { + result = 0; + return result; + } + if( n==1 ) + { + result = ae_fabs(x->ptr.p_double[i1], _state); + return result; + } + scl = 0; + ssq = 1; + for(ix=i1; ix<=i2; ix++) + { + if( ae_fp_neq(x->ptr.p_double[ix],0) ) + { + absxi = ae_fabs(x->ptr.p_double[ix], _state); + if( ae_fp_less(scl,absxi) ) + { + ssq = 1+ssq*ae_sqr(scl/absxi, _state); + scl = absxi; + } + else + { + ssq = ssq+ae_sqr(absxi/scl, _state); + } + } + } + result = scl*ae_sqrt(ssq, _state); + return result; +} + + +ae_int_t vectoridxabsmax(/* Real */ ae_vector* x, + ae_int_t i1, + ae_int_t i2, + ae_state *_state) +{ + ae_int_t i; + double a; + ae_int_t result; + + + result = i1; + a = ae_fabs(x->ptr.p_double[result], _state); + for(i=i1+1; i<=i2; i++) + { + if( ae_fp_greater(ae_fabs(x->ptr.p_double[i], _state),ae_fabs(x->ptr.p_double[result], _state)) ) + { + result = i; + } + } + return result; +} + + +ae_int_t columnidxabsmax(/* Real */ ae_matrix* x, + ae_int_t i1, + ae_int_t i2, + ae_int_t j, + ae_state *_state) +{ + ae_int_t i; + double a; + ae_int_t result; + + + result = i1; + a = ae_fabs(x->ptr.pp_double[result][j], _state); + for(i=i1+1; i<=i2; i++) + { + if( ae_fp_greater(ae_fabs(x->ptr.pp_double[i][j], _state),ae_fabs(x->ptr.pp_double[result][j], _state)) ) + { + result = i; + } + } + return result; +} + + +ae_int_t rowidxabsmax(/* Real */ ae_matrix* x, + ae_int_t j1, + ae_int_t j2, + ae_int_t i, + ae_state *_state) +{ + ae_int_t j; + double a; + ae_int_t result; + + + result = j1; + a = ae_fabs(x->ptr.pp_double[i][result], _state); + for(j=j1+1; j<=j2; j++) + { + if( ae_fp_greater(ae_fabs(x->ptr.pp_double[i][j], _state),ae_fabs(x->ptr.pp_double[i][result], _state)) ) + { + result = j; + } + } + return result; +} + + +double upperhessenberg1norm(/* Real */ ae_matrix* a, + ae_int_t i1, + ae_int_t i2, + ae_int_t j1, + ae_int_t j2, + /* Real */ ae_vector* work, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + double result; + + + ae_assert(i2-i1==j2-j1, "UpperHessenberg1Norm: I2-I1<>J2-J1!", _state); + for(j=j1; j<=j2; j++) + { + work->ptr.p_double[j] = 0; + } + for(i=i1; i<=i2; i++) + { + for(j=ae_maxint(j1, j1+i-i1-1, _state); j<=j2; j++) + { + work->ptr.p_double[j] = work->ptr.p_double[j]+ae_fabs(a->ptr.pp_double[i][j], _state); + } + } + result = 0; + for(j=j1; j<=j2; j++) + { + result = ae_maxreal(result, work->ptr.p_double[j], _state); + } + return result; +} + + +void copymatrix(/* Real */ ae_matrix* a, + ae_int_t is1, + ae_int_t is2, + ae_int_t js1, + ae_int_t js2, + /* Real */ ae_matrix* b, + ae_int_t id1, + ae_int_t id2, + ae_int_t jd1, + ae_int_t jd2, + ae_state *_state) +{ + ae_int_t isrc; + ae_int_t idst; + + + if( is1>is2||js1>js2 ) + { + return; + } + ae_assert(is2-is1==id2-id1, "CopyMatrix: different sizes!", _state); + ae_assert(js2-js1==jd2-jd1, "CopyMatrix: different sizes!", _state); + for(isrc=is1; isrc<=is2; isrc++) + { + idst = isrc-is1+id1; + ae_v_move(&b->ptr.pp_double[idst][jd1], 1, &a->ptr.pp_double[isrc][js1], 1, ae_v_len(jd1,jd2)); + } +} + + +void inplacetranspose(/* Real */ ae_matrix* a, + ae_int_t i1, + ae_int_t i2, + ae_int_t j1, + ae_int_t j2, + /* Real */ ae_vector* work, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + ae_int_t ips; + ae_int_t jps; + ae_int_t l; + + + if( i1>i2||j1>j2 ) + { + return; + } + ae_assert(i1-i2==j1-j2, "InplaceTranspose error: incorrect array size!", _state); + for(i=i1; i<=i2-1; i++) + { + j = j1+i-i1; + ips = i+1; + jps = j1+ips-i1; + l = i2-i; + ae_v_move(&work->ptr.p_double[1], 1, &a->ptr.pp_double[ips][j], a->stride, ae_v_len(1,l)); + ae_v_move(&a->ptr.pp_double[ips][j], a->stride, &a->ptr.pp_double[i][jps], 1, ae_v_len(ips,i2)); + ae_v_move(&a->ptr.pp_double[i][jps], 1, &work->ptr.p_double[1], 1, ae_v_len(jps,j2)); + } +} + + +void copyandtranspose(/* Real */ ae_matrix* a, + ae_int_t is1, + ae_int_t is2, + ae_int_t js1, + ae_int_t js2, + /* Real */ ae_matrix* b, + ae_int_t id1, + ae_int_t id2, + ae_int_t jd1, + ae_int_t jd2, + ae_state *_state) +{ + ae_int_t isrc; + ae_int_t jdst; + + + if( is1>is2||js1>js2 ) + { + return; + } + ae_assert(is2-is1==jd2-jd1, "CopyAndTranspose: different sizes!", _state); + ae_assert(js2-js1==id2-id1, "CopyAndTranspose: different sizes!", _state); + for(isrc=is1; isrc<=is2; isrc++) + { + jdst = isrc-is1+jd1; + ae_v_move(&b->ptr.pp_double[id1][jdst], b->stride, &a->ptr.pp_double[isrc][js1], 1, ae_v_len(id1,id2)); + } +} + + +void matrixvectormultiply(/* Real */ ae_matrix* a, + ae_int_t i1, + ae_int_t i2, + ae_int_t j1, + ae_int_t j2, + ae_bool trans, + /* Real */ ae_vector* x, + ae_int_t ix1, + ae_int_t ix2, + double alpha, + /* Real */ ae_vector* y, + ae_int_t iy1, + ae_int_t iy2, + double beta, + ae_state *_state) +{ + ae_int_t i; + double v; + + + if( !trans ) + { + + /* + * y := alpha*A*x + beta*y; + */ + if( i1>i2||j1>j2 ) + { + return; + } + ae_assert(j2-j1==ix2-ix1, "MatrixVectorMultiply: A and X dont match!", _state); + ae_assert(i2-i1==iy2-iy1, "MatrixVectorMultiply: A and Y dont match!", _state); + + /* + * beta*y + */ + if( ae_fp_eq(beta,0) ) + { + for(i=iy1; i<=iy2; i++) + { + y->ptr.p_double[i] = 0; + } + } + else + { + ae_v_muld(&y->ptr.p_double[iy1], 1, ae_v_len(iy1,iy2), beta); + } + + /* + * alpha*A*x + */ + for(i=i1; i<=i2; i++) + { + v = ae_v_dotproduct(&a->ptr.pp_double[i][j1], 1, &x->ptr.p_double[ix1], 1, ae_v_len(j1,j2)); + y->ptr.p_double[iy1+i-i1] = y->ptr.p_double[iy1+i-i1]+alpha*v; + } + } + else + { + + /* + * y := alpha*A'*x + beta*y; + */ + if( i1>i2||j1>j2 ) + { + return; + } + ae_assert(i2-i1==ix2-ix1, "MatrixVectorMultiply: A and X dont match!", _state); + ae_assert(j2-j1==iy2-iy1, "MatrixVectorMultiply: A and Y dont match!", _state); + + /* + * beta*y + */ + if( ae_fp_eq(beta,0) ) + { + for(i=iy1; i<=iy2; i++) + { + y->ptr.p_double[i] = 0; + } + } + else + { + ae_v_muld(&y->ptr.p_double[iy1], 1, ae_v_len(iy1,iy2), beta); + } + + /* + * alpha*A'*x + */ + for(i=i1; i<=i2; i++) + { + v = alpha*x->ptr.p_double[ix1+i-i1]; + ae_v_addd(&y->ptr.p_double[iy1], 1, &a->ptr.pp_double[i][j1], 1, ae_v_len(iy1,iy2), v); + } + } +} + + +double pythag2(double x, double y, ae_state *_state) +{ + double w; + double xabs; + double yabs; + double z; + double result; + + + xabs = ae_fabs(x, _state); + yabs = ae_fabs(y, _state); + w = ae_maxreal(xabs, yabs, _state); + z = ae_minreal(xabs, yabs, _state); + if( ae_fp_eq(z,0) ) + { + result = w; + } + else + { + result = w*ae_sqrt(1+ae_sqr(z/w, _state), _state); + } + return result; +} + + +void matrixmatrixmultiply(/* Real */ ae_matrix* a, + ae_int_t ai1, + ae_int_t ai2, + ae_int_t aj1, + ae_int_t aj2, + ae_bool transa, + /* Real */ ae_matrix* b, + ae_int_t bi1, + ae_int_t bi2, + ae_int_t bj1, + ae_int_t bj2, + ae_bool transb, + double alpha, + /* Real */ ae_matrix* c, + ae_int_t ci1, + ae_int_t ci2, + ae_int_t cj1, + ae_int_t cj2, + double beta, + /* Real */ ae_vector* work, + ae_state *_state) +{ + ae_int_t arows; + ae_int_t acols; + ae_int_t brows; + ae_int_t bcols; + ae_int_t crows; + ae_int_t ccols; + ae_int_t i; + ae_int_t j; + ae_int_t k; + ae_int_t l; + ae_int_t r; + double v; + + + + /* + * Setup + */ + if( !transa ) + { + arows = ai2-ai1+1; + acols = aj2-aj1+1; + } + else + { + arows = aj2-aj1+1; + acols = ai2-ai1+1; + } + if( !transb ) + { + brows = bi2-bi1+1; + bcols = bj2-bj1+1; + } + else + { + brows = bj2-bj1+1; + bcols = bi2-bi1+1; + } + ae_assert(acols==brows, "MatrixMatrixMultiply: incorrect matrix sizes!", _state); + if( ((arows<=0||acols<=0)||brows<=0)||bcols<=0 ) + { + return; + } + crows = arows; + ccols = bcols; + + /* + * Test WORK + */ + i = ae_maxint(arows, acols, _state); + i = ae_maxint(brows, i, _state); + i = ae_maxint(i, bcols, _state); + work->ptr.p_double[1] = 0; + work->ptr.p_double[i] = 0; + + /* + * Prepare C + */ + if( ae_fp_eq(beta,0) ) + { + for(i=ci1; i<=ci2; i++) + { + for(j=cj1; j<=cj2; j++) + { + c->ptr.pp_double[i][j] = 0; + } + } + } + else + { + for(i=ci1; i<=ci2; i++) + { + ae_v_muld(&c->ptr.pp_double[i][cj1], 1, ae_v_len(cj1,cj2), beta); + } + } + + /* + * A*B + */ + if( !transa&&!transb ) + { + for(l=ai1; l<=ai2; l++) + { + for(r=bi1; r<=bi2; r++) + { + v = alpha*a->ptr.pp_double[l][aj1+r-bi1]; + k = ci1+l-ai1; + ae_v_addd(&c->ptr.pp_double[k][cj1], 1, &b->ptr.pp_double[r][bj1], 1, ae_v_len(cj1,cj2), v); + } + } + return; + } + + /* + * A*B' + */ + if( !transa&&transb ) + { + if( arows*acols<brows*bcols ) + { + for(r=bi1; r<=bi2; r++) + { + for(l=ai1; l<=ai2; l++) + { + v = ae_v_dotproduct(&a->ptr.pp_double[l][aj1], 1, &b->ptr.pp_double[r][bj1], 1, ae_v_len(aj1,aj2)); + c->ptr.pp_double[ci1+l-ai1][cj1+r-bi1] = c->ptr.pp_double[ci1+l-ai1][cj1+r-bi1]+alpha*v; + } + } + return; + } + else + { + for(l=ai1; l<=ai2; l++) + { + for(r=bi1; r<=bi2; r++) + { + v = ae_v_dotproduct(&a->ptr.pp_double[l][aj1], 1, &b->ptr.pp_double[r][bj1], 1, ae_v_len(aj1,aj2)); + c->ptr.pp_double[ci1+l-ai1][cj1+r-bi1] = c->ptr.pp_double[ci1+l-ai1][cj1+r-bi1]+alpha*v; + } + } + return; + } + } + + /* + * A'*B + */ + if( transa&&!transb ) + { + for(l=aj1; l<=aj2; l++) + { + for(r=bi1; r<=bi2; r++) + { + v = alpha*a->ptr.pp_double[ai1+r-bi1][l]; + k = ci1+l-aj1; + ae_v_addd(&c->ptr.pp_double[k][cj1], 1, &b->ptr.pp_double[r][bj1], 1, ae_v_len(cj1,cj2), v); + } + } + return; + } + + /* + * A'*B' + */ + if( transa&&transb ) + { + if( arows*acols<brows*bcols ) + { + for(r=bi1; r<=bi2; r++) + { + k = cj1+r-bi1; + for(i=1; i<=crows; i++) + { + work->ptr.p_double[i] = 0.0; + } + for(l=ai1; l<=ai2; l++) + { + v = alpha*b->ptr.pp_double[r][bj1+l-ai1]; + ae_v_addd(&work->ptr.p_double[1], 1, &a->ptr.pp_double[l][aj1], 1, ae_v_len(1,crows), v); + } + ae_v_add(&c->ptr.pp_double[ci1][k], c->stride, &work->ptr.p_double[1], 1, ae_v_len(ci1,ci2)); + } + return; + } + else + { + for(l=aj1; l<=aj2; l++) + { + k = ai2-ai1+1; + ae_v_move(&work->ptr.p_double[1], 1, &a->ptr.pp_double[ai1][l], a->stride, ae_v_len(1,k)); + for(r=bi1; r<=bi2; r++) + { + v = ae_v_dotproduct(&work->ptr.p_double[1], 1, &b->ptr.pp_double[r][bj1], 1, ae_v_len(1,k)); + c->ptr.pp_double[ci1+l-aj1][cj1+r-bi1] = c->ptr.pp_double[ci1+l-aj1][cj1+r-bi1]+alpha*v; + } + } + return; + } + } +} + + + + +void hermitianmatrixvectormultiply(/* Complex */ ae_matrix* a, + ae_bool isupper, + ae_int_t i1, + ae_int_t i2, + /* Complex */ ae_vector* x, + ae_complex alpha, + /* Complex */ ae_vector* y, + ae_state *_state) +{ + ae_int_t i; + ae_int_t ba1; + ae_int_t ba2; + ae_int_t by1; + ae_int_t by2; + ae_int_t bx1; + ae_int_t bx2; + ae_int_t n; + ae_complex v; + + + n = i2-i1+1; + if( n<=0 ) + { + return; + } + + /* + * Let A = L + D + U, where + * L is strictly lower triangular (main diagonal is zero) + * D is diagonal + * U is strictly upper triangular (main diagonal is zero) + * + * A*x = L*x + D*x + U*x + * + * Calculate D*x first + */ + for(i=i1; i<=i2; i++) + { + y->ptr.p_complex[i-i1+1] = ae_c_mul(a->ptr.pp_complex[i][i],x->ptr.p_complex[i-i1+1]); + } + + /* + * Add L*x + U*x + */ + if( isupper ) + { + for(i=i1; i<=i2-1; i++) + { + + /* + * Add L*x to the result + */ + v = x->ptr.p_complex[i-i1+1]; + by1 = i-i1+2; + by2 = n; + ba1 = i+1; + ba2 = i2; + ae_v_caddc(&y->ptr.p_complex[by1], 1, &a->ptr.pp_complex[i][ba1], 1, "Conj", ae_v_len(by1,by2), v); + + /* + * Add U*x to the result + */ + bx1 = i-i1+2; + bx2 = n; + ba1 = i+1; + ba2 = i2; + v = ae_v_cdotproduct(&x->ptr.p_complex[bx1], 1, "N", &a->ptr.pp_complex[i][ba1], 1, "N", ae_v_len(bx1,bx2)); + y->ptr.p_complex[i-i1+1] = ae_c_add(y->ptr.p_complex[i-i1+1],v); + } + } + else + { + for(i=i1+1; i<=i2; i++) + { + + /* + * Add L*x to the result + */ + bx1 = 1; + bx2 = i-i1; + ba1 = i1; + ba2 = i-1; + v = ae_v_cdotproduct(&x->ptr.p_complex[bx1], 1, "N", &a->ptr.pp_complex[i][ba1], 1, "N", ae_v_len(bx1,bx2)); + y->ptr.p_complex[i-i1+1] = ae_c_add(y->ptr.p_complex[i-i1+1],v); + + /* + * Add U*x to the result + */ + v = x->ptr.p_complex[i-i1+1]; + by1 = 1; + by2 = i-i1; + ba1 = i1; + ba2 = i-1; + ae_v_caddc(&y->ptr.p_complex[by1], 1, &a->ptr.pp_complex[i][ba1], 1, "Conj", ae_v_len(by1,by2), v); + } + } + ae_v_cmulc(&y->ptr.p_complex[1], 1, ae_v_len(1,n), alpha); +} + + +void hermitianrank2update(/* Complex */ ae_matrix* a, + ae_bool isupper, + ae_int_t i1, + ae_int_t i2, + /* Complex */ ae_vector* x, + /* Complex */ ae_vector* y, + /* Complex */ ae_vector* t, + ae_complex alpha, + ae_state *_state) +{ + ae_int_t i; + ae_int_t tp1; + ae_int_t tp2; + ae_complex v; + + + if( isupper ) + { + for(i=i1; i<=i2; i++) + { + tp1 = i+1-i1; + tp2 = i2-i1+1; + v = ae_c_mul(alpha,x->ptr.p_complex[i+1-i1]); + ae_v_cmovec(&t->ptr.p_complex[tp1], 1, &y->ptr.p_complex[tp1], 1, "Conj", ae_v_len(tp1,tp2), v); + v = ae_c_mul(ae_c_conj(alpha, _state),y->ptr.p_complex[i+1-i1]); + ae_v_caddc(&t->ptr.p_complex[tp1], 1, &x->ptr.p_complex[tp1], 1, "Conj", ae_v_len(tp1,tp2), v); + ae_v_cadd(&a->ptr.pp_complex[i][i], 1, &t->ptr.p_complex[tp1], 1, "N", ae_v_len(i,i2)); + } + } + else + { + for(i=i1; i<=i2; i++) + { + tp1 = 1; + tp2 = i+1-i1; + v = ae_c_mul(alpha,x->ptr.p_complex[i+1-i1]); + ae_v_cmovec(&t->ptr.p_complex[tp1], 1, &y->ptr.p_complex[tp1], 1, "Conj", ae_v_len(tp1,tp2), v); + v = ae_c_mul(ae_c_conj(alpha, _state),y->ptr.p_complex[i+1-i1]); + ae_v_caddc(&t->ptr.p_complex[tp1], 1, &x->ptr.p_complex[tp1], 1, "Conj", ae_v_len(tp1,tp2), v); + ae_v_cadd(&a->ptr.pp_complex[i][i1], 1, &t->ptr.p_complex[tp1], 1, "N", ae_v_len(i1,i)); + } + } +} + + + + +/************************************************************************* +Generation of an elementary reflection transformation + +The subroutine generates elementary reflection H of order N, so that, for +a given X, the following equality holds true: + + ( X(1) ) ( Beta ) +H * ( .. ) = ( 0 ) + ( X(n) ) ( 0 ) + +where + ( V(1) ) +H = 1 - Tau * ( .. ) * ( V(1), ..., V(n) ) + ( V(n) ) + +where the first component of vector V equals 1. + +Input parameters: + X - vector. Array whose index ranges within [1..N]. + N - reflection order. + +Output parameters: + X - components from 2 to N are replaced with vector V. + The first component is replaced with parameter Beta. + Tau - scalar value Tau. If X is a null vector, Tau equals 0, + otherwise 1 <= Tau <= 2. + +This subroutine is the modification of the DLARFG subroutines from +the LAPACK library. + +MODIFICATIONS: + 24.12.2005 sign(Alpha) was replaced with an analogous to the Fortran SIGN code. + + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 +*************************************************************************/ +void generatereflection(/* Real */ ae_vector* x, + ae_int_t n, + double* tau, + ae_state *_state) +{ + ae_int_t j; + double alpha; + double xnorm; + double v; + double beta; + double mx; + double s; + + *tau = 0; + + if( n<=1 ) + { + *tau = 0; + return; + } + + /* + * Scale if needed (to avoid overflow/underflow during intermediate + * calculations). + */ + mx = 0; + for(j=1; j<=n; j++) + { + mx = ae_maxreal(ae_fabs(x->ptr.p_double[j], _state), mx, _state); + } + s = 1; + if( ae_fp_neq(mx,0) ) + { + if( ae_fp_less_eq(mx,ae_minrealnumber/ae_machineepsilon) ) + { + s = ae_minrealnumber/ae_machineepsilon; + v = 1/s; + ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), v); + mx = mx*v; + } + else + { + if( ae_fp_greater_eq(mx,ae_maxrealnumber*ae_machineepsilon) ) + { + s = ae_maxrealnumber*ae_machineepsilon; + v = 1/s; + ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), v); + mx = mx*v; + } + } + } + + /* + * XNORM = DNRM2( N-1, X, INCX ) + */ + alpha = x->ptr.p_double[1]; + xnorm = 0; + if( ae_fp_neq(mx,0) ) + { + for(j=2; j<=n; j++) + { + xnorm = xnorm+ae_sqr(x->ptr.p_double[j]/mx, _state); + } + xnorm = ae_sqrt(xnorm, _state)*mx; + } + if( ae_fp_eq(xnorm,0) ) + { + + /* + * H = I + */ + *tau = 0; + x->ptr.p_double[1] = x->ptr.p_double[1]*s; + return; + } + + /* + * general case + */ + mx = ae_maxreal(ae_fabs(alpha, _state), ae_fabs(xnorm, _state), _state); + beta = -mx*ae_sqrt(ae_sqr(alpha/mx, _state)+ae_sqr(xnorm/mx, _state), _state); + if( ae_fp_less(alpha,0) ) + { + beta = -beta; + } + *tau = (beta-alpha)/beta; + v = 1/(alpha-beta); + ae_v_muld(&x->ptr.p_double[2], 1, ae_v_len(2,n), v); + x->ptr.p_double[1] = beta; + + /* + * Scale back outputs + */ + x->ptr.p_double[1] = x->ptr.p_double[1]*s; +} + + +/************************************************************************* +Application of an elementary reflection to a rectangular matrix of size MxN + +The algorithm pre-multiplies the matrix by an elementary reflection transformation +which is given by column V and scalar Tau (see the description of the +GenerateReflection procedure). Not the whole matrix but only a part of it +is transformed (rows from M1 to M2, columns from N1 to N2). Only the elements +of this submatrix are changed. + +Input parameters: + C - matrix to be transformed. + Tau - scalar defining the transformation. + V - column defining the transformation. + Array whose index ranges within [1..M2-M1+1]. + M1, M2 - range of rows to be transformed. + N1, N2 - range of columns to be transformed. + WORK - working array whose indexes goes from N1 to N2. + +Output parameters: + C - the result of multiplying the input matrix C by the + transformation matrix which is given by Tau and V. + If N1>N2 or M1>M2, C is not modified. + + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 +*************************************************************************/ +void applyreflectionfromtheleft(/* Real */ ae_matrix* c, + double tau, + /* Real */ ae_vector* v, + ae_int_t m1, + ae_int_t m2, + ae_int_t n1, + ae_int_t n2, + /* Real */ ae_vector* work, + ae_state *_state) +{ + double t; + ae_int_t i; + ae_int_t vm; + + + if( (ae_fp_eq(tau,0)||n1>n2)||m1>m2 ) + { + return; + } + + /* + * w := C' * v + */ + vm = m2-m1+1; + for(i=n1; i<=n2; i++) + { + work->ptr.p_double[i] = 0; + } + for(i=m1; i<=m2; i++) + { + t = v->ptr.p_double[i+1-m1]; + ae_v_addd(&work->ptr.p_double[n1], 1, &c->ptr.pp_double[i][n1], 1, ae_v_len(n1,n2), t); + } + + /* + * C := C - tau * v * w' + */ + for(i=m1; i<=m2; i++) + { + t = v->ptr.p_double[i-m1+1]*tau; + ae_v_subd(&c->ptr.pp_double[i][n1], 1, &work->ptr.p_double[n1], 1, ae_v_len(n1,n2), t); + } +} + + +/************************************************************************* +Application of an elementary reflection to a rectangular matrix of size MxN + +The algorithm post-multiplies the matrix by an elementary reflection transformation +which is given by column V and scalar Tau (see the description of the +GenerateReflection procedure). Not the whole matrix but only a part of it +is transformed (rows from M1 to M2, columns from N1 to N2). Only the +elements of this submatrix are changed. + +Input parameters: + C - matrix to be transformed. + Tau - scalar defining the transformation. + V - column defining the transformation. + Array whose index ranges within [1..N2-N1+1]. + M1, M2 - range of rows to be transformed. + N1, N2 - range of columns to be transformed. + WORK - working array whose indexes goes from M1 to M2. + +Output parameters: + C - the result of multiplying the input matrix C by the + transformation matrix which is given by Tau and V. + If N1>N2 or M1>M2, C is not modified. + + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 +*************************************************************************/ +void applyreflectionfromtheright(/* Real */ ae_matrix* c, + double tau, + /* Real */ ae_vector* v, + ae_int_t m1, + ae_int_t m2, + ae_int_t n1, + ae_int_t n2, + /* Real */ ae_vector* work, + ae_state *_state) +{ + double t; + ae_int_t i; + ae_int_t vm; + + + if( (ae_fp_eq(tau,0)||n1>n2)||m1>m2 ) + { + return; + } + vm = n2-n1+1; + for(i=m1; i<=m2; i++) + { + t = ae_v_dotproduct(&c->ptr.pp_double[i][n1], 1, &v->ptr.p_double[1], 1, ae_v_len(n1,n2)); + t = t*tau; + ae_v_subd(&c->ptr.pp_double[i][n1], 1, &v->ptr.p_double[1], 1, ae_v_len(n1,n2), t); + } +} + + + + +/************************************************************************* +Generation of an elementary complex reflection transformation + +The subroutine generates elementary complex reflection H of order N, so +that, for a given X, the following equality holds true: + + ( X(1) ) ( Beta ) +H' * ( .. ) = ( 0 ), H'*H = I, Beta is a real number + ( X(n) ) ( 0 ) + +where + + ( V(1) ) +H = 1 - Tau * ( .. ) * ( conj(V(1)), ..., conj(V(n)) ) + ( V(n) ) + +where the first component of vector V equals 1. + +Input parameters: + X - vector. Array with elements [1..N]. + N - reflection order. + +Output parameters: + X - components from 2 to N are replaced by vector V. + The first component is replaced with parameter Beta. + Tau - scalar value Tau. + +This subroutine is the modification of CLARFG subroutines from the LAPACK +library. It has similar functionality except for the fact that it doesn’t +handle errors when intermediate results cause an overflow. + + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 +*************************************************************************/ +void complexgeneratereflection(/* Complex */ ae_vector* x, + ae_int_t n, + ae_complex* tau, + ae_state *_state) +{ + ae_int_t j; + ae_complex alpha; + double alphi; + double alphr; + double beta; + double xnorm; + double mx; + ae_complex t; + double s; + ae_complex v; + + tau->x = 0; + tau->y = 0; + + if( n<=0 ) + { + *tau = ae_complex_from_d(0); + return; + } + + /* + * Scale if needed (to avoid overflow/underflow during intermediate + * calculations). + */ + mx = 0; + for(j=1; j<=n; j++) + { + mx = ae_maxreal(ae_c_abs(x->ptr.p_complex[j], _state), mx, _state); + } + s = 1; + if( ae_fp_neq(mx,0) ) + { + if( ae_fp_less(mx,1) ) + { + s = ae_sqrt(ae_minrealnumber, _state); + v = ae_complex_from_d(1/s); + ae_v_cmulc(&x->ptr.p_complex[1], 1, ae_v_len(1,n), v); + } + else + { + s = ae_sqrt(ae_maxrealnumber, _state); + v = ae_complex_from_d(1/s); + ae_v_cmulc(&x->ptr.p_complex[1], 1, ae_v_len(1,n), v); + } + } + + /* + * calculate + */ + alpha = x->ptr.p_complex[1]; + mx = 0; + for(j=2; j<=n; j++) + { + mx = ae_maxreal(ae_c_abs(x->ptr.p_complex[j], _state), mx, _state); + } + xnorm = 0; + if( ae_fp_neq(mx,0) ) + { + for(j=2; j<=n; j++) + { + t = ae_c_div_d(x->ptr.p_complex[j],mx); + xnorm = xnorm+ae_c_mul(t,ae_c_conj(t, _state)).x; + } + xnorm = ae_sqrt(xnorm, _state)*mx; + } + alphr = alpha.x; + alphi = alpha.y; + if( ae_fp_eq(xnorm,0)&&ae_fp_eq(alphi,0) ) + { + *tau = ae_complex_from_d(0); + x->ptr.p_complex[1] = ae_c_mul_d(x->ptr.p_complex[1],s); + return; + } + mx = ae_maxreal(ae_fabs(alphr, _state), ae_fabs(alphi, _state), _state); + mx = ae_maxreal(mx, ae_fabs(xnorm, _state), _state); + beta = -mx*ae_sqrt(ae_sqr(alphr/mx, _state)+ae_sqr(alphi/mx, _state)+ae_sqr(xnorm/mx, _state), _state); + if( ae_fp_less(alphr,0) ) + { + beta = -beta; + } + tau->x = (beta-alphr)/beta; + tau->y = -alphi/beta; + alpha = ae_c_d_div(1,ae_c_sub_d(alpha,beta)); + if( n>1 ) + { + ae_v_cmulc(&x->ptr.p_complex[2], 1, ae_v_len(2,n), alpha); + } + alpha = ae_complex_from_d(beta); + x->ptr.p_complex[1] = alpha; + + /* + * Scale back + */ + x->ptr.p_complex[1] = ae_c_mul_d(x->ptr.p_complex[1],s); +} + + +/************************************************************************* +Application of an elementary reflection to a rectangular matrix of size MxN + +The algorithm pre-multiplies the matrix by an elementary reflection +transformation which is given by column V and scalar Tau (see the +description of the GenerateReflection). Not the whole matrix but only a +part of it is transformed (rows from M1 to M2, columns from N1 to N2). Only +the elements of this submatrix are changed. + +Note: the matrix is multiplied by H, not by H'. If it is required to +multiply the matrix by H', it is necessary to pass Conj(Tau) instead of Tau. + +Input parameters: + C - matrix to be transformed. + Tau - scalar defining transformation. + V - column defining transformation. + Array whose index ranges within [1..M2-M1+1] + M1, M2 - range of rows to be transformed. + N1, N2 - range of columns to be transformed. + WORK - working array whose index goes from N1 to N2. + +Output parameters: + C - the result of multiplying the input matrix C by the + transformation matrix which is given by Tau and V. + If N1>N2 or M1>M2, C is not modified. + + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 +*************************************************************************/ +void complexapplyreflectionfromtheleft(/* Complex */ ae_matrix* c, + ae_complex tau, + /* Complex */ ae_vector* v, + ae_int_t m1, + ae_int_t m2, + ae_int_t n1, + ae_int_t n2, + /* Complex */ ae_vector* work, + ae_state *_state) +{ + ae_complex t; + ae_int_t i; + ae_int_t vm; + + + if( (ae_c_eq_d(tau,0)||n1>n2)||m1>m2 ) + { + return; + } + + /* + * w := C^T * conj(v) + */ + vm = m2-m1+1; + for(i=n1; i<=n2; i++) + { + work->ptr.p_complex[i] = ae_complex_from_d(0); + } + for(i=m1; i<=m2; i++) + { + t = ae_c_conj(v->ptr.p_complex[i+1-m1], _state); + ae_v_caddc(&work->ptr.p_complex[n1], 1, &c->ptr.pp_complex[i][n1], 1, "N", ae_v_len(n1,n2), t); + } + + /* + * C := C - tau * v * w^T + */ + for(i=m1; i<=m2; i++) + { + t = ae_c_mul(v->ptr.p_complex[i-m1+1],tau); + ae_v_csubc(&c->ptr.pp_complex[i][n1], 1, &work->ptr.p_complex[n1], 1, "N", ae_v_len(n1,n2), t); + } +} + + +/************************************************************************* +Application of an elementary reflection to a rectangular matrix of size MxN + +The algorithm post-multiplies the matrix by an elementary reflection +transformation which is given by column V and scalar Tau (see the +description of the GenerateReflection). Not the whole matrix but only a +part of it is transformed (rows from M1 to M2, columns from N1 to N2). +Only the elements of this submatrix are changed. + +Input parameters: + C - matrix to be transformed. + Tau - scalar defining transformation. + V - column defining transformation. + Array whose index ranges within [1..N2-N1+1] + M1, M2 - range of rows to be transformed. + N1, N2 - range of columns to be transformed. + WORK - working array whose index goes from M1 to M2. + +Output parameters: + C - the result of multiplying the input matrix C by the + transformation matrix which is given by Tau and V. + If N1>N2 or M1>M2, C is not modified. + + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 +*************************************************************************/ +void complexapplyreflectionfromtheright(/* Complex */ ae_matrix* c, + ae_complex tau, + /* Complex */ ae_vector* v, + ae_int_t m1, + ae_int_t m2, + ae_int_t n1, + ae_int_t n2, + /* Complex */ ae_vector* work, + ae_state *_state) +{ + ae_complex t; + ae_int_t i; + ae_int_t vm; + + + if( (ae_c_eq_d(tau,0)||n1>n2)||m1>m2 ) + { + return; + } + + /* + * w := C * v + */ + vm = n2-n1+1; + for(i=m1; i<=m2; i++) + { + t = ae_v_cdotproduct(&c->ptr.pp_complex[i][n1], 1, "N", &v->ptr.p_complex[1], 1, "N", ae_v_len(n1,n2)); + work->ptr.p_complex[i] = t; + } + + /* + * C := C - w * conj(v^T) + */ + ae_v_cmove(&v->ptr.p_complex[1], 1, &v->ptr.p_complex[1], 1, "Conj", ae_v_len(1,vm)); + for(i=m1; i<=m2; i++) + { + t = ae_c_mul(work->ptr.p_complex[i],tau); + ae_v_csubc(&c->ptr.pp_complex[i][n1], 1, &v->ptr.p_complex[1], 1, "N", ae_v_len(n1,n2), t); + } + ae_v_cmove(&v->ptr.p_complex[1], 1, &v->ptr.p_complex[1], 1, "Conj", ae_v_len(1,vm)); +} + + + + +void symmetricmatrixvectormultiply(/* Real */ ae_matrix* a, + ae_bool isupper, + ae_int_t i1, + ae_int_t i2, + /* Real */ ae_vector* x, + double alpha, + /* Real */ ae_vector* y, + ae_state *_state) +{ + ae_int_t i; + ae_int_t ba1; + ae_int_t ba2; + ae_int_t by1; + ae_int_t by2; + ae_int_t bx1; + ae_int_t bx2; + ae_int_t n; + double v; + + + n = i2-i1+1; + if( n<=0 ) + { + return; + } + + /* + * Let A = L + D + U, where + * L is strictly lower triangular (main diagonal is zero) + * D is diagonal + * U is strictly upper triangular (main diagonal is zero) + * + * A*x = L*x + D*x + U*x + * + * Calculate D*x first + */ + for(i=i1; i<=i2; i++) + { + y->ptr.p_double[i-i1+1] = a->ptr.pp_double[i][i]*x->ptr.p_double[i-i1+1]; + } + + /* + * Add L*x + U*x + */ + if( isupper ) + { + for(i=i1; i<=i2-1; i++) + { + + /* + * Add L*x to the result + */ + v = x->ptr.p_double[i-i1+1]; + by1 = i-i1+2; + by2 = n; + ba1 = i+1; + ba2 = i2; + ae_v_addd(&y->ptr.p_double[by1], 1, &a->ptr.pp_double[i][ba1], 1, ae_v_len(by1,by2), v); + + /* + * Add U*x to the result + */ + bx1 = i-i1+2; + bx2 = n; + ba1 = i+1; + ba2 = i2; + v = ae_v_dotproduct(&x->ptr.p_double[bx1], 1, &a->ptr.pp_double[i][ba1], 1, ae_v_len(bx1,bx2)); + y->ptr.p_double[i-i1+1] = y->ptr.p_double[i-i1+1]+v; + } + } + else + { + for(i=i1+1; i<=i2; i++) + { + + /* + * Add L*x to the result + */ + bx1 = 1; + bx2 = i-i1; + ba1 = i1; + ba2 = i-1; + v = ae_v_dotproduct(&x->ptr.p_double[bx1], 1, &a->ptr.pp_double[i][ba1], 1, ae_v_len(bx1,bx2)); + y->ptr.p_double[i-i1+1] = y->ptr.p_double[i-i1+1]+v; + + /* + * Add U*x to the result + */ + v = x->ptr.p_double[i-i1+1]; + by1 = 1; + by2 = i-i1; + ba1 = i1; + ba2 = i-1; + ae_v_addd(&y->ptr.p_double[by1], 1, &a->ptr.pp_double[i][ba1], 1, ae_v_len(by1,by2), v); + } + } + ae_v_muld(&y->ptr.p_double[1], 1, ae_v_len(1,n), alpha); +} + + +void symmetricrank2update(/* Real */ ae_matrix* a, + ae_bool isupper, + ae_int_t i1, + ae_int_t i2, + /* Real */ ae_vector* x, + /* Real */ ae_vector* y, + /* Real */ ae_vector* t, + double alpha, + ae_state *_state) +{ + ae_int_t i; + ae_int_t tp1; + ae_int_t tp2; + double v; + + + if( isupper ) + { + for(i=i1; i<=i2; i++) + { + tp1 = i+1-i1; + tp2 = i2-i1+1; + v = x->ptr.p_double[i+1-i1]; + ae_v_moved(&t->ptr.p_double[tp1], 1, &y->ptr.p_double[tp1], 1, ae_v_len(tp1,tp2), v); + v = y->ptr.p_double[i+1-i1]; + ae_v_addd(&t->ptr.p_double[tp1], 1, &x->ptr.p_double[tp1], 1, ae_v_len(tp1,tp2), v); + ae_v_muld(&t->ptr.p_double[tp1], 1, ae_v_len(tp1,tp2), alpha); + ae_v_add(&a->ptr.pp_double[i][i], 1, &t->ptr.p_double[tp1], 1, ae_v_len(i,i2)); + } + } + else + { + for(i=i1; i<=i2; i++) + { + tp1 = 1; + tp2 = i+1-i1; + v = x->ptr.p_double[i+1-i1]; + ae_v_moved(&t->ptr.p_double[tp1], 1, &y->ptr.p_double[tp1], 1, ae_v_len(tp1,tp2), v); + v = y->ptr.p_double[i+1-i1]; + ae_v_addd(&t->ptr.p_double[tp1], 1, &x->ptr.p_double[tp1], 1, ae_v_len(tp1,tp2), v); + ae_v_muld(&t->ptr.p_double[tp1], 1, ae_v_len(tp1,tp2), alpha); + ae_v_add(&a->ptr.pp_double[i][i1], 1, &t->ptr.p_double[tp1], 1, ae_v_len(i1,i)); + } + } +} + + + + +/************************************************************************* +Application of a sequence of elementary rotations to a matrix + +The algorithm pre-multiplies the matrix by a sequence of rotation +transformations which is given by arrays C and S. Depending on the value +of the IsForward parameter either 1 and 2, 3 and 4 and so on (if IsForward=true) +rows are rotated, or the rows N and N-1, N-2 and N-3 and so on, are rotated. + +Not the whole matrix but only a part of it is transformed (rows from M1 to +M2, columns from N1 to N2). Only the elements of this submatrix are changed. + +Input parameters: + IsForward - the sequence of the rotation application. + M1,M2 - the range of rows to be transformed. + N1, N2 - the range of columns to be transformed. + C,S - transformation coefficients. + Array whose index ranges within [1..M2-M1]. + A - processed matrix. + WORK - working array whose index ranges within [N1..N2]. + +Output parameters: + A - transformed matrix. + +Utility subroutine. +*************************************************************************/ +void applyrotationsfromtheleft(ae_bool isforward, + ae_int_t m1, + ae_int_t m2, + ae_int_t n1, + ae_int_t n2, + /* Real */ ae_vector* c, + /* Real */ ae_vector* s, + /* Real */ ae_matrix* a, + /* Real */ ae_vector* work, + ae_state *_state) +{ + ae_int_t j; + ae_int_t jp1; + double ctemp; + double stemp; + double temp; + + + if( m1>m2||n1>n2 ) + { + return; + } + + /* + * Form P * A + */ + if( isforward ) + { + if( n1!=n2 ) + { + + /* + * Common case: N1<>N2 + */ + for(j=m1; j<=m2-1; j++) + { + ctemp = c->ptr.p_double[j-m1+1]; + stemp = s->ptr.p_double[j-m1+1]; + if( ae_fp_neq(ctemp,1)||ae_fp_neq(stemp,0) ) + { + jp1 = j+1; + ae_v_moved(&work->ptr.p_double[n1], 1, &a->ptr.pp_double[jp1][n1], 1, ae_v_len(n1,n2), ctemp); + ae_v_subd(&work->ptr.p_double[n1], 1, &a->ptr.pp_double[j][n1], 1, ae_v_len(n1,n2), stemp); + ae_v_muld(&a->ptr.pp_double[j][n1], 1, ae_v_len(n1,n2), ctemp); + ae_v_addd(&a->ptr.pp_double[j][n1], 1, &a->ptr.pp_double[jp1][n1], 1, ae_v_len(n1,n2), stemp); + ae_v_move(&a->ptr.pp_double[jp1][n1], 1, &work->ptr.p_double[n1], 1, ae_v_len(n1,n2)); + } + } + } + else + { + + /* + * Special case: N1=N2 + */ + for(j=m1; j<=m2-1; j++) + { + ctemp = c->ptr.p_double[j-m1+1]; + stemp = s->ptr.p_double[j-m1+1]; + if( ae_fp_neq(ctemp,1)||ae_fp_neq(stemp,0) ) + { + temp = a->ptr.pp_double[j+1][n1]; + a->ptr.pp_double[j+1][n1] = ctemp*temp-stemp*a->ptr.pp_double[j][n1]; + a->ptr.pp_double[j][n1] = stemp*temp+ctemp*a->ptr.pp_double[j][n1]; + } + } + } + } + else + { + if( n1!=n2 ) + { + + /* + * Common case: N1<>N2 + */ + for(j=m2-1; j>=m1; j--) + { + ctemp = c->ptr.p_double[j-m1+1]; + stemp = s->ptr.p_double[j-m1+1]; + if( ae_fp_neq(ctemp,1)||ae_fp_neq(stemp,0) ) + { + jp1 = j+1; + ae_v_moved(&work->ptr.p_double[n1], 1, &a->ptr.pp_double[jp1][n1], 1, ae_v_len(n1,n2), ctemp); + ae_v_subd(&work->ptr.p_double[n1], 1, &a->ptr.pp_double[j][n1], 1, ae_v_len(n1,n2), stemp); + ae_v_muld(&a->ptr.pp_double[j][n1], 1, ae_v_len(n1,n2), ctemp); + ae_v_addd(&a->ptr.pp_double[j][n1], 1, &a->ptr.pp_double[jp1][n1], 1, ae_v_len(n1,n2), stemp); + ae_v_move(&a->ptr.pp_double[jp1][n1], 1, &work->ptr.p_double[n1], 1, ae_v_len(n1,n2)); + } + } + } + else + { + + /* + * Special case: N1=N2 + */ + for(j=m2-1; j>=m1; j--) + { + ctemp = c->ptr.p_double[j-m1+1]; + stemp = s->ptr.p_double[j-m1+1]; + if( ae_fp_neq(ctemp,1)||ae_fp_neq(stemp,0) ) + { + temp = a->ptr.pp_double[j+1][n1]; + a->ptr.pp_double[j+1][n1] = ctemp*temp-stemp*a->ptr.pp_double[j][n1]; + a->ptr.pp_double[j][n1] = stemp*temp+ctemp*a->ptr.pp_double[j][n1]; + } + } + } + } +} + + +/************************************************************************* +Application of a sequence of elementary rotations to a matrix + +The algorithm post-multiplies the matrix by a sequence of rotation +transformations which is given by arrays C and S. Depending on the value +of the IsForward parameter either 1 and 2, 3 and 4 and so on (if IsForward=true) +rows are rotated, or the rows N and N-1, N-2 and N-3 and so on are rotated. + +Not the whole matrix but only a part of it is transformed (rows from M1 +to M2, columns from N1 to N2). Only the elements of this submatrix are changed. + +Input parameters: + IsForward - the sequence of the rotation application. + M1,M2 - the range of rows to be transformed. + N1, N2 - the range of columns to be transformed. + C,S - transformation coefficients. + Array whose index ranges within [1..N2-N1]. + A - processed matrix. + WORK - working array whose index ranges within [M1..M2]. + +Output parameters: + A - transformed matrix. + +Utility subroutine. +*************************************************************************/ +void applyrotationsfromtheright(ae_bool isforward, + ae_int_t m1, + ae_int_t m2, + ae_int_t n1, + ae_int_t n2, + /* Real */ ae_vector* c, + /* Real */ ae_vector* s, + /* Real */ ae_matrix* a, + /* Real */ ae_vector* work, + ae_state *_state) +{ + ae_int_t j; + ae_int_t jp1; + double ctemp; + double stemp; + double temp; + + + + /* + * Form A * P' + */ + if( isforward ) + { + if( m1!=m2 ) + { + + /* + * Common case: M1<>M2 + */ + for(j=n1; j<=n2-1; j++) + { + ctemp = c->ptr.p_double[j-n1+1]; + stemp = s->ptr.p_double[j-n1+1]; + if( ae_fp_neq(ctemp,1)||ae_fp_neq(stemp,0) ) + { + jp1 = j+1; + ae_v_moved(&work->ptr.p_double[m1], 1, &a->ptr.pp_double[m1][jp1], a->stride, ae_v_len(m1,m2), ctemp); + ae_v_subd(&work->ptr.p_double[m1], 1, &a->ptr.pp_double[m1][j], a->stride, ae_v_len(m1,m2), stemp); + ae_v_muld(&a->ptr.pp_double[m1][j], a->stride, ae_v_len(m1,m2), ctemp); + ae_v_addd(&a->ptr.pp_double[m1][j], a->stride, &a->ptr.pp_double[m1][jp1], a->stride, ae_v_len(m1,m2), stemp); + ae_v_move(&a->ptr.pp_double[m1][jp1], a->stride, &work->ptr.p_double[m1], 1, ae_v_len(m1,m2)); + } + } + } + else + { + + /* + * Special case: M1=M2 + */ + for(j=n1; j<=n2-1; j++) + { + ctemp = c->ptr.p_double[j-n1+1]; + stemp = s->ptr.p_double[j-n1+1]; + if( ae_fp_neq(ctemp,1)||ae_fp_neq(stemp,0) ) + { + temp = a->ptr.pp_double[m1][j+1]; + a->ptr.pp_double[m1][j+1] = ctemp*temp-stemp*a->ptr.pp_double[m1][j]; + a->ptr.pp_double[m1][j] = stemp*temp+ctemp*a->ptr.pp_double[m1][j]; + } + } + } + } + else + { + if( m1!=m2 ) + { + + /* + * Common case: M1<>M2 + */ + for(j=n2-1; j>=n1; j--) + { + ctemp = c->ptr.p_double[j-n1+1]; + stemp = s->ptr.p_double[j-n1+1]; + if( ae_fp_neq(ctemp,1)||ae_fp_neq(stemp,0) ) + { + jp1 = j+1; + ae_v_moved(&work->ptr.p_double[m1], 1, &a->ptr.pp_double[m1][jp1], a->stride, ae_v_len(m1,m2), ctemp); + ae_v_subd(&work->ptr.p_double[m1], 1, &a->ptr.pp_double[m1][j], a->stride, ae_v_len(m1,m2), stemp); + ae_v_muld(&a->ptr.pp_double[m1][j], a->stride, ae_v_len(m1,m2), ctemp); + ae_v_addd(&a->ptr.pp_double[m1][j], a->stride, &a->ptr.pp_double[m1][jp1], a->stride, ae_v_len(m1,m2), stemp); + ae_v_move(&a->ptr.pp_double[m1][jp1], a->stride, &work->ptr.p_double[m1], 1, ae_v_len(m1,m2)); + } + } + } + else + { + + /* + * Special case: M1=M2 + */ + for(j=n2-1; j>=n1; j--) + { + ctemp = c->ptr.p_double[j-n1+1]; + stemp = s->ptr.p_double[j-n1+1]; + if( ae_fp_neq(ctemp,1)||ae_fp_neq(stemp,0) ) + { + temp = a->ptr.pp_double[m1][j+1]; + a->ptr.pp_double[m1][j+1] = ctemp*temp-stemp*a->ptr.pp_double[m1][j]; + a->ptr.pp_double[m1][j] = stemp*temp+ctemp*a->ptr.pp_double[m1][j]; + } + } + } + } +} + + +/************************************************************************* +The subroutine generates the elementary rotation, so that: + +[ CS SN ] . [ F ] = [ R ] +[ -SN CS ] [ G ] [ 0 ] + +CS**2 + SN**2 = 1 +*************************************************************************/ +void generaterotation(double f, + double g, + double* cs, + double* sn, + double* r, + ae_state *_state) +{ + double f1; + double g1; + + *cs = 0; + *sn = 0; + *r = 0; + + if( ae_fp_eq(g,0) ) + { + *cs = 1; + *sn = 0; + *r = f; + } + else + { + if( ae_fp_eq(f,0) ) + { + *cs = 0; + *sn = 1; + *r = g; + } + else + { + f1 = f; + g1 = g; + if( ae_fp_greater(ae_fabs(f1, _state),ae_fabs(g1, _state)) ) + { + *r = ae_fabs(f1, _state)*ae_sqrt(1+ae_sqr(g1/f1, _state), _state); + } + else + { + *r = ae_fabs(g1, _state)*ae_sqrt(1+ae_sqr(f1/g1, _state), _state); + } + *cs = f1/(*r); + *sn = g1/(*r); + if( ae_fp_greater(ae_fabs(f, _state),ae_fabs(g, _state))&&ae_fp_less(*cs,0) ) + { + *cs = -*cs; + *sn = -*sn; + *r = -*r; + } + } + } +} + + + + +/************************************************************************* +Subroutine performing the Schur decomposition of a matrix in upper +Hessenberg form using the QR algorithm with multiple shifts. + +The source matrix H is represented as S'*H*S = T, where H - matrix in +upper Hessenberg form, S - orthogonal matrix (Schur vectors), T - upper +quasi-triangular matrix (with blocks of sizes 1x1 and 2x2 on the main +diagonal). + +Input parameters: + H - matrix to be decomposed. + Array whose indexes range within [1..N, 1..N]. + N - size of H, N>=0. + + +Output parameters: + H – contains the matrix T. + Array whose indexes range within [1..N, 1..N]. + All elements below the blocks on the main diagonal are equal + to 0. + S - contains Schur vectors. + Array whose indexes range within [1..N, 1..N]. + +Note 1: + The block structure of matrix T could be easily recognized: since all + the elements below the blocks are zeros, the elements a[i+1,i] which + are equal to 0 show the block border. + +Note 2: + the algorithm performance depends on the value of the internal + parameter NS of InternalSchurDecomposition subroutine which defines + the number of shifts in the QR algorithm (analog of the block width + in block matrix algorithms in linear algebra). If you require maximum + performance on your machine, it is recommended to adjust this + parameter manually. + +Result: + True, if the algorithm has converged and the parameters H and S contain + the result. + False, if the algorithm has not converged. + +Algorithm implemented on the basis of subroutine DHSEQR (LAPACK 3.0 library). +*************************************************************************/ +ae_bool upperhessenbergschurdecomposition(/* Real */ ae_matrix* h, + ae_int_t n, + /* Real */ ae_matrix* s, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector wi; + ae_vector wr; + ae_int_t info; + ae_bool result; + + ae_frame_make(_state, &_frame_block); + ae_matrix_clear(s); + ae_vector_init(&wi, 0, DT_REAL, _state, ae_true); + ae_vector_init(&wr, 0, DT_REAL, _state, ae_true); + + internalschurdecomposition(h, n, 1, 2, &wr, &wi, s, &info, _state); + result = info==0; + ae_frame_leave(_state); + return result; +} + + +void internalschurdecomposition(/* Real */ ae_matrix* h, + ae_int_t n, + ae_int_t tneeded, + ae_int_t zneeded, + /* Real */ ae_vector* wr, + /* Real */ ae_vector* wi, + /* Real */ ae_matrix* z, + ae_int_t* info, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector work; + ae_int_t i; + ae_int_t i1; + ae_int_t i2; + ae_int_t ierr; + ae_int_t ii; + ae_int_t itemp; + ae_int_t itn; + ae_int_t its; + ae_int_t j; + ae_int_t k; + ae_int_t l; + ae_int_t maxb; + ae_int_t nr; + ae_int_t ns; + ae_int_t nv; + double absw; + double ovfl; + double smlnum; + double tau; + double temp; + double tst1; + double ulp; + double unfl; + ae_matrix s; + ae_vector v; + ae_vector vv; + ae_vector workc1; + ae_vector works1; + ae_vector workv3; + ae_vector tmpwr; + ae_vector tmpwi; + ae_bool initz; + ae_bool wantt; + ae_bool wantz; + double cnst; + ae_bool failflag; + ae_int_t p1; + ae_int_t p2; + double vt; + + ae_frame_make(_state, &_frame_block); + ae_vector_clear(wr); + ae_vector_clear(wi); + *info = 0; + ae_vector_init(&work, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&s, 0, 0, DT_REAL, _state, ae_true); + ae_vector_init(&v, 0, DT_REAL, _state, ae_true); + ae_vector_init(&vv, 0, DT_REAL, _state, ae_true); + ae_vector_init(&workc1, 0, DT_REAL, _state, ae_true); + ae_vector_init(&works1, 0, DT_REAL, _state, ae_true); + ae_vector_init(&workv3, 0, DT_REAL, _state, ae_true); + ae_vector_init(&tmpwr, 0, DT_REAL, _state, ae_true); + ae_vector_init(&tmpwi, 0, DT_REAL, _state, ae_true); + + + /* + * Set the order of the multi-shift QR algorithm to be used. + * If you want to tune algorithm, change this values + */ + ns = 12; + maxb = 50; + + /* + * Now 2 < NS <= MAXB < NH. + */ + maxb = ae_maxint(3, maxb, _state); + ns = ae_minint(maxb, ns, _state); + + /* + * Initialize + */ + cnst = 1.5; + ae_vector_set_length(&work, ae_maxint(n, 1, _state)+1, _state); + ae_matrix_set_length(&s, ns+1, ns+1, _state); + ae_vector_set_length(&v, ns+1+1, _state); + ae_vector_set_length(&vv, ns+1+1, _state); + ae_vector_set_length(wr, ae_maxint(n, 1, _state)+1, _state); + ae_vector_set_length(wi, ae_maxint(n, 1, _state)+1, _state); + ae_vector_set_length(&workc1, 1+1, _state); + ae_vector_set_length(&works1, 1+1, _state); + ae_vector_set_length(&workv3, 3+1, _state); + ae_vector_set_length(&tmpwr, ae_maxint(n, 1, _state)+1, _state); + ae_vector_set_length(&tmpwi, ae_maxint(n, 1, _state)+1, _state); + ae_assert(n>=0, "InternalSchurDecomposition: incorrect N!", _state); + ae_assert(tneeded==0||tneeded==1, "InternalSchurDecomposition: incorrect TNeeded!", _state); + ae_assert((zneeded==0||zneeded==1)||zneeded==2, "InternalSchurDecomposition: incorrect ZNeeded!", _state); + wantt = tneeded==1; + initz = zneeded==2; + wantz = zneeded!=0; + *info = 0; + + /* + * Initialize Z, if necessary + */ + if( initz ) + { + ae_matrix_set_length(z, n+1, n+1, _state); + for(i=1; i<=n; i++) + { + for(j=1; j<=n; j++) + { + if( i==j ) + { + z->ptr.pp_double[i][j] = 1; + } + else + { + z->ptr.pp_double[i][j] = 0; + } + } + } + } + + /* + * Quick return if possible + */ + if( n==0 ) + { + ae_frame_leave(_state); + return; + } + if( n==1 ) + { + wr->ptr.p_double[1] = h->ptr.pp_double[1][1]; + wi->ptr.p_double[1] = 0; + ae_frame_leave(_state); + return; + } + + /* + * Set rows and columns 1 to N to zero below the first + * subdiagonal. + */ + for(j=1; j<=n-2; j++) + { + for(i=j+2; i<=n; i++) + { + h->ptr.pp_double[i][j] = 0; + } + } + + /* + * Test if N is sufficiently small + */ + if( (ns<=2||ns>n)||maxb>=n ) + { + + /* + * Use the standard double-shift algorithm + */ + hsschur_internalauxschur(wantt, wantz, n, 1, n, h, wr, wi, 1, n, z, &work, &workv3, &workc1, &works1, info, _state); + + /* + * fill entries under diagonal blocks of T with zeros + */ + if( wantt ) + { + j = 1; + while(j<=n) + { + if( ae_fp_eq(wi->ptr.p_double[j],0) ) + { + for(i=j+1; i<=n; i++) + { + h->ptr.pp_double[i][j] = 0; + } + j = j+1; + } + else + { + for(i=j+2; i<=n; i++) + { + h->ptr.pp_double[i][j] = 0; + h->ptr.pp_double[i][j+1] = 0; + } + j = j+2; + } + } + } + ae_frame_leave(_state); + return; + } + unfl = ae_minrealnumber; + ovfl = 1/unfl; + ulp = 2*ae_machineepsilon; + smlnum = unfl*(n/ulp); + + /* + * I1 and I2 are the indices of the first row and last column of H + * to which transformations must be applied. If eigenvalues only are + * being computed, I1 and I2 are set inside the main loop. + */ + i1 = 1; + i2 = n; + + /* + * ITN is the total number of multiple-shift QR iterations allowed. + */ + itn = 30*n; + + /* + * The main loop begins here. I is the loop index and decreases from + * IHI to ILO in steps of at most MAXB. Each iteration of the loop + * works with the active submatrix in rows and columns L to I. + * Eigenvalues I+1 to IHI have already converged. Either L = ILO or + * H(L,L-1) is negligible so that the matrix splits. + */ + i = n; + for(;;) + { + l = 1; + if( i<1 ) + { + + /* + * fill entries under diagonal blocks of T with zeros + */ + if( wantt ) + { + j = 1; + while(j<=n) + { + if( ae_fp_eq(wi->ptr.p_double[j],0) ) + { + for(i=j+1; i<=n; i++) + { + h->ptr.pp_double[i][j] = 0; + } + j = j+1; + } + else + { + for(i=j+2; i<=n; i++) + { + h->ptr.pp_double[i][j] = 0; + h->ptr.pp_double[i][j+1] = 0; + } + j = j+2; + } + } + } + + /* + * Exit + */ + ae_frame_leave(_state); + return; + } + + /* + * Perform multiple-shift QR iterations on rows and columns ILO to I + * until a submatrix of order at most MAXB splits off at the bottom + * because a subdiagonal element has become negligible. + */ + failflag = ae_true; + for(its=0; its<=itn; its++) + { + + /* + * Look for a single small subdiagonal element. + */ + for(k=i; k>=l+1; k--) + { + tst1 = ae_fabs(h->ptr.pp_double[k-1][k-1], _state)+ae_fabs(h->ptr.pp_double[k][k], _state); + if( ae_fp_eq(tst1,0) ) + { + tst1 = upperhessenberg1norm(h, l, i, l, i, &work, _state); + } + if( ae_fp_less_eq(ae_fabs(h->ptr.pp_double[k][k-1], _state),ae_maxreal(ulp*tst1, smlnum, _state)) ) + { + break; + } + } + l = k; + if( l>1 ) + { + + /* + * H(L,L-1) is negligible. + */ + h->ptr.pp_double[l][l-1] = 0; + } + + /* + * Exit from loop if a submatrix of order <= MAXB has split off. + */ + if( l>=i-maxb+1 ) + { + failflag = ae_false; + break; + } + + /* + * Now the active submatrix is in rows and columns L to I. If + * eigenvalues only are being computed, only the active submatrix + * need be transformed. + */ + if( its==20||its==30 ) + { + + /* + * Exceptional shifts. + */ + for(ii=i-ns+1; ii<=i; ii++) + { + wr->ptr.p_double[ii] = cnst*(ae_fabs(h->ptr.pp_double[ii][ii-1], _state)+ae_fabs(h->ptr.pp_double[ii][ii], _state)); + wi->ptr.p_double[ii] = 0; + } + } + else + { + + /* + * Use eigenvalues of trailing submatrix of order NS as shifts. + */ + copymatrix(h, i-ns+1, i, i-ns+1, i, &s, 1, ns, 1, ns, _state); + hsschur_internalauxschur(ae_false, ae_false, ns, 1, ns, &s, &tmpwr, &tmpwi, 1, ns, z, &work, &workv3, &workc1, &works1, &ierr, _state); + for(p1=1; p1<=ns; p1++) + { + wr->ptr.p_double[i-ns+p1] = tmpwr.ptr.p_double[p1]; + wi->ptr.p_double[i-ns+p1] = tmpwi.ptr.p_double[p1]; + } + if( ierr>0 ) + { + + /* + * If DLAHQR failed to compute all NS eigenvalues, use the + * unconverged diagonal elements as the remaining shifts. + */ + for(ii=1; ii<=ierr; ii++) + { + wr->ptr.p_double[i-ns+ii] = s.ptr.pp_double[ii][ii]; + wi->ptr.p_double[i-ns+ii] = 0; + } + } + } + + /* + * Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns)) + * where G is the Hessenberg submatrix H(L:I,L:I) and w is + * the vector of shifts (stored in WR and WI). The result is + * stored in the local array V. + */ + v.ptr.p_double[1] = 1; + for(ii=2; ii<=ns+1; ii++) + { + v.ptr.p_double[ii] = 0; + } + nv = 1; + for(j=i-ns+1; j<=i; j++) + { + if( ae_fp_greater_eq(wi->ptr.p_double[j],0) ) + { + if( ae_fp_eq(wi->ptr.p_double[j],0) ) + { + + /* + * real shift + */ + p1 = nv+1; + ae_v_move(&vv.ptr.p_double[1], 1, &v.ptr.p_double[1], 1, ae_v_len(1,p1)); + matrixvectormultiply(h, l, l+nv, l, l+nv-1, ae_false, &vv, 1, nv, 1.0, &v, 1, nv+1, -wr->ptr.p_double[j], _state); + nv = nv+1; + } + else + { + if( ae_fp_greater(wi->ptr.p_double[j],0) ) + { + + /* + * complex conjugate pair of shifts + */ + p1 = nv+1; + ae_v_move(&vv.ptr.p_double[1], 1, &v.ptr.p_double[1], 1, ae_v_len(1,p1)); + matrixvectormultiply(h, l, l+nv, l, l+nv-1, ae_false, &v, 1, nv, 1.0, &vv, 1, nv+1, -2*wr->ptr.p_double[j], _state); + itemp = vectoridxabsmax(&vv, 1, nv+1, _state); + temp = 1/ae_maxreal(ae_fabs(vv.ptr.p_double[itemp], _state), smlnum, _state); + p1 = nv+1; + ae_v_muld(&vv.ptr.p_double[1], 1, ae_v_len(1,p1), temp); + absw = pythag2(wr->ptr.p_double[j], wi->ptr.p_double[j], _state); + temp = temp*absw*absw; + matrixvectormultiply(h, l, l+nv+1, l, l+nv, ae_false, &vv, 1, nv+1, 1.0, &v, 1, nv+2, temp, _state); + nv = nv+2; + } + } + + /* + * Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero, + * reset it to the unit vector. + */ + itemp = vectoridxabsmax(&v, 1, nv, _state); + temp = ae_fabs(v.ptr.p_double[itemp], _state); + if( ae_fp_eq(temp,0) ) + { + v.ptr.p_double[1] = 1; + for(ii=2; ii<=nv; ii++) + { + v.ptr.p_double[ii] = 0; + } + } + else + { + temp = ae_maxreal(temp, smlnum, _state); + vt = 1/temp; + ae_v_muld(&v.ptr.p_double[1], 1, ae_v_len(1,nv), vt); + } + } + } + + /* + * Multiple-shift QR step + */ + for(k=l; k<=i-1; k++) + { + + /* + * The first iteration of this loop determines a reflection G + * from the vector V and applies it from left and right to H, + * thus creating a nonzero bulge below the subdiagonal. + * + * Each subsequent iteration determines a reflection G to + * restore the Hessenberg form in the (K-1)th column, and thus + * chases the bulge one step toward the bottom of the active + * submatrix. NR is the order of G. + */ + nr = ae_minint(ns+1, i-k+1, _state); + if( k>l ) + { + p1 = k-1; + p2 = k+nr-1; + ae_v_move(&v.ptr.p_double[1], 1, &h->ptr.pp_double[k][p1], h->stride, ae_v_len(1,nr)); + } + generatereflection(&v, nr, &tau, _state); + if( k>l ) + { + h->ptr.pp_double[k][k-1] = v.ptr.p_double[1]; + for(ii=k+1; ii<=i; ii++) + { + h->ptr.pp_double[ii][k-1] = 0; + } + } + v.ptr.p_double[1] = 1; + + /* + * Apply G from the left to transform the rows of the matrix in + * columns K to I2. + */ + applyreflectionfromtheleft(h, tau, &v, k, k+nr-1, k, i2, &work, _state); + + /* + * Apply G from the right to transform the columns of the + * matrix in rows I1 to min(K+NR,I). + */ + applyreflectionfromtheright(h, tau, &v, i1, ae_minint(k+nr, i, _state), k, k+nr-1, &work, _state); + if( wantz ) + { + + /* + * Accumulate transformations in the matrix Z + */ + applyreflectionfromtheright(z, tau, &v, 1, n, k, k+nr-1, &work, _state); + } + } + } + + /* + * Failure to converge in remaining number of iterations + */ + if( failflag ) + { + *info = i; + ae_frame_leave(_state); + return; + } + + /* + * A submatrix of order <= MAXB in rows and columns L to I has split + * off. Use the double-shift QR algorithm to handle it. + */ + hsschur_internalauxschur(wantt, wantz, n, l, i, h, wr, wi, 1, n, z, &work, &workv3, &workc1, &works1, info, _state); + if( *info>0 ) + { + ae_frame_leave(_state); + return; + } + + /* + * Decrement number of remaining iterations, and return to start of + * the main loop with a new value of I. + */ + itn = itn-its; + i = l-1; + } + ae_frame_leave(_state); +} + + +static void hsschur_internalauxschur(ae_bool wantt, + ae_bool wantz, + ae_int_t n, + ae_int_t ilo, + ae_int_t ihi, + /* Real */ ae_matrix* h, + /* Real */ ae_vector* wr, + /* Real */ ae_vector* wi, + ae_int_t iloz, + ae_int_t ihiz, + /* Real */ ae_matrix* z, + /* Real */ ae_vector* work, + /* Real */ ae_vector* workv3, + /* Real */ ae_vector* workc1, + /* Real */ ae_vector* works1, + ae_int_t* info, + ae_state *_state) +{ + ae_int_t i; + ae_int_t i1; + ae_int_t i2; + ae_int_t itn; + ae_int_t its; + ae_int_t j; + ae_int_t k; + ae_int_t l; + ae_int_t m; + ae_int_t nh; + ae_int_t nr; + ae_int_t nz; + double ave; + double cs; + double disc; + double h00; + double h10; + double h11; + double h12; + double h21; + double h22; + double h33; + double h33s; + double h43h34; + double h44; + double h44s; + double ovfl; + double s; + double smlnum; + double sn; + double sum; + double t1; + double t2; + double t3; + double tst1; + double unfl; + double v1; + double v2; + double v3; + ae_bool failflag; + double dat1; + double dat2; + ae_int_t p1; + double him1im1; + double him1i; + double hiim1; + double hii; + double wrim1; + double wri; + double wiim1; + double wii; + double ulp; + + *info = 0; + + *info = 0; + dat1 = 0.75; + dat2 = -0.4375; + ulp = ae_machineepsilon; + + /* + * Quick return if possible + */ + if( n==0 ) + { + return; + } + if( ilo==ihi ) + { + wr->ptr.p_double[ilo] = h->ptr.pp_double[ilo][ilo]; + wi->ptr.p_double[ilo] = 0; + return; + } + nh = ihi-ilo+1; + nz = ihiz-iloz+1; + + /* + * Set machine-dependent constants for the stopping criterion. + * If norm(H) <= sqrt(OVFL), overflow should not occur. + */ + unfl = ae_minrealnumber; + ovfl = 1/unfl; + smlnum = unfl*(nh/ulp); + + /* + * I1 and I2 are the indices of the first row and last column of H + * to which transformations must be applied. If eigenvalues only are + * being computed, I1 and I2 are set inside the main loop. + */ + i1 = 1; + i2 = n; + + /* + * ITN is the total number of QR iterations allowed. + */ + itn = 30*nh; + + /* + * The main loop begins here. I is the loop index and decreases from + * IHI to ILO in steps of 1 or 2. Each iteration of the loop works + * with the active submatrix in rows and columns L to I. + * Eigenvalues I+1 to IHI have already converged. Either L = ILO or + * H(L,L-1) is negligible so that the matrix splits. + */ + i = ihi; + for(;;) + { + l = ilo; + if( i<ilo ) + { + return; + } + + /* + * Perform QR iterations on rows and columns ILO to I until a + * submatrix of order 1 or 2 splits off at the bottom because a + * subdiagonal element has become negligible. + */ + failflag = ae_true; + for(its=0; its<=itn; its++) + { + + /* + * Look for a single small subdiagonal element. + */ + for(k=i; k>=l+1; k--) + { + tst1 = ae_fabs(h->ptr.pp_double[k-1][k-1], _state)+ae_fabs(h->ptr.pp_double[k][k], _state); + if( ae_fp_eq(tst1,0) ) + { + tst1 = upperhessenberg1norm(h, l, i, l, i, work, _state); + } + if( ae_fp_less_eq(ae_fabs(h->ptr.pp_double[k][k-1], _state),ae_maxreal(ulp*tst1, smlnum, _state)) ) + { + break; + } + } + l = k; + if( l>ilo ) + { + + /* + * H(L,L-1) is negligible + */ + h->ptr.pp_double[l][l-1] = 0; + } + + /* + * Exit from loop if a submatrix of order 1 or 2 has split off. + */ + if( l>=i-1 ) + { + failflag = ae_false; + break; + } + + /* + * Now the active submatrix is in rows and columns L to I. If + * eigenvalues only are being computed, only the active submatrix + * need be transformed. + */ + if( its==10||its==20 ) + { + + /* + * Exceptional shift. + */ + s = ae_fabs(h->ptr.pp_double[i][i-1], _state)+ae_fabs(h->ptr.pp_double[i-1][i-2], _state); + h44 = dat1*s+h->ptr.pp_double[i][i]; + h33 = h44; + h43h34 = dat2*s*s; + } + else + { + + /* + * Prepare to use Francis' double shift + * (i.e. 2nd degree generalized Rayleigh quotient) + */ + h44 = h->ptr.pp_double[i][i]; + h33 = h->ptr.pp_double[i-1][i-1]; + h43h34 = h->ptr.pp_double[i][i-1]*h->ptr.pp_double[i-1][i]; + s = h->ptr.pp_double[i-1][i-2]*h->ptr.pp_double[i-1][i-2]; + disc = (h33-h44)*0.5; + disc = disc*disc+h43h34; + if( ae_fp_greater(disc,0) ) + { + + /* + * Real roots: use Wilkinson's shift twice + */ + disc = ae_sqrt(disc, _state); + ave = 0.5*(h33+h44); + if( ae_fp_greater(ae_fabs(h33, _state)-ae_fabs(h44, _state),0) ) + { + h33 = h33*h44-h43h34; + h44 = h33/(hsschur_extschursign(disc, ave, _state)+ave); + } + else + { + h44 = hsschur_extschursign(disc, ave, _state)+ave; + } + h33 = h44; + h43h34 = 0; + } + } + + /* + * Look for two consecutive small subdiagonal elements. + */ + for(m=i-2; m>=l; m--) + { + + /* + * Determine the effect of starting the double-shift QR + * iteration at row M, and see if this would make H(M,M-1) + * negligible. + */ + h11 = h->ptr.pp_double[m][m]; + h22 = h->ptr.pp_double[m+1][m+1]; + h21 = h->ptr.pp_double[m+1][m]; + h12 = h->ptr.pp_double[m][m+1]; + h44s = h44-h11; + h33s = h33-h11; + v1 = (h33s*h44s-h43h34)/h21+h12; + v2 = h22-h11-h33s-h44s; + v3 = h->ptr.pp_double[m+2][m+1]; + s = ae_fabs(v1, _state)+ae_fabs(v2, _state)+ae_fabs(v3, _state); + v1 = v1/s; + v2 = v2/s; + v3 = v3/s; + workv3->ptr.p_double[1] = v1; + workv3->ptr.p_double[2] = v2; + workv3->ptr.p_double[3] = v3; + if( m==l ) + { + break; + } + h00 = h->ptr.pp_double[m-1][m-1]; + h10 = h->ptr.pp_double[m][m-1]; + tst1 = ae_fabs(v1, _state)*(ae_fabs(h00, _state)+ae_fabs(h11, _state)+ae_fabs(h22, _state)); + if( ae_fp_less_eq(ae_fabs(h10, _state)*(ae_fabs(v2, _state)+ae_fabs(v3, _state)),ulp*tst1) ) + { + break; + } + } + + /* + * Double-shift QR step + */ + for(k=m; k<=i-1; k++) + { + + /* + * The first iteration of this loop determines a reflection G + * from the vector V and applies it from left and right to H, + * thus creating a nonzero bulge below the subdiagonal. + * + * Each subsequent iteration determines a reflection G to + * restore the Hessenberg form in the (K-1)th column, and thus + * chases the bulge one step toward the bottom of the active + * submatrix. NR is the order of G. + */ + nr = ae_minint(3, i-k+1, _state); + if( k>m ) + { + for(p1=1; p1<=nr; p1++) + { + workv3->ptr.p_double[p1] = h->ptr.pp_double[k+p1-1][k-1]; + } + } + generatereflection(workv3, nr, &t1, _state); + if( k>m ) + { + h->ptr.pp_double[k][k-1] = workv3->ptr.p_double[1]; + h->ptr.pp_double[k+1][k-1] = 0; + if( k<i-1 ) + { + h->ptr.pp_double[k+2][k-1] = 0; + } + } + else + { + if( m>l ) + { + h->ptr.pp_double[k][k-1] = -h->ptr.pp_double[k][k-1]; + } + } + v2 = workv3->ptr.p_double[2]; + t2 = t1*v2; + if( nr==3 ) + { + v3 = workv3->ptr.p_double[3]; + t3 = t1*v3; + + /* + * Apply G from the left to transform the rows of the matrix + * in columns K to I2. + */ + for(j=k; j<=i2; j++) + { + sum = h->ptr.pp_double[k][j]+v2*h->ptr.pp_double[k+1][j]+v3*h->ptr.pp_double[k+2][j]; + h->ptr.pp_double[k][j] = h->ptr.pp_double[k][j]-sum*t1; + h->ptr.pp_double[k+1][j] = h->ptr.pp_double[k+1][j]-sum*t2; + h->ptr.pp_double[k+2][j] = h->ptr.pp_double[k+2][j]-sum*t3; + } + + /* + * Apply G from the right to transform the columns of the + * matrix in rows I1 to min(K+3,I). + */ + for(j=i1; j<=ae_minint(k+3, i, _state); j++) + { + sum = h->ptr.pp_double[j][k]+v2*h->ptr.pp_double[j][k+1]+v3*h->ptr.pp_double[j][k+2]; + h->ptr.pp_double[j][k] = h->ptr.pp_double[j][k]-sum*t1; + h->ptr.pp_double[j][k+1] = h->ptr.pp_double[j][k+1]-sum*t2; + h->ptr.pp_double[j][k+2] = h->ptr.pp_double[j][k+2]-sum*t3; + } + if( wantz ) + { + + /* + * Accumulate transformations in the matrix Z + */ + for(j=iloz; j<=ihiz; j++) + { + sum = z->ptr.pp_double[j][k]+v2*z->ptr.pp_double[j][k+1]+v3*z->ptr.pp_double[j][k+2]; + z->ptr.pp_double[j][k] = z->ptr.pp_double[j][k]-sum*t1; + z->ptr.pp_double[j][k+1] = z->ptr.pp_double[j][k+1]-sum*t2; + z->ptr.pp_double[j][k+2] = z->ptr.pp_double[j][k+2]-sum*t3; + } + } + } + else + { + if( nr==2 ) + { + + /* + * Apply G from the left to transform the rows of the matrix + * in columns K to I2. + */ + for(j=k; j<=i2; j++) + { + sum = h->ptr.pp_double[k][j]+v2*h->ptr.pp_double[k+1][j]; + h->ptr.pp_double[k][j] = h->ptr.pp_double[k][j]-sum*t1; + h->ptr.pp_double[k+1][j] = h->ptr.pp_double[k+1][j]-sum*t2; + } + + /* + * Apply G from the right to transform the columns of the + * matrix in rows I1 to min(K+3,I). + */ + for(j=i1; j<=i; j++) + { + sum = h->ptr.pp_double[j][k]+v2*h->ptr.pp_double[j][k+1]; + h->ptr.pp_double[j][k] = h->ptr.pp_double[j][k]-sum*t1; + h->ptr.pp_double[j][k+1] = h->ptr.pp_double[j][k+1]-sum*t2; + } + if( wantz ) + { + + /* + * Accumulate transformations in the matrix Z + */ + for(j=iloz; j<=ihiz; j++) + { + sum = z->ptr.pp_double[j][k]+v2*z->ptr.pp_double[j][k+1]; + z->ptr.pp_double[j][k] = z->ptr.pp_double[j][k]-sum*t1; + z->ptr.pp_double[j][k+1] = z->ptr.pp_double[j][k+1]-sum*t2; + } + } + } + } + } + } + if( failflag ) + { + + /* + * Failure to converge in remaining number of iterations + */ + *info = i; + return; + } + if( l==i ) + { + + /* + * H(I,I-1) is negligible: one eigenvalue has converged. + */ + wr->ptr.p_double[i] = h->ptr.pp_double[i][i]; + wi->ptr.p_double[i] = 0; + } + else + { + if( l==i-1 ) + { + + /* + * H(I-1,I-2) is negligible: a pair of eigenvalues have converged. + * + * Transform the 2-by-2 submatrix to standard Schur form, + * and compute and store the eigenvalues. + */ + him1im1 = h->ptr.pp_double[i-1][i-1]; + him1i = h->ptr.pp_double[i-1][i]; + hiim1 = h->ptr.pp_double[i][i-1]; + hii = h->ptr.pp_double[i][i]; + hsschur_aux2x2schur(&him1im1, &him1i, &hiim1, &hii, &wrim1, &wiim1, &wri, &wii, &cs, &sn, _state); + wr->ptr.p_double[i-1] = wrim1; + wi->ptr.p_double[i-1] = wiim1; + wr->ptr.p_double[i] = wri; + wi->ptr.p_double[i] = wii; + h->ptr.pp_double[i-1][i-1] = him1im1; + h->ptr.pp_double[i-1][i] = him1i; + h->ptr.pp_double[i][i-1] = hiim1; + h->ptr.pp_double[i][i] = hii; + if( wantt ) + { + + /* + * Apply the transformation to the rest of H. + */ + if( i2>i ) + { + workc1->ptr.p_double[1] = cs; + works1->ptr.p_double[1] = sn; + applyrotationsfromtheleft(ae_true, i-1, i, i+1, i2, workc1, works1, h, work, _state); + } + workc1->ptr.p_double[1] = cs; + works1->ptr.p_double[1] = sn; + applyrotationsfromtheright(ae_true, i1, i-2, i-1, i, workc1, works1, h, work, _state); + } + if( wantz ) + { + + /* + * Apply the transformation to Z. + */ + workc1->ptr.p_double[1] = cs; + works1->ptr.p_double[1] = sn; + applyrotationsfromtheright(ae_true, iloz, iloz+nz-1, i-1, i, workc1, works1, z, work, _state); + } + } + } + + /* + * Decrement number of remaining iterations, and return to start of + * the main loop with new value of I. + */ + itn = itn-its; + i = l-1; + } +} + + +static void hsschur_aux2x2schur(double* a, + double* b, + double* c, + double* d, + double* rt1r, + double* rt1i, + double* rt2r, + double* rt2i, + double* cs, + double* sn, + ae_state *_state) +{ + double multpl; + double aa; + double bb; + double bcmax; + double bcmis; + double cc; + double cs1; + double dd; + double eps; + double p; + double sab; + double sac; + double scl; + double sigma; + double sn1; + double tau; + double temp; + double z; + + *rt1r = 0; + *rt1i = 0; + *rt2r = 0; + *rt2i = 0; + *cs = 0; + *sn = 0; + + multpl = 4.0; + eps = ae_machineepsilon; + if( ae_fp_eq(*c,0) ) + { + *cs = 1; + *sn = 0; + } + else + { + if( ae_fp_eq(*b,0) ) + { + + /* + * Swap rows and columns + */ + *cs = 0; + *sn = 1; + temp = *d; + *d = *a; + *a = temp; + *b = -*c; + *c = 0; + } + else + { + if( ae_fp_eq(*a-(*d),0)&&hsschur_extschursigntoone(*b, _state)!=hsschur_extschursigntoone(*c, _state) ) + { + *cs = 1; + *sn = 0; + } + else + { + temp = *a-(*d); + p = 0.5*temp; + bcmax = ae_maxreal(ae_fabs(*b, _state), ae_fabs(*c, _state), _state); + bcmis = ae_minreal(ae_fabs(*b, _state), ae_fabs(*c, _state), _state)*hsschur_extschursigntoone(*b, _state)*hsschur_extschursigntoone(*c, _state); + scl = ae_maxreal(ae_fabs(p, _state), bcmax, _state); + z = p/scl*p+bcmax/scl*bcmis; + + /* + * If Z is of the order of the machine accuracy, postpone the + * decision on the nature of eigenvalues + */ + if( ae_fp_greater_eq(z,multpl*eps) ) + { + + /* + * Real eigenvalues. Compute A and D. + */ + z = p+hsschur_extschursign(ae_sqrt(scl, _state)*ae_sqrt(z, _state), p, _state); + *a = *d+z; + *d = *d-bcmax/z*bcmis; + + /* + * Compute B and the rotation matrix + */ + tau = pythag2(*c, z, _state); + *cs = z/tau; + *sn = *c/tau; + *b = *b-(*c); + *c = 0; + } + else + { + + /* + * Complex eigenvalues, or real (almost) equal eigenvalues. + * Make diagonal elements equal. + */ + sigma = *b+(*c); + tau = pythag2(sigma, temp, _state); + *cs = ae_sqrt(0.5*(1+ae_fabs(sigma, _state)/tau), _state); + *sn = -p/(tau*(*cs))*hsschur_extschursign(1, sigma, _state); + + /* + * Compute [ AA BB ] = [ A B ] [ CS -SN ] + * [ CC DD ] [ C D ] [ SN CS ] + */ + aa = *a*(*cs)+*b*(*sn); + bb = -*a*(*sn)+*b*(*cs); + cc = *c*(*cs)+*d*(*sn); + dd = -*c*(*sn)+*d*(*cs); + + /* + * Compute [ A B ] = [ CS SN ] [ AA BB ] + * [ C D ] [-SN CS ] [ CC DD ] + */ + *a = aa*(*cs)+cc*(*sn); + *b = bb*(*cs)+dd*(*sn); + *c = -aa*(*sn)+cc*(*cs); + *d = -bb*(*sn)+dd*(*cs); + temp = 0.5*(*a+(*d)); + *a = temp; + *d = temp; + if( ae_fp_neq(*c,0) ) + { + if( ae_fp_neq(*b,0) ) + { + if( hsschur_extschursigntoone(*b, _state)==hsschur_extschursigntoone(*c, _state) ) + { + + /* + * Real eigenvalues: reduce to upper triangular form + */ + sab = ae_sqrt(ae_fabs(*b, _state), _state); + sac = ae_sqrt(ae_fabs(*c, _state), _state); + p = hsschur_extschursign(sab*sac, *c, _state); + tau = 1/ae_sqrt(ae_fabs(*b+(*c), _state), _state); + *a = temp+p; + *d = temp-p; + *b = *b-(*c); + *c = 0; + cs1 = sab*tau; + sn1 = sac*tau; + temp = *cs*cs1-*sn*sn1; + *sn = *cs*sn1+*sn*cs1; + *cs = temp; + } + } + else + { + *b = -*c; + *c = 0; + temp = *cs; + *cs = -*sn; + *sn = temp; + } + } + } + } + } + } + + /* + * Store eigenvalues in (RT1R,RT1I) and (RT2R,RT2I). + */ + *rt1r = *a; + *rt2r = *d; + if( ae_fp_eq(*c,0) ) + { + *rt1i = 0; + *rt2i = 0; + } + else + { + *rt1i = ae_sqrt(ae_fabs(*b, _state), _state)*ae_sqrt(ae_fabs(*c, _state), _state); + *rt2i = -*rt1i; + } +} + + +static double hsschur_extschursign(double a, double b, ae_state *_state) +{ + double result; + + + if( ae_fp_greater_eq(b,0) ) + { + result = ae_fabs(a, _state); + } + else + { + result = -ae_fabs(a, _state); + } + return result; +} + + +static ae_int_t hsschur_extschursigntoone(double b, ae_state *_state) +{ + ae_int_t result; + + + if( ae_fp_greater_eq(b,0) ) + { + result = 1; + } + else + { + result = -1; + } + return result; +} + + + + +/************************************************************************* +Utility subroutine performing the "safe" solution of system of linear +equations with triangular coefficient matrices. + +The subroutine uses scaling and solves the scaled system A*x=s*b (where s +is a scalar value) instead of A*x=b, choosing s so that x can be +represented by a floating-point number. The closer the system gets to a +singular, the less s is. If the system is singular, s=0 and x contains the +non-trivial solution of equation A*x=0. + +The feature of an algorithm is that it could not cause an overflow or a +division by zero regardless of the matrix used as the input. + +The algorithm can solve systems of equations with upper/lower triangular +matrices, with/without unit diagonal, and systems of type A*x=b or A'*x=b +(where A' is a transposed matrix A). + +Input parameters: + A - system matrix. Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + X - right-hand member of a system. + Array whose index ranges within [0..N-1]. + IsUpper - matrix type. If it is True, the system matrix is the upper + triangular and is located in the corresponding part of + matrix A. + Trans - problem type. If it is True, the problem to be solved is + A'*x=b, otherwise it is A*x=b. + Isunit - matrix type. If it is True, the system matrix has a unit + diagonal (the elements on the main diagonal are not used + in the calculation process), otherwise the matrix is considered + to be a general triangular matrix. + +Output parameters: + X - solution. Array whose index ranges within [0..N-1]. + S - scaling factor. + + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1992 +*************************************************************************/ +void rmatrixtrsafesolve(/* Real */ ae_matrix* a, + ae_int_t n, + /* Real */ ae_vector* x, + double* s, + ae_bool isupper, + ae_bool istrans, + ae_bool isunit, + ae_state *_state) +{ + ae_frame _frame_block; + ae_bool normin; + ae_vector cnorm; + ae_matrix a1; + ae_vector x1; + ae_int_t i; + + ae_frame_make(_state, &_frame_block); + *s = 0; + ae_vector_init(&cnorm, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&a1, 0, 0, DT_REAL, _state, ae_true); + ae_vector_init(&x1, 0, DT_REAL, _state, ae_true); + + + /* + * From 0-based to 1-based + */ + normin = ae_false; + ae_matrix_set_length(&a1, n+1, n+1, _state); + ae_vector_set_length(&x1, n+1, _state); + for(i=1; i<=n; i++) + { + ae_v_move(&a1.ptr.pp_double[i][1], 1, &a->ptr.pp_double[i-1][0], 1, ae_v_len(1,n)); + } + ae_v_move(&x1.ptr.p_double[1], 1, &x->ptr.p_double[0], 1, ae_v_len(1,n)); + + /* + * Solve 1-based + */ + safesolvetriangular(&a1, n, &x1, s, isupper, istrans, isunit, normin, &cnorm, _state); + + /* + * From 1-based to 0-based + */ + ae_v_move(&x->ptr.p_double[0], 1, &x1.ptr.p_double[1], 1, ae_v_len(0,n-1)); + ae_frame_leave(_state); +} + + +/************************************************************************* +Obsolete 1-based subroutine. +See RMatrixTRSafeSolve for 0-based replacement. +*************************************************************************/ +void safesolvetriangular(/* Real */ ae_matrix* a, + ae_int_t n, + /* Real */ ae_vector* x, + double* s, + ae_bool isupper, + ae_bool istrans, + ae_bool isunit, + ae_bool normin, + /* Real */ ae_vector* cnorm, + ae_state *_state) +{ + ae_int_t i; + ae_int_t imax; + ae_int_t j; + ae_int_t jfirst; + ae_int_t jinc; + ae_int_t jlast; + ae_int_t jm1; + ae_int_t jp1; + ae_int_t ip1; + ae_int_t im1; + ae_int_t k; + ae_int_t flg; + double v; + double vd; + double bignum; + double grow; + double rec; + double smlnum; + double sumj; + double tjj; + double tjjs; + double tmax; + double tscal; + double uscal; + double xbnd; + double xj; + double xmax; + ae_bool notran; + ae_bool upper; + ae_bool nounit; + + *s = 0; + + upper = isupper; + notran = !istrans; + nounit = !isunit; + + /* + * these initializers are not really necessary, + * but without them compiler complains about uninitialized locals + */ + tjjs = 0; + + /* + * Quick return if possible + */ + if( n==0 ) + { + return; + } + + /* + * Determine machine dependent parameters to control overflow. + */ + smlnum = ae_minrealnumber/(ae_machineepsilon*2); + bignum = 1/smlnum; + *s = 1; + if( !normin ) + { + ae_vector_set_length(cnorm, n+1, _state); + + /* + * Compute the 1-norm of each column, not including the diagonal. + */ + if( upper ) + { + + /* + * A is upper triangular. + */ + for(j=1; j<=n; j++) + { + v = 0; + for(k=1; k<=j-1; k++) + { + v = v+ae_fabs(a->ptr.pp_double[k][j], _state); + } + cnorm->ptr.p_double[j] = v; + } + } + else + { + + /* + * A is lower triangular. + */ + for(j=1; j<=n-1; j++) + { + v = 0; + for(k=j+1; k<=n; k++) + { + v = v+ae_fabs(a->ptr.pp_double[k][j], _state); + } + cnorm->ptr.p_double[j] = v; + } + cnorm->ptr.p_double[n] = 0; + } + } + + /* + * Scale the column norms by TSCAL if the maximum element in CNORM is + * greater than BIGNUM. + */ + imax = 1; + for(k=2; k<=n; k++) + { + if( ae_fp_greater(cnorm->ptr.p_double[k],cnorm->ptr.p_double[imax]) ) + { + imax = k; + } + } + tmax = cnorm->ptr.p_double[imax]; + if( ae_fp_less_eq(tmax,bignum) ) + { + tscal = 1; + } + else + { + tscal = 1/(smlnum*tmax); + ae_v_muld(&cnorm->ptr.p_double[1], 1, ae_v_len(1,n), tscal); + } + + /* + * Compute a bound on the computed solution vector to see if the + * Level 2 BLAS routine DTRSV can be used. + */ + j = 1; + for(k=2; k<=n; k++) + { + if( ae_fp_greater(ae_fabs(x->ptr.p_double[k], _state),ae_fabs(x->ptr.p_double[j], _state)) ) + { + j = k; + } + } + xmax = ae_fabs(x->ptr.p_double[j], _state); + xbnd = xmax; + if( notran ) + { + + /* + * Compute the growth in A * x = b. + */ + if( upper ) + { + jfirst = n; + jlast = 1; + jinc = -1; + } + else + { + jfirst = 1; + jlast = n; + jinc = 1; + } + if( ae_fp_neq(tscal,1) ) + { + grow = 0; + } + else + { + if( nounit ) + { + + /* + * A is non-unit triangular. + * + * Compute GROW = 1/G(j) and XBND = 1/M(j). + * Initially, G(0) = max{x(i), i=1,...,n}. + */ + grow = 1/ae_maxreal(xbnd, smlnum, _state); + xbnd = grow; + j = jfirst; + while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast)) + { + + /* + * Exit the loop if the growth factor is too small. + */ + if( ae_fp_less_eq(grow,smlnum) ) + { + break; + } + + /* + * M(j) = G(j-1) / abs(A(j,j)) + */ + tjj = ae_fabs(a->ptr.pp_double[j][j], _state); + xbnd = ae_minreal(xbnd, ae_minreal(1, tjj, _state)*grow, _state); + if( ae_fp_greater_eq(tjj+cnorm->ptr.p_double[j],smlnum) ) + { + + /* + * G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) + */ + grow = grow*(tjj/(tjj+cnorm->ptr.p_double[j])); + } + else + { + + /* + * G(j) could overflow, set GROW to 0. + */ + grow = 0; + } + if( j==jlast ) + { + grow = xbnd; + } + j = j+jinc; + } + } + else + { + + /* + * A is unit triangular. + * + * Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. + */ + grow = ae_minreal(1, 1/ae_maxreal(xbnd, smlnum, _state), _state); + j = jfirst; + while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast)) + { + + /* + * Exit the loop if the growth factor is too small. + */ + if( ae_fp_less_eq(grow,smlnum) ) + { + break; + } + + /* + * G(j) = G(j-1)*( 1 + CNORM(j) ) + */ + grow = grow*(1/(1+cnorm->ptr.p_double[j])); + j = j+jinc; + } + } + } + } + else + { + + /* + * Compute the growth in A' * x = b. + */ + if( upper ) + { + jfirst = 1; + jlast = n; + jinc = 1; + } + else + { + jfirst = n; + jlast = 1; + jinc = -1; + } + if( ae_fp_neq(tscal,1) ) + { + grow = 0; + } + else + { + if( nounit ) + { + + /* + * A is non-unit triangular. + * + * Compute GROW = 1/G(j) and XBND = 1/M(j). + * Initially, M(0) = max{x(i), i=1,...,n}. + */ + grow = 1/ae_maxreal(xbnd, smlnum, _state); + xbnd = grow; + j = jfirst; + while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast)) + { + + /* + * Exit the loop if the growth factor is too small. + */ + if( ae_fp_less_eq(grow,smlnum) ) + { + break; + } + + /* + * G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) + */ + xj = 1+cnorm->ptr.p_double[j]; + grow = ae_minreal(grow, xbnd/xj, _state); + + /* + * M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) + */ + tjj = ae_fabs(a->ptr.pp_double[j][j], _state); + if( ae_fp_greater(xj,tjj) ) + { + xbnd = xbnd*(tjj/xj); + } + if( j==jlast ) + { + grow = ae_minreal(grow, xbnd, _state); + } + j = j+jinc; + } + } + else + { + + /* + * A is unit triangular. + * + * Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. + */ + grow = ae_minreal(1, 1/ae_maxreal(xbnd, smlnum, _state), _state); + j = jfirst; + while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast)) + { + + /* + * Exit the loop if the growth factor is too small. + */ + if( ae_fp_less_eq(grow,smlnum) ) + { + break; + } + + /* + * G(j) = ( 1 + CNORM(j) )*G(j-1) + */ + xj = 1+cnorm->ptr.p_double[j]; + grow = grow/xj; + j = j+jinc; + } + } + } + } + if( ae_fp_greater(grow*tscal,smlnum) ) + { + + /* + * Use the Level 2 BLAS solve if the reciprocal of the bound on + * elements of X is not too small. + */ + if( (upper&¬ran)||(!upper&&!notran) ) + { + if( nounit ) + { + vd = a->ptr.pp_double[n][n]; + } + else + { + vd = 1; + } + x->ptr.p_double[n] = x->ptr.p_double[n]/vd; + for(i=n-1; i>=1; i--) + { + ip1 = i+1; + if( upper ) + { + v = ae_v_dotproduct(&a->ptr.pp_double[i][ip1], 1, &x->ptr.p_double[ip1], 1, ae_v_len(ip1,n)); + } + else + { + v = ae_v_dotproduct(&a->ptr.pp_double[ip1][i], a->stride, &x->ptr.p_double[ip1], 1, ae_v_len(ip1,n)); + } + if( nounit ) + { + vd = a->ptr.pp_double[i][i]; + } + else + { + vd = 1; + } + x->ptr.p_double[i] = (x->ptr.p_double[i]-v)/vd; + } + } + else + { + if( nounit ) + { + vd = a->ptr.pp_double[1][1]; + } + else + { + vd = 1; + } + x->ptr.p_double[1] = x->ptr.p_double[1]/vd; + for(i=2; i<=n; i++) + { + im1 = i-1; + if( upper ) + { + v = ae_v_dotproduct(&a->ptr.pp_double[1][i], a->stride, &x->ptr.p_double[1], 1, ae_v_len(1,im1)); + } + else + { + v = ae_v_dotproduct(&a->ptr.pp_double[i][1], 1, &x->ptr.p_double[1], 1, ae_v_len(1,im1)); + } + if( nounit ) + { + vd = a->ptr.pp_double[i][i]; + } + else + { + vd = 1; + } + x->ptr.p_double[i] = (x->ptr.p_double[i]-v)/vd; + } + } + } + else + { + + /* + * Use a Level 1 BLAS solve, scaling intermediate results. + */ + if( ae_fp_greater(xmax,bignum) ) + { + + /* + * Scale X so that its components are less than or equal to + * BIGNUM in absolute value. + */ + *s = bignum/xmax; + ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), *s); + xmax = bignum; + } + if( notran ) + { + + /* + * Solve A * x = b + */ + j = jfirst; + while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast)) + { + + /* + * Compute x(j) = b(j) / A(j,j), scaling x if necessary. + */ + xj = ae_fabs(x->ptr.p_double[j], _state); + flg = 0; + if( nounit ) + { + tjjs = a->ptr.pp_double[j][j]*tscal; + } + else + { + tjjs = tscal; + if( ae_fp_eq(tscal,1) ) + { + flg = 100; + } + } + if( flg!=100 ) + { + tjj = ae_fabs(tjjs, _state); + if( ae_fp_greater(tjj,smlnum) ) + { + + /* + * abs(A(j,j)) > SMLNUM: + */ + if( ae_fp_less(tjj,1) ) + { + if( ae_fp_greater(xj,tjj*bignum) ) + { + + /* + * Scale x by 1/b(j). + */ + rec = 1/xj; + ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), rec); + *s = *s*rec; + xmax = xmax*rec; + } + } + x->ptr.p_double[j] = x->ptr.p_double[j]/tjjs; + xj = ae_fabs(x->ptr.p_double[j], _state); + } + else + { + if( ae_fp_greater(tjj,0) ) + { + + /* + * 0 < abs(A(j,j)) <= SMLNUM: + */ + if( ae_fp_greater(xj,tjj*bignum) ) + { + + /* + * Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM + * to avoid overflow when dividing by A(j,j). + */ + rec = tjj*bignum/xj; + if( ae_fp_greater(cnorm->ptr.p_double[j],1) ) + { + + /* + * Scale by 1/CNORM(j) to avoid overflow when + * multiplying x(j) times column j. + */ + rec = rec/cnorm->ptr.p_double[j]; + } + ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), rec); + *s = *s*rec; + xmax = xmax*rec; + } + x->ptr.p_double[j] = x->ptr.p_double[j]/tjjs; + xj = ae_fabs(x->ptr.p_double[j], _state); + } + else + { + + /* + * A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and + * scale = 0, and compute a solution to A*x = 0. + */ + for(i=1; i<=n; i++) + { + x->ptr.p_double[i] = 0; + } + x->ptr.p_double[j] = 1; + xj = 1; + *s = 0; + xmax = 0; + } + } + } + + /* + * Scale x if necessary to avoid overflow when adding a + * multiple of column j of A. + */ + if( ae_fp_greater(xj,1) ) + { + rec = 1/xj; + if( ae_fp_greater(cnorm->ptr.p_double[j],(bignum-xmax)*rec) ) + { + + /* + * Scale x by 1/(2*abs(x(j))). + */ + rec = rec*0.5; + ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), rec); + *s = *s*rec; + } + } + else + { + if( ae_fp_greater(xj*cnorm->ptr.p_double[j],bignum-xmax) ) + { + + /* + * Scale x by 1/2. + */ + ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), 0.5); + *s = *s*0.5; + } + } + if( upper ) + { + if( j>1 ) + { + + /* + * Compute the update + * x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) + */ + v = x->ptr.p_double[j]*tscal; + jm1 = j-1; + ae_v_subd(&x->ptr.p_double[1], 1, &a->ptr.pp_double[1][j], a->stride, ae_v_len(1,jm1), v); + i = 1; + for(k=2; k<=j-1; k++) + { + if( ae_fp_greater(ae_fabs(x->ptr.p_double[k], _state),ae_fabs(x->ptr.p_double[i], _state)) ) + { + i = k; + } + } + xmax = ae_fabs(x->ptr.p_double[i], _state); + } + } + else + { + if( j<n ) + { + + /* + * Compute the update + * x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) + */ + jp1 = j+1; + v = x->ptr.p_double[j]*tscal; + ae_v_subd(&x->ptr.p_double[jp1], 1, &a->ptr.pp_double[jp1][j], a->stride, ae_v_len(jp1,n), v); + i = j+1; + for(k=j+2; k<=n; k++) + { + if( ae_fp_greater(ae_fabs(x->ptr.p_double[k], _state),ae_fabs(x->ptr.p_double[i], _state)) ) + { + i = k; + } + } + xmax = ae_fabs(x->ptr.p_double[i], _state); + } + } + j = j+jinc; + } + } + else + { + + /* + * Solve A' * x = b + */ + j = jfirst; + while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast)) + { + + /* + * Compute x(j) = b(j) - sum A(k,j)*x(k). + * k<>j + */ + xj = ae_fabs(x->ptr.p_double[j], _state); + uscal = tscal; + rec = 1/ae_maxreal(xmax, 1, _state); + if( ae_fp_greater(cnorm->ptr.p_double[j],(bignum-xj)*rec) ) + { + + /* + * If x(j) could overflow, scale x by 1/(2*XMAX). + */ + rec = rec*0.5; + if( nounit ) + { + tjjs = a->ptr.pp_double[j][j]*tscal; + } + else + { + tjjs = tscal; + } + tjj = ae_fabs(tjjs, _state); + if( ae_fp_greater(tjj,1) ) + { + + /* + * Divide by A(j,j) when scaling x if A(j,j) > 1. + */ + rec = ae_minreal(1, rec*tjj, _state); + uscal = uscal/tjjs; + } + if( ae_fp_less(rec,1) ) + { + ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), rec); + *s = *s*rec; + xmax = xmax*rec; + } + } + sumj = 0; + if( ae_fp_eq(uscal,1) ) + { + + /* + * If the scaling needed for A in the dot product is 1, + * call DDOT to perform the dot product. + */ + if( upper ) + { + if( j>1 ) + { + jm1 = j-1; + sumj = ae_v_dotproduct(&a->ptr.pp_double[1][j], a->stride, &x->ptr.p_double[1], 1, ae_v_len(1,jm1)); + } + else + { + sumj = 0; + } + } + else + { + if( j<n ) + { + jp1 = j+1; + sumj = ae_v_dotproduct(&a->ptr.pp_double[jp1][j], a->stride, &x->ptr.p_double[jp1], 1, ae_v_len(jp1,n)); + } + } + } + else + { + + /* + * Otherwise, use in-line code for the dot product. + */ + if( upper ) + { + for(i=1; i<=j-1; i++) + { + v = a->ptr.pp_double[i][j]*uscal; + sumj = sumj+v*x->ptr.p_double[i]; + } + } + else + { + if( j<n ) + { + for(i=j+1; i<=n; i++) + { + v = a->ptr.pp_double[i][j]*uscal; + sumj = sumj+v*x->ptr.p_double[i]; + } + } + } + } + if( ae_fp_eq(uscal,tscal) ) + { + + /* + * Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j) + * was not used to scale the dotproduct. + */ + x->ptr.p_double[j] = x->ptr.p_double[j]-sumj; + xj = ae_fabs(x->ptr.p_double[j], _state); + flg = 0; + if( nounit ) + { + tjjs = a->ptr.pp_double[j][j]*tscal; + } + else + { + tjjs = tscal; + if( ae_fp_eq(tscal,1) ) + { + flg = 150; + } + } + + /* + * Compute x(j) = x(j) / A(j,j), scaling if necessary. + */ + if( flg!=150 ) + { + tjj = ae_fabs(tjjs, _state); + if( ae_fp_greater(tjj,smlnum) ) + { + + /* + * abs(A(j,j)) > SMLNUM: + */ + if( ae_fp_less(tjj,1) ) + { + if( ae_fp_greater(xj,tjj*bignum) ) + { + + /* + * Scale X by 1/abs(x(j)). + */ + rec = 1/xj; + ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), rec); + *s = *s*rec; + xmax = xmax*rec; + } + } + x->ptr.p_double[j] = x->ptr.p_double[j]/tjjs; + } + else + { + if( ae_fp_greater(tjj,0) ) + { + + /* + * 0 < abs(A(j,j)) <= SMLNUM: + */ + if( ae_fp_greater(xj,tjj*bignum) ) + { + + /* + * Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. + */ + rec = tjj*bignum/xj; + ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), rec); + *s = *s*rec; + xmax = xmax*rec; + } + x->ptr.p_double[j] = x->ptr.p_double[j]/tjjs; + } + else + { + + /* + * A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and + * scale = 0, and compute a solution to A'*x = 0. + */ + for(i=1; i<=n; i++) + { + x->ptr.p_double[i] = 0; + } + x->ptr.p_double[j] = 1; + *s = 0; + xmax = 0; + } + } + } + } + else + { + + /* + * Compute x(j) := x(j) / A(j,j) - sumj if the dot + * product has already been divided by 1/A(j,j). + */ + x->ptr.p_double[j] = x->ptr.p_double[j]/tjjs-sumj; + } + xmax = ae_maxreal(xmax, ae_fabs(x->ptr.p_double[j], _state), _state); + j = j+jinc; + } + } + *s = *s/tscal; + } + + /* + * Scale the column norms by 1/TSCAL for return. + */ + if( ae_fp_neq(tscal,1) ) + { + v = 1/tscal; + ae_v_muld(&cnorm->ptr.p_double[1], 1, ae_v_len(1,n), v); + } +} + + + + +/************************************************************************* +Real implementation of CMatrixScaledTRSafeSolve + + -- ALGLIB routine -- + 21.01.2010 + Bochkanov Sergey +*************************************************************************/ +ae_bool rmatrixscaledtrsafesolve(/* Real */ ae_matrix* a, + double sa, + ae_int_t n, + /* Real */ ae_vector* x, + ae_bool isupper, + ae_int_t trans, + ae_bool isunit, + double maxgrowth, + ae_state *_state) +{ + ae_frame _frame_block; + double lnmax; + double nrmb; + double nrmx; + ae_int_t i; + ae_complex alpha; + ae_complex beta; + double vr; + ae_complex cx; + ae_vector tmp; + ae_bool result; + + ae_frame_make(_state, &_frame_block); + ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true); + + ae_assert(n>0, "RMatrixTRSafeSolve: incorrect N!", _state); + ae_assert(trans==0||trans==1, "RMatrixTRSafeSolve: incorrect Trans!", _state); + result = ae_true; + lnmax = ae_log(ae_maxrealnumber, _state); + + /* + * Quick return if possible + */ + if( n<=0 ) + { + ae_frame_leave(_state); + return result; + } + + /* + * Load norms: right part and X + */ + nrmb = 0; + for(i=0; i<=n-1; i++) + { + nrmb = ae_maxreal(nrmb, ae_fabs(x->ptr.p_double[i], _state), _state); + } + nrmx = 0; + + /* + * Solve + */ + ae_vector_set_length(&tmp, n, _state); + result = ae_true; + if( isupper&&trans==0 ) + { + + /* + * U*x = b + */ + for(i=n-1; i>=0; i--) + { + + /* + * Task is reduced to alpha*x[i] = beta + */ + if( isunit ) + { + alpha = ae_complex_from_d(sa); + } + else + { + alpha = ae_complex_from_d(a->ptr.pp_double[i][i]*sa); + } + if( i<n-1 ) + { + ae_v_moved(&tmp.ptr.p_double[i+1], 1, &a->ptr.pp_double[i][i+1], 1, ae_v_len(i+1,n-1), sa); + vr = ae_v_dotproduct(&tmp.ptr.p_double[i+1], 1, &x->ptr.p_double[i+1], 1, ae_v_len(i+1,n-1)); + beta = ae_complex_from_d(x->ptr.p_double[i]-vr); + } + else + { + beta = ae_complex_from_d(x->ptr.p_double[i]); + } + + /* + * solve alpha*x[i] = beta + */ + result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &cx, _state); + if( !result ) + { + ae_frame_leave(_state); + return result; + } + x->ptr.p_double[i] = cx.x; + } + ae_frame_leave(_state); + return result; + } + if( !isupper&&trans==0 ) + { + + /* + * L*x = b + */ + for(i=0; i<=n-1; i++) + { + + /* + * Task is reduced to alpha*x[i] = beta + */ + if( isunit ) + { + alpha = ae_complex_from_d(sa); + } + else + { + alpha = ae_complex_from_d(a->ptr.pp_double[i][i]*sa); + } + if( i>0 ) + { + ae_v_moved(&tmp.ptr.p_double[0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,i-1), sa); + vr = ae_v_dotproduct(&tmp.ptr.p_double[0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,i-1)); + beta = ae_complex_from_d(x->ptr.p_double[i]-vr); + } + else + { + beta = ae_complex_from_d(x->ptr.p_double[i]); + } + + /* + * solve alpha*x[i] = beta + */ + result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &cx, _state); + if( !result ) + { + ae_frame_leave(_state); + return result; + } + x->ptr.p_double[i] = cx.x; + } + ae_frame_leave(_state); + return result; + } + if( isupper&&trans==1 ) + { + + /* + * U^T*x = b + */ + for(i=0; i<=n-1; i++) + { + + /* + * Task is reduced to alpha*x[i] = beta + */ + if( isunit ) + { + alpha = ae_complex_from_d(sa); + } + else + { + alpha = ae_complex_from_d(a->ptr.pp_double[i][i]*sa); + } + beta = ae_complex_from_d(x->ptr.p_double[i]); + + /* + * solve alpha*x[i] = beta + */ + result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &cx, _state); + if( !result ) + { + ae_frame_leave(_state); + return result; + } + x->ptr.p_double[i] = cx.x; + + /* + * update the rest of right part + */ + if( i<n-1 ) + { + vr = cx.x; + ae_v_moved(&tmp.ptr.p_double[i+1], 1, &a->ptr.pp_double[i][i+1], 1, ae_v_len(i+1,n-1), sa); + ae_v_subd(&x->ptr.p_double[i+1], 1, &tmp.ptr.p_double[i+1], 1, ae_v_len(i+1,n-1), vr); + } + } + ae_frame_leave(_state); + return result; + } + if( !isupper&&trans==1 ) + { + + /* + * L^T*x = b + */ + for(i=n-1; i>=0; i--) + { + + /* + * Task is reduced to alpha*x[i] = beta + */ + if( isunit ) + { + alpha = ae_complex_from_d(sa); + } + else + { + alpha = ae_complex_from_d(a->ptr.pp_double[i][i]*sa); + } + beta = ae_complex_from_d(x->ptr.p_double[i]); + + /* + * solve alpha*x[i] = beta + */ + result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &cx, _state); + if( !result ) + { + ae_frame_leave(_state); + return result; + } + x->ptr.p_double[i] = cx.x; + + /* + * update the rest of right part + */ + if( i>0 ) + { + vr = cx.x; + ae_v_moved(&tmp.ptr.p_double[0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,i-1), sa); + ae_v_subd(&x->ptr.p_double[0], 1, &tmp.ptr.p_double[0], 1, ae_v_len(0,i-1), vr); + } + } + ae_frame_leave(_state); + return result; + } + result = ae_false; + ae_frame_leave(_state); + return result; +} + + +/************************************************************************* +Internal subroutine for safe solution of + + SA*op(A)=b + +where A is NxN upper/lower triangular/unitriangular matrix, op(A) is +either identity transform, transposition or Hermitian transposition, SA is +a scaling factor such that max(|SA*A[i,j]|) is close to 1.0 in magnutude. + +This subroutine limits relative growth of solution (in inf-norm) by +MaxGrowth, returning False if growth exceeds MaxGrowth. Degenerate or +near-degenerate matrices are handled correctly (False is returned) as long +as MaxGrowth is significantly less than MaxRealNumber/norm(b). + + -- ALGLIB routine -- + 21.01.2010 + Bochkanov Sergey +*************************************************************************/ +ae_bool cmatrixscaledtrsafesolve(/* Complex */ ae_matrix* a, + double sa, + ae_int_t n, + /* Complex */ ae_vector* x, + ae_bool isupper, + ae_int_t trans, + ae_bool isunit, + double maxgrowth, + ae_state *_state) +{ + ae_frame _frame_block; + double lnmax; + double nrmb; + double nrmx; + ae_int_t i; + ae_complex alpha; + ae_complex beta; + ae_complex vc; + ae_vector tmp; + ae_bool result; + + ae_frame_make(_state, &_frame_block); + ae_vector_init(&tmp, 0, DT_COMPLEX, _state, ae_true); + + ae_assert(n>0, "CMatrixTRSafeSolve: incorrect N!", _state); + ae_assert((trans==0||trans==1)||trans==2, "CMatrixTRSafeSolve: incorrect Trans!", _state); + result = ae_true; + lnmax = ae_log(ae_maxrealnumber, _state); + + /* + * Quick return if possible + */ + if( n<=0 ) + { + ae_frame_leave(_state); + return result; + } + + /* + * Load norms: right part and X + */ + nrmb = 0; + for(i=0; i<=n-1; i++) + { + nrmb = ae_maxreal(nrmb, ae_c_abs(x->ptr.p_complex[i], _state), _state); + } + nrmx = 0; + + /* + * Solve + */ + ae_vector_set_length(&tmp, n, _state); + result = ae_true; + if( isupper&&trans==0 ) + { + + /* + * U*x = b + */ + for(i=n-1; i>=0; i--) + { + + /* + * Task is reduced to alpha*x[i] = beta + */ + if( isunit ) + { + alpha = ae_complex_from_d(sa); + } + else + { + alpha = ae_c_mul_d(a->ptr.pp_complex[i][i],sa); + } + if( i<n-1 ) + { + ae_v_cmoved(&tmp.ptr.p_complex[i+1], 1, &a->ptr.pp_complex[i][i+1], 1, "N", ae_v_len(i+1,n-1), sa); + vc = ae_v_cdotproduct(&tmp.ptr.p_complex[i+1], 1, "N", &x->ptr.p_complex[i+1], 1, "N", ae_v_len(i+1,n-1)); + beta = ae_c_sub(x->ptr.p_complex[i],vc); + } + else + { + beta = x->ptr.p_complex[i]; + } + + /* + * solve alpha*x[i] = beta + */ + result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state); + if( !result ) + { + ae_frame_leave(_state); + return result; + } + x->ptr.p_complex[i] = vc; + } + ae_frame_leave(_state); + return result; + } + if( !isupper&&trans==0 ) + { + + /* + * L*x = b + */ + for(i=0; i<=n-1; i++) + { + + /* + * Task is reduced to alpha*x[i] = beta + */ + if( isunit ) + { + alpha = ae_complex_from_d(sa); + } + else + { + alpha = ae_c_mul_d(a->ptr.pp_complex[i][i],sa); + } + if( i>0 ) + { + ae_v_cmoved(&tmp.ptr.p_complex[0], 1, &a->ptr.pp_complex[i][0], 1, "N", ae_v_len(0,i-1), sa); + vc = ae_v_cdotproduct(&tmp.ptr.p_complex[0], 1, "N", &x->ptr.p_complex[0], 1, "N", ae_v_len(0,i-1)); + beta = ae_c_sub(x->ptr.p_complex[i],vc); + } + else + { + beta = x->ptr.p_complex[i]; + } + + /* + * solve alpha*x[i] = beta + */ + result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state); + if( !result ) + { + ae_frame_leave(_state); + return result; + } + x->ptr.p_complex[i] = vc; + } + ae_frame_leave(_state); + return result; + } + if( isupper&&trans==1 ) + { + + /* + * U^T*x = b + */ + for(i=0; i<=n-1; i++) + { + + /* + * Task is reduced to alpha*x[i] = beta + */ + if( isunit ) + { + alpha = ae_complex_from_d(sa); + } + else + { + alpha = ae_c_mul_d(a->ptr.pp_complex[i][i],sa); + } + beta = x->ptr.p_complex[i]; + + /* + * solve alpha*x[i] = beta + */ + result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state); + if( !result ) + { + ae_frame_leave(_state); + return result; + } + x->ptr.p_complex[i] = vc; + + /* + * update the rest of right part + */ + if( i<n-1 ) + { + ae_v_cmoved(&tmp.ptr.p_complex[i+1], 1, &a->ptr.pp_complex[i][i+1], 1, "N", ae_v_len(i+1,n-1), sa); + ae_v_csubc(&x->ptr.p_complex[i+1], 1, &tmp.ptr.p_complex[i+1], 1, "N", ae_v_len(i+1,n-1), vc); + } + } + ae_frame_leave(_state); + return result; + } + if( !isupper&&trans==1 ) + { + + /* + * L^T*x = b + */ + for(i=n-1; i>=0; i--) + { + + /* + * Task is reduced to alpha*x[i] = beta + */ + if( isunit ) + { + alpha = ae_complex_from_d(sa); + } + else + { + alpha = ae_c_mul_d(a->ptr.pp_complex[i][i],sa); + } + beta = x->ptr.p_complex[i]; + + /* + * solve alpha*x[i] = beta + */ + result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state); + if( !result ) + { + ae_frame_leave(_state); + return result; + } + x->ptr.p_complex[i] = vc; + + /* + * update the rest of right part + */ + if( i>0 ) + { + ae_v_cmoved(&tmp.ptr.p_complex[0], 1, &a->ptr.pp_complex[i][0], 1, "N", ae_v_len(0,i-1), sa); + ae_v_csubc(&x->ptr.p_complex[0], 1, &tmp.ptr.p_complex[0], 1, "N", ae_v_len(0,i-1), vc); + } + } + ae_frame_leave(_state); + return result; + } + if( isupper&&trans==2 ) + { + + /* + * U^H*x = b + */ + for(i=0; i<=n-1; i++) + { + + /* + * Task is reduced to alpha*x[i] = beta + */ + if( isunit ) + { + alpha = ae_complex_from_d(sa); + } + else + { + alpha = ae_c_mul_d(ae_c_conj(a->ptr.pp_complex[i][i], _state),sa); + } + beta = x->ptr.p_complex[i]; + + /* + * solve alpha*x[i] = beta + */ + result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state); + if( !result ) + { + ae_frame_leave(_state); + return result; + } + x->ptr.p_complex[i] = vc; + + /* + * update the rest of right part + */ + if( i<n-1 ) + { + ae_v_cmoved(&tmp.ptr.p_complex[i+1], 1, &a->ptr.pp_complex[i][i+1], 1, "Conj", ae_v_len(i+1,n-1), sa); + ae_v_csubc(&x->ptr.p_complex[i+1], 1, &tmp.ptr.p_complex[i+1], 1, "N", ae_v_len(i+1,n-1), vc); + } + } + ae_frame_leave(_state); + return result; + } + if( !isupper&&trans==2 ) + { + + /* + * L^T*x = b + */ + for(i=n-1; i>=0; i--) + { + + /* + * Task is reduced to alpha*x[i] = beta + */ + if( isunit ) + { + alpha = ae_complex_from_d(sa); + } + else + { + alpha = ae_c_mul_d(ae_c_conj(a->ptr.pp_complex[i][i], _state),sa); + } + beta = x->ptr.p_complex[i]; + + /* + * solve alpha*x[i] = beta + */ + result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state); + if( !result ) + { + ae_frame_leave(_state); + return result; + } + x->ptr.p_complex[i] = vc; + + /* + * update the rest of right part + */ + if( i>0 ) + { + ae_v_cmoved(&tmp.ptr.p_complex[0], 1, &a->ptr.pp_complex[i][0], 1, "Conj", ae_v_len(0,i-1), sa); + ae_v_csubc(&x->ptr.p_complex[0], 1, &tmp.ptr.p_complex[0], 1, "N", ae_v_len(0,i-1), vc); + } + } + ae_frame_leave(_state); + return result; + } + result = ae_false; + ae_frame_leave(_state); + return result; +} + + +/************************************************************************* +complex basic solver-updater for reduced linear system + + alpha*x[i] = beta + +solves this equation and updates it in overlfow-safe manner (keeping track +of relative growth of solution). + +Parameters: + Alpha - alpha + Beta - beta + LnMax - precomputed Ln(MaxRealNumber) + BNorm - inf-norm of b (right part of original system) + MaxGrowth- maximum growth of norm(x) relative to norm(b) + XNorm - inf-norm of other components of X (which are already processed) + it is updated by CBasicSolveAndUpdate. + X - solution + + -- ALGLIB routine -- + 26.01.2009 + Bochkanov Sergey +*************************************************************************/ +static ae_bool safesolve_cbasicsolveandupdate(ae_complex alpha, + ae_complex beta, + double lnmax, + double bnorm, + double maxgrowth, + double* xnorm, + ae_complex* x, + ae_state *_state) +{ + double v; + ae_bool result; + + x->x = 0; + x->y = 0; + + result = ae_false; + if( ae_c_eq_d(alpha,0) ) + { + return result; + } + if( ae_c_neq_d(beta,0) ) + { + + /* + * alpha*x[i]=beta + */ + v = ae_log(ae_c_abs(beta, _state), _state)-ae_log(ae_c_abs(alpha, _state), _state); + if( ae_fp_greater(v,lnmax) ) + { + return result; + } + *x = ae_c_div(beta,alpha); + } + else + { + + /* + * alpha*x[i]=0 + */ + *x = ae_complex_from_d(0); + } + + /* + * update NrmX, test growth limit + */ + *xnorm = ae_maxreal(*xnorm, ae_c_abs(*x, _state), _state); + if( ae_fp_greater(*xnorm,maxgrowth*bnorm) ) + { + return result; + } + result = ae_true; + return result; +} + + + + +/************************************************************************* +More precise dot-product. Absolute error of subroutine result is about +1 ulp of max(MX,V), where: + MX = max( |a[i]*b[i]| ) + V = |(a,b)| + +INPUT PARAMETERS + A - array[0..N-1], vector 1 + B - array[0..N-1], vector 2 + N - vectors length, N<2^29. + Temp - array[0..N-1], pre-allocated temporary storage + +OUTPUT PARAMETERS + R - (A,B) + RErr - estimate of error. This estimate accounts for both errors + during calculation of (A,B) and errors introduced by + rounding of A and B to fit in double (about 1 ulp). + + -- ALGLIB -- + Copyright 24.08.2009 by Bochkanov Sergey +*************************************************************************/ +void xdot(/* Real */ ae_vector* a, + /* Real */ ae_vector* b, + ae_int_t n, + /* Real */ ae_vector* temp, + double* r, + double* rerr, + ae_state *_state) +{ + ae_int_t i; + double mx; + double v; + + *r = 0; + *rerr = 0; + + + /* + * special cases: + * * N=0 + */ + if( n==0 ) + { + *r = 0; + *rerr = 0; + return; + } + mx = 0; + for(i=0; i<=n-1; i++) + { + v = a->ptr.p_double[i]*b->ptr.p_double[i]; + temp->ptr.p_double[i] = v; + mx = ae_maxreal(mx, ae_fabs(v, _state), _state); + } + if( ae_fp_eq(mx,0) ) + { + *r = 0; + *rerr = 0; + return; + } + xblas_xsum(temp, mx, n, r, rerr, _state); +} + + +/************************************************************************* +More precise complex dot-product. Absolute error of subroutine result is +about 1 ulp of max(MX,V), where: + MX = max( |a[i]*b[i]| ) + V = |(a,b)| + +INPUT PARAMETERS + A - array[0..N-1], vector 1 + B - array[0..N-1], vector 2 + N - vectors length, N<2^29. + Temp - array[0..2*N-1], pre-allocated temporary storage + +OUTPUT PARAMETERS + R - (A,B) + RErr - estimate of error. This estimate accounts for both errors + during calculation of (A,B) and errors introduced by + rounding of A and B to fit in double (about 1 ulp). + + -- ALGLIB -- + Copyright 27.01.2010 by Bochkanov Sergey +*************************************************************************/ +void xcdot(/* Complex */ ae_vector* a, + /* Complex */ ae_vector* b, + ae_int_t n, + /* Real */ ae_vector* temp, + ae_complex* r, + double* rerr, + ae_state *_state) +{ + ae_int_t i; + double mx; + double v; + double rerrx; + double rerry; + + r->x = 0; + r->y = 0; + *rerr = 0; + + + /* + * special cases: + * * N=0 + */ + if( n==0 ) + { + *r = ae_complex_from_d(0); + *rerr = 0; + return; + } + + /* + * calculate real part + */ + mx = 0; + for(i=0; i<=n-1; i++) + { + v = a->ptr.p_complex[i].x*b->ptr.p_complex[i].x; + temp->ptr.p_double[2*i+0] = v; + mx = ae_maxreal(mx, ae_fabs(v, _state), _state); + v = -a->ptr.p_complex[i].y*b->ptr.p_complex[i].y; + temp->ptr.p_double[2*i+1] = v; + mx = ae_maxreal(mx, ae_fabs(v, _state), _state); + } + if( ae_fp_eq(mx,0) ) + { + r->x = 0; + rerrx = 0; + } + else + { + xblas_xsum(temp, mx, 2*n, &r->x, &rerrx, _state); + } + + /* + * calculate imaginary part + */ + mx = 0; + for(i=0; i<=n-1; i++) + { + v = a->ptr.p_complex[i].x*b->ptr.p_complex[i].y; + temp->ptr.p_double[2*i+0] = v; + mx = ae_maxreal(mx, ae_fabs(v, _state), _state); + v = a->ptr.p_complex[i].y*b->ptr.p_complex[i].x; + temp->ptr.p_double[2*i+1] = v; + mx = ae_maxreal(mx, ae_fabs(v, _state), _state); + } + if( ae_fp_eq(mx,0) ) + { + r->y = 0; + rerry = 0; + } + else + { + xblas_xsum(temp, mx, 2*n, &r->y, &rerry, _state); + } + + /* + * total error + */ + if( ae_fp_eq(rerrx,0)&&ae_fp_eq(rerry,0) ) + { + *rerr = 0; + } + else + { + *rerr = ae_maxreal(rerrx, rerry, _state)*ae_sqrt(1+ae_sqr(ae_minreal(rerrx, rerry, _state)/ae_maxreal(rerrx, rerry, _state), _state), _state); + } +} + + +/************************************************************************* +Internal subroutine for extra-precise calculation of SUM(w[i]). + +INPUT PARAMETERS: + W - array[0..N-1], values to be added + W is modified during calculations. + MX - max(W[i]) + N - array size + +OUTPUT PARAMETERS: + R - SUM(w[i]) + RErr- error estimate for R + + -- ALGLIB -- + Copyright 24.08.2009 by Bochkanov Sergey +*************************************************************************/ +static void xblas_xsum(/* Real */ ae_vector* w, + double mx, + ae_int_t n, + double* r, + double* rerr, + ae_state *_state) +{ + ae_int_t i; + ae_int_t k; + ae_int_t ks; + double v; + double s; + double ln2; + double chunk; + double invchunk; + ae_bool allzeros; + + *r = 0; + *rerr = 0; + + + /* + * special cases: + * * N=0 + * * N is too large to use integer arithmetics + */ + if( n==0 ) + { + *r = 0; + *rerr = 0; + return; + } + if( ae_fp_eq(mx,0) ) + { + *r = 0; + *rerr = 0; + return; + } + ae_assert(n<536870912, "XDot: N is too large!", _state); + + /* + * Prepare + */ + ln2 = ae_log(2, _state); + *rerr = mx*ae_machineepsilon; + + /* + * 1. find S such that 0.5<=S*MX<1 + * 2. multiply W by S, so task is normalized in some sense + * 3. S:=1/S so we can obtain original vector multiplying by S + */ + k = ae_round(ae_log(mx, _state)/ln2, _state); + s = xblas_xfastpow(2, -k, _state); + while(ae_fp_greater_eq(s*mx,1)) + { + s = 0.5*s; + } + while(ae_fp_less(s*mx,0.5)) + { + s = 2*s; + } + ae_v_muld(&w->ptr.p_double[0], 1, ae_v_len(0,n-1), s); + s = 1/s; + + /* + * find Chunk=2^M such that N*Chunk<2^29 + * + * we have chosen upper limit (2^29) with enough space left + * to tolerate possible problems with rounding and N's close + * to the limit, so we don't want to be very strict here. + */ + k = ae_trunc(ae_log((double)536870912/(double)n, _state)/ln2, _state); + chunk = xblas_xfastpow(2, k, _state); + if( ae_fp_less(chunk,2) ) + { + chunk = 2; + } + invchunk = 1/chunk; + + /* + * calculate result + */ + *r = 0; + ae_v_muld(&w->ptr.p_double[0], 1, ae_v_len(0,n-1), chunk); + for(;;) + { + s = s*invchunk; + allzeros = ae_true; + ks = 0; + for(i=0; i<=n-1; i++) + { + v = w->ptr.p_double[i]; + k = ae_trunc(v, _state); + if( ae_fp_neq(v,k) ) + { + allzeros = ae_false; + } + w->ptr.p_double[i] = chunk*(v-k); + ks = ks+k; + } + *r = *r+s*ks; + v = ae_fabs(*r, _state); + if( allzeros||ae_fp_eq(s*n+mx,mx) ) + { + break; + } + } + + /* + * correct error + */ + *rerr = ae_maxreal(*rerr, ae_fabs(*r, _state)*ae_machineepsilon, _state); +} + + +/************************************************************************* +Fast Pow + + -- ALGLIB -- + Copyright 24.08.2009 by Bochkanov Sergey +*************************************************************************/ +static double xblas_xfastpow(double r, ae_int_t n, ae_state *_state) +{ + double result; + + + result = 0; + if( n>0 ) + { + if( n%2==0 ) + { + result = ae_sqr(xblas_xfastpow(r, n/2, _state), _state); + } + else + { + result = r*xblas_xfastpow(r, n-1, _state); + } + return result; + } + if( n==0 ) + { + result = 1; + } + if( n<0 ) + { + result = xblas_xfastpow(1/r, -n, _state); + } + return result; +} + + + + +/************************************************************************* +Normalizes direction/step pair: makes |D|=1, scales Stp. +If |D|=0, it returns, leavind D/Stp unchanged. + + -- ALGLIB -- + Copyright 01.04.2010 by Bochkanov Sergey +*************************************************************************/ +void linminnormalized(/* Real */ ae_vector* d, + double* stp, + ae_int_t n, + ae_state *_state) +{ + double mx; + double s; + ae_int_t i; + + + + /* + * first, scale D to avoid underflow/overflow durng squaring + */ + mx = 0; + for(i=0; i<=n-1; i++) + { + mx = ae_maxreal(mx, ae_fabs(d->ptr.p_double[i], _state), _state); + } + if( ae_fp_eq(mx,0) ) + { + return; + } + s = 1/mx; + ae_v_muld(&d->ptr.p_double[0], 1, ae_v_len(0,n-1), s); + *stp = *stp/s; + + /* + * normalize D + */ + s = ae_v_dotproduct(&d->ptr.p_double[0], 1, &d->ptr.p_double[0], 1, ae_v_len(0,n-1)); + s = 1/ae_sqrt(s, _state); + ae_v_muld(&d->ptr.p_double[0], 1, ae_v_len(0,n-1), s); + *stp = *stp/s; +} + + +/************************************************************************* +THE PURPOSE OF MCSRCH IS TO FIND A STEP WHICH SATISFIES A SUFFICIENT +DECREASE CONDITION AND A CURVATURE CONDITION. + +AT EACH STAGE THE SUBROUTINE UPDATES AN INTERVAL OF UNCERTAINTY WITH +ENDPOINTS STX AND STY. THE INTERVAL OF UNCERTAINTY IS INITIALLY CHOSEN +SO THAT IT CONTAINS A MINIMIZER OF THE MODIFIED FUNCTION + + F(X+STP*S) - F(X) - FTOL*STP*(GRADF(X)'S). + +IF A STEP IS OBTAINED FOR WHICH THE MODIFIED FUNCTION HAS A NONPOSITIVE +FUNCTION VALUE AND NONNEGATIVE DERIVATIVE, THEN THE INTERVAL OF +UNCERTAINTY IS CHOSEN SO THAT IT CONTAINS A MINIMIZER OF F(X+STP*S). + +THE ALGORITHM IS DESIGNED TO FIND A STEP WHICH SATISFIES THE SUFFICIENT +DECREASE CONDITION + + F(X+STP*S) .LE. F(X) + FTOL*STP*(GRADF(X)'S), + +AND THE CURVATURE CONDITION + + ABS(GRADF(X+STP*S)'S)) .LE. GTOL*ABS(GRADF(X)'S). + +IF FTOL IS LESS THAN GTOL AND IF, FOR EXAMPLE, THE FUNCTION IS BOUNDED +BELOW, THEN THERE IS ALWAYS A STEP WHICH SATISFIES BOTH CONDITIONS. +IF NO STEP CAN BE FOUND WHICH SATISFIES BOTH CONDITIONS, THEN THE +ALGORITHM USUALLY STOPS WHEN ROUNDING ERRORS PREVENT FURTHER PROGRESS. +IN THIS CASE STP ONLY SATISFIES THE SUFFICIENT DECREASE CONDITION. + +PARAMETERS DESCRIPRION + +STAGE IS ZERO ON FIRST CALL, ZERO ON FINAL EXIT + +N IS A POSITIVE INTEGER INPUT VARIABLE SET TO THE NUMBER OF VARIABLES. + +X IS AN ARRAY OF LENGTH N. ON INPUT IT MUST CONTAIN THE BASE POINT FOR +THE LINE SEARCH. ON OUTPUT IT CONTAINS X+STP*S. + +F IS A VARIABLE. ON INPUT IT MUST CONTAIN THE VALUE OF F AT X. ON OUTPUT +IT CONTAINS THE VALUE OF F AT X + STP*S. + +G IS AN ARRAY OF LENGTH N. ON INPUT IT MUST CONTAIN THE GRADIENT OF F AT X. +ON OUTPUT IT CONTAINS THE GRADIENT OF F AT X + STP*S. + +S IS AN INPUT ARRAY OF LENGTH N WHICH SPECIFIES THE SEARCH DIRECTION. + +STP IS A NONNEGATIVE VARIABLE. ON INPUT STP CONTAINS AN INITIAL ESTIMATE +OF A SATISFACTORY STEP. ON OUTPUT STP CONTAINS THE FINAL ESTIMATE. + +FTOL AND GTOL ARE NONNEGATIVE INPUT VARIABLES. TERMINATION OCCURS WHEN THE +SUFFICIENT DECREASE CONDITION AND THE DIRECTIONAL DERIVATIVE CONDITION ARE +SATISFIED. + +XTOL IS A NONNEGATIVE INPUT VARIABLE. TERMINATION OCCURS WHEN THE RELATIVE +WIDTH OF THE INTERVAL OF UNCERTAINTY IS AT MOST XTOL. + +STPMIN AND STPMAX ARE NONNEGATIVE INPUT VARIABLES WHICH SPECIFY LOWER AND +UPPER BOUNDS FOR THE STEP. + +MAXFEV IS A POSITIVE INTEGER INPUT VARIABLE. TERMINATION OCCURS WHEN THE +NUMBER OF CALLS TO FCN IS AT LEAST MAXFEV BY THE END OF AN ITERATION. + +INFO IS AN INTEGER OUTPUT VARIABLE SET AS FOLLOWS: + INFO = 0 IMPROPER INPUT PARAMETERS. + + INFO = 1 THE SUFFICIENT DECREASE CONDITION AND THE + DIRECTIONAL DERIVATIVE CONDITION HOLD. + + INFO = 2 RELATIVE WIDTH OF THE INTERVAL OF UNCERTAINTY + IS AT MOST XTOL. + + INFO = 3 NUMBER OF CALLS TO FCN HAS REACHED MAXFEV. + + INFO = 4 THE STEP IS AT THE LOWER BOUND STPMIN. + + INFO = 5 THE STEP IS AT THE UPPER BOUND STPMAX. + + INFO = 6 ROUNDING ERRORS PREVENT FURTHER PROGRESS. + THERE MAY NOT BE A STEP WHICH SATISFIES THE + SUFFICIENT DECREASE AND CURVATURE CONDITIONS. + TOLERANCES MAY BE TOO SMALL. + +NFEV IS AN INTEGER OUTPUT VARIABLE SET TO THE NUMBER OF CALLS TO FCN. + +WA IS A WORK ARRAY OF LENGTH N. + +ARGONNE NATIONAL LABORATORY. MINPACK PROJECT. JUNE 1983 +JORGE J. MORE', DAVID J. THUENTE +*************************************************************************/ +void mcsrch(ae_int_t n, + /* Real */ ae_vector* x, + double* f, + /* Real */ ae_vector* g, + /* Real */ ae_vector* s, + double* stp, + double stpmax, + ae_int_t* info, + ae_int_t* nfev, + /* Real */ ae_vector* wa, + linminstate* state, + ae_int_t* stage, + ae_state *_state) +{ + double v; + double p5; + double p66; + double zero; + + + + /* + * init + */ + p5 = 0.5; + p66 = 0.66; + state->xtrapf = 4.0; + zero = 0; + if( ae_fp_eq(stpmax,0) ) + { + stpmax = linmin_defstpmax; + } + if( ae_fp_less(*stp,linmin_stpmin) ) + { + *stp = linmin_stpmin; + } + if( ae_fp_greater(*stp,stpmax) ) + { + *stp = stpmax; + } + + /* + * Main cycle + */ + for(;;) + { + if( *stage==0 ) + { + + /* + * NEXT + */ + *stage = 2; + continue; + } + if( *stage==2 ) + { + state->infoc = 1; + *info = 0; + + /* + * CHECK THE INPUT PARAMETERS FOR ERRORS. + */ + if( ((((((n<=0||ae_fp_less_eq(*stp,0))||ae_fp_less(linmin_ftol,0))||ae_fp_less(linmin_gtol,zero))||ae_fp_less(linmin_xtol,zero))||ae_fp_less(linmin_stpmin,zero))||ae_fp_less(stpmax,linmin_stpmin))||linmin_maxfev<=0 ) + { + *stage = 0; + return; + } + + /* + * COMPUTE THE INITIAL GRADIENT IN THE SEARCH DIRECTION + * AND CHECK THAT S IS A DESCENT DIRECTION. + */ + v = ae_v_dotproduct(&g->ptr.p_double[0], 1, &s->ptr.p_double[0], 1, ae_v_len(0,n-1)); + state->dginit = v; + if( ae_fp_greater_eq(state->dginit,0) ) + { + *stage = 0; + return; + } + + /* + * INITIALIZE LOCAL VARIABLES. + */ + state->brackt = ae_false; + state->stage1 = ae_true; + *nfev = 0; + state->finit = *f; + state->dgtest = linmin_ftol*state->dginit; + state->width = stpmax-linmin_stpmin; + state->width1 = state->width/p5; + ae_v_move(&wa->ptr.p_double[0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,n-1)); + + /* + * THE VARIABLES STX, FX, DGX CONTAIN THE VALUES OF THE STEP, + * FUNCTION, AND DIRECTIONAL DERIVATIVE AT THE BEST STEP. + * THE VARIABLES STY, FY, DGY CONTAIN THE VALUE OF THE STEP, + * FUNCTION, AND DERIVATIVE AT THE OTHER ENDPOINT OF + * THE INTERVAL OF UNCERTAINTY. + * THE VARIABLES STP, F, DG CONTAIN THE VALUES OF THE STEP, + * FUNCTION, AND DERIVATIVE AT THE CURRENT STEP. + */ + state->stx = 0; + state->fx = state->finit; + state->dgx = state->dginit; + state->sty = 0; + state->fy = state->finit; + state->dgy = state->dginit; + + /* + * NEXT + */ + *stage = 3; + continue; + } + if( *stage==3 ) + { + + /* + * START OF ITERATION. + * + * SET THE MINIMUM AND MAXIMUM STEPS TO CORRESPOND + * TO THE PRESENT INTERVAL OF UNCERTAINTY. + */ + if( state->brackt ) + { + if( ae_fp_less(state->stx,state->sty) ) + { + state->stmin = state->stx; + state->stmax = state->sty; + } + else + { + state->stmin = state->sty; + state->stmax = state->stx; + } + } + else + { + state->stmin = state->stx; + state->stmax = *stp+state->xtrapf*(*stp-state->stx); + } + + /* + * FORCE THE STEP TO BE WITHIN THE BOUNDS STPMAX AND STPMIN. + */ + if( ae_fp_greater(*stp,stpmax) ) + { + *stp = stpmax; + } + if( ae_fp_less(*stp,linmin_stpmin) ) + { + *stp = linmin_stpmin; + } + + /* + * IF AN UNUSUAL TERMINATION IS TO OCCUR THEN LET + * STP BE THE LOWEST POINT OBTAINED SO FAR. + */ + if( (((state->brackt&&(ae_fp_less_eq(*stp,state->stmin)||ae_fp_greater_eq(*stp,state->stmax)))||*nfev>=linmin_maxfev-1)||state->infoc==0)||(state->brackt&&ae_fp_less_eq(state->stmax-state->stmin,linmin_xtol*state->stmax)) ) + { + *stp = state->stx; + } + + /* + * EVALUATE THE FUNCTION AND GRADIENT AT STP + * AND COMPUTE THE DIRECTIONAL DERIVATIVE. + */ + ae_v_move(&x->ptr.p_double[0], 1, &wa->ptr.p_double[0], 1, ae_v_len(0,n-1)); + ae_v_addd(&x->ptr.p_double[0], 1, &s->ptr.p_double[0], 1, ae_v_len(0,n-1), *stp); + + /* + * NEXT + */ + *stage = 4; + return; + } + if( *stage==4 ) + { + *info = 0; + *nfev = *nfev+1; + v = ae_v_dotproduct(&g->ptr.p_double[0], 1, &s->ptr.p_double[0], 1, ae_v_len(0,n-1)); + state->dg = v; + state->ftest1 = state->finit+*stp*state->dgtest; + + /* + * TEST FOR CONVERGENCE. + */ + if( (state->brackt&&(ae_fp_less_eq(*stp,state->stmin)||ae_fp_greater_eq(*stp,state->stmax)))||state->infoc==0 ) + { + *info = 6; + } + if( (ae_fp_eq(*stp,stpmax)&&ae_fp_less_eq(*f,state->ftest1))&&ae_fp_less_eq(state->dg,state->dgtest) ) + { + *info = 5; + } + if( ae_fp_eq(*stp,linmin_stpmin)&&(ae_fp_greater(*f,state->ftest1)||ae_fp_greater_eq(state->dg,state->dgtest)) ) + { + *info = 4; + } + if( *nfev>=linmin_maxfev ) + { + *info = 3; + } + if( state->brackt&&ae_fp_less_eq(state->stmax-state->stmin,linmin_xtol*state->stmax) ) + { + *info = 2; + } + if( ae_fp_less_eq(*f,state->ftest1)&&ae_fp_less_eq(ae_fabs(state->dg, _state),-linmin_gtol*state->dginit) ) + { + *info = 1; + } + + /* + * CHECK FOR TERMINATION. + */ + if( *info!=0 ) + { + *stage = 0; + return; + } + + /* + * IN THE FIRST STAGE WE SEEK A STEP FOR WHICH THE MODIFIED + * FUNCTION HAS A NONPOSITIVE VALUE AND NONNEGATIVE DERIVATIVE. + */ + if( (state->stage1&&ae_fp_less_eq(*f,state->ftest1))&&ae_fp_greater_eq(state->dg,ae_minreal(linmin_ftol, linmin_gtol, _state)*state->dginit) ) + { + state->stage1 = ae_false; + } + + /* + * A MODIFIED FUNCTION IS USED TO PREDICT THE STEP ONLY IF + * WE HAVE NOT OBTAINED A STEP FOR WHICH THE MODIFIED + * FUNCTION HAS A NONPOSITIVE FUNCTION VALUE AND NONNEGATIVE + * DERIVATIVE, AND IF A LOWER FUNCTION VALUE HAS BEEN + * OBTAINED BUT THE DECREASE IS NOT SUFFICIENT. + */ + if( (state->stage1&&ae_fp_less_eq(*f,state->fx))&&ae_fp_greater(*f,state->ftest1) ) + { + + /* + * DEFINE THE MODIFIED FUNCTION AND DERIVATIVE VALUES. + */ + state->fm = *f-*stp*state->dgtest; + state->fxm = state->fx-state->stx*state->dgtest; + state->fym = state->fy-state->sty*state->dgtest; + state->dgm = state->dg-state->dgtest; + state->dgxm = state->dgx-state->dgtest; + state->dgym = state->dgy-state->dgtest; + + /* + * CALL CSTEP TO UPDATE THE INTERVAL OF UNCERTAINTY + * AND TO COMPUTE THE NEW STEP. + */ + linmin_mcstep(&state->stx, &state->fxm, &state->dgxm, &state->sty, &state->fym, &state->dgym, stp, state->fm, state->dgm, &state->brackt, state->stmin, state->stmax, &state->infoc, _state); + + /* + * RESET THE FUNCTION AND GRADIENT VALUES FOR F. + */ + state->fx = state->fxm+state->stx*state->dgtest; + state->fy = state->fym+state->sty*state->dgtest; + state->dgx = state->dgxm+state->dgtest; + state->dgy = state->dgym+state->dgtest; + } + else + { + + /* + * CALL MCSTEP TO UPDATE THE INTERVAL OF UNCERTAINTY + * AND TO COMPUTE THE NEW STEP. + */ + linmin_mcstep(&state->stx, &state->fx, &state->dgx, &state->sty, &state->fy, &state->dgy, stp, *f, state->dg, &state->brackt, state->stmin, state->stmax, &state->infoc, _state); + } + + /* + * FORCE A SUFFICIENT DECREASE IN THE SIZE OF THE + * INTERVAL OF UNCERTAINTY. + */ + if( state->brackt ) + { + if( ae_fp_greater_eq(ae_fabs(state->sty-state->stx, _state),p66*state->width1) ) + { + *stp = state->stx+p5*(state->sty-state->stx); + } + state->width1 = state->width; + state->width = ae_fabs(state->sty-state->stx, _state); + } + + /* + * NEXT. + */ + *stage = 3; + continue; + } + } +} + + +/************************************************************************* +These functions perform Armijo line search using at most FMAX function +evaluations. It doesn't enforce some kind of " sufficient decrease" +criterion - it just tries different Armijo steps and returns optimum found +so far. + +Optimization is done using F-rcomm interface: +* ArmijoCreate initializes State structure + (reusing previously allocated buffers) +* ArmijoIteration is subsequently called +* ArmijoResults returns results + +INPUT PARAMETERS: + N - problem size + X - array[N], starting point + F - F(X+S*STP) + S - step direction, S>0 + STP - step length + STPMAX - maximum value for STP or zero (if no limit is imposed) + FMAX - maximum number of function evaluations + State - optimization state + + -- ALGLIB -- + Copyright 05.10.2010 by Bochkanov Sergey +*************************************************************************/ +void armijocreate(ae_int_t n, + /* Real */ ae_vector* x, + double f, + /* Real */ ae_vector* s, + double stp, + double stpmax, + ae_int_t fmax, + armijostate* state, + ae_state *_state) +{ + + + if( state->x.cnt<n ) + { + ae_vector_set_length(&state->x, n, _state); + } + if( state->xbase.cnt<n ) + { + ae_vector_set_length(&state->xbase, n, _state); + } + if( state->s.cnt<n ) + { + ae_vector_set_length(&state->s, n, _state); + } + state->stpmax = stpmax; + state->fmax = fmax; + state->stplen = stp; + state->fcur = f; + state->n = n; + ae_v_move(&state->xbase.ptr.p_double[0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,n-1)); + ae_v_move(&state->s.ptr.p_double[0], 1, &s->ptr.p_double[0], 1, ae_v_len(0,n-1)); + ae_vector_set_length(&state->rstate.ia, 0+1, _state); + ae_vector_set_length(&state->rstate.ra, 0+1, _state); + state->rstate.stage = -1; +} + + +/************************************************************************* +This is rcomm-based search function + + -- ALGLIB -- + Copyright 05.10.2010 by Bochkanov Sergey +*************************************************************************/ +ae_bool armijoiteration(armijostate* state, ae_state *_state) +{ + double v; + ae_int_t n; + ae_bool result; + + + + /* + * Reverse communication preparations + * I know it looks ugly, but it works the same way + * anywhere from C++ to Python. + * + * This code initializes locals by: + * * random values determined during code + * generation - on first subroutine call + * * values from previous call - on subsequent calls + */ + if( state->rstate.stage>=0 ) + { + n = state->rstate.ia.ptr.p_int[0]; + v = state->rstate.ra.ptr.p_double[0]; + } + else + { + n = -983; + v = -989; + } + if( state->rstate.stage==0 ) + { + goto lbl_0; + } + if( state->rstate.stage==1 ) + { + goto lbl_1; + } + if( state->rstate.stage==2 ) + { + goto lbl_2; + } + if( state->rstate.stage==3 ) + { + goto lbl_3; + } + + /* + * Routine body + */ + if( (ae_fp_less_eq(state->stplen,0)||ae_fp_less(state->stpmax,0))||state->fmax<2 ) + { + state->info = 0; + result = ae_false; + return result; + } + if( ae_fp_less_eq(state->stplen,linmin_stpmin) ) + { + state->info = 4; + result = ae_false; + return result; + } + n = state->n; + state->nfev = 0; + + /* + * We always need F + */ + state->needf = ae_true; + + /* + * Bound StpLen + */ + if( ae_fp_greater(state->stplen,state->stpmax)&&ae_fp_neq(state->stpmax,0) ) + { + state->stplen = state->stpmax; + } + + /* + * Increase length + */ + v = state->stplen*linmin_armijofactor; + if( ae_fp_greater(v,state->stpmax)&&ae_fp_neq(state->stpmax,0) ) + { + v = state->stpmax; + } + ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1)); + ae_v_addd(&state->x.ptr.p_double[0], 1, &state->s.ptr.p_double[0], 1, ae_v_len(0,n-1), v); + state->rstate.stage = 0; + goto lbl_rcomm; +lbl_0: + state->nfev = state->nfev+1; + if( ae_fp_greater_eq(state->f,state->fcur) ) + { + goto lbl_4; + } + state->stplen = v; + state->fcur = state->f; +lbl_6: + if( ae_false ) + { + goto lbl_7; + } + + /* + * test stopping conditions + */ + if( state->nfev>=state->fmax ) + { + state->info = 3; + result = ae_false; + return result; + } + if( ae_fp_greater_eq(state->stplen,state->stpmax) ) + { + state->info = 5; + result = ae_false; + return result; + } + + /* + * evaluate F + */ + v = state->stplen*linmin_armijofactor; + if( ae_fp_greater(v,state->stpmax)&&ae_fp_neq(state->stpmax,0) ) + { + v = state->stpmax; + } + ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1)); + ae_v_addd(&state->x.ptr.p_double[0], 1, &state->s.ptr.p_double[0], 1, ae_v_len(0,n-1), v); + state->rstate.stage = 1; + goto lbl_rcomm; +lbl_1: + state->nfev = state->nfev+1; + + /* + * make decision + */ + if( ae_fp_less(state->f,state->fcur) ) + { + state->stplen = v; + state->fcur = state->f; + } + else + { + state->info = 1; + result = ae_false; + return result; + } + goto lbl_6; +lbl_7: +lbl_4: + + /* + * Decrease length + */ + v = state->stplen/linmin_armijofactor; + ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1)); + ae_v_addd(&state->x.ptr.p_double[0], 1, &state->s.ptr.p_double[0], 1, ae_v_len(0,n-1), v); + state->rstate.stage = 2; + goto lbl_rcomm; +lbl_2: + state->nfev = state->nfev+1; + if( ae_fp_greater_eq(state->f,state->fcur) ) + { + goto lbl_8; + } + state->stplen = state->stplen/linmin_armijofactor; + state->fcur = state->f; +lbl_10: + if( ae_false ) + { + goto lbl_11; + } + + /* + * test stopping conditions + */ + if( state->nfev>=state->fmax ) + { + state->info = 3; + result = ae_false; + return result; + } + if( ae_fp_less_eq(state->stplen,linmin_stpmin) ) + { + state->info = 4; + result = ae_false; + return result; + } + + /* + * evaluate F + */ + v = state->stplen/linmin_armijofactor; + ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1)); + ae_v_addd(&state->x.ptr.p_double[0], 1, &state->s.ptr.p_double[0], 1, ae_v_len(0,n-1), v); + state->rstate.stage = 3; + goto lbl_rcomm; +lbl_3: + state->nfev = state->nfev+1; + + /* + * make decision + */ + if( ae_fp_less(state->f,state->fcur) ) + { + state->stplen = state->stplen/linmin_armijofactor; + state->fcur = state->f; + } + else + { + state->info = 1; + result = ae_false; + return result; + } + goto lbl_10; +lbl_11: +lbl_8: + + /* + * Nothing to be done + */ + state->info = 1; + result = ae_false; + return result; + + /* + * Saving state + */ +lbl_rcomm: + result = ae_true; + state->rstate.ia.ptr.p_int[0] = n; + state->rstate.ra.ptr.p_double[0] = v; + return result; +} + + +/************************************************************************* +Results of Armijo search + +OUTPUT PARAMETERS: + INFO - on output it is set to one of the return codes: + * 0 improper input params + * 1 optimum step is found with at most FMAX evaluations + * 3 FMAX evaluations were used, + X contains optimum found so far + * 4 step is at lower bound STPMIN + * 5 step is at upper bound + STP - step length (in case of failure it is still returned) + F - function value (in case of failure it is still returned) + + -- ALGLIB -- + Copyright 05.10.2010 by Bochkanov Sergey +*************************************************************************/ +void armijoresults(armijostate* state, + ae_int_t* info, + double* stp, + double* f, + ae_state *_state) +{ + + + *info = state->info; + *stp = state->stplen; + *f = state->fcur; +} + + +static void linmin_mcstep(double* stx, + double* fx, + double* dx, + double* sty, + double* fy, + double* dy, + double* stp, + double fp, + double dp, + ae_bool* brackt, + double stmin, + double stmax, + ae_int_t* info, + ae_state *_state) +{ + ae_bool bound; + double gamma; + double p; + double q; + double r; + double s; + double sgnd; + double stpc; + double stpf; + double stpq; + double theta; + + + *info = 0; + + /* + * CHECK THE INPUT PARAMETERS FOR ERRORS. + */ + if( ((*brackt&&(ae_fp_less_eq(*stp,ae_minreal(*stx, *sty, _state))||ae_fp_greater_eq(*stp,ae_maxreal(*stx, *sty, _state))))||ae_fp_greater_eq(*dx*(*stp-(*stx)),0))||ae_fp_less(stmax,stmin) ) + { + return; + } + + /* + * DETERMINE IF THE DERIVATIVES HAVE OPPOSITE SIGN. + */ + sgnd = dp*(*dx/ae_fabs(*dx, _state)); + + /* + * FIRST CASE. A HIGHER FUNCTION VALUE. + * THE MINIMUM IS BRACKETED. IF THE CUBIC STEP IS CLOSER + * TO STX THAN THE QUADRATIC STEP, THE CUBIC STEP IS TAKEN, + * ELSE THE AVERAGE OF THE CUBIC AND QUADRATIC STEPS IS TAKEN. + */ + if( ae_fp_greater(fp,*fx) ) + { + *info = 1; + bound = ae_true; + theta = 3*(*fx-fp)/(*stp-(*stx))+(*dx)+dp; + s = ae_maxreal(ae_fabs(theta, _state), ae_maxreal(ae_fabs(*dx, _state), ae_fabs(dp, _state), _state), _state); + gamma = s*ae_sqrt(ae_sqr(theta/s, _state)-*dx/s*(dp/s), _state); + if( ae_fp_less(*stp,*stx) ) + { + gamma = -gamma; + } + p = gamma-(*dx)+theta; + q = gamma-(*dx)+gamma+dp; + r = p/q; + stpc = *stx+r*(*stp-(*stx)); + stpq = *stx+*dx/((*fx-fp)/(*stp-(*stx))+(*dx))/2*(*stp-(*stx)); + if( ae_fp_less(ae_fabs(stpc-(*stx), _state),ae_fabs(stpq-(*stx), _state)) ) + { + stpf = stpc; + } + else + { + stpf = stpc+(stpq-stpc)/2; + } + *brackt = ae_true; + } + else + { + if( ae_fp_less(sgnd,0) ) + { + + /* + * SECOND CASE. A LOWER FUNCTION VALUE AND DERIVATIVES OF + * OPPOSITE SIGN. THE MINIMUM IS BRACKETED. IF THE CUBIC + * STEP IS CLOSER TO STX THAN THE QUADRATIC (SECANT) STEP, + * THE CUBIC STEP IS TAKEN, ELSE THE QUADRATIC STEP IS TAKEN. + */ + *info = 2; + bound = ae_false; + theta = 3*(*fx-fp)/(*stp-(*stx))+(*dx)+dp; + s = ae_maxreal(ae_fabs(theta, _state), ae_maxreal(ae_fabs(*dx, _state), ae_fabs(dp, _state), _state), _state); + gamma = s*ae_sqrt(ae_sqr(theta/s, _state)-*dx/s*(dp/s), _state); + if( ae_fp_greater(*stp,*stx) ) + { + gamma = -gamma; + } + p = gamma-dp+theta; + q = gamma-dp+gamma+(*dx); + r = p/q; + stpc = *stp+r*(*stx-(*stp)); + stpq = *stp+dp/(dp-(*dx))*(*stx-(*stp)); + if( ae_fp_greater(ae_fabs(stpc-(*stp), _state),ae_fabs(stpq-(*stp), _state)) ) + { + stpf = stpc; + } + else + { + stpf = stpq; + } + *brackt = ae_true; + } + else + { + if( ae_fp_less(ae_fabs(dp, _state),ae_fabs(*dx, _state)) ) + { + + /* + * THIRD CASE. A LOWER FUNCTION VALUE, DERIVATIVES OF THE + * SAME SIGN, AND THE MAGNITUDE OF THE DERIVATIVE DECREASES. + * THE CUBIC STEP IS ONLY USED IF THE CUBIC TENDS TO INFINITY + * IN THE DIRECTION OF THE STEP OR IF THE MINIMUM OF THE CUBIC + * IS BEYOND STP. OTHERWISE THE CUBIC STEP IS DEFINED TO BE + * EITHER STPMIN OR STPMAX. THE QUADRATIC (SECANT) STEP IS ALSO + * COMPUTED AND IF THE MINIMUM IS BRACKETED THEN THE THE STEP + * CLOSEST TO STX IS TAKEN, ELSE THE STEP FARTHEST AWAY IS TAKEN. + */ + *info = 3; + bound = ae_true; + theta = 3*(*fx-fp)/(*stp-(*stx))+(*dx)+dp; + s = ae_maxreal(ae_fabs(theta, _state), ae_maxreal(ae_fabs(*dx, _state), ae_fabs(dp, _state), _state), _state); + + /* + * THE CASE GAMMA = 0 ONLY ARISES IF THE CUBIC DOES NOT TEND + * TO INFINITY IN THE DIRECTION OF THE STEP. + */ + gamma = s*ae_sqrt(ae_maxreal(0, ae_sqr(theta/s, _state)-*dx/s*(dp/s), _state), _state); + if( ae_fp_greater(*stp,*stx) ) + { + gamma = -gamma; + } + p = gamma-dp+theta; + q = gamma+(*dx-dp)+gamma; + r = p/q; + if( ae_fp_less(r,0)&&ae_fp_neq(gamma,0) ) + { + stpc = *stp+r*(*stx-(*stp)); + } + else + { + if( ae_fp_greater(*stp,*stx) ) + { + stpc = stmax; + } + else + { + stpc = stmin; + } + } + stpq = *stp+dp/(dp-(*dx))*(*stx-(*stp)); + if( *brackt ) + { + if( ae_fp_less(ae_fabs(*stp-stpc, _state),ae_fabs(*stp-stpq, _state)) ) + { + stpf = stpc; + } + else + { + stpf = stpq; + } + } + else + { + if( ae_fp_greater(ae_fabs(*stp-stpc, _state),ae_fabs(*stp-stpq, _state)) ) + { + stpf = stpc; + } + else + { + stpf = stpq; + } + } + } + else + { + + /* + * FOURTH CASE. A LOWER FUNCTION VALUE, DERIVATIVES OF THE + * SAME SIGN, AND THE MAGNITUDE OF THE DERIVATIVE DOES + * NOT DECREASE. IF THE MINIMUM IS NOT BRACKETED, THE STEP + * IS EITHER STPMIN OR STPMAX, ELSE THE CUBIC STEP IS TAKEN. + */ + *info = 4; + bound = ae_false; + if( *brackt ) + { + theta = 3*(fp-(*fy))/(*sty-(*stp))+(*dy)+dp; + s = ae_maxreal(ae_fabs(theta, _state), ae_maxreal(ae_fabs(*dy, _state), ae_fabs(dp, _state), _state), _state); + gamma = s*ae_sqrt(ae_sqr(theta/s, _state)-*dy/s*(dp/s), _state); + if( ae_fp_greater(*stp,*sty) ) + { + gamma = -gamma; + } + p = gamma-dp+theta; + q = gamma-dp+gamma+(*dy); + r = p/q; + stpc = *stp+r*(*sty-(*stp)); + stpf = stpc; + } + else + { + if( ae_fp_greater(*stp,*stx) ) + { + stpf = stmax; + } + else + { + stpf = stmin; + } + } + } + } + } + + /* + * UPDATE THE INTERVAL OF UNCERTAINTY. THIS UPDATE DOES NOT + * DEPEND ON THE NEW STEP OR THE CASE ANALYSIS ABOVE. + */ + if( ae_fp_greater(fp,*fx) ) + { + *sty = *stp; + *fy = fp; + *dy = dp; + } + else + { + if( ae_fp_less(sgnd,0.0) ) + { + *sty = *stx; + *fy = *fx; + *dy = *dx; + } + *stx = *stp; + *fx = fp; + *dx = dp; + } + + /* + * COMPUTE THE NEW STEP AND SAFEGUARD IT. + */ + stpf = ae_minreal(stmax, stpf, _state); + stpf = ae_maxreal(stmin, stpf, _state); + *stp = stpf; + if( *brackt&&bound ) + { + if( ae_fp_greater(*sty,*stx) ) + { + *stp = ae_minreal(*stx+0.66*(*sty-(*stx)), *stp, _state); + } + else + { + *stp = ae_maxreal(*stx+0.66*(*sty-(*stx)), *stp, _state); + } + } +} + + +ae_bool _linminstate_init(linminstate* p, ae_state *_state, ae_bool make_automatic) +{ + return ae_true; +} + + +ae_bool _linminstate_init_copy(linminstate* dst, linminstate* src, ae_state *_state, ae_bool make_automatic) +{ + dst->brackt = src->brackt; + dst->stage1 = src->stage1; + dst->infoc = src->infoc; + dst->dg = src->dg; + dst->dgm = src->dgm; + dst->dginit = src->dginit; + dst->dgtest = src->dgtest; + dst->dgx = src->dgx; + dst->dgxm = src->dgxm; + dst->dgy = src->dgy; + dst->dgym = src->dgym; + dst->finit = src->finit; + dst->ftest1 = src->ftest1; + dst->fm = src->fm; + dst->fx = src->fx; + dst->fxm = src->fxm; + dst->fy = src->fy; + dst->fym = src->fym; + dst->stx = src->stx; + dst->sty = src->sty; + dst->stmin = src->stmin; + dst->stmax = src->stmax; + dst->width = src->width; + dst->width1 = src->width1; + dst->xtrapf = src->xtrapf; + return ae_true; +} + + +void _linminstate_clear(linminstate* p) +{ +} + + +ae_bool _armijostate_init(armijostate* p, ae_state *_state, ae_bool make_automatic) +{ + if( !ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->xbase, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->s, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !_rcommstate_init(&p->rstate, _state, make_automatic) ) + return ae_false; + return ae_true; +} + + +ae_bool _armijostate_init_copy(armijostate* dst, armijostate* src, ae_state *_state, ae_bool make_automatic) +{ + dst->needf = src->needf; + if( !ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic) ) + return ae_false; + dst->f = src->f; + dst->n = src->n; + if( !ae_vector_init_copy(&dst->xbase, &src->xbase, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->s, &src->s, _state, make_automatic) ) + return ae_false; + dst->stplen = src->stplen; + dst->fcur = src->fcur; + dst->stpmax = src->stpmax; + dst->fmax = src->fmax; + dst->nfev = src->nfev; + dst->info = src->info; + if( !_rcommstate_init_copy(&dst->rstate, &src->rstate, _state, make_automatic) ) + return ae_false; + return ae_true; +} + + +void _armijostate_clear(armijostate* p) +{ + ae_vector_clear(&p->x); + ae_vector_clear(&p->xbase); + ae_vector_clear(&p->s); + _rcommstate_clear(&p->rstate); +} + + + + +/************************************************************************* +This subroutine generates FFT plan - a decomposition of a N-length FFT to +the more simpler operations. Plan consists of the root entry and the child +entries. + +Subroutine parameters: + N task size + +Output parameters: + Plan plan + + -- ALGLIB -- + Copyright 01.05.2009 by Bochkanov Sergey +*************************************************************************/ +void ftbasegeneratecomplexfftplan(ae_int_t n, + ftplan* plan, + ae_state *_state) +{ + ae_int_t planarraysize; + ae_int_t plansize; + ae_int_t precomputedsize; + ae_int_t tmpmemsize; + ae_int_t stackmemsize; + ae_int_t stackptr; + + _ftplan_clear(plan); + + planarraysize = 1; + plansize = 0; + precomputedsize = 0; + stackmemsize = 0; + stackptr = 0; + tmpmemsize = 2*n; + ae_vector_set_length(&plan->plan, planarraysize, _state); + ftbase_ftbasegenerateplanrec(n, ftbase_ftbasecffttask, plan, &plansize, &precomputedsize, &planarraysize, &tmpmemsize, &stackmemsize, stackptr, _state); + ae_assert(stackptr==0, "Internal error in FTBaseGenerateComplexFFTPlan: stack ptr!", _state); + ae_vector_set_length(&plan->stackbuf, ae_maxint(stackmemsize, 1, _state), _state); + ae_vector_set_length(&plan->tmpbuf, ae_maxint(tmpmemsize, 1, _state), _state); + ae_vector_set_length(&plan->precomputed, ae_maxint(precomputedsize, 1, _state), _state); + stackptr = 0; + ftbase_ftbaseprecomputeplanrec(plan, 0, stackptr, _state); + ae_assert(stackptr==0, "Internal error in FTBaseGenerateComplexFFTPlan: stack ptr!", _state); +} + + +/************************************************************************* +Generates real FFT plan +*************************************************************************/ +void ftbasegeneraterealfftplan(ae_int_t n, ftplan* plan, ae_state *_state) +{ + ae_int_t planarraysize; + ae_int_t plansize; + ae_int_t precomputedsize; + ae_int_t tmpmemsize; + ae_int_t stackmemsize; + ae_int_t stackptr; + + _ftplan_clear(plan); + + planarraysize = 1; + plansize = 0; + precomputedsize = 0; + stackmemsize = 0; + stackptr = 0; + tmpmemsize = 2*n; + ae_vector_set_length(&plan->plan, planarraysize, _state); + ftbase_ftbasegenerateplanrec(n, ftbase_ftbaserffttask, plan, &plansize, &precomputedsize, &planarraysize, &tmpmemsize, &stackmemsize, stackptr, _state); + ae_assert(stackptr==0, "Internal error in FTBaseGenerateRealFFTPlan: stack ptr!", _state); + ae_vector_set_length(&plan->stackbuf, ae_maxint(stackmemsize, 1, _state), _state); + ae_vector_set_length(&plan->tmpbuf, ae_maxint(tmpmemsize, 1, _state), _state); + ae_vector_set_length(&plan->precomputed, ae_maxint(precomputedsize, 1, _state), _state); + stackptr = 0; + ftbase_ftbaseprecomputeplanrec(plan, 0, stackptr, _state); + ae_assert(stackptr==0, "Internal error in FTBaseGenerateRealFFTPlan: stack ptr!", _state); +} + + +/************************************************************************* +Generates real FHT plan +*************************************************************************/ +void ftbasegeneraterealfhtplan(ae_int_t n, ftplan* plan, ae_state *_state) +{ + ae_int_t planarraysize; + ae_int_t plansize; + ae_int_t precomputedsize; + ae_int_t tmpmemsize; + ae_int_t stackmemsize; + ae_int_t stackptr; + + _ftplan_clear(plan); + + planarraysize = 1; + plansize = 0; + precomputedsize = 0; + stackmemsize = 0; + stackptr = 0; + tmpmemsize = n; + ae_vector_set_length(&plan->plan, planarraysize, _state); + ftbase_ftbasegenerateplanrec(n, ftbase_ftbaserfhttask, plan, &plansize, &precomputedsize, &planarraysize, &tmpmemsize, &stackmemsize, stackptr, _state); + ae_assert(stackptr==0, "Internal error in FTBaseGenerateRealFHTPlan: stack ptr!", _state); + ae_vector_set_length(&plan->stackbuf, ae_maxint(stackmemsize, 1, _state), _state); + ae_vector_set_length(&plan->tmpbuf, ae_maxint(tmpmemsize, 1, _state), _state); + ae_vector_set_length(&plan->precomputed, ae_maxint(precomputedsize, 1, _state), _state); + stackptr = 0; + ftbase_ftbaseprecomputeplanrec(plan, 0, stackptr, _state); + ae_assert(stackptr==0, "Internal error in FTBaseGenerateRealFHTPlan: stack ptr!", _state); +} + + +/************************************************************************* +This subroutine executes FFT/FHT plan. + +If Plan is a: +* complex FFT plan - sizeof(A)=2*N, + A contains interleaved real/imaginary values +* real FFT plan - sizeof(A)=2*N, + A contains real values interleaved with zeros +* real FHT plan - sizeof(A)=2*N, + A contains real values interleaved with zeros + + -- ALGLIB -- + Copyright 01.05.2009 by Bochkanov Sergey +*************************************************************************/ +void ftbaseexecuteplan(/* Real */ ae_vector* a, + ae_int_t aoffset, + ae_int_t n, + ftplan* plan, + ae_state *_state) +{ + ae_int_t stackptr; + + + stackptr = 0; + ftbaseexecuteplanrec(a, aoffset, plan, 0, stackptr, _state); +} + + +/************************************************************************* +Recurrent subroutine for the FTBaseExecutePlan + +Parameters: + A FFT'ed array + AOffset offset of the FFT'ed part (distance is measured in doubles) + + -- ALGLIB -- + Copyright 01.05.2009 by Bochkanov Sergey +*************************************************************************/ +void ftbaseexecuteplanrec(/* Real */ ae_vector* a, + ae_int_t aoffset, + ftplan* plan, + ae_int_t entryoffset, + ae_int_t stackptr, + ae_state *_state) +{ + ae_frame _frame_block; + ae_int_t i; + ae_int_t j; + ae_int_t k; + ae_int_t n1; + ae_int_t n2; + ae_int_t n; + ae_int_t m; + ae_int_t offs; + ae_int_t offs1; + ae_int_t offs2; + ae_int_t offsa; + ae_int_t offsb; + ae_int_t offsp; + double hk; + double hnk; + double x; + double y; + double bx; + double by; + ae_vector emptyarray; + double a0x; + double a0y; + double a1x; + double a1y; + double a2x; + double a2y; + double a3x; + double a3y; + double v0; + double v1; + double v2; + double v3; + double t1x; + double t1y; + double t2x; + double t2y; + double t3x; + double t3y; + double t4x; + double t4y; + double t5x; + double t5y; + double m1x; + double m1y; + double m2x; + double m2y; + double m3x; + double m3y; + double m4x; + double m4y; + double m5x; + double m5y; + double s1x; + double s1y; + double s2x; + double s2y; + double s3x; + double s3y; + double s4x; + double s4y; + double s5x; + double s5y; + double c1; + double c2; + double c3; + double c4; + double c5; + ae_vector tmp; + + ae_frame_make(_state, &_frame_block); + ae_vector_init(&emptyarray, 0, DT_REAL, _state, ae_true); + ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true); + + if( plan->plan.ptr.p_int[entryoffset+3]==ftbase_fftemptyplan ) + { + ae_frame_leave(_state); + return; + } + if( plan->plan.ptr.p_int[entryoffset+3]==ftbase_fftcooleytukeyplan ) + { + + /* + * Cooley-Tukey plan + * * transposition + * * row-wise FFT + * * twiddle factors: + * - TwBase is a basis twiddle factor for I=1, J=1 + * - TwRow is a twiddle factor for a second element in a row (J=1) + * - Tw is a twiddle factor for a current element + * * transposition again + * * row-wise FFT again + */ + n1 = plan->plan.ptr.p_int[entryoffset+1]; + n2 = plan->plan.ptr.p_int[entryoffset+2]; + ftbase_internalcomplexlintranspose(a, n1, n2, aoffset, &plan->tmpbuf, _state); + for(i=0; i<=n2-1; i++) + { + ftbaseexecuteplanrec(a, aoffset+i*n1*2, plan, plan->plan.ptr.p_int[entryoffset+5], stackptr, _state); + } + ftbase_ffttwcalc(a, aoffset, n1, n2, _state); + ftbase_internalcomplexlintranspose(a, n2, n1, aoffset, &plan->tmpbuf, _state); + for(i=0; i<=n1-1; i++) + { + ftbaseexecuteplanrec(a, aoffset+i*n2*2, plan, plan->plan.ptr.p_int[entryoffset+6], stackptr, _state); + } + ftbase_internalcomplexlintranspose(a, n1, n2, aoffset, &plan->tmpbuf, _state); + ae_frame_leave(_state); + return; + } + if( plan->plan.ptr.p_int[entryoffset+3]==ftbase_fftrealcooleytukeyplan ) + { + + /* + * Cooley-Tukey plan + * * transposition + * * row-wise FFT + * * twiddle factors: + * - TwBase is a basis twiddle factor for I=1, J=1 + * - TwRow is a twiddle factor for a second element in a row (J=1) + * - Tw is a twiddle factor for a current element + * * transposition again + * * row-wise FFT again + */ + n1 = plan->plan.ptr.p_int[entryoffset+1]; + n2 = plan->plan.ptr.p_int[entryoffset+2]; + ftbase_internalcomplexlintranspose(a, n2, n1, aoffset, &plan->tmpbuf, _state); + for(i=0; i<=n1/2-1; i++) + { + + /* + * pack two adjacent smaller real FFT's together, + * make one complex FFT, + * unpack result + */ + offs = aoffset+2*i*n2*2; + for(k=0; k<=n2-1; k++) + { + a->ptr.p_double[offs+2*k+1] = a->ptr.p_double[offs+2*n2+2*k+0]; + } + ftbaseexecuteplanrec(a, offs, plan, plan->plan.ptr.p_int[entryoffset+6], stackptr, _state); + plan->tmpbuf.ptr.p_double[0] = a->ptr.p_double[offs+0]; + plan->tmpbuf.ptr.p_double[1] = 0; + plan->tmpbuf.ptr.p_double[2*n2+0] = a->ptr.p_double[offs+1]; + plan->tmpbuf.ptr.p_double[2*n2+1] = 0; + for(k=1; k<=n2-1; k++) + { + offs1 = 2*k; + offs2 = 2*n2+2*k; + hk = a->ptr.p_double[offs+2*k+0]; + hnk = a->ptr.p_double[offs+2*(n2-k)+0]; + plan->tmpbuf.ptr.p_double[offs1+0] = 0.5*(hk+hnk); + plan->tmpbuf.ptr.p_double[offs2+1] = -0.5*(hk-hnk); + hk = a->ptr.p_double[offs+2*k+1]; + hnk = a->ptr.p_double[offs+2*(n2-k)+1]; + plan->tmpbuf.ptr.p_double[offs2+0] = 0.5*(hk+hnk); + plan->tmpbuf.ptr.p_double[offs1+1] = 0.5*(hk-hnk); + } + ae_v_move(&a->ptr.p_double[offs], 1, &plan->tmpbuf.ptr.p_double[0], 1, ae_v_len(offs,offs+2*n2*2-1)); + } + if( n1%2!=0 ) + { + ftbaseexecuteplanrec(a, aoffset+(n1-1)*n2*2, plan, plan->plan.ptr.p_int[entryoffset+6], stackptr, _state); + } + ftbase_ffttwcalc(a, aoffset, n2, n1, _state); + ftbase_internalcomplexlintranspose(a, n1, n2, aoffset, &plan->tmpbuf, _state); + for(i=0; i<=n2-1; i++) + { + ftbaseexecuteplanrec(a, aoffset+i*n1*2, plan, plan->plan.ptr.p_int[entryoffset+5], stackptr, _state); + } + ftbase_internalcomplexlintranspose(a, n2, n1, aoffset, &plan->tmpbuf, _state); + ae_frame_leave(_state); + return; + } + if( plan->plan.ptr.p_int[entryoffset+3]==ftbase_fhtcooleytukeyplan ) + { + + /* + * Cooley-Tukey FHT plan: + * * transpose \ + * * smaller FHT's | + * * pre-process | + * * multiply by twiddle factors | corresponds to multiplication by H1 + * * post-process | + * * transpose again / + * * multiply by H2 (smaller FHT's) + * * final transposition + * + * For more details see Vitezslav Vesely, "Fast algorithms + * of Fourier and Hartley transform and their implementation in MATLAB", + * page 31. + */ + n1 = plan->plan.ptr.p_int[entryoffset+1]; + n2 = plan->plan.ptr.p_int[entryoffset+2]; + n = n1*n2; + ftbase_internalreallintranspose(a, n1, n2, aoffset, &plan->tmpbuf, _state); + for(i=0; i<=n2-1; i++) + { + ftbaseexecuteplanrec(a, aoffset+i*n1, plan, plan->plan.ptr.p_int[entryoffset+5], stackptr, _state); + } + for(i=0; i<=n2-1; i++) + { + for(j=0; j<=n1-1; j++) + { + offsa = aoffset+i*n1; + hk = a->ptr.p_double[offsa+j]; + hnk = a->ptr.p_double[offsa+(n1-j)%n1]; + offs = 2*(i*n1+j); + plan->tmpbuf.ptr.p_double[offs+0] = -0.5*(hnk-hk); + plan->tmpbuf.ptr.p_double[offs+1] = 0.5*(hk+hnk); + } + } + ftbase_ffttwcalc(&plan->tmpbuf, 0, n1, n2, _state); + for(j=0; j<=n1-1; j++) + { + a->ptr.p_double[aoffset+j] = plan->tmpbuf.ptr.p_double[2*j+0]+plan->tmpbuf.ptr.p_double[2*j+1]; + } + if( n2%2==0 ) + { + offs = 2*(n2/2)*n1; + offsa = aoffset+n2/2*n1; + for(j=0; j<=n1-1; j++) + { + a->ptr.p_double[offsa+j] = plan->tmpbuf.ptr.p_double[offs+2*j+0]+plan->tmpbuf.ptr.p_double[offs+2*j+1]; + } + } + for(i=1; i<=(n2+1)/2-1; i++) + { + offs = 2*i*n1; + offs2 = 2*(n2-i)*n1; + offsa = aoffset+i*n1; + for(j=0; j<=n1-1; j++) + { + a->ptr.p_double[offsa+j] = plan->tmpbuf.ptr.p_double[offs+2*j+1]+plan->tmpbuf.ptr.p_double[offs2+2*j+0]; + } + offsa = aoffset+(n2-i)*n1; + for(j=0; j<=n1-1; j++) + { + a->ptr.p_double[offsa+j] = plan->tmpbuf.ptr.p_double[offs+2*j+0]+plan->tmpbuf.ptr.p_double[offs2+2*j+1]; + } + } + ftbase_internalreallintranspose(a, n2, n1, aoffset, &plan->tmpbuf, _state); + for(i=0; i<=n1-1; i++) + { + ftbaseexecuteplanrec(a, aoffset+i*n2, plan, plan->plan.ptr.p_int[entryoffset+6], stackptr, _state); + } + ftbase_internalreallintranspose(a, n1, n2, aoffset, &plan->tmpbuf, _state); + ae_frame_leave(_state); + return; + } + if( plan->plan.ptr.p_int[entryoffset+3]==ftbase_fhtn2plan ) + { + + /* + * Cooley-Tukey FHT plan + */ + n1 = plan->plan.ptr.p_int[entryoffset+1]; + n2 = plan->plan.ptr.p_int[entryoffset+2]; + n = n1*n2; + ftbase_reffht(a, n, aoffset, _state); + ae_frame_leave(_state); + return; + } + if( plan->plan.ptr.p_int[entryoffset+3]==ftbase_fftcodeletplan ) + { + n1 = plan->plan.ptr.p_int[entryoffset+1]; + n2 = plan->plan.ptr.p_int[entryoffset+2]; + n = n1*n2; + if( n==2 ) + { + a0x = a->ptr.p_double[aoffset+0]; + a0y = a->ptr.p_double[aoffset+1]; + a1x = a->ptr.p_double[aoffset+2]; + a1y = a->ptr.p_double[aoffset+3]; + v0 = a0x+a1x; + v1 = a0y+a1y; + v2 = a0x-a1x; + v3 = a0y-a1y; + a->ptr.p_double[aoffset+0] = v0; + a->ptr.p_double[aoffset+1] = v1; + a->ptr.p_double[aoffset+2] = v2; + a->ptr.p_double[aoffset+3] = v3; + ae_frame_leave(_state); + return; + } + if( n==3 ) + { + offs = plan->plan.ptr.p_int[entryoffset+7]; + c1 = plan->precomputed.ptr.p_double[offs+0]; + c2 = plan->precomputed.ptr.p_double[offs+1]; + a0x = a->ptr.p_double[aoffset+0]; + a0y = a->ptr.p_double[aoffset+1]; + a1x = a->ptr.p_double[aoffset+2]; + a1y = a->ptr.p_double[aoffset+3]; + a2x = a->ptr.p_double[aoffset+4]; + a2y = a->ptr.p_double[aoffset+5]; + t1x = a1x+a2x; + t1y = a1y+a2y; + a0x = a0x+t1x; + a0y = a0y+t1y; + m1x = c1*t1x; + m1y = c1*t1y; + m2x = c2*(a1y-a2y); + m2y = c2*(a2x-a1x); + s1x = a0x+m1x; + s1y = a0y+m1y; + a1x = s1x+m2x; + a1y = s1y+m2y; + a2x = s1x-m2x; + a2y = s1y-m2y; + a->ptr.p_double[aoffset+0] = a0x; + a->ptr.p_double[aoffset+1] = a0y; + a->ptr.p_double[aoffset+2] = a1x; + a->ptr.p_double[aoffset+3] = a1y; + a->ptr.p_double[aoffset+4] = a2x; + a->ptr.p_double[aoffset+5] = a2y; + ae_frame_leave(_state); + return; + } + if( n==4 ) + { + a0x = a->ptr.p_double[aoffset+0]; + a0y = a->ptr.p_double[aoffset+1]; + a1x = a->ptr.p_double[aoffset+2]; + a1y = a->ptr.p_double[aoffset+3]; + a2x = a->ptr.p_double[aoffset+4]; + a2y = a->ptr.p_double[aoffset+5]; + a3x = a->ptr.p_double[aoffset+6]; + a3y = a->ptr.p_double[aoffset+7]; + t1x = a0x+a2x; + t1y = a0y+a2y; + t2x = a1x+a3x; + t2y = a1y+a3y; + m2x = a0x-a2x; + m2y = a0y-a2y; + m3x = a1y-a3y; + m3y = a3x-a1x; + a->ptr.p_double[aoffset+0] = t1x+t2x; + a->ptr.p_double[aoffset+1] = t1y+t2y; + a->ptr.p_double[aoffset+4] = t1x-t2x; + a->ptr.p_double[aoffset+5] = t1y-t2y; + a->ptr.p_double[aoffset+2] = m2x+m3x; + a->ptr.p_double[aoffset+3] = m2y+m3y; + a->ptr.p_double[aoffset+6] = m2x-m3x; + a->ptr.p_double[aoffset+7] = m2y-m3y; + ae_frame_leave(_state); + return; + } + if( n==5 ) + { + offs = plan->plan.ptr.p_int[entryoffset+7]; + c1 = plan->precomputed.ptr.p_double[offs+0]; + c2 = plan->precomputed.ptr.p_double[offs+1]; + c3 = plan->precomputed.ptr.p_double[offs+2]; + c4 = plan->precomputed.ptr.p_double[offs+3]; + c5 = plan->precomputed.ptr.p_double[offs+4]; + t1x = a->ptr.p_double[aoffset+2]+a->ptr.p_double[aoffset+8]; + t1y = a->ptr.p_double[aoffset+3]+a->ptr.p_double[aoffset+9]; + t2x = a->ptr.p_double[aoffset+4]+a->ptr.p_double[aoffset+6]; + t2y = a->ptr.p_double[aoffset+5]+a->ptr.p_double[aoffset+7]; + t3x = a->ptr.p_double[aoffset+2]-a->ptr.p_double[aoffset+8]; + t3y = a->ptr.p_double[aoffset+3]-a->ptr.p_double[aoffset+9]; + t4x = a->ptr.p_double[aoffset+6]-a->ptr.p_double[aoffset+4]; + t4y = a->ptr.p_double[aoffset+7]-a->ptr.p_double[aoffset+5]; + t5x = t1x+t2x; + t5y = t1y+t2y; + a->ptr.p_double[aoffset+0] = a->ptr.p_double[aoffset+0]+t5x; + a->ptr.p_double[aoffset+1] = a->ptr.p_double[aoffset+1]+t5y; + m1x = c1*t5x; + m1y = c1*t5y; + m2x = c2*(t1x-t2x); + m2y = c2*(t1y-t2y); + m3x = -c3*(t3y+t4y); + m3y = c3*(t3x+t4x); + m4x = -c4*t4y; + m4y = c4*t4x; + m5x = -c5*t3y; + m5y = c5*t3x; + s3x = m3x-m4x; + s3y = m3y-m4y; + s5x = m3x+m5x; + s5y = m3y+m5y; + s1x = a->ptr.p_double[aoffset+0]+m1x; + s1y = a->ptr.p_double[aoffset+1]+m1y; + s2x = s1x+m2x; + s2y = s1y+m2y; + s4x = s1x-m2x; + s4y = s1y-m2y; + a->ptr.p_double[aoffset+2] = s2x+s3x; + a->ptr.p_double[aoffset+3] = s2y+s3y; + a->ptr.p_double[aoffset+4] = s4x+s5x; + a->ptr.p_double[aoffset+5] = s4y+s5y; + a->ptr.p_double[aoffset+6] = s4x-s5x; + a->ptr.p_double[aoffset+7] = s4y-s5y; + a->ptr.p_double[aoffset+8] = s2x-s3x; + a->ptr.p_double[aoffset+9] = s2y-s3y; + ae_frame_leave(_state); + return; + } + } + if( plan->plan.ptr.p_int[entryoffset+3]==ftbase_fhtcodeletplan ) + { + n1 = plan->plan.ptr.p_int[entryoffset+1]; + n2 = plan->plan.ptr.p_int[entryoffset+2]; + n = n1*n2; + if( n==2 ) + { + a0x = a->ptr.p_double[aoffset+0]; + a1x = a->ptr.p_double[aoffset+1]; + a->ptr.p_double[aoffset+0] = a0x+a1x; + a->ptr.p_double[aoffset+1] = a0x-a1x; + ae_frame_leave(_state); + return; + } + if( n==3 ) + { + offs = plan->plan.ptr.p_int[entryoffset+7]; + c1 = plan->precomputed.ptr.p_double[offs+0]; + c2 = plan->precomputed.ptr.p_double[offs+1]; + a0x = a->ptr.p_double[aoffset+0]; + a1x = a->ptr.p_double[aoffset+1]; + a2x = a->ptr.p_double[aoffset+2]; + t1x = a1x+a2x; + a0x = a0x+t1x; + m1x = c1*t1x; + m2y = c2*(a2x-a1x); + s1x = a0x+m1x; + a->ptr.p_double[aoffset+0] = a0x; + a->ptr.p_double[aoffset+1] = s1x-m2y; + a->ptr.p_double[aoffset+2] = s1x+m2y; + ae_frame_leave(_state); + return; + } + if( n==4 ) + { + a0x = a->ptr.p_double[aoffset+0]; + a1x = a->ptr.p_double[aoffset+1]; + a2x = a->ptr.p_double[aoffset+2]; + a3x = a->ptr.p_double[aoffset+3]; + t1x = a0x+a2x; + t2x = a1x+a3x; + m2x = a0x-a2x; + m3y = a3x-a1x; + a->ptr.p_double[aoffset+0] = t1x+t2x; + a->ptr.p_double[aoffset+1] = m2x-m3y; + a->ptr.p_double[aoffset+2] = t1x-t2x; + a->ptr.p_double[aoffset+3] = m2x+m3y; + ae_frame_leave(_state); + return; + } + if( n==5 ) + { + offs = plan->plan.ptr.p_int[entryoffset+7]; + c1 = plan->precomputed.ptr.p_double[offs+0]; + c2 = plan->precomputed.ptr.p_double[offs+1]; + c3 = plan->precomputed.ptr.p_double[offs+2]; + c4 = plan->precomputed.ptr.p_double[offs+3]; + c5 = plan->precomputed.ptr.p_double[offs+4]; + t1x = a->ptr.p_double[aoffset+1]+a->ptr.p_double[aoffset+4]; + t2x = a->ptr.p_double[aoffset+2]+a->ptr.p_double[aoffset+3]; + t3x = a->ptr.p_double[aoffset+1]-a->ptr.p_double[aoffset+4]; + t4x = a->ptr.p_double[aoffset+3]-a->ptr.p_double[aoffset+2]; + t5x = t1x+t2x; + v0 = a->ptr.p_double[aoffset+0]+t5x; + a->ptr.p_double[aoffset+0] = v0; + m2x = c2*(t1x-t2x); + m3y = c3*(t3x+t4x); + s3y = m3y-c4*t4x; + s5y = m3y+c5*t3x; + s1x = v0+c1*t5x; + s2x = s1x+m2x; + s4x = s1x-m2x; + a->ptr.p_double[aoffset+1] = s2x-s3y; + a->ptr.p_double[aoffset+2] = s4x-s5y; + a->ptr.p_double[aoffset+3] = s4x+s5y; + a->ptr.p_double[aoffset+4] = s2x+s3y; + ae_frame_leave(_state); + return; + } + } + if( plan->plan.ptr.p_int[entryoffset+3]==ftbase_fftbluesteinplan ) + { + + /* + * Bluestein plan: + * 1. multiply by precomputed coefficients + * 2. make convolution: forward FFT, multiplication by precomputed FFT + * and backward FFT. backward FFT is represented as + * + * invfft(x) = fft(x')'/M + * + * for performance reasons reduction of inverse FFT to + * forward FFT is merged with multiplication of FFT components + * and last stage of Bluestein's transformation. + * 3. post-multiplication by Bluestein factors + */ + n = plan->plan.ptr.p_int[entryoffset+1]; + m = plan->plan.ptr.p_int[entryoffset+4]; + offs = plan->plan.ptr.p_int[entryoffset+7]; + for(i=stackptr+2*n; i<=stackptr+2*m-1; i++) + { + plan->stackbuf.ptr.p_double[i] = 0; + } + offsp = offs+2*m; + offsa = aoffset; + offsb = stackptr; + for(i=0; i<=n-1; i++) + { + bx = plan->precomputed.ptr.p_double[offsp+0]; + by = plan->precomputed.ptr.p_double[offsp+1]; + x = a->ptr.p_double[offsa+0]; + y = a->ptr.p_double[offsa+1]; + plan->stackbuf.ptr.p_double[offsb+0] = x*bx-y*(-by); + plan->stackbuf.ptr.p_double[offsb+1] = x*(-by)+y*bx; + offsp = offsp+2; + offsa = offsa+2; + offsb = offsb+2; + } + ftbaseexecuteplanrec(&plan->stackbuf, stackptr, plan, plan->plan.ptr.p_int[entryoffset+5], stackptr+2*2*m, _state); + offsb = stackptr; + offsp = offs; + for(i=0; i<=m-1; i++) + { + x = plan->stackbuf.ptr.p_double[offsb+0]; + y = plan->stackbuf.ptr.p_double[offsb+1]; + bx = plan->precomputed.ptr.p_double[offsp+0]; + by = plan->precomputed.ptr.p_double[offsp+1]; + plan->stackbuf.ptr.p_double[offsb+0] = x*bx-y*by; + plan->stackbuf.ptr.p_double[offsb+1] = -(x*by+y*bx); + offsb = offsb+2; + offsp = offsp+2; + } + ftbaseexecuteplanrec(&plan->stackbuf, stackptr, plan, plan->plan.ptr.p_int[entryoffset+5], stackptr+2*2*m, _state); + offsb = stackptr; + offsp = offs+2*m; + offsa = aoffset; + for(i=0; i<=n-1; i++) + { + x = plan->stackbuf.ptr.p_double[offsb+0]/m; + y = -plan->stackbuf.ptr.p_double[offsb+1]/m; + bx = plan->precomputed.ptr.p_double[offsp+0]; + by = plan->precomputed.ptr.p_double[offsp+1]; + a->ptr.p_double[offsa+0] = x*bx-y*(-by); + a->ptr.p_double[offsa+1] = x*(-by)+y*bx; + offsp = offsp+2; + offsa = offsa+2; + offsb = offsb+2; + } + ae_frame_leave(_state); + return; + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Returns good factorization N=N1*N2. + +Usually N1<=N2 (but not always - small N's may be exception). +if N1<>1 then N2<>1. + +Factorization is chosen depending on task type and codelets we have. + + -- ALGLIB -- + Copyright 01.05.2009 by Bochkanov Sergey +*************************************************************************/ +void ftbasefactorize(ae_int_t n, + ae_int_t tasktype, + ae_int_t* n1, + ae_int_t* n2, + ae_state *_state) +{ + ae_int_t j; + + *n1 = 0; + *n2 = 0; + + *n1 = 0; + *n2 = 0; + + /* + * try to find good codelet + */ + if( *n1*(*n2)!=n ) + { + for(j=ftbase_ftbasecodeletrecommended; j>=2; j--) + { + if( n%j==0 ) + { + *n1 = j; + *n2 = n/j; + break; + } + } + } + + /* + * try to factorize N + */ + if( *n1*(*n2)!=n ) + { + for(j=ftbase_ftbasecodeletrecommended+1; j<=n-1; j++) + { + if( n%j==0 ) + { + *n1 = j; + *n2 = n/j; + break; + } + } + } + + /* + * looks like N is prime :( + */ + if( *n1*(*n2)!=n ) + { + *n1 = 1; + *n2 = n; + } + + /* + * normalize + */ + if( *n2==1&&*n1!=1 ) + { + *n2 = *n1; + *n1 = 1; + } +} + + +/************************************************************************* +Is number smooth? + + -- ALGLIB -- + Copyright 01.05.2009 by Bochkanov Sergey +*************************************************************************/ +ae_bool ftbaseissmooth(ae_int_t n, ae_state *_state) +{ + ae_int_t i; + ae_bool result; + + + for(i=2; i<=ftbase_ftbasemaxsmoothfactor; i++) + { + while(n%i==0) + { + n = n/i; + } + } + result = n==1; + return result; +} + + +/************************************************************************* +Returns smallest smooth (divisible only by 2, 3, 5) number that is greater +than or equal to max(N,2) + + -- ALGLIB -- + Copyright 01.05.2009 by Bochkanov Sergey +*************************************************************************/ +ae_int_t ftbasefindsmooth(ae_int_t n, ae_state *_state) +{ + ae_int_t best; + ae_int_t result; + + + best = 2; + while(best<n) + { + best = 2*best; + } + ftbase_ftbasefindsmoothrec(n, 1, 2, &best, _state); + result = best; + return result; +} + + +/************************************************************************* +Returns smallest smooth (divisible only by 2, 3, 5) even number that is +greater than or equal to max(N,2) + + -- ALGLIB -- + Copyright 01.05.2009 by Bochkanov Sergey +*************************************************************************/ +ae_int_t ftbasefindsmootheven(ae_int_t n, ae_state *_state) +{ + ae_int_t best; + ae_int_t result; + + + best = 2; + while(best<n) + { + best = 2*best; + } + ftbase_ftbasefindsmoothrec(n, 2, 2, &best, _state); + result = best; + return result; +} + + +/************************************************************************* +Returns estimate of FLOP count for the FFT. + +It is only an estimate based on operations count for the PERFECT FFT +and relative inefficiency of the algorithm actually used. + +N should be power of 2, estimates are badly wrong for non-power-of-2 N's. + + -- ALGLIB -- + Copyright 01.05.2009 by Bochkanov Sergey +*************************************************************************/ +double ftbasegetflopestimate(ae_int_t n, ae_state *_state) +{ + double result; + + + result = ftbase_ftbaseinefficiencyfactor*(4*n*ae_log(n, _state)/ae_log(2, _state)-6*n+8); + return result; +} + + +/************************************************************************* +Recurrent subroutine for the FFTGeneratePlan: + +PARAMETERS: + N plan size + IsReal whether input is real or not. + subroutine MUST NOT ignore this flag because real + inputs comes with non-initialized imaginary parts, + so ignoring this flag will result in corrupted output + HalfOut whether full output or only half of it from 0 to + floor(N/2) is needed. This flag may be ignored if + doing so will simplify calculations + Plan plan array + PlanSize size of used part (in integers) + PrecomputedSize size of precomputed array allocated yet + PlanArraySize plan array size (actual) + TmpMemSize temporary memory required size + BluesteinMemSize temporary memory required size + + -- ALGLIB -- + Copyright 01.05.2009 by Bochkanov Sergey +*************************************************************************/ +static void ftbase_ftbasegenerateplanrec(ae_int_t n, + ae_int_t tasktype, + ftplan* plan, + ae_int_t* plansize, + ae_int_t* precomputedsize, + ae_int_t* planarraysize, + ae_int_t* tmpmemsize, + ae_int_t* stackmemsize, + ae_int_t stackptr, + ae_state *_state) +{ + ae_int_t k; + ae_int_t m; + ae_int_t n1; + ae_int_t n2; + ae_int_t esize; + ae_int_t entryoffset; + + + + /* + * prepare + */ + if( *plansize+ftbase_ftbaseplanentrysize>(*planarraysize) ) + { + ftbase_fftarrayresize(&plan->plan, planarraysize, 8*(*planarraysize), _state); + } + entryoffset = *plansize; + esize = ftbase_ftbaseplanentrysize; + *plansize = *plansize+esize; + + /* + * if N=1, generate empty plan and exit + */ + if( n==1 ) + { + plan->plan.ptr.p_int[entryoffset+0] = esize; + plan->plan.ptr.p_int[entryoffset+1] = -1; + plan->plan.ptr.p_int[entryoffset+2] = -1; + plan->plan.ptr.p_int[entryoffset+3] = ftbase_fftemptyplan; + plan->plan.ptr.p_int[entryoffset+4] = -1; + plan->plan.ptr.p_int[entryoffset+5] = -1; + plan->plan.ptr.p_int[entryoffset+6] = -1; + plan->plan.ptr.p_int[entryoffset+7] = -1; + return; + } + + /* + * generate plans + */ + ftbasefactorize(n, tasktype, &n1, &n2, _state); + if( tasktype==ftbase_ftbasecffttask||tasktype==ftbase_ftbaserffttask ) + { + + /* + * complex FFT plans + */ + if( n1!=1 ) + { + + /* + * Cooley-Tukey plan (real or complex) + * + * Note that child plans are COMPLEX + * (whether plan itself is complex or not). + */ + *tmpmemsize = ae_maxint(*tmpmemsize, 2*n1*n2, _state); + plan->plan.ptr.p_int[entryoffset+0] = esize; + plan->plan.ptr.p_int[entryoffset+1] = n1; + plan->plan.ptr.p_int[entryoffset+2] = n2; + if( tasktype==ftbase_ftbasecffttask ) + { + plan->plan.ptr.p_int[entryoffset+3] = ftbase_fftcooleytukeyplan; + } + else + { + plan->plan.ptr.p_int[entryoffset+3] = ftbase_fftrealcooleytukeyplan; + } + plan->plan.ptr.p_int[entryoffset+4] = 0; + plan->plan.ptr.p_int[entryoffset+5] = *plansize; + ftbase_ftbasegenerateplanrec(n1, ftbase_ftbasecffttask, plan, plansize, precomputedsize, planarraysize, tmpmemsize, stackmemsize, stackptr, _state); + plan->plan.ptr.p_int[entryoffset+6] = *plansize; + ftbase_ftbasegenerateplanrec(n2, ftbase_ftbasecffttask, plan, plansize, precomputedsize, planarraysize, tmpmemsize, stackmemsize, stackptr, _state); + plan->plan.ptr.p_int[entryoffset+7] = -1; + return; + } + else + { + if( ((n==2||n==3)||n==4)||n==5 ) + { + + /* + * hard-coded plan + */ + plan->plan.ptr.p_int[entryoffset+0] = esize; + plan->plan.ptr.p_int[entryoffset+1] = n1; + plan->plan.ptr.p_int[entryoffset+2] = n2; + plan->plan.ptr.p_int[entryoffset+3] = ftbase_fftcodeletplan; + plan->plan.ptr.p_int[entryoffset+4] = 0; + plan->plan.ptr.p_int[entryoffset+5] = -1; + plan->plan.ptr.p_int[entryoffset+6] = -1; + plan->plan.ptr.p_int[entryoffset+7] = *precomputedsize; + if( n==3 ) + { + *precomputedsize = *precomputedsize+2; + } + if( n==5 ) + { + *precomputedsize = *precomputedsize+5; + } + return; + } + else + { + + /* + * Bluestein's plan + * + * Select such M that M>=2*N-1, M is composite, and M's + * factors are 2, 3, 5 + */ + k = 2*n2-1; + m = ftbasefindsmooth(k, _state); + *tmpmemsize = ae_maxint(*tmpmemsize, 2*m, _state); + plan->plan.ptr.p_int[entryoffset+0] = esize; + plan->plan.ptr.p_int[entryoffset+1] = n2; + plan->plan.ptr.p_int[entryoffset+2] = -1; + plan->plan.ptr.p_int[entryoffset+3] = ftbase_fftbluesteinplan; + plan->plan.ptr.p_int[entryoffset+4] = m; + plan->plan.ptr.p_int[entryoffset+5] = *plansize; + stackptr = stackptr+2*2*m; + *stackmemsize = ae_maxint(*stackmemsize, stackptr, _state); + ftbase_ftbasegenerateplanrec(m, ftbase_ftbasecffttask, plan, plansize, precomputedsize, planarraysize, tmpmemsize, stackmemsize, stackptr, _state); + stackptr = stackptr-2*2*m; + plan->plan.ptr.p_int[entryoffset+6] = -1; + plan->plan.ptr.p_int[entryoffset+7] = *precomputedsize; + *precomputedsize = *precomputedsize+2*m+2*n; + return; + } + } + } + if( tasktype==ftbase_ftbaserfhttask ) + { + + /* + * real FHT plans + */ + if( n1!=1 ) + { + + /* + * Cooley-Tukey plan + * + */ + *tmpmemsize = ae_maxint(*tmpmemsize, 2*n1*n2, _state); + plan->plan.ptr.p_int[entryoffset+0] = esize; + plan->plan.ptr.p_int[entryoffset+1] = n1; + plan->plan.ptr.p_int[entryoffset+2] = n2; + plan->plan.ptr.p_int[entryoffset+3] = ftbase_fhtcooleytukeyplan; + plan->plan.ptr.p_int[entryoffset+4] = 0; + plan->plan.ptr.p_int[entryoffset+5] = *plansize; + ftbase_ftbasegenerateplanrec(n1, tasktype, plan, plansize, precomputedsize, planarraysize, tmpmemsize, stackmemsize, stackptr, _state); + plan->plan.ptr.p_int[entryoffset+6] = *plansize; + ftbase_ftbasegenerateplanrec(n2, tasktype, plan, plansize, precomputedsize, planarraysize, tmpmemsize, stackmemsize, stackptr, _state); + plan->plan.ptr.p_int[entryoffset+7] = -1; + return; + } + else + { + + /* + * N2 plan + */ + plan->plan.ptr.p_int[entryoffset+0] = esize; + plan->plan.ptr.p_int[entryoffset+1] = n1; + plan->plan.ptr.p_int[entryoffset+2] = n2; + plan->plan.ptr.p_int[entryoffset+3] = ftbase_fhtn2plan; + plan->plan.ptr.p_int[entryoffset+4] = 0; + plan->plan.ptr.p_int[entryoffset+5] = -1; + plan->plan.ptr.p_int[entryoffset+6] = -1; + plan->plan.ptr.p_int[entryoffset+7] = -1; + if( ((n==2||n==3)||n==4)||n==5 ) + { + + /* + * hard-coded plan + */ + plan->plan.ptr.p_int[entryoffset+0] = esize; + plan->plan.ptr.p_int[entryoffset+1] = n1; + plan->plan.ptr.p_int[entryoffset+2] = n2; + plan->plan.ptr.p_int[entryoffset+3] = ftbase_fhtcodeletplan; + plan->plan.ptr.p_int[entryoffset+4] = 0; + plan->plan.ptr.p_int[entryoffset+5] = -1; + plan->plan.ptr.p_int[entryoffset+6] = -1; + plan->plan.ptr.p_int[entryoffset+7] = *precomputedsize; + if( n==3 ) + { + *precomputedsize = *precomputedsize+2; + } + if( n==5 ) + { + *precomputedsize = *precomputedsize+5; + } + return; + } + return; + } + } +} + + +/************************************************************************* +Recurrent subroutine for precomputing FFT plans + + -- ALGLIB -- + Copyright 01.05.2009 by Bochkanov Sergey +*************************************************************************/ +static void ftbase_ftbaseprecomputeplanrec(ftplan* plan, + ae_int_t entryoffset, + ae_int_t stackptr, + ae_state *_state) +{ + ae_frame _frame_block; + ae_int_t i; + ae_int_t n1; + ae_int_t n2; + ae_int_t n; + ae_int_t m; + ae_int_t offs; + double v; + ae_vector emptyarray; + double bx; + double by; + + ae_frame_make(_state, &_frame_block); + ae_vector_init(&emptyarray, 0, DT_REAL, _state, ae_true); + + if( (plan->plan.ptr.p_int[entryoffset+3]==ftbase_fftcooleytukeyplan||plan->plan.ptr.p_int[entryoffset+3]==ftbase_fftrealcooleytukeyplan)||plan->plan.ptr.p_int[entryoffset+3]==ftbase_fhtcooleytukeyplan ) + { + ftbase_ftbaseprecomputeplanrec(plan, plan->plan.ptr.p_int[entryoffset+5], stackptr, _state); + ftbase_ftbaseprecomputeplanrec(plan, plan->plan.ptr.p_int[entryoffset+6], stackptr, _state); + ae_frame_leave(_state); + return; + } + if( plan->plan.ptr.p_int[entryoffset+3]==ftbase_fftcodeletplan||plan->plan.ptr.p_int[entryoffset+3]==ftbase_fhtcodeletplan ) + { + n1 = plan->plan.ptr.p_int[entryoffset+1]; + n2 = plan->plan.ptr.p_int[entryoffset+2]; + n = n1*n2; + if( n==3 ) + { + offs = plan->plan.ptr.p_int[entryoffset+7]; + plan->precomputed.ptr.p_double[offs+0] = ae_cos(2*ae_pi/3, _state)-1; + plan->precomputed.ptr.p_double[offs+1] = ae_sin(2*ae_pi/3, _state); + ae_frame_leave(_state); + return; + } + if( n==5 ) + { + offs = plan->plan.ptr.p_int[entryoffset+7]; + v = 2*ae_pi/5; + plan->precomputed.ptr.p_double[offs+0] = (ae_cos(v, _state)+ae_cos(2*v, _state))/2-1; + plan->precomputed.ptr.p_double[offs+1] = (ae_cos(v, _state)-ae_cos(2*v, _state))/2; + plan->precomputed.ptr.p_double[offs+2] = -ae_sin(v, _state); + plan->precomputed.ptr.p_double[offs+3] = -(ae_sin(v, _state)+ae_sin(2*v, _state)); + plan->precomputed.ptr.p_double[offs+4] = ae_sin(v, _state)-ae_sin(2*v, _state); + ae_frame_leave(_state); + return; + } + } + if( plan->plan.ptr.p_int[entryoffset+3]==ftbase_fftbluesteinplan ) + { + ftbase_ftbaseprecomputeplanrec(plan, plan->plan.ptr.p_int[entryoffset+5], stackptr, _state); + n = plan->plan.ptr.p_int[entryoffset+1]; + m = plan->plan.ptr.p_int[entryoffset+4]; + offs = plan->plan.ptr.p_int[entryoffset+7]; + for(i=0; i<=2*m-1; i++) + { + plan->precomputed.ptr.p_double[offs+i] = 0; + } + for(i=0; i<=n-1; i++) + { + bx = ae_cos(ae_pi*ae_sqr(i, _state)/n, _state); + by = ae_sin(ae_pi*ae_sqr(i, _state)/n, _state); + plan->precomputed.ptr.p_double[offs+2*i+0] = bx; + plan->precomputed.ptr.p_double[offs+2*i+1] = by; + plan->precomputed.ptr.p_double[offs+2*m+2*i+0] = bx; + plan->precomputed.ptr.p_double[offs+2*m+2*i+1] = by; + if( i>0 ) + { + plan->precomputed.ptr.p_double[offs+2*(m-i)+0] = bx; + plan->precomputed.ptr.p_double[offs+2*(m-i)+1] = by; + } + } + ftbaseexecuteplanrec(&plan->precomputed, offs, plan, plan->plan.ptr.p_int[entryoffset+5], stackptr, _state); + ae_frame_leave(_state); + return; + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Twiddle factors calculation + + -- ALGLIB -- + Copyright 01.05.2009 by Bochkanov Sergey +*************************************************************************/ +static void ftbase_ffttwcalc(/* Real */ ae_vector* a, + ae_int_t aoffset, + ae_int_t n1, + ae_int_t n2, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + ae_int_t n; + ae_int_t idx; + ae_int_t offs; + double x; + double y; + double twxm1; + double twy; + double twbasexm1; + double twbasey; + double twrowxm1; + double twrowy; + double tmpx; + double tmpy; + double v; + + + n = n1*n2; + v = -2*ae_pi/n; + twbasexm1 = -2*ae_sqr(ae_sin(0.5*v, _state), _state); + twbasey = ae_sin(v, _state); + twrowxm1 = 0; + twrowy = 0; + for(i=0; i<=n2-1; i++) + { + twxm1 = 0; + twy = 0; + for(j=0; j<=n1-1; j++) + { + idx = i*n1+j; + offs = aoffset+2*idx; + x = a->ptr.p_double[offs+0]; + y = a->ptr.p_double[offs+1]; + tmpx = x*twxm1-y*twy; + tmpy = x*twy+y*twxm1; + a->ptr.p_double[offs+0] = x+tmpx; + a->ptr.p_double[offs+1] = y+tmpy; + + /* + * update Tw: Tw(new) = Tw(old)*TwRow + */ + if( j<n1-1 ) + { + if( j%ftbase_ftbaseupdatetw==0 ) + { + v = -2*ae_pi*i*(j+1)/n; + twxm1 = -2*ae_sqr(ae_sin(0.5*v, _state), _state); + twy = ae_sin(v, _state); + } + else + { + tmpx = twrowxm1+twxm1*twrowxm1-twy*twrowy; + tmpy = twrowy+twxm1*twrowy+twy*twrowxm1; + twxm1 = twxm1+tmpx; + twy = twy+tmpy; + } + } + } + + /* + * update TwRow: TwRow(new) = TwRow(old)*TwBase + */ + if( i<n2-1 ) + { + if( j%ftbase_ftbaseupdatetw==0 ) + { + v = -2*ae_pi*(i+1)/n; + twrowxm1 = -2*ae_sqr(ae_sin(0.5*v, _state), _state); + twrowy = ae_sin(v, _state); + } + else + { + tmpx = twbasexm1+twrowxm1*twbasexm1-twrowy*twbasey; + tmpy = twbasey+twrowxm1*twbasey+twrowy*twbasexm1; + twrowxm1 = twrowxm1+tmpx; + twrowy = twrowy+tmpy; + } + } + } +} + + +/************************************************************************* +Linear transpose: transpose complex matrix stored in 1-dimensional array + + -- ALGLIB -- + Copyright 01.05.2009 by Bochkanov Sergey +*************************************************************************/ +static void ftbase_internalcomplexlintranspose(/* Real */ ae_vector* a, + ae_int_t m, + ae_int_t n, + ae_int_t astart, + /* Real */ ae_vector* buf, + ae_state *_state) +{ + + + ftbase_ffticltrec(a, astart, n, buf, 0, m, m, n, _state); + ae_v_move(&a->ptr.p_double[astart], 1, &buf->ptr.p_double[0], 1, ae_v_len(astart,astart+2*m*n-1)); +} + + +/************************************************************************* +Linear transpose: transpose real matrix stored in 1-dimensional array + + -- ALGLIB -- + Copyright 01.05.2009 by Bochkanov Sergey +*************************************************************************/ +static void ftbase_internalreallintranspose(/* Real */ ae_vector* a, + ae_int_t m, + ae_int_t n, + ae_int_t astart, + /* Real */ ae_vector* buf, + ae_state *_state) +{ + + + ftbase_fftirltrec(a, astart, n, buf, 0, m, m, n, _state); + ae_v_move(&a->ptr.p_double[astart], 1, &buf->ptr.p_double[0], 1, ae_v_len(astart,astart+m*n-1)); +} + + +/************************************************************************* +Recurrent subroutine for a InternalComplexLinTranspose + +Write A^T to B, where: +* A is m*n complex matrix stored in array A as pairs of real/image values, + beginning from AStart position, with AStride stride +* B is n*m complex matrix stored in array B as pairs of real/image values, + beginning from BStart position, with BStride stride +stride is measured in complex numbers, i.e. in real/image pairs. + + -- ALGLIB -- + Copyright 01.05.2009 by Bochkanov Sergey +*************************************************************************/ +static void ftbase_ffticltrec(/* Real */ ae_vector* a, + ae_int_t astart, + ae_int_t astride, + /* Real */ ae_vector* b, + ae_int_t bstart, + ae_int_t bstride, + ae_int_t m, + ae_int_t n, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + ae_int_t idx1; + ae_int_t idx2; + ae_int_t m2; + ae_int_t m1; + ae_int_t n1; + + + if( m==0||n==0 ) + { + return; + } + if( ae_maxint(m, n, _state)<=8 ) + { + m2 = 2*bstride; + for(i=0; i<=m-1; i++) + { + idx1 = bstart+2*i; + idx2 = astart+2*i*astride; + for(j=0; j<=n-1; j++) + { + b->ptr.p_double[idx1+0] = a->ptr.p_double[idx2+0]; + b->ptr.p_double[idx1+1] = a->ptr.p_double[idx2+1]; + idx1 = idx1+m2; + idx2 = idx2+2; + } + } + return; + } + if( n>m ) + { + + /* + * New partition: + * + * "A^T -> B" becomes "(A1 A2)^T -> ( B1 ) + * ( B2 ) + */ + n1 = n/2; + if( n-n1>=8&&n1%8!=0 ) + { + n1 = n1+(8-n1%8); + } + ae_assert(n-n1>0, "Assertion failed", _state); + ftbase_ffticltrec(a, astart, astride, b, bstart, bstride, m, n1, _state); + ftbase_ffticltrec(a, astart+2*n1, astride, b, bstart+2*n1*bstride, bstride, m, n-n1, _state); + } + else + { + + /* + * New partition: + * + * "A^T -> B" becomes "( A1 )^T -> ( B1 B2 ) + * ( A2 ) + */ + m1 = m/2; + if( m-m1>=8&&m1%8!=0 ) + { + m1 = m1+(8-m1%8); + } + ae_assert(m-m1>0, "Assertion failed", _state); + ftbase_ffticltrec(a, astart, astride, b, bstart, bstride, m1, n, _state); + ftbase_ffticltrec(a, astart+2*m1*astride, astride, b, bstart+2*m1, bstride, m-m1, n, _state); + } +} + + +/************************************************************************* +Recurrent subroutine for a InternalRealLinTranspose + + + -- ALGLIB -- + Copyright 01.05.2009 by Bochkanov Sergey +*************************************************************************/ +static void ftbase_fftirltrec(/* Real */ ae_vector* a, + ae_int_t astart, + ae_int_t astride, + /* Real */ ae_vector* b, + ae_int_t bstart, + ae_int_t bstride, + ae_int_t m, + ae_int_t n, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + ae_int_t idx1; + ae_int_t idx2; + ae_int_t m1; + ae_int_t n1; + + + if( m==0||n==0 ) + { + return; + } + if( ae_maxint(m, n, _state)<=8 ) + { + for(i=0; i<=m-1; i++) + { + idx1 = bstart+i; + idx2 = astart+i*astride; + for(j=0; j<=n-1; j++) + { + b->ptr.p_double[idx1] = a->ptr.p_double[idx2]; + idx1 = idx1+bstride; + idx2 = idx2+1; + } + } + return; + } + if( n>m ) + { + + /* + * New partition: + * + * "A^T -> B" becomes "(A1 A2)^T -> ( B1 ) + * ( B2 ) + */ + n1 = n/2; + if( n-n1>=8&&n1%8!=0 ) + { + n1 = n1+(8-n1%8); + } + ae_assert(n-n1>0, "Assertion failed", _state); + ftbase_fftirltrec(a, astart, astride, b, bstart, bstride, m, n1, _state); + ftbase_fftirltrec(a, astart+n1, astride, b, bstart+n1*bstride, bstride, m, n-n1, _state); + } + else + { + + /* + * New partition: + * + * "A^T -> B" becomes "( A1 )^T -> ( B1 B2 ) + * ( A2 ) + */ + m1 = m/2; + if( m-m1>=8&&m1%8!=0 ) + { + m1 = m1+(8-m1%8); + } + ae_assert(m-m1>0, "Assertion failed", _state); + ftbase_fftirltrec(a, astart, astride, b, bstart, bstride, m1, n, _state); + ftbase_fftirltrec(a, astart+m1*astride, astride, b, bstart+m1, bstride, m-m1, n, _state); + } +} + + +/************************************************************************* +recurrent subroutine for FFTFindSmoothRec + + -- ALGLIB -- + Copyright 01.05.2009 by Bochkanov Sergey +*************************************************************************/ +static void ftbase_ftbasefindsmoothrec(ae_int_t n, + ae_int_t seed, + ae_int_t leastfactor, + ae_int_t* best, + ae_state *_state) +{ + + + ae_assert(ftbase_ftbasemaxsmoothfactor<=5, "FTBaseFindSmoothRec: internal error!", _state); + if( seed>=n ) + { + *best = ae_minint(*best, seed, _state); + return; + } + if( leastfactor<=2 ) + { + ftbase_ftbasefindsmoothrec(n, seed*2, 2, best, _state); + } + if( leastfactor<=3 ) + { + ftbase_ftbasefindsmoothrec(n, seed*3, 3, best, _state); + } + if( leastfactor<=5 ) + { + ftbase_ftbasefindsmoothrec(n, seed*5, 5, best, _state); + } +} + + +/************************************************************************* +Internal subroutine: array resize + + -- ALGLIB -- + Copyright 01.05.2009 by Bochkanov Sergey +*************************************************************************/ +static void ftbase_fftarrayresize(/* Integer */ ae_vector* a, + ae_int_t* asize, + ae_int_t newasize, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector tmp; + ae_int_t i; + + ae_frame_make(_state, &_frame_block); + ae_vector_init(&tmp, 0, DT_INT, _state, ae_true); + + ae_vector_set_length(&tmp, *asize, _state); + for(i=0; i<=*asize-1; i++) + { + tmp.ptr.p_int[i] = a->ptr.p_int[i]; + } + ae_vector_set_length(a, newasize, _state); + for(i=0; i<=*asize-1; i++) + { + a->ptr.p_int[i] = tmp.ptr.p_int[i]; + } + *asize = newasize; + ae_frame_leave(_state); +} + + +/************************************************************************* +Reference FHT stub +*************************************************************************/ +static void ftbase_reffht(/* Real */ ae_vector* a, + ae_int_t n, + ae_int_t offs, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector buf; + ae_int_t i; + ae_int_t j; + double v; + + ae_frame_make(_state, &_frame_block); + ae_vector_init(&buf, 0, DT_REAL, _state, ae_true); + + ae_assert(n>0, "RefFHTR1D: incorrect N!", _state); + ae_vector_set_length(&buf, n, _state); + for(i=0; i<=n-1; i++) + { + v = 0; + for(j=0; j<=n-1; j++) + { + v = v+a->ptr.p_double[offs+j]*(ae_cos(2*ae_pi*i*j/n, _state)+ae_sin(2*ae_pi*i*j/n, _state)); + } + buf.ptr.p_double[i] = v; + } + for(i=0; i<=n-1; i++) + { + a->ptr.p_double[offs+i] = buf.ptr.p_double[i]; + } + ae_frame_leave(_state); +} + + +ae_bool _ftplan_init(ftplan* p, ae_state *_state, ae_bool make_automatic) +{ + if( !ae_vector_init(&p->plan, 0, DT_INT, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->precomputed, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->tmpbuf, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->stackbuf, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + return ae_true; +} + + +ae_bool _ftplan_init_copy(ftplan* dst, ftplan* src, ae_state *_state, ae_bool make_automatic) +{ + if( !ae_vector_init_copy(&dst->plan, &src->plan, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->precomputed, &src->precomputed, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->tmpbuf, &src->tmpbuf, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->stackbuf, &src->stackbuf, _state, make_automatic) ) + return ae_false; + return ae_true; +} + + +void _ftplan_clear(ftplan* p) +{ + ae_vector_clear(&p->plan); + ae_vector_clear(&p->precomputed); + ae_vector_clear(&p->tmpbuf); + ae_vector_clear(&p->stackbuf); +} + + + + +double nulog1p(double x, ae_state *_state) +{ + double z; + double lp; + double lq; + double result; + + + z = 1.0+x; + if( ae_fp_less(z,0.70710678118654752440)||ae_fp_greater(z,1.41421356237309504880) ) + { + result = ae_log(z, _state); + return result; + } + z = x*x; + lp = 4.5270000862445199635215E-5; + lp = lp*x+4.9854102823193375972212E-1; + lp = lp*x+6.5787325942061044846969E0; + lp = lp*x+2.9911919328553073277375E1; + lp = lp*x+6.0949667980987787057556E1; + lp = lp*x+5.7112963590585538103336E1; + lp = lp*x+2.0039553499201281259648E1; + lq = 1.0000000000000000000000E0; + lq = lq*x+1.5062909083469192043167E1; + lq = lq*x+8.3047565967967209469434E1; + lq = lq*x+2.2176239823732856465394E2; + lq = lq*x+3.0909872225312059774938E2; + lq = lq*x+2.1642788614495947685003E2; + lq = lq*x+6.0118660497603843919306E1; + z = -0.5*z+x*(z*lp/lq); + result = x+z; + return result; +} + + +double nuexpm1(double x, ae_state *_state) +{ + double r; + double xx; + double ep; + double eq; + double result; + + + if( ae_fp_less(x,-0.5)||ae_fp_greater(x,0.5) ) + { + result = ae_exp(x, _state)-1.0; + return result; + } + xx = x*x; + ep = 1.2617719307481059087798E-4; + ep = ep*xx+3.0299440770744196129956E-2; + ep = ep*xx+9.9999999999999999991025E-1; + eq = 3.0019850513866445504159E-6; + eq = eq*xx+2.5244834034968410419224E-3; + eq = eq*xx+2.2726554820815502876593E-1; + eq = eq*xx+2.0000000000000000000897E0; + r = x*ep; + r = r/(eq-r); + result = r+r; + return result; +} + + +double nucosm1(double x, ae_state *_state) +{ + double xx; + double c; + double result; + + + if( ae_fp_less(x,-0.25*ae_pi)||ae_fp_greater(x,0.25*ae_pi) ) + { + result = ae_cos(x, _state)-1; + return result; + } + xx = x*x; + c = 4.7377507964246204691685E-14; + c = c*xx-1.1470284843425359765671E-11; + c = c*xx+2.0876754287081521758361E-9; + c = c*xx-2.7557319214999787979814E-7; + c = c*xx+2.4801587301570552304991E-5; + c = c*xx-1.3888888888888872993737E-3; + c = c*xx+4.1666666666666666609054E-2; + result = -0.5*xx+xx*xx*c; + return result; +} + + + + + +} + diff --git a/contrib/lbfgs/alglibinternal.h b/contrib/lbfgs/alglibinternal.h new file mode 100755 index 0000000000..ee81f195c0 --- /dev/null +++ b/contrib/lbfgs/alglibinternal.h @@ -0,0 +1,707 @@ +/************************************************************************* +Copyright (c) Sergey Bochkanov (ALGLIB project). + +>>> SOURCE LICENSE >>> +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation (www.fsf.org); either version 2 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +A copy of the GNU General Public License is available at +http://www.fsf.org/licensing/licenses +>>> END OF LICENSE >>> +*************************************************************************/ +#ifndef _alglibinternal_pkg_h +#define _alglibinternal_pkg_h +#include "ap.h" + + +///////////////////////////////////////////////////////////////////////// +// +// THIS SECTION CONTAINS COMPUTATIONAL CORE DECLARATIONS (DATATYPES) +// +///////////////////////////////////////////////////////////////////////// +namespace alglib_impl +{ +typedef struct +{ + ae_vector ia1; + ae_vector ia2; + ae_vector ra1; + ae_vector ra2; +} apbuffers; +typedef struct +{ + ae_bool brackt; + ae_bool stage1; + ae_int_t infoc; + double dg; + double dgm; + double dginit; + double dgtest; + double dgx; + double dgxm; + double dgy; + double dgym; + double finit; + double ftest1; + double fm; + double fx; + double fxm; + double fy; + double fym; + double stx; + double sty; + double stmin; + double stmax; + double width; + double width1; + double xtrapf; +} linminstate; +typedef struct +{ + ae_bool needf; + ae_vector x; + double f; + ae_int_t n; + ae_vector xbase; + ae_vector s; + double stplen; + double fcur; + double stpmax; + ae_int_t fmax; + ae_int_t nfev; + ae_int_t info; + rcommstate rstate; +} armijostate; +typedef struct +{ + ae_vector plan; + ae_vector precomputed; + ae_vector tmpbuf; + ae_vector stackbuf; +} ftplan; + +} + +///////////////////////////////////////////////////////////////////////// +// +// THIS SECTION CONTAINS C++ INTERFACE +// +///////////////////////////////////////////////////////////////////////// +namespace alglib +{ + + +} + +///////////////////////////////////////////////////////////////////////// +// +// THIS SECTION CONTAINS COMPUTATIONAL CORE DECLARATIONS (FUNCTIONS) +// +///////////////////////////////////////////////////////////////////////// +namespace alglib_impl +{ +void tagsort(/* Real */ ae_vector* a, + ae_int_t n, + /* Integer */ ae_vector* p1, + /* Integer */ ae_vector* p2, + ae_state *_state); +void tagsortfasti(/* Real */ ae_vector* a, + /* Integer */ ae_vector* b, + /* Real */ ae_vector* bufa, + /* Integer */ ae_vector* bufb, + ae_int_t n, + ae_state *_state); +void tagsortfastr(/* Real */ ae_vector* a, + /* Real */ ae_vector* b, + /* Real */ ae_vector* bufa, + /* Real */ ae_vector* bufb, + ae_int_t n, + ae_state *_state); +void tagsortfast(/* Real */ ae_vector* a, + /* Real */ ae_vector* bufa, + ae_int_t n, + ae_state *_state); +void tagheappushi(/* Real */ ae_vector* a, + /* Integer */ ae_vector* b, + ae_int_t* n, + double va, + ae_int_t vb, + ae_state *_state); +void tagheapreplacetopi(/* Real */ ae_vector* a, + /* Integer */ ae_vector* b, + ae_int_t n, + double va, + ae_int_t vb, + ae_state *_state); +void tagheappopi(/* Real */ ae_vector* a, + /* Integer */ ae_vector* b, + ae_int_t* n, + ae_state *_state); +void taskgenint1d(double a, + double b, + ae_int_t n, + /* Real */ ae_vector* x, + /* Real */ ae_vector* y, + ae_state *_state); +void taskgenint1dequidist(double a, + double b, + ae_int_t n, + /* Real */ ae_vector* x, + /* Real */ ae_vector* y, + ae_state *_state); +void taskgenint1dcheb1(double a, + double b, + ae_int_t n, + /* Real */ ae_vector* x, + /* Real */ ae_vector* y, + ae_state *_state); +void taskgenint1dcheb2(double a, + double b, + ae_int_t n, + /* Real */ ae_vector* x, + /* Real */ ae_vector* y, + ae_state *_state); +ae_bool aredistinct(/* Real */ ae_vector* x, + ae_int_t n, + ae_state *_state); +void bvectorsetlengthatleast(/* Boolean */ ae_vector* x, + ae_int_t n, + ae_state *_state); +void rvectorsetlengthatleast(/* Real */ ae_vector* x, + ae_int_t n, + ae_state *_state); +void rmatrixsetlengthatleast(/* Real */ ae_matrix* x, + ae_int_t m, + ae_int_t n, + ae_state *_state); +ae_bool isfinitevector(/* Real */ ae_vector* x, + ae_int_t n, + ae_state *_state); +ae_bool isfinitecvector(/* Complex */ ae_vector* z, + ae_int_t n, + ae_state *_state); +ae_bool apservisfinitematrix(/* Real */ ae_matrix* x, + ae_int_t m, + ae_int_t n, + ae_state *_state); +ae_bool apservisfinitecmatrix(/* Complex */ ae_matrix* x, + ae_int_t m, + ae_int_t n, + ae_state *_state); +ae_bool isfinitertrmatrix(/* Real */ ae_matrix* x, + ae_int_t n, + ae_bool isupper, + ae_state *_state); +ae_bool apservisfinitectrmatrix(/* Complex */ ae_matrix* x, + ae_int_t n, + ae_bool isupper, + ae_state *_state); +ae_bool apservisfiniteornanmatrix(/* Real */ ae_matrix* x, + ae_int_t m, + ae_int_t n, + ae_state *_state); +double safepythag2(double x, double y, ae_state *_state); +double safepythag3(double x, double y, double z, ae_state *_state); +ae_int_t saferdiv(double x, double y, double* r, ae_state *_state); +double safeminposrv(double x, double y, double v, ae_state *_state); +void apperiodicmap(double* x, + double a, + double b, + double* k, + ae_state *_state); +double boundval(double x, double b1, double b2, ae_state *_state); +ae_bool _apbuffers_init(apbuffers* p, ae_state *_state, ae_bool make_automatic); +ae_bool _apbuffers_init_copy(apbuffers* dst, apbuffers* src, ae_state *_state, ae_bool make_automatic); +void _apbuffers_clear(apbuffers* p); +void rankx(/* Real */ ae_vector* x, + ae_int_t n, + apbuffers* buf, + ae_state *_state); +ae_bool cmatrixrank1f(ae_int_t m, + ae_int_t n, + /* Complex */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + /* Complex */ ae_vector* u, + ae_int_t iu, + /* Complex */ ae_vector* v, + ae_int_t iv, + ae_state *_state); +ae_bool rmatrixrank1f(ae_int_t m, + ae_int_t n, + /* Real */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + /* Real */ ae_vector* u, + ae_int_t iu, + /* Real */ ae_vector* v, + ae_int_t iv, + ae_state *_state); +ae_bool cmatrixmvf(ae_int_t m, + ae_int_t n, + /* Complex */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + ae_int_t opa, + /* Complex */ ae_vector* x, + ae_int_t ix, + /* Complex */ ae_vector* y, + ae_int_t iy, + ae_state *_state); +ae_bool rmatrixmvf(ae_int_t m, + ae_int_t n, + /* Real */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + ae_int_t opa, + /* Real */ ae_vector* x, + ae_int_t ix, + /* Real */ ae_vector* y, + ae_int_t iy, + ae_state *_state); +ae_bool cmatrixrighttrsmf(ae_int_t m, + ae_int_t n, + /* Complex */ ae_matrix* a, + ae_int_t i1, + ae_int_t j1, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + /* Complex */ ae_matrix* x, + ae_int_t i2, + ae_int_t j2, + ae_state *_state); +ae_bool cmatrixlefttrsmf(ae_int_t m, + ae_int_t n, + /* Complex */ ae_matrix* a, + ae_int_t i1, + ae_int_t j1, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + /* Complex */ ae_matrix* x, + ae_int_t i2, + ae_int_t j2, + ae_state *_state); +ae_bool rmatrixrighttrsmf(ae_int_t m, + ae_int_t n, + /* Real */ ae_matrix* a, + ae_int_t i1, + ae_int_t j1, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + /* Real */ ae_matrix* x, + ae_int_t i2, + ae_int_t j2, + ae_state *_state); +ae_bool rmatrixlefttrsmf(ae_int_t m, + ae_int_t n, + /* Real */ ae_matrix* a, + ae_int_t i1, + ae_int_t j1, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + /* Real */ ae_matrix* x, + ae_int_t i2, + ae_int_t j2, + ae_state *_state); +ae_bool cmatrixsyrkf(ae_int_t n, + ae_int_t k, + double alpha, + /* Complex */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + ae_int_t optypea, + double beta, + /* Complex */ ae_matrix* c, + ae_int_t ic, + ae_int_t jc, + ae_bool isupper, + ae_state *_state); +ae_bool rmatrixsyrkf(ae_int_t n, + ae_int_t k, + double alpha, + /* Real */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + ae_int_t optypea, + double beta, + /* Real */ ae_matrix* c, + ae_int_t ic, + ae_int_t jc, + ae_bool isupper, + ae_state *_state); +ae_bool rmatrixgemmf(ae_int_t m, + ae_int_t n, + ae_int_t k, + double alpha, + /* Real */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + ae_int_t optypea, + /* Real */ ae_matrix* b, + ae_int_t ib, + ae_int_t jb, + ae_int_t optypeb, + double beta, + /* Real */ ae_matrix* c, + ae_int_t ic, + ae_int_t jc, + ae_state *_state); +ae_bool cmatrixgemmf(ae_int_t m, + ae_int_t n, + ae_int_t k, + ae_complex alpha, + /* Complex */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + ae_int_t optypea, + /* Complex */ ae_matrix* b, + ae_int_t ib, + ae_int_t jb, + ae_int_t optypeb, + ae_complex beta, + /* Complex */ ae_matrix* c, + ae_int_t ic, + ae_int_t jc, + ae_state *_state); +double vectornorm2(/* Real */ ae_vector* x, + ae_int_t i1, + ae_int_t i2, + ae_state *_state); +ae_int_t vectoridxabsmax(/* Real */ ae_vector* x, + ae_int_t i1, + ae_int_t i2, + ae_state *_state); +ae_int_t columnidxabsmax(/* Real */ ae_matrix* x, + ae_int_t i1, + ae_int_t i2, + ae_int_t j, + ae_state *_state); +ae_int_t rowidxabsmax(/* Real */ ae_matrix* x, + ae_int_t j1, + ae_int_t j2, + ae_int_t i, + ae_state *_state); +double upperhessenberg1norm(/* Real */ ae_matrix* a, + ae_int_t i1, + ae_int_t i2, + ae_int_t j1, + ae_int_t j2, + /* Real */ ae_vector* work, + ae_state *_state); +void copymatrix(/* Real */ ae_matrix* a, + ae_int_t is1, + ae_int_t is2, + ae_int_t js1, + ae_int_t js2, + /* Real */ ae_matrix* b, + ae_int_t id1, + ae_int_t id2, + ae_int_t jd1, + ae_int_t jd2, + ae_state *_state); +void inplacetranspose(/* Real */ ae_matrix* a, + ae_int_t i1, + ae_int_t i2, + ae_int_t j1, + ae_int_t j2, + /* Real */ ae_vector* work, + ae_state *_state); +void copyandtranspose(/* Real */ ae_matrix* a, + ae_int_t is1, + ae_int_t is2, + ae_int_t js1, + ae_int_t js2, + /* Real */ ae_matrix* b, + ae_int_t id1, + ae_int_t id2, + ae_int_t jd1, + ae_int_t jd2, + ae_state *_state); +void matrixvectormultiply(/* Real */ ae_matrix* a, + ae_int_t i1, + ae_int_t i2, + ae_int_t j1, + ae_int_t j2, + ae_bool trans, + /* Real */ ae_vector* x, + ae_int_t ix1, + ae_int_t ix2, + double alpha, + /* Real */ ae_vector* y, + ae_int_t iy1, + ae_int_t iy2, + double beta, + ae_state *_state); +double pythag2(double x, double y, ae_state *_state); +void matrixmatrixmultiply(/* Real */ ae_matrix* a, + ae_int_t ai1, + ae_int_t ai2, + ae_int_t aj1, + ae_int_t aj2, + ae_bool transa, + /* Real */ ae_matrix* b, + ae_int_t bi1, + ae_int_t bi2, + ae_int_t bj1, + ae_int_t bj2, + ae_bool transb, + double alpha, + /* Real */ ae_matrix* c, + ae_int_t ci1, + ae_int_t ci2, + ae_int_t cj1, + ae_int_t cj2, + double beta, + /* Real */ ae_vector* work, + ae_state *_state); +void hermitianmatrixvectormultiply(/* Complex */ ae_matrix* a, + ae_bool isupper, + ae_int_t i1, + ae_int_t i2, + /* Complex */ ae_vector* x, + ae_complex alpha, + /* Complex */ ae_vector* y, + ae_state *_state); +void hermitianrank2update(/* Complex */ ae_matrix* a, + ae_bool isupper, + ae_int_t i1, + ae_int_t i2, + /* Complex */ ae_vector* x, + /* Complex */ ae_vector* y, + /* Complex */ ae_vector* t, + ae_complex alpha, + ae_state *_state); +void generatereflection(/* Real */ ae_vector* x, + ae_int_t n, + double* tau, + ae_state *_state); +void applyreflectionfromtheleft(/* Real */ ae_matrix* c, + double tau, + /* Real */ ae_vector* v, + ae_int_t m1, + ae_int_t m2, + ae_int_t n1, + ae_int_t n2, + /* Real */ ae_vector* work, + ae_state *_state); +void applyreflectionfromtheright(/* Real */ ae_matrix* c, + double tau, + /* Real */ ae_vector* v, + ae_int_t m1, + ae_int_t m2, + ae_int_t n1, + ae_int_t n2, + /* Real */ ae_vector* work, + ae_state *_state); +void complexgeneratereflection(/* Complex */ ae_vector* x, + ae_int_t n, + ae_complex* tau, + ae_state *_state); +void complexapplyreflectionfromtheleft(/* Complex */ ae_matrix* c, + ae_complex tau, + /* Complex */ ae_vector* v, + ae_int_t m1, + ae_int_t m2, + ae_int_t n1, + ae_int_t n2, + /* Complex */ ae_vector* work, + ae_state *_state); +void complexapplyreflectionfromtheright(/* Complex */ ae_matrix* c, + ae_complex tau, + /* Complex */ ae_vector* v, + ae_int_t m1, + ae_int_t m2, + ae_int_t n1, + ae_int_t n2, + /* Complex */ ae_vector* work, + ae_state *_state); +void symmetricmatrixvectormultiply(/* Real */ ae_matrix* a, + ae_bool isupper, + ae_int_t i1, + ae_int_t i2, + /* Real */ ae_vector* x, + double alpha, + /* Real */ ae_vector* y, + ae_state *_state); +void symmetricrank2update(/* Real */ ae_matrix* a, + ae_bool isupper, + ae_int_t i1, + ae_int_t i2, + /* Real */ ae_vector* x, + /* Real */ ae_vector* y, + /* Real */ ae_vector* t, + double alpha, + ae_state *_state); +void applyrotationsfromtheleft(ae_bool isforward, + ae_int_t m1, + ae_int_t m2, + ae_int_t n1, + ae_int_t n2, + /* Real */ ae_vector* c, + /* Real */ ae_vector* s, + /* Real */ ae_matrix* a, + /* Real */ ae_vector* work, + ae_state *_state); +void applyrotationsfromtheright(ae_bool isforward, + ae_int_t m1, + ae_int_t m2, + ae_int_t n1, + ae_int_t n2, + /* Real */ ae_vector* c, + /* Real */ ae_vector* s, + /* Real */ ae_matrix* a, + /* Real */ ae_vector* work, + ae_state *_state); +void generaterotation(double f, + double g, + double* cs, + double* sn, + double* r, + ae_state *_state); +ae_bool upperhessenbergschurdecomposition(/* Real */ ae_matrix* h, + ae_int_t n, + /* Real */ ae_matrix* s, + ae_state *_state); +void internalschurdecomposition(/* Real */ ae_matrix* h, + ae_int_t n, + ae_int_t tneeded, + ae_int_t zneeded, + /* Real */ ae_vector* wr, + /* Real */ ae_vector* wi, + /* Real */ ae_matrix* z, + ae_int_t* info, + ae_state *_state); +void rmatrixtrsafesolve(/* Real */ ae_matrix* a, + ae_int_t n, + /* Real */ ae_vector* x, + double* s, + ae_bool isupper, + ae_bool istrans, + ae_bool isunit, + ae_state *_state); +void safesolvetriangular(/* Real */ ae_matrix* a, + ae_int_t n, + /* Real */ ae_vector* x, + double* s, + ae_bool isupper, + ae_bool istrans, + ae_bool isunit, + ae_bool normin, + /* Real */ ae_vector* cnorm, + ae_state *_state); +ae_bool rmatrixscaledtrsafesolve(/* Real */ ae_matrix* a, + double sa, + ae_int_t n, + /* Real */ ae_vector* x, + ae_bool isupper, + ae_int_t trans, + ae_bool isunit, + double maxgrowth, + ae_state *_state); +ae_bool cmatrixscaledtrsafesolve(/* Complex */ ae_matrix* a, + double sa, + ae_int_t n, + /* Complex */ ae_vector* x, + ae_bool isupper, + ae_int_t trans, + ae_bool isunit, + double maxgrowth, + ae_state *_state); +void xdot(/* Real */ ae_vector* a, + /* Real */ ae_vector* b, + ae_int_t n, + /* Real */ ae_vector* temp, + double* r, + double* rerr, + ae_state *_state); +void xcdot(/* Complex */ ae_vector* a, + /* Complex */ ae_vector* b, + ae_int_t n, + /* Real */ ae_vector* temp, + ae_complex* r, + double* rerr, + ae_state *_state); +void linminnormalized(/* Real */ ae_vector* d, + double* stp, + ae_int_t n, + ae_state *_state); +void mcsrch(ae_int_t n, + /* Real */ ae_vector* x, + double* f, + /* Real */ ae_vector* g, + /* Real */ ae_vector* s, + double* stp, + double stpmax, + ae_int_t* info, + ae_int_t* nfev, + /* Real */ ae_vector* wa, + linminstate* state, + ae_int_t* stage, + ae_state *_state); +void armijocreate(ae_int_t n, + /* Real */ ae_vector* x, + double f, + /* Real */ ae_vector* s, + double stp, + double stpmax, + ae_int_t fmax, + armijostate* state, + ae_state *_state); +ae_bool armijoiteration(armijostate* state, ae_state *_state); +void armijoresults(armijostate* state, + ae_int_t* info, + double* stp, + double* f, + ae_state *_state); +ae_bool _linminstate_init(linminstate* p, ae_state *_state, ae_bool make_automatic); +ae_bool _linminstate_init_copy(linminstate* dst, linminstate* src, ae_state *_state, ae_bool make_automatic); +void _linminstate_clear(linminstate* p); +ae_bool _armijostate_init(armijostate* p, ae_state *_state, ae_bool make_automatic); +ae_bool _armijostate_init_copy(armijostate* dst, armijostate* src, ae_state *_state, ae_bool make_automatic); +void _armijostate_clear(armijostate* p); +void ftbasegeneratecomplexfftplan(ae_int_t n, + ftplan* plan, + ae_state *_state); +void ftbasegeneraterealfftplan(ae_int_t n, ftplan* plan, ae_state *_state); +void ftbasegeneraterealfhtplan(ae_int_t n, ftplan* plan, ae_state *_state); +void ftbaseexecuteplan(/* Real */ ae_vector* a, + ae_int_t aoffset, + ae_int_t n, + ftplan* plan, + ae_state *_state); +void ftbaseexecuteplanrec(/* Real */ ae_vector* a, + ae_int_t aoffset, + ftplan* plan, + ae_int_t entryoffset, + ae_int_t stackptr, + ae_state *_state); +void ftbasefactorize(ae_int_t n, + ae_int_t tasktype, + ae_int_t* n1, + ae_int_t* n2, + ae_state *_state); +ae_bool ftbaseissmooth(ae_int_t n, ae_state *_state); +ae_int_t ftbasefindsmooth(ae_int_t n, ae_state *_state); +ae_int_t ftbasefindsmootheven(ae_int_t n, ae_state *_state); +double ftbasegetflopestimate(ae_int_t n, ae_state *_state); +ae_bool _ftplan_init(ftplan* p, ae_state *_state, ae_bool make_automatic); +ae_bool _ftplan_init_copy(ftplan* dst, ftplan* src, ae_state *_state, ae_bool make_automatic); +void _ftplan_clear(ftplan* p); +double nulog1p(double x, ae_state *_state); +double nuexpm1(double x, ae_state *_state); +double nucosm1(double x, ae_state *_state); + +} +#endif + diff --git a/contrib/lbfgs/alglibmisc.cpp b/contrib/lbfgs/alglibmisc.cpp new file mode 100755 index 0000000000..c1cd80cff9 --- /dev/null +++ b/contrib/lbfgs/alglibmisc.cpp @@ -0,0 +1,3083 @@ +/************************************************************************* +Copyright (c) Sergey Bochkanov (ALGLIB project). + +>>> SOURCE LICENSE >>> +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation (www.fsf.org); either version 2 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +A copy of the GNU General Public License is available at +http://www.fsf.org/licensing/licenses +>>> END OF LICENSE >>> +*************************************************************************/ +#include "stdafx.h" +#include "alglibmisc.h" + +// disable some irrelevant warnings +#if (AE_COMPILER==AE_MSVC) +#pragma warning(disable:4100) +#pragma warning(disable:4127) +#pragma warning(disable:4702) +#pragma warning(disable:4996) +#endif +using namespace std; + +///////////////////////////////////////////////////////////////////////// +// +// THIS SECTION CONTAINS IMPLEMENTATION OF C++ INTERFACE +// +///////////////////////////////////////////////////////////////////////// +namespace alglib +{ + + +/************************************************************************* +Portable high quality random number generator state. +Initialized with HQRNDRandomize() or HQRNDSeed(). + +Fields: + S1, S2 - seed values + V - precomputed value + MagicV - 'magic' value used to determine whether State structure + was correctly initialized. +*************************************************************************/ +_hqrndstate_owner::_hqrndstate_owner() +{ + p_struct = (alglib_impl::hqrndstate*)alglib_impl::ae_malloc(sizeof(alglib_impl::hqrndstate), NULL); + if( p_struct==NULL ) + throw ap_error("ALGLIB: malloc error"); + if( !alglib_impl::_hqrndstate_init(p_struct, NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); +} + +_hqrndstate_owner::_hqrndstate_owner(const _hqrndstate_owner &rhs) +{ + p_struct = (alglib_impl::hqrndstate*)alglib_impl::ae_malloc(sizeof(alglib_impl::hqrndstate), NULL); + if( p_struct==NULL ) + throw ap_error("ALGLIB: malloc error"); + if( !alglib_impl::_hqrndstate_init_copy(p_struct, const_cast<alglib_impl::hqrndstate*>(rhs.p_struct), NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); +} + +_hqrndstate_owner& _hqrndstate_owner::operator=(const _hqrndstate_owner &rhs) +{ + if( this==&rhs ) + return *this; + alglib_impl::_hqrndstate_clear(p_struct); + if( !alglib_impl::_hqrndstate_init_copy(p_struct, const_cast<alglib_impl::hqrndstate*>(rhs.p_struct), NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); + return *this; +} + +_hqrndstate_owner::~_hqrndstate_owner() +{ + alglib_impl::_hqrndstate_clear(p_struct); + ae_free(p_struct); +} + +alglib_impl::hqrndstate* _hqrndstate_owner::c_ptr() +{ + return p_struct; +} + +alglib_impl::hqrndstate* _hqrndstate_owner::c_ptr() const +{ + return const_cast<alglib_impl::hqrndstate*>(p_struct); +} +hqrndstate::hqrndstate() : _hqrndstate_owner() +{ +} + +hqrndstate::hqrndstate(const hqrndstate &rhs):_hqrndstate_owner(rhs) +{ +} + +hqrndstate& hqrndstate::operator=(const hqrndstate &rhs) +{ + if( this==&rhs ) + return *this; + _hqrndstate_owner::operator=(rhs); + return *this; +} + +hqrndstate::~hqrndstate() +{ +} + +/************************************************************************* +HQRNDState initialization with random values which come from standard +RNG. + + -- ALGLIB -- + Copyright 02.12.2009 by Bochkanov Sergey +*************************************************************************/ +void hqrndrandomize(hqrndstate &state) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::hqrndrandomize(const_cast<alglib_impl::hqrndstate*>(state.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +HQRNDState initialization with seed values + + -- ALGLIB -- + Copyright 02.12.2009 by Bochkanov Sergey +*************************************************************************/ +void hqrndseed(const ae_int_t s1, const ae_int_t s2, hqrndstate &state) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::hqrndseed(s1, s2, const_cast<alglib_impl::hqrndstate*>(state.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This function generates random real number in (0,1), +not including interval boundaries + +State structure must be initialized with HQRNDRandomize() or HQRNDSeed(). + + -- ALGLIB -- + Copyright 02.12.2009 by Bochkanov Sergey +*************************************************************************/ +double hqrnduniformr(const hqrndstate &state) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + double result = alglib_impl::hqrnduniformr(const_cast<alglib_impl::hqrndstate*>(state.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<double*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This function generates random integer number in [0, N) + +1. N must be less than HQRNDMax-1. +2. State structure must be initialized with HQRNDRandomize() or HQRNDSeed() + + -- ALGLIB -- + Copyright 02.12.2009 by Bochkanov Sergey +*************************************************************************/ +ae_int_t hqrnduniformi(const hqrndstate &state, const ae_int_t n) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::ae_int_t result = alglib_impl::hqrnduniformi(const_cast<alglib_impl::hqrndstate*>(state.c_ptr()), n, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<ae_int_t*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Random number generator: normal numbers + +This function generates one random number from normal distribution. +Its performance is equal to that of HQRNDNormal2() + +State structure must be initialized with HQRNDRandomize() or HQRNDSeed(). + + -- ALGLIB -- + Copyright 02.12.2009 by Bochkanov Sergey +*************************************************************************/ +double hqrndnormal(const hqrndstate &state) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + double result = alglib_impl::hqrndnormal(const_cast<alglib_impl::hqrndstate*>(state.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<double*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Random number generator: random X and Y such that X^2+Y^2=1 + +State structure must be initialized with HQRNDRandomize() or HQRNDSeed(). + + -- ALGLIB -- + Copyright 02.12.2009 by Bochkanov Sergey +*************************************************************************/ +void hqrndunit2(const hqrndstate &state, double &x, double &y) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::hqrndunit2(const_cast<alglib_impl::hqrndstate*>(state.c_ptr()), &x, &y, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Random number generator: normal numbers + +This function generates two independent random numbers from normal +distribution. Its performance is equal to that of HQRNDNormal() + +State structure must be initialized with HQRNDRandomize() or HQRNDSeed(). + + -- ALGLIB -- + Copyright 02.12.2009 by Bochkanov Sergey +*************************************************************************/ +void hqrndnormal2(const hqrndstate &state, double &x1, double &x2) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::hqrndnormal2(const_cast<alglib_impl::hqrndstate*>(state.c_ptr()), &x1, &x2, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Random number generator: exponential distribution + +State structure must be initialized with HQRNDRandomize() or HQRNDSeed(). + + -- ALGLIB -- + Copyright 11.08.2007 by Bochkanov Sergey +*************************************************************************/ +double hqrndexponential(const hqrndstate &state, const double lambdav) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + double result = alglib_impl::hqrndexponential(const_cast<alglib_impl::hqrndstate*>(state.c_ptr()), lambdav, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<double*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* + +*************************************************************************/ +_kdtree_owner::_kdtree_owner() +{ + p_struct = (alglib_impl::kdtree*)alglib_impl::ae_malloc(sizeof(alglib_impl::kdtree), NULL); + if( p_struct==NULL ) + throw ap_error("ALGLIB: malloc error"); + if( !alglib_impl::_kdtree_init(p_struct, NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); +} + +_kdtree_owner::_kdtree_owner(const _kdtree_owner &rhs) +{ + p_struct = (alglib_impl::kdtree*)alglib_impl::ae_malloc(sizeof(alglib_impl::kdtree), NULL); + if( p_struct==NULL ) + throw ap_error("ALGLIB: malloc error"); + if( !alglib_impl::_kdtree_init_copy(p_struct, const_cast<alglib_impl::kdtree*>(rhs.p_struct), NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); +} + +_kdtree_owner& _kdtree_owner::operator=(const _kdtree_owner &rhs) +{ + if( this==&rhs ) + return *this; + alglib_impl::_kdtree_clear(p_struct); + if( !alglib_impl::_kdtree_init_copy(p_struct, const_cast<alglib_impl::kdtree*>(rhs.p_struct), NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); + return *this; +} + +_kdtree_owner::~_kdtree_owner() +{ + alglib_impl::_kdtree_clear(p_struct); + ae_free(p_struct); +} + +alglib_impl::kdtree* _kdtree_owner::c_ptr() +{ + return p_struct; +} + +alglib_impl::kdtree* _kdtree_owner::c_ptr() const +{ + return const_cast<alglib_impl::kdtree*>(p_struct); +} +kdtree::kdtree() : _kdtree_owner() +{ +} + +kdtree::kdtree(const kdtree &rhs):_kdtree_owner(rhs) +{ +} + +kdtree& kdtree::operator=(const kdtree &rhs) +{ + if( this==&rhs ) + return *this; + _kdtree_owner::operator=(rhs); + return *this; +} + +kdtree::~kdtree() +{ +} + +/************************************************************************* +KD-tree creation + +This subroutine creates KD-tree from set of X-values and optional Y-values + +INPUT PARAMETERS + XY - dataset, array[0..N-1,0..NX+NY-1]. + one row corresponds to one point. + first NX columns contain X-values, next NY (NY may be zero) + columns may contain associated Y-values + N - number of points, N>=1 + NX - space dimension, NX>=1. + NY - number of optional Y-values, NY>=0. + NormType- norm type: + * 0 denotes infinity-norm + * 1 denotes 1-norm + * 2 denotes 2-norm (Euclidean norm) + +OUTPUT PARAMETERS + KDT - KD-tree + + +NOTES + +1. KD-tree creation have O(N*logN) complexity and O(N*(2*NX+NY)) memory + requirements. +2. Although KD-trees may be used with any combination of N and NX, they + are more efficient than brute-force search only when N >> 4^NX. So they + are most useful in low-dimensional tasks (NX=2, NX=3). NX=1 is another + inefficient case, because simple binary search (without additional + structures) is much more efficient in such tasks than KD-trees. + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void kdtreebuild(const real_2d_array &xy, const ae_int_t n, const ae_int_t nx, const ae_int_t ny, const ae_int_t normtype, kdtree &kdt) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::kdtreebuild(const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), n, nx, ny, normtype, const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +KD-tree creation + +This subroutine creates KD-tree from set of X-values and optional Y-values + +INPUT PARAMETERS + XY - dataset, array[0..N-1,0..NX+NY-1]. + one row corresponds to one point. + first NX columns contain X-values, next NY (NY may be zero) + columns may contain associated Y-values + N - number of points, N>=1 + NX - space dimension, NX>=1. + NY - number of optional Y-values, NY>=0. + NormType- norm type: + * 0 denotes infinity-norm + * 1 denotes 1-norm + * 2 denotes 2-norm (Euclidean norm) + +OUTPUT PARAMETERS + KDT - KD-tree + + +NOTES + +1. KD-tree creation have O(N*logN) complexity and O(N*(2*NX+NY)) memory + requirements. +2. Although KD-trees may be used with any combination of N and NX, they + are more efficient than brute-force search only when N >> 4^NX. So they + are most useful in low-dimensional tasks (NX=2, NX=3). NX=1 is another + inefficient case, because simple binary search (without additional + structures) is much more efficient in such tasks than KD-trees. + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void kdtreebuild(const real_2d_array &xy, const ae_int_t nx, const ae_int_t ny, const ae_int_t normtype, kdtree &kdt) +{ + alglib_impl::ae_state _alglib_env_state; + ae_int_t n; + + n = xy.rows(); + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::kdtreebuild(const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), n, nx, ny, normtype, const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), &_alglib_env_state); + + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +KD-tree creation + +This subroutine creates KD-tree from set of X-values, integer tags and +optional Y-values + +INPUT PARAMETERS + XY - dataset, array[0..N-1,0..NX+NY-1]. + one row corresponds to one point. + first NX columns contain X-values, next NY (NY may be zero) + columns may contain associated Y-values + Tags - tags, array[0..N-1], contains integer tags associated + with points. + N - number of points, N>=1 + NX - space dimension, NX>=1. + NY - number of optional Y-values, NY>=0. + NormType- norm type: + * 0 denotes infinity-norm + * 1 denotes 1-norm + * 2 denotes 2-norm (Euclidean norm) + +OUTPUT PARAMETERS + KDT - KD-tree + +NOTES + +1. KD-tree creation have O(N*logN) complexity and O(N*(2*NX+NY)) memory + requirements. +2. Although KD-trees may be used with any combination of N and NX, they + are more efficient than brute-force search only when N >> 4^NX. So they + are most useful in low-dimensional tasks (NX=2, NX=3). NX=1 is another + inefficient case, because simple binary search (without additional + structures) is much more efficient in such tasks than KD-trees. + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void kdtreebuildtagged(const real_2d_array &xy, const integer_1d_array &tags, const ae_int_t n, const ae_int_t nx, const ae_int_t ny, const ae_int_t normtype, kdtree &kdt) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::kdtreebuildtagged(const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), const_cast<alglib_impl::ae_vector*>(tags.c_ptr()), n, nx, ny, normtype, const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +KD-tree creation + +This subroutine creates KD-tree from set of X-values, integer tags and +optional Y-values + +INPUT PARAMETERS + XY - dataset, array[0..N-1,0..NX+NY-1]. + one row corresponds to one point. + first NX columns contain X-values, next NY (NY may be zero) + columns may contain associated Y-values + Tags - tags, array[0..N-1], contains integer tags associated + with points. + N - number of points, N>=1 + NX - space dimension, NX>=1. + NY - number of optional Y-values, NY>=0. + NormType- norm type: + * 0 denotes infinity-norm + * 1 denotes 1-norm + * 2 denotes 2-norm (Euclidean norm) + +OUTPUT PARAMETERS + KDT - KD-tree + +NOTES + +1. KD-tree creation have O(N*logN) complexity and O(N*(2*NX+NY)) memory + requirements. +2. Although KD-trees may be used with any combination of N and NX, they + are more efficient than brute-force search only when N >> 4^NX. So they + are most useful in low-dimensional tasks (NX=2, NX=3). NX=1 is another + inefficient case, because simple binary search (without additional + structures) is much more efficient in such tasks than KD-trees. + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void kdtreebuildtagged(const real_2d_array &xy, const integer_1d_array &tags, const ae_int_t nx, const ae_int_t ny, const ae_int_t normtype, kdtree &kdt) +{ + alglib_impl::ae_state _alglib_env_state; + ae_int_t n; + if( (xy.rows()!=tags.length())) + throw ap_error("Error while calling 'kdtreebuildtagged': looks like one of arguments has wrong size"); + n = xy.rows(); + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::kdtreebuildtagged(const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), const_cast<alglib_impl::ae_vector*>(tags.c_ptr()), n, nx, ny, normtype, const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), &_alglib_env_state); + + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +K-NN query: K nearest neighbors + +INPUT PARAMETERS + KDT - KD-tree + X - point, array[0..NX-1]. + K - number of neighbors to return, K>=1 + SelfMatch - whether self-matches are allowed: + * if True, nearest neighbor may be the point itself + (if it exists in original dataset) + * if False, then only points with non-zero distance + are returned + * if not given, considered True + +RESULT + number of actual neighbors found (either K or N, if K>N). + +This subroutine performs query and stores its result in the internal +structures of the KD-tree. You can use following subroutines to obtain +these results: +* KDTreeQueryResultsX() to get X-values +* KDTreeQueryResultsXY() to get X- and Y-values +* KDTreeQueryResultsTags() to get tag values +* KDTreeQueryResultsDistances() to get distances + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +ae_int_t kdtreequeryknn(const kdtree &kdt, const real_1d_array &x, const ae_int_t k, const bool selfmatch) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::ae_int_t result = alglib_impl::kdtreequeryknn(const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), k, selfmatch, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<ae_int_t*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +K-NN query: K nearest neighbors + +INPUT PARAMETERS + KDT - KD-tree + X - point, array[0..NX-1]. + K - number of neighbors to return, K>=1 + SelfMatch - whether self-matches are allowed: + * if True, nearest neighbor may be the point itself + (if it exists in original dataset) + * if False, then only points with non-zero distance + are returned + * if not given, considered True + +RESULT + number of actual neighbors found (either K or N, if K>N). + +This subroutine performs query and stores its result in the internal +structures of the KD-tree. You can use following subroutines to obtain +these results: +* KDTreeQueryResultsX() to get X-values +* KDTreeQueryResultsXY() to get X- and Y-values +* KDTreeQueryResultsTags() to get tag values +* KDTreeQueryResultsDistances() to get distances + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +ae_int_t kdtreequeryknn(const kdtree &kdt, const real_1d_array &x, const ae_int_t k) +{ + alglib_impl::ae_state _alglib_env_state; + bool selfmatch; + + selfmatch = true; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::ae_int_t result = alglib_impl::kdtreequeryknn(const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), k, selfmatch, &_alglib_env_state); + + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<ae_int_t*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +R-NN query: all points within R-sphere centered at X + +INPUT PARAMETERS + KDT - KD-tree + X - point, array[0..NX-1]. + R - radius of sphere (in corresponding norm), R>0 + SelfMatch - whether self-matches are allowed: + * if True, nearest neighbor may be the point itself + (if it exists in original dataset) + * if False, then only points with non-zero distance + are returned + * if not given, considered True + +RESULT + number of neighbors found, >=0 + +This subroutine performs query and stores its result in the internal +structures of the KD-tree. You can use following subroutines to obtain +actual results: +* KDTreeQueryResultsX() to get X-values +* KDTreeQueryResultsXY() to get X- and Y-values +* KDTreeQueryResultsTags() to get tag values +* KDTreeQueryResultsDistances() to get distances + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +ae_int_t kdtreequeryrnn(const kdtree &kdt, const real_1d_array &x, const double r, const bool selfmatch) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::ae_int_t result = alglib_impl::kdtreequeryrnn(const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), r, selfmatch, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<ae_int_t*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +R-NN query: all points within R-sphere centered at X + +INPUT PARAMETERS + KDT - KD-tree + X - point, array[0..NX-1]. + R - radius of sphere (in corresponding norm), R>0 + SelfMatch - whether self-matches are allowed: + * if True, nearest neighbor may be the point itself + (if it exists in original dataset) + * if False, then only points with non-zero distance + are returned + * if not given, considered True + +RESULT + number of neighbors found, >=0 + +This subroutine performs query and stores its result in the internal +structures of the KD-tree. You can use following subroutines to obtain +actual results: +* KDTreeQueryResultsX() to get X-values +* KDTreeQueryResultsXY() to get X- and Y-values +* KDTreeQueryResultsTags() to get tag values +* KDTreeQueryResultsDistances() to get distances + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +ae_int_t kdtreequeryrnn(const kdtree &kdt, const real_1d_array &x, const double r) +{ + alglib_impl::ae_state _alglib_env_state; + bool selfmatch; + + selfmatch = true; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::ae_int_t result = alglib_impl::kdtreequeryrnn(const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), r, selfmatch, &_alglib_env_state); + + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<ae_int_t*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +K-NN query: approximate K nearest neighbors + +INPUT PARAMETERS + KDT - KD-tree + X - point, array[0..NX-1]. + K - number of neighbors to return, K>=1 + SelfMatch - whether self-matches are allowed: + * if True, nearest neighbor may be the point itself + (if it exists in original dataset) + * if False, then only points with non-zero distance + are returned + * if not given, considered True + Eps - approximation factor, Eps>=0. eps-approximate nearest + neighbor is a neighbor whose distance from X is at + most (1+eps) times distance of true nearest neighbor. + +RESULT + number of actual neighbors found (either K or N, if K>N). + +NOTES + significant performance gain may be achieved only when Eps is is on + the order of magnitude of 1 or larger. + +This subroutine performs query and stores its result in the internal +structures of the KD-tree. You can use following subroutines to obtain +these results: +* KDTreeQueryResultsX() to get X-values +* KDTreeQueryResultsXY() to get X- and Y-values +* KDTreeQueryResultsTags() to get tag values +* KDTreeQueryResultsDistances() to get distances + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +ae_int_t kdtreequeryaknn(const kdtree &kdt, const real_1d_array &x, const ae_int_t k, const bool selfmatch, const double eps) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::ae_int_t result = alglib_impl::kdtreequeryaknn(const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), k, selfmatch, eps, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<ae_int_t*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +K-NN query: approximate K nearest neighbors + +INPUT PARAMETERS + KDT - KD-tree + X - point, array[0..NX-1]. + K - number of neighbors to return, K>=1 + SelfMatch - whether self-matches are allowed: + * if True, nearest neighbor may be the point itself + (if it exists in original dataset) + * if False, then only points with non-zero distance + are returned + * if not given, considered True + Eps - approximation factor, Eps>=0. eps-approximate nearest + neighbor is a neighbor whose distance from X is at + most (1+eps) times distance of true nearest neighbor. + +RESULT + number of actual neighbors found (either K or N, if K>N). + +NOTES + significant performance gain may be achieved only when Eps is is on + the order of magnitude of 1 or larger. + +This subroutine performs query and stores its result in the internal +structures of the KD-tree. You can use following subroutines to obtain +these results: +* KDTreeQueryResultsX() to get X-values +* KDTreeQueryResultsXY() to get X- and Y-values +* KDTreeQueryResultsTags() to get tag values +* KDTreeQueryResultsDistances() to get distances + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +ae_int_t kdtreequeryaknn(const kdtree &kdt, const real_1d_array &x, const ae_int_t k, const double eps) +{ + alglib_impl::ae_state _alglib_env_state; + bool selfmatch; + + selfmatch = true; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::ae_int_t result = alglib_impl::kdtreequeryaknn(const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), k, selfmatch, eps, &_alglib_env_state); + + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<ae_int_t*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +X-values from last query + +INPUT PARAMETERS + KDT - KD-tree + X - possibly pre-allocated buffer. If X is too small to store + result, it is resized. If size(X) is enough to store + result, it is left unchanged. + +OUTPUT PARAMETERS + X - rows are filled with X-values + +NOTES +1. points are ordered by distance from the query point (first = closest) +2. if XY is larger than required to store result, only leading part will + be overwritten; trailing part will be left unchanged. So if on input + XY = [[A,B],[C,D]], and result is [1,2], then on exit we will get + XY = [[1,2],[C,D]]. This is done purposely to increase performance; if + you want function to resize array according to result size, use + function with same name and suffix 'I'. + +SEE ALSO +* KDTreeQueryResultsXY() X- and Y-values +* KDTreeQueryResultsTags() tag values +* KDTreeQueryResultsDistances() distances + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void kdtreequeryresultsx(const kdtree &kdt, real_2d_array &x) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::kdtreequeryresultsx(const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), const_cast<alglib_impl::ae_matrix*>(x.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +X- and Y-values from last query + +INPUT PARAMETERS + KDT - KD-tree + XY - possibly pre-allocated buffer. If XY is too small to store + result, it is resized. If size(XY) is enough to store + result, it is left unchanged. + +OUTPUT PARAMETERS + XY - rows are filled with points: first NX columns with + X-values, next NY columns - with Y-values. + +NOTES +1. points are ordered by distance from the query point (first = closest) +2. if XY is larger than required to store result, only leading part will + be overwritten; trailing part will be left unchanged. So if on input + XY = [[A,B],[C,D]], and result is [1,2], then on exit we will get + XY = [[1,2],[C,D]]. This is done purposely to increase performance; if + you want function to resize array according to result size, use + function with same name and suffix 'I'. + +SEE ALSO +* KDTreeQueryResultsX() X-values +* KDTreeQueryResultsTags() tag values +* KDTreeQueryResultsDistances() distances + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void kdtreequeryresultsxy(const kdtree &kdt, real_2d_array &xy) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::kdtreequeryresultsxy(const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Tags from last query + +INPUT PARAMETERS + KDT - KD-tree + Tags - possibly pre-allocated buffer. If X is too small to store + result, it is resized. If size(X) is enough to store + result, it is left unchanged. + +OUTPUT PARAMETERS + Tags - filled with tags associated with points, + or, when no tags were supplied, with zeros + +NOTES +1. points are ordered by distance from the query point (first = closest) +2. if XY is larger than required to store result, only leading part will + be overwritten; trailing part will be left unchanged. So if on input + XY = [[A,B],[C,D]], and result is [1,2], then on exit we will get + XY = [[1,2],[C,D]]. This is done purposely to increase performance; if + you want function to resize array according to result size, use + function with same name and suffix 'I'. + +SEE ALSO +* KDTreeQueryResultsX() X-values +* KDTreeQueryResultsXY() X- and Y-values +* KDTreeQueryResultsDistances() distances + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void kdtreequeryresultstags(const kdtree &kdt, integer_1d_array &tags) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::kdtreequeryresultstags(const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), const_cast<alglib_impl::ae_vector*>(tags.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Distances from last query + +INPUT PARAMETERS + KDT - KD-tree + R - possibly pre-allocated buffer. If X is too small to store + result, it is resized. If size(X) is enough to store + result, it is left unchanged. + +OUTPUT PARAMETERS + R - filled with distances (in corresponding norm) + +NOTES +1. points are ordered by distance from the query point (first = closest) +2. if XY is larger than required to store result, only leading part will + be overwritten; trailing part will be left unchanged. So if on input + XY = [[A,B],[C,D]], and result is [1,2], then on exit we will get + XY = [[1,2],[C,D]]. This is done purposely to increase performance; if + you want function to resize array according to result size, use + function with same name and suffix 'I'. + +SEE ALSO +* KDTreeQueryResultsX() X-values +* KDTreeQueryResultsXY() X- and Y-values +* KDTreeQueryResultsTags() tag values + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void kdtreequeryresultsdistances(const kdtree &kdt, real_1d_array &r) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::kdtreequeryresultsdistances(const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), const_cast<alglib_impl::ae_vector*>(r.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +X-values from last query; 'interactive' variant for languages like Python +which support constructs like "X = KDTreeQueryResultsXI(KDT)" and +interactive mode of interpreter. + +This function allocates new array on each call, so it is significantly +slower than its 'non-interactive' counterpart, but it is more convenient +when you call it from command line. + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void kdtreequeryresultsxi(const kdtree &kdt, real_2d_array &x) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::kdtreequeryresultsxi(const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), const_cast<alglib_impl::ae_matrix*>(x.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +XY-values from last query; 'interactive' variant for languages like Python +which support constructs like "XY = KDTreeQueryResultsXYI(KDT)" and +interactive mode of interpreter. + +This function allocates new array on each call, so it is significantly +slower than its 'non-interactive' counterpart, but it is more convenient +when you call it from command line. + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void kdtreequeryresultsxyi(const kdtree &kdt, real_2d_array &xy) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::kdtreequeryresultsxyi(const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Tags from last query; 'interactive' variant for languages like Python +which support constructs like "Tags = KDTreeQueryResultsTagsI(KDT)" and +interactive mode of interpreter. + +This function allocates new array on each call, so it is significantly +slower than its 'non-interactive' counterpart, but it is more convenient +when you call it from command line. + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void kdtreequeryresultstagsi(const kdtree &kdt, integer_1d_array &tags) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::kdtreequeryresultstagsi(const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), const_cast<alglib_impl::ae_vector*>(tags.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Distances from last query; 'interactive' variant for languages like Python +which support constructs like "R = KDTreeQueryResultsDistancesI(KDT)" +and interactive mode of interpreter. + +This function allocates new array on each call, so it is significantly +slower than its 'non-interactive' counterpart, but it is more convenient +when you call it from command line. + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void kdtreequeryresultsdistancesi(const kdtree &kdt, real_1d_array &r) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::kdtreequeryresultsdistancesi(const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), const_cast<alglib_impl::ae_vector*>(r.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} +} + +///////////////////////////////////////////////////////////////////////// +// +// THIS SECTION CONTAINS IMPLEMENTATION OF COMPUTATIONAL CORE +// +///////////////////////////////////////////////////////////////////////// +namespace alglib_impl +{ +static ae_int_t hqrnd_hqrndmax = 2147483563; +static ae_int_t hqrnd_hqrndm1 = 2147483563; +static ae_int_t hqrnd_hqrndm2 = 2147483399; +static ae_int_t hqrnd_hqrndmagic = 1634357784; +static ae_int_t hqrnd_hqrndintegerbase(hqrndstate* state, + ae_state *_state); + + +static ae_int_t nearestneighbor_splitnodesize = 6; +static void nearestneighbor_kdtreesplit(kdtree* kdt, + ae_int_t i1, + ae_int_t i2, + ae_int_t d, + double s, + ae_int_t* i3, + ae_state *_state); +static void nearestneighbor_kdtreegeneratetreerec(kdtree* kdt, + ae_int_t* nodesoffs, + ae_int_t* splitsoffs, + ae_int_t i1, + ae_int_t i2, + ae_int_t maxleafsize, + ae_state *_state); +static void nearestneighbor_kdtreequerynnrec(kdtree* kdt, + ae_int_t offs, + ae_state *_state); +static void nearestneighbor_kdtreeinitbox(kdtree* kdt, + /* Real */ ae_vector* x, + ae_state *_state); + + + + + +/************************************************************************* +HQRNDState initialization with random values which come from standard +RNG. + + -- ALGLIB -- + Copyright 02.12.2009 by Bochkanov Sergey +*************************************************************************/ +void hqrndrandomize(hqrndstate* state, ae_state *_state) +{ + + _hqrndstate_clear(state); + + hqrndseed(ae_randominteger(hqrnd_hqrndm1, _state), ae_randominteger(hqrnd_hqrndm2, _state), state, _state); +} + + +/************************************************************************* +HQRNDState initialization with seed values + + -- ALGLIB -- + Copyright 02.12.2009 by Bochkanov Sergey +*************************************************************************/ +void hqrndseed(ae_int_t s1, + ae_int_t s2, + hqrndstate* state, + ae_state *_state) +{ + + _hqrndstate_clear(state); + + state->s1 = s1%(hqrnd_hqrndm1-1)+1; + state->s2 = s2%(hqrnd_hqrndm2-1)+1; + state->v = (double)1/(double)hqrnd_hqrndmax; + state->magicv = hqrnd_hqrndmagic; +} + + +/************************************************************************* +This function generates random real number in (0,1), +not including interval boundaries + +State structure must be initialized with HQRNDRandomize() or HQRNDSeed(). + + -- ALGLIB -- + Copyright 02.12.2009 by Bochkanov Sergey +*************************************************************************/ +double hqrnduniformr(hqrndstate* state, ae_state *_state) +{ + double result; + + + result = state->v*hqrnd_hqrndintegerbase(state, _state); + return result; +} + + +/************************************************************************* +This function generates random integer number in [0, N) + +1. N must be less than HQRNDMax-1. +2. State structure must be initialized with HQRNDRandomize() or HQRNDSeed() + + -- ALGLIB -- + Copyright 02.12.2009 by Bochkanov Sergey +*************************************************************************/ +ae_int_t hqrnduniformi(hqrndstate* state, ae_int_t n, ae_state *_state) +{ + ae_int_t mx; + ae_int_t result; + + + + /* + * Correct handling of N's close to RNDBaseMax + * (avoiding skewed distributions for RNDBaseMax<>K*N) + */ + ae_assert(n>0, "HQRNDUniformI: N<=0!", _state); + ae_assert(n<hqrnd_hqrndmax-1, "HQRNDUniformI: N>=RNDBaseMax-1!", _state); + mx = hqrnd_hqrndmax-1-(hqrnd_hqrndmax-1)%n; + do + { + result = hqrnd_hqrndintegerbase(state, _state)-1; + } + while(result>=mx); + result = result%n; + return result; +} + + +/************************************************************************* +Random number generator: normal numbers + +This function generates one random number from normal distribution. +Its performance is equal to that of HQRNDNormal2() + +State structure must be initialized with HQRNDRandomize() or HQRNDSeed(). + + -- ALGLIB -- + Copyright 02.12.2009 by Bochkanov Sergey +*************************************************************************/ +double hqrndnormal(hqrndstate* state, ae_state *_state) +{ + double v1; + double v2; + double result; + + + hqrndnormal2(state, &v1, &v2, _state); + result = v1; + return result; +} + + +/************************************************************************* +Random number generator: random X and Y such that X^2+Y^2=1 + +State structure must be initialized with HQRNDRandomize() or HQRNDSeed(). + + -- ALGLIB -- + Copyright 02.12.2009 by Bochkanov Sergey +*************************************************************************/ +void hqrndunit2(hqrndstate* state, double* x, double* y, ae_state *_state) +{ + double v; + double mx; + double mn; + + *x = 0; + *y = 0; + + do + { + hqrndnormal2(state, x, y, _state); + } + while(!(ae_fp_neq(*x,0)||ae_fp_neq(*y,0))); + mx = ae_maxreal(ae_fabs(*x, _state), ae_fabs(*y, _state), _state); + mn = ae_minreal(ae_fabs(*x, _state), ae_fabs(*y, _state), _state); + v = mx*ae_sqrt(1+ae_sqr(mn/mx, _state), _state); + *x = *x/v; + *y = *y/v; +} + + +/************************************************************************* +Random number generator: normal numbers + +This function generates two independent random numbers from normal +distribution. Its performance is equal to that of HQRNDNormal() + +State structure must be initialized with HQRNDRandomize() or HQRNDSeed(). + + -- ALGLIB -- + Copyright 02.12.2009 by Bochkanov Sergey +*************************************************************************/ +void hqrndnormal2(hqrndstate* state, + double* x1, + double* x2, + ae_state *_state) +{ + double u; + double v; + double s; + + *x1 = 0; + *x2 = 0; + + for(;;) + { + u = 2*hqrnduniformr(state, _state)-1; + v = 2*hqrnduniformr(state, _state)-1; + s = ae_sqr(u, _state)+ae_sqr(v, _state); + if( ae_fp_greater(s,0)&&ae_fp_less(s,1) ) + { + + /* + * two Sqrt's instead of one to + * avoid overflow when S is too small + */ + s = ae_sqrt(-2*ae_log(s, _state), _state)/ae_sqrt(s, _state); + *x1 = u*s; + *x2 = v*s; + return; + } + } +} + + +/************************************************************************* +Random number generator: exponential distribution + +State structure must be initialized with HQRNDRandomize() or HQRNDSeed(). + + -- ALGLIB -- + Copyright 11.08.2007 by Bochkanov Sergey +*************************************************************************/ +double hqrndexponential(hqrndstate* state, + double lambdav, + ae_state *_state) +{ + double result; + + + ae_assert(ae_fp_greater(lambdav,0), "HQRNDExponential: LambdaV<=0!", _state); + result = -ae_log(hqrnduniformr(state, _state), _state)/lambdav; + return result; +} + + +/************************************************************************* + +L'Ecuyer, Efficient and portable combined random number generators +*************************************************************************/ +static ae_int_t hqrnd_hqrndintegerbase(hqrndstate* state, + ae_state *_state) +{ + ae_int_t k; + ae_int_t result; + + + ae_assert(state->magicv==hqrnd_hqrndmagic, "HQRNDIntegerBase: State is not correctly initialized!", _state); + k = state->s1/53668; + state->s1 = 40014*(state->s1-k*53668)-k*12211; + if( state->s1<0 ) + { + state->s1 = state->s1+2147483563; + } + k = state->s2/52774; + state->s2 = 40692*(state->s2-k*52774)-k*3791; + if( state->s2<0 ) + { + state->s2 = state->s2+2147483399; + } + + /* + * Result + */ + result = state->s1-state->s2; + if( result<1 ) + { + result = result+2147483562; + } + return result; +} + + +ae_bool _hqrndstate_init(hqrndstate* p, ae_state *_state, ae_bool make_automatic) +{ + return ae_true; +} + + +ae_bool _hqrndstate_init_copy(hqrndstate* dst, hqrndstate* src, ae_state *_state, ae_bool make_automatic) +{ + dst->s1 = src->s1; + dst->s2 = src->s2; + dst->v = src->v; + dst->magicv = src->magicv; + return ae_true; +} + + +void _hqrndstate_clear(hqrndstate* p) +{ +} + + + + +/************************************************************************* +KD-tree creation + +This subroutine creates KD-tree from set of X-values and optional Y-values + +INPUT PARAMETERS + XY - dataset, array[0..N-1,0..NX+NY-1]. + one row corresponds to one point. + first NX columns contain X-values, next NY (NY may be zero) + columns may contain associated Y-values + N - number of points, N>=1 + NX - space dimension, NX>=1. + NY - number of optional Y-values, NY>=0. + NormType- norm type: + * 0 denotes infinity-norm + * 1 denotes 1-norm + * 2 denotes 2-norm (Euclidean norm) + +OUTPUT PARAMETERS + KDT - KD-tree + + +NOTES + +1. KD-tree creation have O(N*logN) complexity and O(N*(2*NX+NY)) memory + requirements. +2. Although KD-trees may be used with any combination of N and NX, they + are more efficient than brute-force search only when N >> 4^NX. So they + are most useful in low-dimensional tasks (NX=2, NX=3). NX=1 is another + inefficient case, because simple binary search (without additional + structures) is much more efficient in such tasks than KD-trees. + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void kdtreebuild(/* Real */ ae_matrix* xy, + ae_int_t n, + ae_int_t nx, + ae_int_t ny, + ae_int_t normtype, + kdtree* kdt, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector tags; + ae_int_t i; + + ae_frame_make(_state, &_frame_block); + _kdtree_clear(kdt); + ae_vector_init(&tags, 0, DT_INT, _state, ae_true); + + ae_assert(n>=1, "KDTreeBuild: N<1!", _state); + ae_assert(nx>=1, "KDTreeBuild: NX<1!", _state); + ae_assert(ny>=0, "KDTreeBuild: NY<0!", _state); + ae_assert(normtype>=0&&normtype<=2, "KDTreeBuild: incorrect NormType!", _state); + ae_assert(xy->rows>=n, "KDTreeBuild: rows(X)<N!", _state); + ae_assert(xy->cols>=nx+ny, "KDTreeBuild: cols(X)<NX+NY!", _state); + ae_assert(apservisfinitematrix(xy, n, nx+ny, _state), "KDTreeBuild: X contains infinite or NaN values!", _state); + ae_vector_set_length(&tags, n, _state); + for(i=0; i<=n-1; i++) + { + tags.ptr.p_int[i] = 0; + } + kdtreebuildtagged(xy, &tags, n, nx, ny, normtype, kdt, _state); + ae_frame_leave(_state); +} + + +/************************************************************************* +KD-tree creation + +This subroutine creates KD-tree from set of X-values, integer tags and +optional Y-values + +INPUT PARAMETERS + XY - dataset, array[0..N-1,0..NX+NY-1]. + one row corresponds to one point. + first NX columns contain X-values, next NY (NY may be zero) + columns may contain associated Y-values + Tags - tags, array[0..N-1], contains integer tags associated + with points. + N - number of points, N>=1 + NX - space dimension, NX>=1. + NY - number of optional Y-values, NY>=0. + NormType- norm type: + * 0 denotes infinity-norm + * 1 denotes 1-norm + * 2 denotes 2-norm (Euclidean norm) + +OUTPUT PARAMETERS + KDT - KD-tree + +NOTES + +1. KD-tree creation have O(N*logN) complexity and O(N*(2*NX+NY)) memory + requirements. +2. Although KD-trees may be used with any combination of N and NX, they + are more efficient than brute-force search only when N >> 4^NX. So they + are most useful in low-dimensional tasks (NX=2, NX=3). NX=1 is another + inefficient case, because simple binary search (without additional + structures) is much more efficient in such tasks than KD-trees. + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void kdtreebuildtagged(/* Real */ ae_matrix* xy, + /* Integer */ ae_vector* tags, + ae_int_t n, + ae_int_t nx, + ae_int_t ny, + ae_int_t normtype, + kdtree* kdt, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + ae_int_t maxnodes; + ae_int_t nodesoffs; + ae_int_t splitsoffs; + + _kdtree_clear(kdt); + + ae_assert(n>=1, "KDTreeBuildTagged: N<1!", _state); + ae_assert(nx>=1, "KDTreeBuildTagged: NX<1!", _state); + ae_assert(ny>=0, "KDTreeBuildTagged: NY<0!", _state); + ae_assert(normtype>=0&&normtype<=2, "KDTreeBuildTagged: incorrect NormType!", _state); + ae_assert(xy->rows>=n, "KDTreeBuildTagged: rows(X)<N!", _state); + ae_assert(xy->cols>=nx+ny, "KDTreeBuildTagged: cols(X)<NX+NY!", _state); + ae_assert(apservisfinitematrix(xy, n, nx+ny, _state), "KDTreeBuildTagged: X contains infinite or NaN values!", _state); + + /* + * initialize + */ + kdt->n = n; + kdt->nx = nx; + kdt->ny = ny; + kdt->normtype = normtype; + kdt->distmatrixtype = 0; + ae_matrix_set_length(&kdt->xy, n, 2*nx+ny, _state); + ae_vector_set_length(&kdt->tags, n, _state); + ae_vector_set_length(&kdt->idx, n, _state); + ae_vector_set_length(&kdt->r, n, _state); + ae_vector_set_length(&kdt->x, nx, _state); + ae_vector_set_length(&kdt->buf, ae_maxint(n, nx, _state), _state); + + /* + * Initial fill + */ + for(i=0; i<=n-1; i++) + { + ae_v_move(&kdt->xy.ptr.pp_double[i][0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nx-1)); + ae_v_move(&kdt->xy.ptr.pp_double[i][nx], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(nx,2*nx+ny-1)); + kdt->tags.ptr.p_int[i] = tags->ptr.p_int[i]; + } + + /* + * Determine bounding box + */ + ae_vector_set_length(&kdt->boxmin, nx, _state); + ae_vector_set_length(&kdt->boxmax, nx, _state); + ae_vector_set_length(&kdt->curboxmin, nx, _state); + ae_vector_set_length(&kdt->curboxmax, nx, _state); + ae_v_move(&kdt->boxmin.ptr.p_double[0], 1, &kdt->xy.ptr.pp_double[0][0], 1, ae_v_len(0,nx-1)); + ae_v_move(&kdt->boxmax.ptr.p_double[0], 1, &kdt->xy.ptr.pp_double[0][0], 1, ae_v_len(0,nx-1)); + for(i=1; i<=n-1; i++) + { + for(j=0; j<=nx-1; j++) + { + kdt->boxmin.ptr.p_double[j] = ae_minreal(kdt->boxmin.ptr.p_double[j], kdt->xy.ptr.pp_double[i][j], _state); + kdt->boxmax.ptr.p_double[j] = ae_maxreal(kdt->boxmax.ptr.p_double[j], kdt->xy.ptr.pp_double[i][j], _state); + } + } + + /* + * prepare tree structure + * * MaxNodes=N because we guarantee no trivial splits, i.e. + * every split will generate two non-empty boxes + */ + maxnodes = n; + ae_vector_set_length(&kdt->nodes, nearestneighbor_splitnodesize*2*maxnodes, _state); + ae_vector_set_length(&kdt->splits, 2*maxnodes, _state); + nodesoffs = 0; + splitsoffs = 0; + ae_v_move(&kdt->curboxmin.ptr.p_double[0], 1, &kdt->boxmin.ptr.p_double[0], 1, ae_v_len(0,nx-1)); + ae_v_move(&kdt->curboxmax.ptr.p_double[0], 1, &kdt->boxmax.ptr.p_double[0], 1, ae_v_len(0,nx-1)); + nearestneighbor_kdtreegeneratetreerec(kdt, &nodesoffs, &splitsoffs, 0, n, 8, _state); + + /* + * Set current query size to 0 + */ + kdt->kcur = 0; +} + + +/************************************************************************* +K-NN query: K nearest neighbors + +INPUT PARAMETERS + KDT - KD-tree + X - point, array[0..NX-1]. + K - number of neighbors to return, K>=1 + SelfMatch - whether self-matches are allowed: + * if True, nearest neighbor may be the point itself + (if it exists in original dataset) + * if False, then only points with non-zero distance + are returned + * if not given, considered True + +RESULT + number of actual neighbors found (either K or N, if K>N). + +This subroutine performs query and stores its result in the internal +structures of the KD-tree. You can use following subroutines to obtain +these results: +* KDTreeQueryResultsX() to get X-values +* KDTreeQueryResultsXY() to get X- and Y-values +* KDTreeQueryResultsTags() to get tag values +* KDTreeQueryResultsDistances() to get distances + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +ae_int_t kdtreequeryknn(kdtree* kdt, + /* Real */ ae_vector* x, + ae_int_t k, + ae_bool selfmatch, + ae_state *_state) +{ + ae_int_t result; + + + ae_assert(k>=1, "KDTreeQueryKNN: K<1!", _state); + ae_assert(x->cnt>=kdt->nx, "KDTreeQueryKNN: Length(X)<NX!", _state); + ae_assert(isfinitevector(x, kdt->nx, _state), "KDTreeQueryKNN: X contains infinite or NaN values!", _state); + result = kdtreequeryaknn(kdt, x, k, selfmatch, 0.0, _state); + return result; +} + + +/************************************************************************* +R-NN query: all points within R-sphere centered at X + +INPUT PARAMETERS + KDT - KD-tree + X - point, array[0..NX-1]. + R - radius of sphere (in corresponding norm), R>0 + SelfMatch - whether self-matches are allowed: + * if True, nearest neighbor may be the point itself + (if it exists in original dataset) + * if False, then only points with non-zero distance + are returned + * if not given, considered True + +RESULT + number of neighbors found, >=0 + +This subroutine performs query and stores its result in the internal +structures of the KD-tree. You can use following subroutines to obtain +actual results: +* KDTreeQueryResultsX() to get X-values +* KDTreeQueryResultsXY() to get X- and Y-values +* KDTreeQueryResultsTags() to get tag values +* KDTreeQueryResultsDistances() to get distances + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +ae_int_t kdtreequeryrnn(kdtree* kdt, + /* Real */ ae_vector* x, + double r, + ae_bool selfmatch, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + ae_int_t result; + + + ae_assert(ae_fp_greater(r,0), "KDTreeQueryRNN: incorrect R!", _state); + ae_assert(x->cnt>=kdt->nx, "KDTreeQueryRNN: Length(X)<NX!", _state); + ae_assert(isfinitevector(x, kdt->nx, _state), "KDTreeQueryRNN: X contains infinite or NaN values!", _state); + + /* + * Prepare parameters + */ + kdt->kneeded = 0; + if( kdt->normtype!=2 ) + { + kdt->rneeded = r; + } + else + { + kdt->rneeded = ae_sqr(r, _state); + } + kdt->selfmatch = selfmatch; + kdt->approxf = 1; + kdt->kcur = 0; + + /* + * calculate distance from point to current bounding box + */ + nearestneighbor_kdtreeinitbox(kdt, x, _state); + + /* + * call recursive search + * results are returned as heap + */ + nearestneighbor_kdtreequerynnrec(kdt, 0, _state); + + /* + * pop from heap to generate ordered representation + * + * last element is not pop'ed because it is already in + * its place + */ + result = kdt->kcur; + j = kdt->kcur; + for(i=kdt->kcur; i>=2; i--) + { + tagheappopi(&kdt->r, &kdt->idx, &j, _state); + } + return result; +} + + +/************************************************************************* +K-NN query: approximate K nearest neighbors + +INPUT PARAMETERS + KDT - KD-tree + X - point, array[0..NX-1]. + K - number of neighbors to return, K>=1 + SelfMatch - whether self-matches are allowed: + * if True, nearest neighbor may be the point itself + (if it exists in original dataset) + * if False, then only points with non-zero distance + are returned + * if not given, considered True + Eps - approximation factor, Eps>=0. eps-approximate nearest + neighbor is a neighbor whose distance from X is at + most (1+eps) times distance of true nearest neighbor. + +RESULT + number of actual neighbors found (either K or N, if K>N). + +NOTES + significant performance gain may be achieved only when Eps is is on + the order of magnitude of 1 or larger. + +This subroutine performs query and stores its result in the internal +structures of the KD-tree. You can use following subroutines to obtain +these results: +* KDTreeQueryResultsX() to get X-values +* KDTreeQueryResultsXY() to get X- and Y-values +* KDTreeQueryResultsTags() to get tag values +* KDTreeQueryResultsDistances() to get distances + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +ae_int_t kdtreequeryaknn(kdtree* kdt, + /* Real */ ae_vector* x, + ae_int_t k, + ae_bool selfmatch, + double eps, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + ae_int_t result; + + + ae_assert(k>0, "KDTreeQueryAKNN: incorrect K!", _state); + ae_assert(ae_fp_greater_eq(eps,0), "KDTreeQueryAKNN: incorrect Eps!", _state); + ae_assert(x->cnt>=kdt->nx, "KDTreeQueryAKNN: Length(X)<NX!", _state); + ae_assert(isfinitevector(x, kdt->nx, _state), "KDTreeQueryAKNN: X contains infinite or NaN values!", _state); + + /* + * Prepare parameters + */ + k = ae_minint(k, kdt->n, _state); + kdt->kneeded = k; + kdt->rneeded = 0; + kdt->selfmatch = selfmatch; + if( kdt->normtype==2 ) + { + kdt->approxf = 1/ae_sqr(1+eps, _state); + } + else + { + kdt->approxf = 1/(1+eps); + } + kdt->kcur = 0; + + /* + * calculate distance from point to current bounding box + */ + nearestneighbor_kdtreeinitbox(kdt, x, _state); + + /* + * call recursive search + * results are returned as heap + */ + nearestneighbor_kdtreequerynnrec(kdt, 0, _state); + + /* + * pop from heap to generate ordered representation + * + * last element is non pop'ed because it is already in + * its place + */ + result = kdt->kcur; + j = kdt->kcur; + for(i=kdt->kcur; i>=2; i--) + { + tagheappopi(&kdt->r, &kdt->idx, &j, _state); + } + return result; +} + + +/************************************************************************* +X-values from last query + +INPUT PARAMETERS + KDT - KD-tree + X - possibly pre-allocated buffer. If X is too small to store + result, it is resized. If size(X) is enough to store + result, it is left unchanged. + +OUTPUT PARAMETERS + X - rows are filled with X-values + +NOTES +1. points are ordered by distance from the query point (first = closest) +2. if XY is larger than required to store result, only leading part will + be overwritten; trailing part will be left unchanged. So if on input + XY = [[A,B],[C,D]], and result is [1,2], then on exit we will get + XY = [[1,2],[C,D]]. This is done purposely to increase performance; if + you want function to resize array according to result size, use + function with same name and suffix 'I'. + +SEE ALSO +* KDTreeQueryResultsXY() X- and Y-values +* KDTreeQueryResultsTags() tag values +* KDTreeQueryResultsDistances() distances + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void kdtreequeryresultsx(kdtree* kdt, + /* Real */ ae_matrix* x, + ae_state *_state) +{ + ae_int_t i; + ae_int_t k; + + + if( kdt->kcur==0 ) + { + return; + } + if( x->rows<kdt->kcur||x->cols<kdt->nx ) + { + ae_matrix_set_length(x, kdt->kcur, kdt->nx, _state); + } + k = kdt->kcur; + for(i=0; i<=k-1; i++) + { + ae_v_move(&x->ptr.pp_double[i][0], 1, &kdt->xy.ptr.pp_double[kdt->idx.ptr.p_int[i]][kdt->nx], 1, ae_v_len(0,kdt->nx-1)); + } +} + + +/************************************************************************* +X- and Y-values from last query + +INPUT PARAMETERS + KDT - KD-tree + XY - possibly pre-allocated buffer. If XY is too small to store + result, it is resized. If size(XY) is enough to store + result, it is left unchanged. + +OUTPUT PARAMETERS + XY - rows are filled with points: first NX columns with + X-values, next NY columns - with Y-values. + +NOTES +1. points are ordered by distance from the query point (first = closest) +2. if XY is larger than required to store result, only leading part will + be overwritten; trailing part will be left unchanged. So if on input + XY = [[A,B],[C,D]], and result is [1,2], then on exit we will get + XY = [[1,2],[C,D]]. This is done purposely to increase performance; if + you want function to resize array according to result size, use + function with same name and suffix 'I'. + +SEE ALSO +* KDTreeQueryResultsX() X-values +* KDTreeQueryResultsTags() tag values +* KDTreeQueryResultsDistances() distances + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void kdtreequeryresultsxy(kdtree* kdt, + /* Real */ ae_matrix* xy, + ae_state *_state) +{ + ae_int_t i; + ae_int_t k; + + + if( kdt->kcur==0 ) + { + return; + } + if( xy->rows<kdt->kcur||xy->cols<kdt->nx+kdt->ny ) + { + ae_matrix_set_length(xy, kdt->kcur, kdt->nx+kdt->ny, _state); + } + k = kdt->kcur; + for(i=0; i<=k-1; i++) + { + ae_v_move(&xy->ptr.pp_double[i][0], 1, &kdt->xy.ptr.pp_double[kdt->idx.ptr.p_int[i]][kdt->nx], 1, ae_v_len(0,kdt->nx+kdt->ny-1)); + } +} + + +/************************************************************************* +Tags from last query + +INPUT PARAMETERS + KDT - KD-tree + Tags - possibly pre-allocated buffer. If X is too small to store + result, it is resized. If size(X) is enough to store + result, it is left unchanged. + +OUTPUT PARAMETERS + Tags - filled with tags associated with points, + or, when no tags were supplied, with zeros + +NOTES +1. points are ordered by distance from the query point (first = closest) +2. if XY is larger than required to store result, only leading part will + be overwritten; trailing part will be left unchanged. So if on input + XY = [[A,B],[C,D]], and result is [1,2], then on exit we will get + XY = [[1,2],[C,D]]. This is done purposely to increase performance; if + you want function to resize array according to result size, use + function with same name and suffix 'I'. + +SEE ALSO +* KDTreeQueryResultsX() X-values +* KDTreeQueryResultsXY() X- and Y-values +* KDTreeQueryResultsDistances() distances + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void kdtreequeryresultstags(kdtree* kdt, + /* Integer */ ae_vector* tags, + ae_state *_state) +{ + ae_int_t i; + ae_int_t k; + + + if( kdt->kcur==0 ) + { + return; + } + if( tags->cnt<kdt->kcur ) + { + ae_vector_set_length(tags, kdt->kcur, _state); + } + k = kdt->kcur; + for(i=0; i<=k-1; i++) + { + tags->ptr.p_int[i] = kdt->tags.ptr.p_int[kdt->idx.ptr.p_int[i]]; + } +} + + +/************************************************************************* +Distances from last query + +INPUT PARAMETERS + KDT - KD-tree + R - possibly pre-allocated buffer. If X is too small to store + result, it is resized. If size(X) is enough to store + result, it is left unchanged. + +OUTPUT PARAMETERS + R - filled with distances (in corresponding norm) + +NOTES +1. points are ordered by distance from the query point (first = closest) +2. if XY is larger than required to store result, only leading part will + be overwritten; trailing part will be left unchanged. So if on input + XY = [[A,B],[C,D]], and result is [1,2], then on exit we will get + XY = [[1,2],[C,D]]. This is done purposely to increase performance; if + you want function to resize array according to result size, use + function with same name and suffix 'I'. + +SEE ALSO +* KDTreeQueryResultsX() X-values +* KDTreeQueryResultsXY() X- and Y-values +* KDTreeQueryResultsTags() tag values + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void kdtreequeryresultsdistances(kdtree* kdt, + /* Real */ ae_vector* r, + ae_state *_state) +{ + ae_int_t i; + ae_int_t k; + + + if( kdt->kcur==0 ) + { + return; + } + if( r->cnt<kdt->kcur ) + { + ae_vector_set_length(r, kdt->kcur, _state); + } + k = kdt->kcur; + + /* + * unload norms + * + * Abs() call is used to handle cases with negative norms + * (generated during KFN requests) + */ + if( kdt->normtype==0 ) + { + for(i=0; i<=k-1; i++) + { + r->ptr.p_double[i] = ae_fabs(kdt->r.ptr.p_double[i], _state); + } + } + if( kdt->normtype==1 ) + { + for(i=0; i<=k-1; i++) + { + r->ptr.p_double[i] = ae_fabs(kdt->r.ptr.p_double[i], _state); + } + } + if( kdt->normtype==2 ) + { + for(i=0; i<=k-1; i++) + { + r->ptr.p_double[i] = ae_sqrt(ae_fabs(kdt->r.ptr.p_double[i], _state), _state); + } + } +} + + +/************************************************************************* +X-values from last query; 'interactive' variant for languages like Python +which support constructs like "X = KDTreeQueryResultsXI(KDT)" and +interactive mode of interpreter. + +This function allocates new array on each call, so it is significantly +slower than its 'non-interactive' counterpart, but it is more convenient +when you call it from command line. + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void kdtreequeryresultsxi(kdtree* kdt, + /* Real */ ae_matrix* x, + ae_state *_state) +{ + + ae_matrix_clear(x); + + kdtreequeryresultsx(kdt, x, _state); +} + + +/************************************************************************* +XY-values from last query; 'interactive' variant for languages like Python +which support constructs like "XY = KDTreeQueryResultsXYI(KDT)" and +interactive mode of interpreter. + +This function allocates new array on each call, so it is significantly +slower than its 'non-interactive' counterpart, but it is more convenient +when you call it from command line. + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void kdtreequeryresultsxyi(kdtree* kdt, + /* Real */ ae_matrix* xy, + ae_state *_state) +{ + + ae_matrix_clear(xy); + + kdtreequeryresultsxy(kdt, xy, _state); +} + + +/************************************************************************* +Tags from last query; 'interactive' variant for languages like Python +which support constructs like "Tags = KDTreeQueryResultsTagsI(KDT)" and +interactive mode of interpreter. + +This function allocates new array on each call, so it is significantly +slower than its 'non-interactive' counterpart, but it is more convenient +when you call it from command line. + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void kdtreequeryresultstagsi(kdtree* kdt, + /* Integer */ ae_vector* tags, + ae_state *_state) +{ + + ae_vector_clear(tags); + + kdtreequeryresultstags(kdt, tags, _state); +} + + +/************************************************************************* +Distances from last query; 'interactive' variant for languages like Python +which support constructs like "R = KDTreeQueryResultsDistancesI(KDT)" +and interactive mode of interpreter. + +This function allocates new array on each call, so it is significantly +slower than its 'non-interactive' counterpart, but it is more convenient +when you call it from command line. + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void kdtreequeryresultsdistancesi(kdtree* kdt, + /* Real */ ae_vector* r, + ae_state *_state) +{ + + ae_vector_clear(r); + + kdtreequeryresultsdistances(kdt, r, _state); +} + + +/************************************************************************* +Rearranges nodes [I1,I2) using partition in D-th dimension with S as threshold. +Returns split position I3: [I1,I3) and [I3,I2) are created as result. + +This subroutine doesn't create tree structures, just rearranges nodes. +*************************************************************************/ +static void nearestneighbor_kdtreesplit(kdtree* kdt, + ae_int_t i1, + ae_int_t i2, + ae_int_t d, + double s, + ae_int_t* i3, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + ae_int_t ileft; + ae_int_t iright; + double v; + + *i3 = 0; + + + /* + * split XY/Tags in two parts: + * * [ILeft,IRight] is non-processed part of XY/Tags + * + * After cycle is done, we have Ileft=IRight. We deal with + * this element separately. + * + * After this, [I1,ILeft) contains left part, and [ILeft,I2) + * contains right part. + */ + ileft = i1; + iright = i2-1; + while(ileft<iright) + { + if( ae_fp_less_eq(kdt->xy.ptr.pp_double[ileft][d],s) ) + { + + /* + * XY[ILeft] is on its place. + * Advance ILeft. + */ + ileft = ileft+1; + } + else + { + + /* + * XY[ILeft,..] must be at IRight. + * Swap and advance IRight. + */ + for(i=0; i<=2*kdt->nx+kdt->ny-1; i++) + { + v = kdt->xy.ptr.pp_double[ileft][i]; + kdt->xy.ptr.pp_double[ileft][i] = kdt->xy.ptr.pp_double[iright][i]; + kdt->xy.ptr.pp_double[iright][i] = v; + } + j = kdt->tags.ptr.p_int[ileft]; + kdt->tags.ptr.p_int[ileft] = kdt->tags.ptr.p_int[iright]; + kdt->tags.ptr.p_int[iright] = j; + iright = iright-1; + } + } + if( ae_fp_less_eq(kdt->xy.ptr.pp_double[ileft][d],s) ) + { + ileft = ileft+1; + } + else + { + iright = iright-1; + } + *i3 = ileft; +} + + +/************************************************************************* +Recursive kd-tree generation subroutine. + +PARAMETERS + KDT tree + NodesOffs unused part of Nodes[] which must be filled by tree + SplitsOffs unused part of Splits[] + I1, I2 points from [I1,I2) are processed + +NodesOffs[] and SplitsOffs[] must be large enough. + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +static void nearestneighbor_kdtreegeneratetreerec(kdtree* kdt, + ae_int_t* nodesoffs, + ae_int_t* splitsoffs, + ae_int_t i1, + ae_int_t i2, + ae_int_t maxleafsize, + ae_state *_state) +{ + ae_int_t n; + ae_int_t nx; + ae_int_t ny; + ae_int_t i; + ae_int_t j; + ae_int_t oldoffs; + ae_int_t i3; + ae_int_t cntless; + ae_int_t cntgreater; + double minv; + double maxv; + ae_int_t minidx; + ae_int_t maxidx; + ae_int_t d; + double ds; + double s; + double v; + + + ae_assert(i2>i1, "KDTreeGenerateTreeRec: internal error", _state); + + /* + * Generate leaf if needed + */ + if( i2-i1<=maxleafsize ) + { + kdt->nodes.ptr.p_int[*nodesoffs+0] = i2-i1; + kdt->nodes.ptr.p_int[*nodesoffs+1] = i1; + *nodesoffs = *nodesoffs+2; + return; + } + + /* + * Load values for easier access + */ + nx = kdt->nx; + ny = kdt->ny; + + /* + * select dimension to split: + * * D is a dimension number + */ + d = 0; + ds = kdt->curboxmax.ptr.p_double[0]-kdt->curboxmin.ptr.p_double[0]; + for(i=1; i<=nx-1; i++) + { + v = kdt->curboxmax.ptr.p_double[i]-kdt->curboxmin.ptr.p_double[i]; + if( ae_fp_greater(v,ds) ) + { + ds = v; + d = i; + } + } + + /* + * Select split position S using sliding midpoint rule, + * rearrange points into [I1,I3) and [I3,I2) + */ + s = kdt->curboxmin.ptr.p_double[d]+0.5*ds; + ae_v_move(&kdt->buf.ptr.p_double[0], 1, &kdt->xy.ptr.pp_double[i1][d], kdt->xy.stride, ae_v_len(0,i2-i1-1)); + n = i2-i1; + cntless = 0; + cntgreater = 0; + minv = kdt->buf.ptr.p_double[0]; + maxv = kdt->buf.ptr.p_double[0]; + minidx = i1; + maxidx = i1; + for(i=0; i<=n-1; i++) + { + v = kdt->buf.ptr.p_double[i]; + if( ae_fp_less(v,minv) ) + { + minv = v; + minidx = i1+i; + } + if( ae_fp_greater(v,maxv) ) + { + maxv = v; + maxidx = i1+i; + } + if( ae_fp_less(v,s) ) + { + cntless = cntless+1; + } + if( ae_fp_greater(v,s) ) + { + cntgreater = cntgreater+1; + } + } + if( cntless>0&&cntgreater>0 ) + { + + /* + * normal midpoint split + */ + nearestneighbor_kdtreesplit(kdt, i1, i2, d, s, &i3, _state); + } + else + { + + /* + * sliding midpoint + */ + if( cntless==0 ) + { + + /* + * 1. move split to MinV, + * 2. place one point to the left bin (move to I1), + * others - to the right bin + */ + s = minv; + if( minidx!=i1 ) + { + for(i=0; i<=2*kdt->nx+kdt->ny-1; i++) + { + v = kdt->xy.ptr.pp_double[minidx][i]; + kdt->xy.ptr.pp_double[minidx][i] = kdt->xy.ptr.pp_double[i1][i]; + kdt->xy.ptr.pp_double[i1][i] = v; + } + j = kdt->tags.ptr.p_int[minidx]; + kdt->tags.ptr.p_int[minidx] = kdt->tags.ptr.p_int[i1]; + kdt->tags.ptr.p_int[i1] = j; + } + i3 = i1+1; + } + else + { + + /* + * 1. move split to MaxV, + * 2. place one point to the right bin (move to I2-1), + * others - to the left bin + */ + s = maxv; + if( maxidx!=i2-1 ) + { + for(i=0; i<=2*kdt->nx+kdt->ny-1; i++) + { + v = kdt->xy.ptr.pp_double[maxidx][i]; + kdt->xy.ptr.pp_double[maxidx][i] = kdt->xy.ptr.pp_double[i2-1][i]; + kdt->xy.ptr.pp_double[i2-1][i] = v; + } + j = kdt->tags.ptr.p_int[maxidx]; + kdt->tags.ptr.p_int[maxidx] = kdt->tags.ptr.p_int[i2-1]; + kdt->tags.ptr.p_int[i2-1] = j; + } + i3 = i2-1; + } + } + + /* + * Generate 'split' node + */ + kdt->nodes.ptr.p_int[*nodesoffs+0] = 0; + kdt->nodes.ptr.p_int[*nodesoffs+1] = d; + kdt->nodes.ptr.p_int[*nodesoffs+2] = *splitsoffs; + kdt->splits.ptr.p_double[*splitsoffs+0] = s; + oldoffs = *nodesoffs; + *nodesoffs = *nodesoffs+nearestneighbor_splitnodesize; + *splitsoffs = *splitsoffs+1; + + /* + * Recirsive generation: + * * update CurBox + * * call subroutine + * * restore CurBox + */ + kdt->nodes.ptr.p_int[oldoffs+3] = *nodesoffs; + v = kdt->curboxmax.ptr.p_double[d]; + kdt->curboxmax.ptr.p_double[d] = s; + nearestneighbor_kdtreegeneratetreerec(kdt, nodesoffs, splitsoffs, i1, i3, maxleafsize, _state); + kdt->curboxmax.ptr.p_double[d] = v; + kdt->nodes.ptr.p_int[oldoffs+4] = *nodesoffs; + v = kdt->curboxmin.ptr.p_double[d]; + kdt->curboxmin.ptr.p_double[d] = s; + nearestneighbor_kdtreegeneratetreerec(kdt, nodesoffs, splitsoffs, i3, i2, maxleafsize, _state); + kdt->curboxmin.ptr.p_double[d] = v; +} + + +/************************************************************************* +Recursive subroutine for NN queries. + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +static void nearestneighbor_kdtreequerynnrec(kdtree* kdt, + ae_int_t offs, + ae_state *_state) +{ + double ptdist; + ae_int_t i; + ae_int_t j; + ae_int_t nx; + ae_int_t i1; + ae_int_t i2; + ae_int_t d; + double s; + double v; + double t1; + ae_int_t childbestoffs; + ae_int_t childworstoffs; + ae_int_t childoffs; + double prevdist; + ae_bool todive; + ae_bool bestisleft; + ae_bool updatemin; + + + + /* + * Leaf node. + * Process points. + */ + if( kdt->nodes.ptr.p_int[offs]>0 ) + { + i1 = kdt->nodes.ptr.p_int[offs+1]; + i2 = i1+kdt->nodes.ptr.p_int[offs]; + for(i=i1; i<=i2-1; i++) + { + + /* + * Calculate distance + */ + ptdist = 0; + nx = kdt->nx; + if( kdt->normtype==0 ) + { + for(j=0; j<=nx-1; j++) + { + ptdist = ae_maxreal(ptdist, ae_fabs(kdt->xy.ptr.pp_double[i][j]-kdt->x.ptr.p_double[j], _state), _state); + } + } + if( kdt->normtype==1 ) + { + for(j=0; j<=nx-1; j++) + { + ptdist = ptdist+ae_fabs(kdt->xy.ptr.pp_double[i][j]-kdt->x.ptr.p_double[j], _state); + } + } + if( kdt->normtype==2 ) + { + for(j=0; j<=nx-1; j++) + { + ptdist = ptdist+ae_sqr(kdt->xy.ptr.pp_double[i][j]-kdt->x.ptr.p_double[j], _state); + } + } + + /* + * Skip points with zero distance if self-matches are turned off + */ + if( ae_fp_eq(ptdist,0)&&!kdt->selfmatch ) + { + continue; + } + + /* + * We CAN'T process point if R-criterion isn't satisfied, + * i.e. (RNeeded<>0) AND (PtDist>R). + */ + if( ae_fp_eq(kdt->rneeded,0)||ae_fp_less_eq(ptdist,kdt->rneeded) ) + { + + /* + * R-criterion is satisfied, we must either: + * * replace worst point, if (KNeeded<>0) AND (KCur=KNeeded) + * (or skip, if worst point is better) + * * add point without replacement otherwise + */ + if( kdt->kcur<kdt->kneeded||kdt->kneeded==0 ) + { + + /* + * add current point to heap without replacement + */ + tagheappushi(&kdt->r, &kdt->idx, &kdt->kcur, ptdist, i, _state); + } + else + { + + /* + * New points are added or not, depending on their distance. + * If added, they replace element at the top of the heap + */ + if( ae_fp_less(ptdist,kdt->r.ptr.p_double[0]) ) + { + if( kdt->kneeded==1 ) + { + kdt->idx.ptr.p_int[0] = i; + kdt->r.ptr.p_double[0] = ptdist; + } + else + { + tagheapreplacetopi(&kdt->r, &kdt->idx, kdt->kneeded, ptdist, i, _state); + } + } + } + } + } + return; + } + + /* + * Simple split + */ + if( kdt->nodes.ptr.p_int[offs]==0 ) + { + + /* + * Load: + * * D dimension to split + * * S split position + */ + d = kdt->nodes.ptr.p_int[offs+1]; + s = kdt->splits.ptr.p_double[kdt->nodes.ptr.p_int[offs+2]]; + + /* + * Calculate: + * * ChildBestOffs child box with best chances + * * ChildWorstOffs child box with worst chances + */ + if( ae_fp_less_eq(kdt->x.ptr.p_double[d],s) ) + { + childbestoffs = kdt->nodes.ptr.p_int[offs+3]; + childworstoffs = kdt->nodes.ptr.p_int[offs+4]; + bestisleft = ae_true; + } + else + { + childbestoffs = kdt->nodes.ptr.p_int[offs+4]; + childworstoffs = kdt->nodes.ptr.p_int[offs+3]; + bestisleft = ae_false; + } + + /* + * Navigate through childs + */ + for(i=0; i<=1; i++) + { + + /* + * Select child to process: + * * ChildOffs current child offset in Nodes[] + * * UpdateMin whether minimum or maximum value + * of bounding box is changed on update + */ + if( i==0 ) + { + childoffs = childbestoffs; + updatemin = !bestisleft; + } + else + { + updatemin = bestisleft; + childoffs = childworstoffs; + } + + /* + * Update bounding box and current distance + */ + if( updatemin ) + { + prevdist = kdt->curdist; + t1 = kdt->x.ptr.p_double[d]; + v = kdt->curboxmin.ptr.p_double[d]; + if( ae_fp_less_eq(t1,s) ) + { + if( kdt->normtype==0 ) + { + kdt->curdist = ae_maxreal(kdt->curdist, s-t1, _state); + } + if( kdt->normtype==1 ) + { + kdt->curdist = kdt->curdist-ae_maxreal(v-t1, 0, _state)+s-t1; + } + if( kdt->normtype==2 ) + { + kdt->curdist = kdt->curdist-ae_sqr(ae_maxreal(v-t1, 0, _state), _state)+ae_sqr(s-t1, _state); + } + } + kdt->curboxmin.ptr.p_double[d] = s; + } + else + { + prevdist = kdt->curdist; + t1 = kdt->x.ptr.p_double[d]; + v = kdt->curboxmax.ptr.p_double[d]; + if( ae_fp_greater_eq(t1,s) ) + { + if( kdt->normtype==0 ) + { + kdt->curdist = ae_maxreal(kdt->curdist, t1-s, _state); + } + if( kdt->normtype==1 ) + { + kdt->curdist = kdt->curdist-ae_maxreal(t1-v, 0, _state)+t1-s; + } + if( kdt->normtype==2 ) + { + kdt->curdist = kdt->curdist-ae_sqr(ae_maxreal(t1-v, 0, _state), _state)+ae_sqr(t1-s, _state); + } + } + kdt->curboxmax.ptr.p_double[d] = s; + } + + /* + * Decide: to dive into cell or not to dive + */ + if( ae_fp_neq(kdt->rneeded,0)&&ae_fp_greater(kdt->curdist,kdt->rneeded) ) + { + todive = ae_false; + } + else + { + if( kdt->kcur<kdt->kneeded||kdt->kneeded==0 ) + { + + /* + * KCur<KNeeded (i.e. not all points are found) + */ + todive = ae_true; + } + else + { + + /* + * KCur=KNeeded, decide to dive or not to dive + * using point position relative to bounding box. + */ + todive = ae_fp_less_eq(kdt->curdist,kdt->r.ptr.p_double[0]*kdt->approxf); + } + } + if( todive ) + { + nearestneighbor_kdtreequerynnrec(kdt, childoffs, _state); + } + + /* + * Restore bounding box and distance + */ + if( updatemin ) + { + kdt->curboxmin.ptr.p_double[d] = v; + } + else + { + kdt->curboxmax.ptr.p_double[d] = v; + } + kdt->curdist = prevdist; + } + return; + } +} + + +/************************************************************************* +Copies X[] to KDT.X[] +Loads distance from X[] to bounding box. +Initializes CurBox[]. + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +static void nearestneighbor_kdtreeinitbox(kdtree* kdt, + /* Real */ ae_vector* x, + ae_state *_state) +{ + ae_int_t i; + double vx; + double vmin; + double vmax; + + + + /* + * calculate distance from point to current bounding box + */ + kdt->curdist = 0; + if( kdt->normtype==0 ) + { + for(i=0; i<=kdt->nx-1; i++) + { + vx = x->ptr.p_double[i]; + vmin = kdt->boxmin.ptr.p_double[i]; + vmax = kdt->boxmax.ptr.p_double[i]; + kdt->x.ptr.p_double[i] = vx; + kdt->curboxmin.ptr.p_double[i] = vmin; + kdt->curboxmax.ptr.p_double[i] = vmax; + if( ae_fp_less(vx,vmin) ) + { + kdt->curdist = ae_maxreal(kdt->curdist, vmin-vx, _state); + } + else + { + if( ae_fp_greater(vx,vmax) ) + { + kdt->curdist = ae_maxreal(kdt->curdist, vx-vmax, _state); + } + } + } + } + if( kdt->normtype==1 ) + { + for(i=0; i<=kdt->nx-1; i++) + { + vx = x->ptr.p_double[i]; + vmin = kdt->boxmin.ptr.p_double[i]; + vmax = kdt->boxmax.ptr.p_double[i]; + kdt->x.ptr.p_double[i] = vx; + kdt->curboxmin.ptr.p_double[i] = vmin; + kdt->curboxmax.ptr.p_double[i] = vmax; + if( ae_fp_less(vx,vmin) ) + { + kdt->curdist = kdt->curdist+vmin-vx; + } + else + { + if( ae_fp_greater(vx,vmax) ) + { + kdt->curdist = kdt->curdist+vx-vmax; + } + } + } + } + if( kdt->normtype==2 ) + { + for(i=0; i<=kdt->nx-1; i++) + { + vx = x->ptr.p_double[i]; + vmin = kdt->boxmin.ptr.p_double[i]; + vmax = kdt->boxmax.ptr.p_double[i]; + kdt->x.ptr.p_double[i] = vx; + kdt->curboxmin.ptr.p_double[i] = vmin; + kdt->curboxmax.ptr.p_double[i] = vmax; + if( ae_fp_less(vx,vmin) ) + { + kdt->curdist = kdt->curdist+ae_sqr(vmin-vx, _state); + } + else + { + if( ae_fp_greater(vx,vmax) ) + { + kdt->curdist = kdt->curdist+ae_sqr(vx-vmax, _state); + } + } + } + } +} + + +ae_bool _kdtree_init(kdtree* p, ae_state *_state, ae_bool make_automatic) +{ + if( !ae_matrix_init(&p->xy, 0, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->tags, 0, DT_INT, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->boxmin, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->boxmax, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->curboxmin, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->curboxmax, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->nodes, 0, DT_INT, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->splits, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->idx, 0, DT_INT, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->r, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->buf, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + return ae_true; +} + + +ae_bool _kdtree_init_copy(kdtree* dst, kdtree* src, ae_state *_state, ae_bool make_automatic) +{ + dst->n = src->n; + dst->nx = src->nx; + dst->ny = src->ny; + dst->normtype = src->normtype; + dst->distmatrixtype = src->distmatrixtype; + if( !ae_matrix_init_copy(&dst->xy, &src->xy, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->tags, &src->tags, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->boxmin, &src->boxmin, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->boxmax, &src->boxmax, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->curboxmin, &src->curboxmin, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->curboxmax, &src->curboxmax, _state, make_automatic) ) + return ae_false; + dst->curdist = src->curdist; + if( !ae_vector_init_copy(&dst->nodes, &src->nodes, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->splits, &src->splits, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic) ) + return ae_false; + dst->kneeded = src->kneeded; + dst->rneeded = src->rneeded; + dst->selfmatch = src->selfmatch; + dst->approxf = src->approxf; + dst->kcur = src->kcur; + if( !ae_vector_init_copy(&dst->idx, &src->idx, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->r, &src->r, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->buf, &src->buf, _state, make_automatic) ) + return ae_false; + dst->debugcounter = src->debugcounter; + return ae_true; +} + + +void _kdtree_clear(kdtree* p) +{ + ae_matrix_clear(&p->xy); + ae_vector_clear(&p->tags); + ae_vector_clear(&p->boxmin); + ae_vector_clear(&p->boxmax); + ae_vector_clear(&p->curboxmin); + ae_vector_clear(&p->curboxmax); + ae_vector_clear(&p->nodes); + ae_vector_clear(&p->splits); + ae_vector_clear(&p->x); + ae_vector_clear(&p->idx); + ae_vector_clear(&p->r); + ae_vector_clear(&p->buf); +} + + + +} + diff --git a/contrib/lbfgs/alglibmisc.h b/contrib/lbfgs/alglibmisc.h new file mode 100755 index 0000000000..491fb3314e --- /dev/null +++ b/contrib/lbfgs/alglibmisc.h @@ -0,0 +1,685 @@ +/************************************************************************* +Copyright (c) Sergey Bochkanov (ALGLIB project). + +>>> SOURCE LICENSE >>> +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation (www.fsf.org); either version 2 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +A copy of the GNU General Public License is available at +http://www.fsf.org/licensing/licenses +>>> END OF LICENSE >>> +*************************************************************************/ +#ifndef _alglibmisc_pkg_h +#define _alglibmisc_pkg_h +#include "ap.h" +#include "alglibinternal.h" + +///////////////////////////////////////////////////////////////////////// +// +// THIS SECTION CONTAINS COMPUTATIONAL CORE DECLARATIONS (DATATYPES) +// +///////////////////////////////////////////////////////////////////////// +namespace alglib_impl +{ +typedef struct +{ + ae_int_t s1; + ae_int_t s2; + double v; + ae_int_t magicv; +} hqrndstate; +typedef struct +{ + ae_int_t n; + ae_int_t nx; + ae_int_t ny; + ae_int_t normtype; + ae_int_t distmatrixtype; + ae_matrix xy; + ae_vector tags; + ae_vector boxmin; + ae_vector boxmax; + ae_vector curboxmin; + ae_vector curboxmax; + double curdist; + ae_vector nodes; + ae_vector splits; + ae_vector x; + ae_int_t kneeded; + double rneeded; + ae_bool selfmatch; + double approxf; + ae_int_t kcur; + ae_vector idx; + ae_vector r; + ae_vector buf; + ae_int_t debugcounter; +} kdtree; + +} + +///////////////////////////////////////////////////////////////////////// +// +// THIS SECTION CONTAINS C++ INTERFACE +// +///////////////////////////////////////////////////////////////////////// +namespace alglib +{ + +/************************************************************************* +Portable high quality random number generator state. +Initialized with HQRNDRandomize() or HQRNDSeed(). + +Fields: + S1, S2 - seed values + V - precomputed value + MagicV - 'magic' value used to determine whether State structure + was correctly initialized. +*************************************************************************/ +class _hqrndstate_owner +{ +public: + _hqrndstate_owner(); + _hqrndstate_owner(const _hqrndstate_owner &rhs); + _hqrndstate_owner& operator=(const _hqrndstate_owner &rhs); + virtual ~_hqrndstate_owner(); + alglib_impl::hqrndstate* c_ptr(); + alglib_impl::hqrndstate* c_ptr() const; +protected: + alglib_impl::hqrndstate *p_struct; +}; +class hqrndstate : public _hqrndstate_owner +{ +public: + hqrndstate(); + hqrndstate(const hqrndstate &rhs); + hqrndstate& operator=(const hqrndstate &rhs); + virtual ~hqrndstate(); + +}; + +/************************************************************************* + +*************************************************************************/ +class _kdtree_owner +{ +public: + _kdtree_owner(); + _kdtree_owner(const _kdtree_owner &rhs); + _kdtree_owner& operator=(const _kdtree_owner &rhs); + virtual ~_kdtree_owner(); + alglib_impl::kdtree* c_ptr(); + alglib_impl::kdtree* c_ptr() const; +protected: + alglib_impl::kdtree *p_struct; +}; +class kdtree : public _kdtree_owner +{ +public: + kdtree(); + kdtree(const kdtree &rhs); + kdtree& operator=(const kdtree &rhs); + virtual ~kdtree(); + +}; + +/************************************************************************* +HQRNDState initialization with random values which come from standard +RNG. + + -- ALGLIB -- + Copyright 02.12.2009 by Bochkanov Sergey +*************************************************************************/ +void hqrndrandomize(hqrndstate &state); + + +/************************************************************************* +HQRNDState initialization with seed values + + -- ALGLIB -- + Copyright 02.12.2009 by Bochkanov Sergey +*************************************************************************/ +void hqrndseed(const ae_int_t s1, const ae_int_t s2, hqrndstate &state); + + +/************************************************************************* +This function generates random real number in (0,1), +not including interval boundaries + +State structure must be initialized with HQRNDRandomize() or HQRNDSeed(). + + -- ALGLIB -- + Copyright 02.12.2009 by Bochkanov Sergey +*************************************************************************/ +double hqrnduniformr(const hqrndstate &state); + + +/************************************************************************* +This function generates random integer number in [0, N) + +1. N must be less than HQRNDMax-1. +2. State structure must be initialized with HQRNDRandomize() or HQRNDSeed() + + -- ALGLIB -- + Copyright 02.12.2009 by Bochkanov Sergey +*************************************************************************/ +ae_int_t hqrnduniformi(const hqrndstate &state, const ae_int_t n); + + +/************************************************************************* +Random number generator: normal numbers + +This function generates one random number from normal distribution. +Its performance is equal to that of HQRNDNormal2() + +State structure must be initialized with HQRNDRandomize() or HQRNDSeed(). + + -- ALGLIB -- + Copyright 02.12.2009 by Bochkanov Sergey +*************************************************************************/ +double hqrndnormal(const hqrndstate &state); + + +/************************************************************************* +Random number generator: random X and Y such that X^2+Y^2=1 + +State structure must be initialized with HQRNDRandomize() or HQRNDSeed(). + + -- ALGLIB -- + Copyright 02.12.2009 by Bochkanov Sergey +*************************************************************************/ +void hqrndunit2(const hqrndstate &state, double &x, double &y); + + +/************************************************************************* +Random number generator: normal numbers + +This function generates two independent random numbers from normal +distribution. Its performance is equal to that of HQRNDNormal() + +State structure must be initialized with HQRNDRandomize() or HQRNDSeed(). + + -- ALGLIB -- + Copyright 02.12.2009 by Bochkanov Sergey +*************************************************************************/ +void hqrndnormal2(const hqrndstate &state, double &x1, double &x2); + + +/************************************************************************* +Random number generator: exponential distribution + +State structure must be initialized with HQRNDRandomize() or HQRNDSeed(). + + -- ALGLIB -- + Copyright 11.08.2007 by Bochkanov Sergey +*************************************************************************/ +double hqrndexponential(const hqrndstate &state, const double lambdav); + +/************************************************************************* +KD-tree creation + +This subroutine creates KD-tree from set of X-values and optional Y-values + +INPUT PARAMETERS + XY - dataset, array[0..N-1,0..NX+NY-1]. + one row corresponds to one point. + first NX columns contain X-values, next NY (NY may be zero) + columns may contain associated Y-values + N - number of points, N>=1 + NX - space dimension, NX>=1. + NY - number of optional Y-values, NY>=0. + NormType- norm type: + * 0 denotes infinity-norm + * 1 denotes 1-norm + * 2 denotes 2-norm (Euclidean norm) + +OUTPUT PARAMETERS + KDT - KD-tree + + +NOTES + +1. KD-tree creation have O(N*logN) complexity and O(N*(2*NX+NY)) memory + requirements. +2. Although KD-trees may be used with any combination of N and NX, they + are more efficient than brute-force search only when N >> 4^NX. So they + are most useful in low-dimensional tasks (NX=2, NX=3). NX=1 is another + inefficient case, because simple binary search (without additional + structures) is much more efficient in such tasks than KD-trees. + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void kdtreebuild(const real_2d_array &xy, const ae_int_t n, const ae_int_t nx, const ae_int_t ny, const ae_int_t normtype, kdtree &kdt); +void kdtreebuild(const real_2d_array &xy, const ae_int_t nx, const ae_int_t ny, const ae_int_t normtype, kdtree &kdt); + + +/************************************************************************* +KD-tree creation + +This subroutine creates KD-tree from set of X-values, integer tags and +optional Y-values + +INPUT PARAMETERS + XY - dataset, array[0..N-1,0..NX+NY-1]. + one row corresponds to one point. + first NX columns contain X-values, next NY (NY may be zero) + columns may contain associated Y-values + Tags - tags, array[0..N-1], contains integer tags associated + with points. + N - number of points, N>=1 + NX - space dimension, NX>=1. + NY - number of optional Y-values, NY>=0. + NormType- norm type: + * 0 denotes infinity-norm + * 1 denotes 1-norm + * 2 denotes 2-norm (Euclidean norm) + +OUTPUT PARAMETERS + KDT - KD-tree + +NOTES + +1. KD-tree creation have O(N*logN) complexity and O(N*(2*NX+NY)) memory + requirements. +2. Although KD-trees may be used with any combination of N and NX, they + are more efficient than brute-force search only when N >> 4^NX. So they + are most useful in low-dimensional tasks (NX=2, NX=3). NX=1 is another + inefficient case, because simple binary search (without additional + structures) is much more efficient in such tasks than KD-trees. + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void kdtreebuildtagged(const real_2d_array &xy, const integer_1d_array &tags, const ae_int_t n, const ae_int_t nx, const ae_int_t ny, const ae_int_t normtype, kdtree &kdt); +void kdtreebuildtagged(const real_2d_array &xy, const integer_1d_array &tags, const ae_int_t nx, const ae_int_t ny, const ae_int_t normtype, kdtree &kdt); + + +/************************************************************************* +K-NN query: K nearest neighbors + +INPUT PARAMETERS + KDT - KD-tree + X - point, array[0..NX-1]. + K - number of neighbors to return, K>=1 + SelfMatch - whether self-matches are allowed: + * if True, nearest neighbor may be the point itself + (if it exists in original dataset) + * if False, then only points with non-zero distance + are returned + * if not given, considered True + +RESULT + number of actual neighbors found (either K or N, if K>N). + +This subroutine performs query and stores its result in the internal +structures of the KD-tree. You can use following subroutines to obtain +these results: +* KDTreeQueryResultsX() to get X-values +* KDTreeQueryResultsXY() to get X- and Y-values +* KDTreeQueryResultsTags() to get tag values +* KDTreeQueryResultsDistances() to get distances + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +ae_int_t kdtreequeryknn(const kdtree &kdt, const real_1d_array &x, const ae_int_t k, const bool selfmatch); +ae_int_t kdtreequeryknn(const kdtree &kdt, const real_1d_array &x, const ae_int_t k); + + +/************************************************************************* +R-NN query: all points within R-sphere centered at X + +INPUT PARAMETERS + KDT - KD-tree + X - point, array[0..NX-1]. + R - radius of sphere (in corresponding norm), R>0 + SelfMatch - whether self-matches are allowed: + * if True, nearest neighbor may be the point itself + (if it exists in original dataset) + * if False, then only points with non-zero distance + are returned + * if not given, considered True + +RESULT + number of neighbors found, >=0 + +This subroutine performs query and stores its result in the internal +structures of the KD-tree. You can use following subroutines to obtain +actual results: +* KDTreeQueryResultsX() to get X-values +* KDTreeQueryResultsXY() to get X- and Y-values +* KDTreeQueryResultsTags() to get tag values +* KDTreeQueryResultsDistances() to get distances + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +ae_int_t kdtreequeryrnn(const kdtree &kdt, const real_1d_array &x, const double r, const bool selfmatch); +ae_int_t kdtreequeryrnn(const kdtree &kdt, const real_1d_array &x, const double r); + + +/************************************************************************* +K-NN query: approximate K nearest neighbors + +INPUT PARAMETERS + KDT - KD-tree + X - point, array[0..NX-1]. + K - number of neighbors to return, K>=1 + SelfMatch - whether self-matches are allowed: + * if True, nearest neighbor may be the point itself + (if it exists in original dataset) + * if False, then only points with non-zero distance + are returned + * if not given, considered True + Eps - approximation factor, Eps>=0. eps-approximate nearest + neighbor is a neighbor whose distance from X is at + most (1+eps) times distance of true nearest neighbor. + +RESULT + number of actual neighbors found (either K or N, if K>N). + +NOTES + significant performance gain may be achieved only when Eps is is on + the order of magnitude of 1 or larger. + +This subroutine performs query and stores its result in the internal +structures of the KD-tree. You can use following subroutines to obtain +these results: +* KDTreeQueryResultsX() to get X-values +* KDTreeQueryResultsXY() to get X- and Y-values +* KDTreeQueryResultsTags() to get tag values +* KDTreeQueryResultsDistances() to get distances + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +ae_int_t kdtreequeryaknn(const kdtree &kdt, const real_1d_array &x, const ae_int_t k, const bool selfmatch, const double eps); +ae_int_t kdtreequeryaknn(const kdtree &kdt, const real_1d_array &x, const ae_int_t k, const double eps); + + +/************************************************************************* +X-values from last query + +INPUT PARAMETERS + KDT - KD-tree + X - possibly pre-allocated buffer. If X is too small to store + result, it is resized. If size(X) is enough to store + result, it is left unchanged. + +OUTPUT PARAMETERS + X - rows are filled with X-values + +NOTES +1. points are ordered by distance from the query point (first = closest) +2. if XY is larger than required to store result, only leading part will + be overwritten; trailing part will be left unchanged. So if on input + XY = [[A,B],[C,D]], and result is [1,2], then on exit we will get + XY = [[1,2],[C,D]]. This is done purposely to increase performance; if + you want function to resize array according to result size, use + function with same name and suffix 'I'. + +SEE ALSO +* KDTreeQueryResultsXY() X- and Y-values +* KDTreeQueryResultsTags() tag values +* KDTreeQueryResultsDistances() distances + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void kdtreequeryresultsx(const kdtree &kdt, real_2d_array &x); + + +/************************************************************************* +X- and Y-values from last query + +INPUT PARAMETERS + KDT - KD-tree + XY - possibly pre-allocated buffer. If XY is too small to store + result, it is resized. If size(XY) is enough to store + result, it is left unchanged. + +OUTPUT PARAMETERS + XY - rows are filled with points: first NX columns with + X-values, next NY columns - with Y-values. + +NOTES +1. points are ordered by distance from the query point (first = closest) +2. if XY is larger than required to store result, only leading part will + be overwritten; trailing part will be left unchanged. So if on input + XY = [[A,B],[C,D]], and result is [1,2], then on exit we will get + XY = [[1,2],[C,D]]. This is done purposely to increase performance; if + you want function to resize array according to result size, use + function with same name and suffix 'I'. + +SEE ALSO +* KDTreeQueryResultsX() X-values +* KDTreeQueryResultsTags() tag values +* KDTreeQueryResultsDistances() distances + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void kdtreequeryresultsxy(const kdtree &kdt, real_2d_array &xy); + + +/************************************************************************* +Tags from last query + +INPUT PARAMETERS + KDT - KD-tree + Tags - possibly pre-allocated buffer. If X is too small to store + result, it is resized. If size(X) is enough to store + result, it is left unchanged. + +OUTPUT PARAMETERS + Tags - filled with tags associated with points, + or, when no tags were supplied, with zeros + +NOTES +1. points are ordered by distance from the query point (first = closest) +2. if XY is larger than required to store result, only leading part will + be overwritten; trailing part will be left unchanged. So if on input + XY = [[A,B],[C,D]], and result is [1,2], then on exit we will get + XY = [[1,2],[C,D]]. This is done purposely to increase performance; if + you want function to resize array according to result size, use + function with same name and suffix 'I'. + +SEE ALSO +* KDTreeQueryResultsX() X-values +* KDTreeQueryResultsXY() X- and Y-values +* KDTreeQueryResultsDistances() distances + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void kdtreequeryresultstags(const kdtree &kdt, integer_1d_array &tags); + + +/************************************************************************* +Distances from last query + +INPUT PARAMETERS + KDT - KD-tree + R - possibly pre-allocated buffer. If X is too small to store + result, it is resized. If size(X) is enough to store + result, it is left unchanged. + +OUTPUT PARAMETERS + R - filled with distances (in corresponding norm) + +NOTES +1. points are ordered by distance from the query point (first = closest) +2. if XY is larger than required to store result, only leading part will + be overwritten; trailing part will be left unchanged. So if on input + XY = [[A,B],[C,D]], and result is [1,2], then on exit we will get + XY = [[1,2],[C,D]]. This is done purposely to increase performance; if + you want function to resize array according to result size, use + function with same name and suffix 'I'. + +SEE ALSO +* KDTreeQueryResultsX() X-values +* KDTreeQueryResultsXY() X- and Y-values +* KDTreeQueryResultsTags() tag values + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void kdtreequeryresultsdistances(const kdtree &kdt, real_1d_array &r); + + +/************************************************************************* +X-values from last query; 'interactive' variant for languages like Python +which support constructs like "X = KDTreeQueryResultsXI(KDT)" and +interactive mode of interpreter. + +This function allocates new array on each call, so it is significantly +slower than its 'non-interactive' counterpart, but it is more convenient +when you call it from command line. + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void kdtreequeryresultsxi(const kdtree &kdt, real_2d_array &x); + + +/************************************************************************* +XY-values from last query; 'interactive' variant for languages like Python +which support constructs like "XY = KDTreeQueryResultsXYI(KDT)" and +interactive mode of interpreter. + +This function allocates new array on each call, so it is significantly +slower than its 'non-interactive' counterpart, but it is more convenient +when you call it from command line. + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void kdtreequeryresultsxyi(const kdtree &kdt, real_2d_array &xy); + + +/************************************************************************* +Tags from last query; 'interactive' variant for languages like Python +which support constructs like "Tags = KDTreeQueryResultsTagsI(KDT)" and +interactive mode of interpreter. + +This function allocates new array on each call, so it is significantly +slower than its 'non-interactive' counterpart, but it is more convenient +when you call it from command line. + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void kdtreequeryresultstagsi(const kdtree &kdt, integer_1d_array &tags); + + +/************************************************************************* +Distances from last query; 'interactive' variant for languages like Python +which support constructs like "R = KDTreeQueryResultsDistancesI(KDT)" +and interactive mode of interpreter. + +This function allocates new array on each call, so it is significantly +slower than its 'non-interactive' counterpart, but it is more convenient +when you call it from command line. + + -- ALGLIB -- + Copyright 28.02.2010 by Bochkanov Sergey +*************************************************************************/ +void kdtreequeryresultsdistancesi(const kdtree &kdt, real_1d_array &r); +} + +///////////////////////////////////////////////////////////////////////// +// +// THIS SECTION CONTAINS COMPUTATIONAL CORE DECLARATIONS (FUNCTIONS) +// +///////////////////////////////////////////////////////////////////////// +namespace alglib_impl +{ +void hqrndrandomize(hqrndstate* state, ae_state *_state); +void hqrndseed(ae_int_t s1, + ae_int_t s2, + hqrndstate* state, + ae_state *_state); +double hqrnduniformr(hqrndstate* state, ae_state *_state); +ae_int_t hqrnduniformi(hqrndstate* state, ae_int_t n, ae_state *_state); +double hqrndnormal(hqrndstate* state, ae_state *_state); +void hqrndunit2(hqrndstate* state, double* x, double* y, ae_state *_state); +void hqrndnormal2(hqrndstate* state, + double* x1, + double* x2, + ae_state *_state); +double hqrndexponential(hqrndstate* state, + double lambdav, + ae_state *_state); +ae_bool _hqrndstate_init(hqrndstate* p, ae_state *_state, ae_bool make_automatic); +ae_bool _hqrndstate_init_copy(hqrndstate* dst, hqrndstate* src, ae_state *_state, ae_bool make_automatic); +void _hqrndstate_clear(hqrndstate* p); +void kdtreebuild(/* Real */ ae_matrix* xy, + ae_int_t n, + ae_int_t nx, + ae_int_t ny, + ae_int_t normtype, + kdtree* kdt, + ae_state *_state); +void kdtreebuildtagged(/* Real */ ae_matrix* xy, + /* Integer */ ae_vector* tags, + ae_int_t n, + ae_int_t nx, + ae_int_t ny, + ae_int_t normtype, + kdtree* kdt, + ae_state *_state); +ae_int_t kdtreequeryknn(kdtree* kdt, + /* Real */ ae_vector* x, + ae_int_t k, + ae_bool selfmatch, + ae_state *_state); +ae_int_t kdtreequeryrnn(kdtree* kdt, + /* Real */ ae_vector* x, + double r, + ae_bool selfmatch, + ae_state *_state); +ae_int_t kdtreequeryaknn(kdtree* kdt, + /* Real */ ae_vector* x, + ae_int_t k, + ae_bool selfmatch, + double eps, + ae_state *_state); +void kdtreequeryresultsx(kdtree* kdt, + /* Real */ ae_matrix* x, + ae_state *_state); +void kdtreequeryresultsxy(kdtree* kdt, + /* Real */ ae_matrix* xy, + ae_state *_state); +void kdtreequeryresultstags(kdtree* kdt, + /* Integer */ ae_vector* tags, + ae_state *_state); +void kdtreequeryresultsdistances(kdtree* kdt, + /* Real */ ae_vector* r, + ae_state *_state); +void kdtreequeryresultsxi(kdtree* kdt, + /* Real */ ae_matrix* x, + ae_state *_state); +void kdtreequeryresultsxyi(kdtree* kdt, + /* Real */ ae_matrix* xy, + ae_state *_state); +void kdtreequeryresultstagsi(kdtree* kdt, + /* Integer */ ae_vector* tags, + ae_state *_state); +void kdtreequeryresultsdistancesi(kdtree* kdt, + /* Real */ ae_vector* r, + ae_state *_state); +ae_bool _kdtree_init(kdtree* p, ae_state *_state, ae_bool make_automatic); +ae_bool _kdtree_init_copy(kdtree* dst, kdtree* src, ae_state *_state, ae_bool make_automatic); +void _kdtree_clear(kdtree* p); + +} +#endif + diff --git a/contrib/lbfgs/ap.cpp b/contrib/lbfgs/ap.cpp new file mode 100755 index 0000000000..14bfb10910 --- /dev/null +++ b/contrib/lbfgs/ap.cpp @@ -0,0 +1,8952 @@ +/************************************************************************* +Copyright (c) Sergey Bochkanov (ALGLIB project). + +>>> SOURCE LICENSE >>> +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation (www.fsf.org); either version 2 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +A copy of the GNU General Public License is available at +http://www.fsf.org/licensing/licenses +>>> END OF LICENSE >>> +*************************************************************************/ +#include "stdafx.h" +#include "ap.h" +#include <limits> +#include <locale.h> +using namespace std; + +#if defined(AE_CPU) +#if (AE_CPU==AE_INTEL) + +#if AE_COMPILER==AE_MSVC +#include <intrin.h> +#endif + +#endif +#endif + +// disable some irrelevant warnings +#if (AE_COMPILER==AE_MSVC) +#pragma warning(disable:4100) +#pragma warning(disable:4127) +#pragma warning(disable:4702) +#pragma warning(disable:4996) +#endif + +///////////////////////////////////////////////////////////////////////// +// +// THIS SECTION IMPLEMENTS BASIC FUNCTIONALITY LIKE +// MEMORY MANAGEMENT FOR VECTORS/MATRICES WHICH IS +// SHARED BETWEEN C++ AND PURE C LIBRARIES +// +///////////////////////////////////////////////////////////////////////// +namespace alglib_impl +{ +/* + * local definitions + */ +#define x_nb 16 +#define AE_DATA_ALIGN 16 +#define AE_PTR_ALIGN sizeof(void*) +#define DYN_BOTTOM ((void*)1) +#define DYN_FRAME ((void*)2) +#define AE_LITTLE_ENDIAN 1 +#define AE_BIG_ENDIAN 2 +#define AE_MIXED_ENDIAN 3 + + +/* + * alloc counter (if used) + */ +#ifdef AE_USE_ALLOC_COUNTER +ae_int64_t _alloc_counter = 0; +#endif + +/* + * These declarations are used to ensure that + * sizeof(ae_int32_t)==4, sizeof(ae_int64_t)==8, sizeof(ae_int_t)==sizeof(void*). + * they will lead to syntax error otherwise (array size will be negative). + * + * you can remove them, if you want - they are not used anywhere. + * + */ +static char _ae_int32_t_must_be_32_bits_wide[1-2*((int)(sizeof(ae_int32_t))-4)*((int)(sizeof(ae_int32_t))-4)]; +static char _ae_int64_t_must_be_64_bits_wide[1-2*((int)(sizeof(ae_int64_t))-8)*((int)(sizeof(ae_int64_t))-8)]; +static char _ae_int_t_must_be_pointer_sized [1-2*((int)(sizeof(ae_int_t))-(int)sizeof(void*))*((int)(sizeof(ae_int_t))-(int)(sizeof(void*)))]; + +ae_int_t ae_misalignment(const void *ptr, size_t alignment) +{ + union _u + { + const void *ptr; + ae_int_t iptr; + } u; + u.ptr = ptr; + return (ae_int_t)(u.iptr%alignment); +} + +void* ae_align(void *ptr, size_t alignment) +{ + char *result = (char*)ptr; + if( (result-(char*)0)%alignment!=0 ) + result += alignment - (result-(char*)0)%alignment; + return result; +} + +void ae_break(ae_state *state, ae_error_type error_type, const char *msg) +{ +#ifndef AE_USE_CPP_ERROR_HANDLING + if( state!=NULL ) + { + ae_state_clear(state); + state->last_error = error_type; + state->error_msg = msg; + if( state->break_jump!=NULL ) + longjmp(*(state->break_jump), 1); + else + abort(); + } + else + abort(); +#else + if( state!=NULL ) + { + ae_state_clear(state); + state->last_error = error_type; + state->error_msg = msg; + } + throw error_type; +#endif +} + +void* aligned_malloc(size_t size, size_t alignment) +{ + if( size==0 ) + return NULL; + if( alignment<=1 ) + { + /* no alignment, just call malloc */ + void *block; + void **p; ; + block = malloc(sizeof(void*)+size); + if( block==NULL ) + return NULL; + p = (void**)block; + *p = block; +#ifdef AE_USE_ALLOC_COUNTER + _alloc_counter++; +#endif + return (void*)((char*)block+sizeof(void*)); + } + else + { + /* align */ + void *block; + char *result; + block = malloc(alignment-1+sizeof(void*)+size); + if( block==NULL ) + return NULL; + result = (char*)block+sizeof(void*); + /*if( (result-(char*)0)%alignment!=0 ) + result += alignment - (result-(char*)0)%alignment;*/ + result = (char*)ae_align(result, alignment); + *((void**)(result-sizeof(void*))) = block; +#ifdef AE_USE_ALLOC_COUNTER + _alloc_counter++; +#endif + return result; + } +} + +void aligned_free(void *block) +{ + void *p; + if( block==NULL ) + return; + p = *((void**)((char*)block-sizeof(void*))); + free(p); +#ifdef AE_USE_ALLOC_COUNTER + _alloc_counter--; +#endif +} + +/************************************************************************ +Malloc's memory with automatic alignment. + +Returns NULL when zero size is specified. + +Error handling: +* if state is NULL, returns NULL on allocation error +* if state is not NULL, calls ae_break() on allocation error +************************************************************************/ +void* ae_malloc(size_t size, ae_state *state) +{ + void *result; + if( size==0 ) + return NULL; + result = aligned_malloc(size,AE_DATA_ALIGN); + if( result==NULL && state!=NULL) + ae_break(state, ERR_OUT_OF_MEMORY, "ae_malloc(): out of memory"); + return result; +} + +void ae_free(void *p) +{ + if( p!=NULL ) + aligned_free(p); +} + +/************************************************************************ +Sets pointers to the matrix rows. + +* dst must be correctly initialized matrix +* dst->data.ptr points to the beginning of memory block allocated for + row pointers. +* dst->ptr - undefined (initialized during algorithm processing) +* storage parameter points to the beginning of actual storage +************************************************************************/ +void ae_matrix_update_row_pointers(ae_matrix *dst, void *storage) +{ + char *p_base; + void **pp_ptr; + ae_int_t i; + if( dst->rows>0 && dst->cols>0 ) + { + p_base = (char*)storage; + pp_ptr = (void**)dst->data.ptr; + dst->ptr.pp_void = pp_ptr; + for(i=0; i<dst->rows; i++, p_base+=dst->stride*ae_sizeof(dst->datatype)) + pp_ptr[i] = p_base; + } + else + dst->ptr.pp_void = NULL; +} + +/************************************************************************ +Returns size of datatype. +Zero for dynamic types like strings or multiple precision types. +************************************************************************/ +ae_int_t ae_sizeof(ae_datatype datatype) +{ + switch(datatype) + { + case DT_BOOL: return (ae_int_t)sizeof(ae_bool); + case DT_INT: return (ae_int_t)sizeof(ae_int_t); + case DT_REAL: return (ae_int_t)sizeof(double); + case DT_COMPLEX: return 2*(ae_int_t)sizeof(double); + default: return 0; + } +} + +/************************************************************************ +This function initializes ALGLIB environment state. + +NOTES: +* stacks contain no frames, so ae_make_frame() must be called before + attaching dynamic blocks. Without it ae_leave_frame() will cycle + forever (which is intended behavior). +************************************************************************/ +void ae_state_init(ae_state *state) +{ + ae_int32_t *vp; + + /* + * p_next points to itself because: + * * correct program should be able to detect end of the list + * by looking at the ptr field. + * * NULL p_next may be used to distinguish automatic blocks + * (in the list) from non-automatic (not in the list) + */ + state->last_block.p_next = &(state->last_block); + state->last_block.deallocator = NULL; + state->last_block.ptr = DYN_BOTTOM; + state->p_top_block = &(state->last_block); +#ifndef AE_USE_CPP_ERROR_HANDLING + state->break_jump = NULL; +#endif + state->error_msg = ""; + + /* + * determine endianness and initialize precomputed IEEE special quantities. + */ + state->endianness = ae_get_endianness(); + if( state->endianness==AE_LITTLE_ENDIAN ) + { + vp = (ae_int32_t*)(&state->v_nan); + vp[0] = 0; + vp[1] = (ae_int32_t)0x7FF80000; + vp = (ae_int32_t*)(&state->v_posinf); + vp[0] = 0; + vp[1] = (ae_int32_t)0x7FF00000; + vp = (ae_int32_t*)(&state->v_neginf); + vp[0] = 0; + vp[1] = (ae_int32_t)0xFFF00000; + } + else if( state->endianness==AE_BIG_ENDIAN ) + { + vp = (ae_int32_t*)(&state->v_nan); + vp[1] = 0; + vp[0] = (ae_int32_t)0x7FF80000; + vp = (ae_int32_t*)(&state->v_posinf); + vp[1] = 0; + vp[0] = (ae_int32_t)0x7FF00000; + vp = (ae_int32_t*)(&state->v_neginf); + vp[1] = 0; + vp[0] = (ae_int32_t)0xFFF00000; + } + else + abort(); +} + + +/************************************************************************ +This function clears ALGLIB environment state. +All dynamic data controlled by state are freed. +************************************************************************/ +void ae_state_clear(ae_state *state) +{ + while( state->p_top_block->ptr!=DYN_BOTTOM ) + ae_frame_leave(state); +} + + +#ifndef AE_USE_CPP_ERROR_HANDLING +/************************************************************************ +This function sets jump buffer for error handling. + +buf may be NULL. +************************************************************************/ +void ae_state_set_break_jump(ae_state *state, jmp_buf *buf) +{ + state->break_jump = buf; +} +#endif + + +/************************************************************************ +This function makes new stack frame. + +This function takes two parameters: environment state and pointer to the +dynamic block which will be used as indicator of the frame beginning. +This dynamic block must be initialized by caller and mustn't be changed/ +deallocated/reused till ae_leave_frame called. It may be global or local +variable (local is even better). +************************************************************************/ +void ae_frame_make(ae_state *state, ae_frame *tmp) +{ + tmp->db_marker.p_next = state->p_top_block; + tmp->db_marker.deallocator = NULL; + tmp->db_marker.ptr = DYN_FRAME; + state->p_top_block = &tmp->db_marker; +} + + +/************************************************************************ +This function leaves current stack frame and deallocates all automatic +dynamic blocks which were attached to this frame. +************************************************************************/ +void ae_frame_leave(ae_state *state) +{ + while( state->p_top_block->ptr!=DYN_FRAME && state->p_top_block->ptr!=DYN_BOTTOM) + { + if( state->p_top_block->ptr!=NULL && state->p_top_block->deallocator!=NULL) + ((ae_deallocator)(state->p_top_block->deallocator))(state->p_top_block->ptr); + state->p_top_block = state->p_top_block->p_next; + } + state->p_top_block = state->p_top_block->p_next; +} + + +/************************************************************************ +This function attaches block to the dynamic block list + +block block +state ALGLIB environment state + +NOTES: +* never call it for special blocks which marks frame boundaries! +************************************************************************/ +void ae_db_attach(ae_dyn_block *block, ae_state *state) +{ + block->p_next = state->p_top_block; + state->p_top_block = block; +} + + +/************************************************************************ +This function malloc's dynamic block: + +block destination block, assumed to be uninitialized +size size (in bytes) +state ALGLIB environment state. May be NULL. +make_automatic if true, vector is added to the dynamic block list + +block is assumed to be uninitialized, its fields are ignored. + +Error handling: +* if state is NULL, returns ae_false on allocation error +* if state is not NULL, calls ae_break() on allocation error +* returns ae_true on success + +NOTES: +* never call it for blocks which are already in the list +************************************************************************/ +ae_bool ae_db_malloc(ae_dyn_block *block, ae_int_t size, ae_state *state, ae_bool make_automatic) +{ + /* ensure that size is >=0 + two ways to exit: 1) through ae_assert, if we have non-NULL state, 2) by returning ae_false */ + if( state!=NULL ) + ae_assert(size>=0, "ae_db_malloc(): negative size", state); + if( size<0 ) + return ae_false; + + /* alloc */ + block->ptr = ae_malloc((size_t)size, state); + if( block->ptr==NULL && size!=0 ) + return ae_false; + if( make_automatic && state!=NULL ) + ae_db_attach(block, state); + else + block->p_next = NULL; + block->deallocator = ae_free; + return ae_true; +} + + +/************************************************************************ +This function realloc's dynamic block: + +block destination block (initialized) +size new size (in bytes) +state ALGLIB environment state + +block is assumed to be initialized. + +This function: +* deletes old contents +* preserves automatic state + +Error handling: +* if state is NULL, returns ae_false on allocation error +* if state is not NULL, calls ae_break() on allocation error +* returns ae_true on success + +NOTES: +* never call it for special blocks which mark frame boundaries! +************************************************************************/ +ae_bool ae_db_realloc(ae_dyn_block *block, ae_int_t size, ae_state *state) +{ + /* ensure that size is >=0 + two ways to exit: 1) through ae_assert, if we have non-NULL state, 2) by returning ae_false */ + if( state!=NULL ) + ae_assert(size>=0, "ae_db_realloc(): negative size", state); + if( size<0 ) + return ae_false; + + /* realloc */ + if( block->ptr!=NULL ) + ((ae_deallocator)block->deallocator)(block->ptr); + block->ptr = ae_malloc((size_t)size, state); + if( block->ptr==NULL && size!=0 ) + return ae_false; + block->deallocator = ae_free; + return ae_true; +} + + +/************************************************************************ +This function clears dynamic block (releases all dynamically allocated +memory). Dynamic block may be in automatic management list - in this case +it will NOT be removed from list. + +block destination block (initialized) + +NOTES: +* never call it for special blocks which marks frame boundaries! +************************************************************************/ +void ae_db_free(ae_dyn_block *block) +{ + if( block->ptr!=NULL ) + ((ae_deallocator)block->deallocator)(block->ptr); + block->ptr = NULL; + block->deallocator = ae_free; +} + +/************************************************************************ +This function swaps contents of two dynamic blocks (pointers and +deallocators) leaving other parameters (automatic management settings, +etc.) unchanged. + +NOTES: +* never call it for special blocks which marks frame boundaries! +************************************************************************/ +void ae_db_swap(ae_dyn_block *block1, ae_dyn_block *block2) +{ + void (*deallocator)(void*) = NULL; + void * volatile ptr; + ptr = block1->ptr; + deallocator = block1->deallocator; + block1->ptr = block2->ptr; + block1->deallocator = block2->deallocator; + block2->ptr = ptr; + block2->deallocator = deallocator; +} + +/************************************************************************ +This function creates ae_vector. + +Vector size may be zero. Vector contents is uninitialized. + +dst destination vector +size vector size, may be zero +datatype guess what... +state ALGLIB environment state +make_automatic if true, vector is added to the dynamic block list + +Error handling: +* if state is NULL, returns ae_false on allocation error +* if state is not NULL, calls ae_break() on allocation error +* returns ae_true on success + +dst is assumed to be uninitialized, its fields are ignored. +************************************************************************/ +ae_bool ae_vector_init(ae_vector *dst, ae_int_t size, ae_datatype datatype, ae_state *state, ae_bool make_automatic) +{ + /* ensure that size is >=0 + two ways to exit: 1) through ae_assert, if we have non-NULL state, 2) by returning ae_false */ + if( state!=NULL ) + ae_assert(size>=0, "ae_vector_init(): negative size", state); + if( size<0 ) + return ae_false; + + /* init */ + dst->cnt = size; + dst->datatype = datatype; + if( !ae_db_malloc(&dst->data, size*ae_sizeof(datatype), state, make_automatic) ) + return ae_false; + dst->ptr.p_ptr = dst->data.ptr; + return ae_true; +} + + +/************************************************************************ +This function creates copy of ae_vector. + +dst destination vector +src well, it is source +state ALGLIB environment state +make_automatic if true, vector is added to the dynamic block list + +Error handling: +* if state is NULL, returns ae_false on allocation error +* if state is not NULL, calls ae_break() on allocation error +* returns ae_true on success + +dst is assumed to be uninitialized, its fields are ignored. +************************************************************************/ +ae_bool ae_vector_init_copy(ae_vector *dst, ae_vector *src, ae_state *state, ae_bool make_automatic) +{ + if( !ae_vector_init(dst, src->cnt, src->datatype, state, make_automatic) ) + return ae_false; + if( src->cnt!=0 ) + memcpy(dst->ptr.p_ptr, src->ptr.p_ptr, (size_t)(src->cnt*ae_sizeof(src->datatype))); + return ae_true; +} + +/************************************************************************ +This function creates ae_vector from x_vector: + +dst destination vector +src source, vector in x-format +state ALGLIB environment state +make_automatic if true, vector is added to the dynamic block list + +dst is assumed to be uninitialized, its fields are ignored. +************************************************************************/ +void ae_vector_init_from_x(ae_vector *dst, x_vector *src, ae_state *state, ae_bool make_automatic) +{ + ae_vector_init(dst, (ae_int_t)src->cnt, (ae_datatype)src->datatype, state, make_automatic); + if( src->cnt>0 ) + memcpy(dst->ptr.p_ptr, src->ptr, (size_t)(((ae_int_t)src->cnt)*ae_sizeof((ae_datatype)src->datatype))); +} + + +/************************************************************************ +This function changes length of ae_vector. + +dst destination vector +newsize vector size, may be zero +state ALGLIB environment state + +Error handling: +* if state is NULL, returns ae_false on allocation error +* if state is not NULL, calls ae_break() on allocation error +* returns ae_true on success + +NOTES: +* vector must be initialized +* all contents is destroyed during setlength() call +* new size may be zero. +************************************************************************/ +ae_bool ae_vector_set_length(ae_vector *dst, ae_int_t newsize, ae_state *state) +{ + /* ensure that size is >=0 + two ways to exit: 1) through ae_assert, if we have non-NULL state, 2) by returning ae_false */ + if( state!=NULL ) + ae_assert(newsize>=0, "ae_vector_set_length(): negative size", state); + if( newsize<0 ) + return ae_false; + + /* set length */ + if( dst->cnt==newsize ) + return ae_true; + dst->cnt = newsize; + if( !ae_db_realloc(&dst->data, newsize*ae_sizeof(dst->datatype), state) ) + return ae_false; + dst->ptr.p_ptr = dst->data.ptr; + return ae_true; +} + + +/************************************************************************ +This function clears vector contents (releases all dynamically allocated +memory). Vector may be in automatic management list - in this case it +will NOT be removed from list. + +IMPORTANT: this function does NOT invalidates dst; it just releases all +dynamically allocated storage, but dst still may be used after call to +ae_vector_set_length(). + +dst destination vector +************************************************************************/ +void ae_vector_clear(ae_vector *dst) +{ + dst->cnt = 0; + ae_db_free(&dst->data); + dst->ptr.p_ptr = 0; +} + + +/************************************************************************ +This function efficiently swaps contents of two vectors, leaving other +pararemeters (automatic management, etc.) unchanged. +************************************************************************/ +void ae_swap_vectors(ae_vector *vec1, ae_vector *vec2) +{ + ae_int_t cnt; + ae_datatype datatype; + void *p_ptr; + + ae_db_swap(&vec1->data, &vec2->data); + + cnt = vec1->cnt; + datatype = vec1->datatype; + p_ptr = vec1->ptr.p_ptr; + vec1->cnt = vec2->cnt; + vec1->datatype = vec2->datatype; + vec1->ptr.p_ptr = vec2->ptr.p_ptr; + vec2->cnt = cnt; + vec2->datatype = datatype; + vec2->ptr.p_ptr = p_ptr; +} + +/************************************************************************ +This function creates ae_matrix. + +Matrix size may be zero, in such cases both rows and cols are zero. +Matrix contents is uninitialized. + +dst destination natrix +rows rows count +cols cols count +datatype element type +state ALGLIB environment state +make_automatic if true, matrix is added to the dynamic block list + +Error handling: +* if state is NULL, returns ae_false on allocation error +* if state is not NULL, calls ae_break() on allocation error +* returns ae_true on success + +dst is assumed to be uninitialized, its fields are ignored. +************************************************************************/ +ae_bool ae_matrix_init(ae_matrix *dst, ae_int_t rows, ae_int_t cols, ae_datatype datatype, ae_state *state, ae_bool make_automatic) +{ + /* ensure that size is >=0 + two ways to exit: 1) through ae_assert, if we have non-NULL state, 2) by returning ae_false */ + if( state!=NULL ) + ae_assert(rows>=0 && cols>=0, "ae_matrix_init(): negative length", state); + if( rows<0 || cols<0 ) + return ae_false; + + /* if one of rows/cols is zero, another MUST be too */ + if( rows==0 || cols==0 ) + { + rows = 0; + cols = 0; + } + + /* init */ + dst->rows = rows; + dst->cols = cols; + dst->stride = cols; + while( dst->stride*ae_sizeof(datatype)%AE_DATA_ALIGN!=0 ) + dst->stride++; + dst->datatype = datatype; + if( !ae_db_malloc(&dst->data, dst->rows*((ae_int_t)sizeof(void*)+dst->stride*ae_sizeof(datatype))+AE_DATA_ALIGN-1, state, make_automatic) ) + return ae_false; + ae_matrix_update_row_pointers(dst, ae_align((char*)dst->data.ptr+dst->rows*sizeof(void*),AE_DATA_ALIGN)); + return ae_true; +} + + +/************************************************************************ +This function creates copy of ae_matrix. + +dst destination matrix +src well, it is source +state ALGLIB environment state +make_automatic if true, matrix is added to the dynamic block list + +Error handling: +* if state is NULL, returns ae_false on allocation error +* if state is not NULL, calls ae_break() on allocation error +* returns ae_true on success + +dst is assumed to be uninitialized, its fields are ignored. +************************************************************************/ +ae_bool ae_matrix_init_copy(ae_matrix *dst, ae_matrix *src, ae_state *state, ae_bool make_automatic) +{ + ae_int_t i; + if( !ae_matrix_init(dst, src->rows, src->cols, src->datatype, state, make_automatic) ) + return ae_false; + if( src->rows!=0 && src->cols!=0 ) + { + if( dst->stride==src->stride ) + memcpy(dst->ptr.pp_void[0], src->ptr.pp_void[0], (size_t)(src->rows*src->stride*ae_sizeof(src->datatype))); + else + for(i=0; i<dst->rows; i++) + memcpy(dst->ptr.pp_void[i], src->ptr.pp_void[i], (size_t)(dst->cols*ae_sizeof(dst->datatype))); + } + return ae_true; +} + + +void ae_matrix_init_from_x(ae_matrix *dst, x_matrix *src, ae_state *state, ae_bool make_automatic) +{ + char *p_src_row; + char *p_dst_row; + ae_int_t row_size; + ae_int_t i; + ae_matrix_init(dst, (ae_int_t)src->rows, (ae_int_t)src->cols, (ae_datatype)src->datatype, state, make_automatic); + if( src->rows!=0 && src->cols!=0 ) + { + p_src_row = (char*)src->ptr; + p_dst_row = (char*)(dst->ptr.pp_void[0]); + row_size = ae_sizeof((ae_datatype)src->datatype)*(ae_int_t)src->cols; + for(i=0; i<src->rows; i++, p_src_row+=src->stride*ae_sizeof((ae_datatype)src->datatype), p_dst_row+=dst->stride*ae_sizeof((ae_datatype)src->datatype)) + memcpy(p_dst_row, p_src_row, (size_t)(row_size)); + } +} + + +/************************************************************************ +This function changes length of ae_matrix. + +dst destination matrix +rows size, may be zero +cols size, may be zero +state ALGLIB environment state + +Error handling: +* if state is NULL, returns ae_false on allocation error +* if state is not NULL, calls ae_break() on allocation error +* returns ae_true on success + +NOTES: +* matrix must be initialized +* all contents is destroyed during setlength() call +* new size may be zero. +************************************************************************/ +ae_bool ae_matrix_set_length(ae_matrix *dst, ae_int_t rows, ae_int_t cols, ae_state *state) +{ + /* ensure that size is >=0 + two ways to exit: 1) through ae_assert, if we have non-NULL state, 2) by returning ae_false */ + if( state!=NULL ) + ae_assert(rows>=0 && cols>=0, "ae_matrix_set_length(): negative length", state); + if( rows<0 || cols<0 ) + return ae_false; + + if( dst->rows==rows && dst->cols==cols ) + return ae_true; + dst->rows = rows; + dst->cols = cols; + dst->stride = cols; + while( dst->stride*ae_sizeof(dst->datatype)%AE_DATA_ALIGN!=0 ) + dst->stride++; + if( !ae_db_realloc(&dst->data, dst->rows*((ae_int_t)sizeof(void*)+dst->stride*ae_sizeof(dst->datatype))+AE_DATA_ALIGN-1, state) ) + return ae_false; + ae_matrix_update_row_pointers(dst, ae_align((char*)dst->data.ptr+dst->rows*sizeof(void*),AE_DATA_ALIGN)); + return ae_true; +} + + +/************************************************************************ +This function clears matrix contents (releases all dynamically allocated +memory). Matrix may be in automatic management list - in this case it +will NOT be removed from list. + +IMPORTANT: this function does NOT invalidates dst; it just releases all +dynamically allocated storage, but dst still may be used after call to +ae_matrix_set_length(). + +dst destination matrix +************************************************************************/ +void ae_matrix_clear(ae_matrix *dst) +{ + dst->rows = 0; + dst->cols = 0; + dst->stride = 0; + ae_db_free(&dst->data); + dst->ptr.p_ptr = 0; +} + + +/************************************************************************ +This function efficiently swaps contents of two vectors, leaving other +pararemeters (automatic management, etc.) unchanged. +************************************************************************/ +void ae_swap_matrices(ae_matrix *mat1, ae_matrix *mat2) +{ + ae_int_t rows; + ae_int_t cols; + ae_int_t stride; + ae_datatype datatype; + void *p_ptr; + + ae_db_swap(&mat1->data, &mat2->data); + + rows = mat1->rows; + cols = mat1->cols; + stride = mat1->stride; + datatype = mat1->datatype; + p_ptr = mat1->ptr.p_ptr; + + mat1->rows = mat2->rows; + mat1->cols = mat2->cols; + mat1->stride = mat2->stride; + mat1->datatype = mat2->datatype; + mat1->ptr.p_ptr = mat2->ptr.p_ptr; + + mat2->rows = rows; + mat2->cols = cols; + mat2->stride = stride; + mat2->datatype = datatype; + mat2->ptr.p_ptr = p_ptr; +} + +/************************************************************************ +This function fills x_vector by ae_vector's contents: + +dst destination vector +src source, vector in x-format +state ALGLIB environment state + +NOTES: +* dst is assumed to be initialized. Its contents is freed before copying + data from src (if size / type are different) or overwritten (if + possible given destination size). +************************************************************************/ +void ae_x_set_vector(x_vector *dst, ae_vector *src, ae_state *state) +{ + if( dst->cnt!=src->cnt || dst->datatype!=src->datatype ) + { + if( dst->owner==OWN_AE ) + ae_free(dst->ptr); + dst->ptr = ae_malloc((size_t)(src->cnt*ae_sizeof(src->datatype)), state); + dst->last_action = ACT_NEW_LOCATION; + dst->cnt = src->cnt; + dst->datatype = src->datatype; + dst->owner = OWN_AE; + } + else + dst->last_action = ACT_SAME_LOCATION; + if( src->cnt ) + memcpy(dst->ptr, src->ptr.p_ptr, (size_t)(src->cnt*ae_sizeof(src->datatype))); +} + +/************************************************************************ +This function fills x_matrix by ae_matrix's contents: + +dst destination vector +src source, matrix in x-format +state ALGLIB environment state + +NOTES: +* dst is assumed to be initialized. Its contents is freed before copying + data from src (if size / type are different) or overwritten (if + possible given destination size). +************************************************************************/ +void ae_x_set_matrix(x_matrix *dst, ae_matrix *src, ae_state *state) +{ + char *p_src_row; + char *p_dst_row; + ae_int_t i; + ae_int_t row_size; + if( dst->rows!=src->rows || dst->cols!=src->cols || dst->datatype!=src->datatype ) + { + if( dst->owner==OWN_AE ) + ae_free(dst->ptr); + dst->rows = src->rows; + dst->cols = src->cols; + dst->stride = src->cols; + dst->datatype = src->datatype; + dst->ptr = ae_malloc((size_t)(dst->rows*((ae_int_t)dst->stride)*ae_sizeof(src->datatype)), state); + dst->last_action = ACT_NEW_LOCATION; + dst->owner = OWN_AE; + } + else + dst->last_action = ACT_SAME_LOCATION; + if( src->rows!=0 && src->cols!=0 ) + { + p_src_row = (char*)(src->ptr.pp_void[0]); + p_dst_row = (char*)dst->ptr; + row_size = ae_sizeof(src->datatype)*src->cols; + for(i=0; i<src->rows; i++, p_src_row+=src->stride*ae_sizeof(src->datatype), p_dst_row+=dst->stride*ae_sizeof(src->datatype)) + memcpy(p_dst_row, p_src_row, (size_t)(row_size)); + } +} + +/************************************************************************ +This function attaches x_vector to ae_vector's contents. +Ownership of memory allocated is not changed (it is still managed by +ae_matrix). + +dst destination vector +src source, vector in x-format +state ALGLIB environment state + +NOTES: +* dst is assumed to be initialized. Its contents is freed before + attaching to src. +* this function doesn't need ae_state parameter because it can't fail + (assuming correctly initialized src) +************************************************************************/ +void ae_x_attach_to_vector(x_vector *dst, ae_vector *src) +{ + if( dst->owner==OWN_AE ) + ae_free(dst->ptr); + dst->ptr = src->ptr.p_ptr; + dst->last_action = ACT_NEW_LOCATION; + dst->cnt = src->cnt; + dst->datatype = src->datatype; + dst->owner = OWN_CALLER; +} + +/************************************************************************ +This function attaches x_matrix to ae_matrix's contents. +Ownership of memory allocated is not changed (it is still managed by +ae_matrix). + +dst destination vector +src source, matrix in x-format +state ALGLIB environment state + +NOTES: +* dst is assumed to be initialized. Its contents is freed before + attaching to src. +* this function doesn't need ae_state parameter because it can't fail + (assuming correctly initialized src) +************************************************************************/ +void ae_x_attach_to_matrix(x_matrix *dst, ae_matrix *src) +{ + if( dst->owner==OWN_AE ) + ae_free(dst->ptr); + dst->rows = src->rows; + dst->cols = src->cols; + dst->stride = src->stride; + dst->datatype = src->datatype; + dst->ptr = &(src->ptr.pp_double[0][0]); + dst->last_action = ACT_NEW_LOCATION; + dst->owner = OWN_CALLER; +} + +/************************************************************************ +This function clears x_vector. It does nothing if vector is not owned by +ALGLIB environment. + +dst vector +************************************************************************/ +void x_vector_clear(x_vector *dst) +{ + if( dst->owner==OWN_AE ) + aligned_free(dst->ptr); + dst->ptr = NULL; + dst->cnt = 0; +} + +/************************************************************************ +Assertion +************************************************************************/ +void ae_assert(ae_bool cond, const char *msg, ae_state *state) +{ + if( !cond ) + ae_break(state, ERR_ASSERTION_FAILED, msg); +} + +/************************************************************************ +CPUID + +Returns information about features CPU and compiler support. + +You must tell ALGLIB what CPU family is used by defining AE_CPU symbol +(without this hint zero will be returned). + +Note: results of this function depend on both CPU and compiler; +if compiler doesn't support SSE intrinsics, function won't set +corresponding flag. +************************************************************************/ +ae_int_t ae_cpuid() +{ + /* + * to speed up CPU detection we cache our data in the static vars. + * there is no synchronization, but it is still thread safe. + * + * thread safety is guaranteed on all modern architectures which + * have following property: simultaneous writes by different cores + * to the same location will be executed in serial manner. + * + */ + static ae_bool initialized = ae_false; + static ae_bool has_sse2 = ae_false; + ae_int_t result; + + /* + * if not initialized, determine system properties + */ + if( !initialized ) + { + /* + * SSE2 + */ +#if defined(AE_CPU) +#if (AE_CPU==AE_INTEL) && defined(AE_HAS_SSE2_INTRINSICS) +#if AE_COMPILER==AE_GNUC +#elif AE_COMPILER==AE_MSVC + { + int CPUInfo[4]; + __cpuid(CPUInfo, 1); + if( (CPUInfo[3]&0x04000000)!=0 ) + has_sse2 = ae_true; + } +#elif AE_COMPILER==AE_SUNC +#else +#endif +#endif +#endif + /* + * set initialization flag + */ + initialized = ae_true; + } + + /* + * return + */ + result = 0; + if( has_sse2 ) + result = result|CPU_SSE2; + return result; +} + +/************************************************************************ +Real math functions +************************************************************************/ +ae_bool ae_fp_eq(double v1, double v2) +{ + /* IEEE-strict floating point comparison */ + volatile double x = v1; + volatile double y = v2; + return x==y; +} + +ae_bool ae_fp_neq(double v1, double v2) +{ + /* IEEE-strict floating point comparison */ + return !ae_fp_eq(v1,v2); +} + +ae_bool ae_fp_less(double v1, double v2) +{ + /* IEEE-strict floating point comparison */ + volatile double x = v1; + volatile double y = v2; + return x<y; +} + +ae_bool ae_fp_less_eq(double v1, double v2) +{ + /* IEEE-strict floating point comparison */ + volatile double x = v1; + volatile double y = v2; + return x<=y; +} + +ae_bool ae_fp_greater(double v1, double v2) +{ + /* IEEE-strict floating point comparison */ + volatile double x = v1; + volatile double y = v2; + return x>y; +} + +ae_bool ae_fp_greater_eq(double v1, double v2) +{ + /* IEEE-strict floating point comparison */ + volatile double x = v1; + volatile double y = v2; + return x>=y; +} + +ae_bool ae_isfinite_stateless(double x, ae_int_t endianness) +{ + union _u + { + double a; + ae_int32_t p[2]; + } u; + ae_int32_t high; + u.a = x; + if( endianness==AE_LITTLE_ENDIAN ) + high = u.p[1]; + else + high = u.p[0]; + return (high & (ae_int32_t)0x7FF00000)!=(ae_int32_t)0x7FF00000; +} + +ae_bool ae_isnan_stateless(double x, ae_int_t endianness) +{ + union _u + { + double a; + ae_int32_t p[2]; + } u; + ae_int32_t high, low; + u.a = x; + if( endianness==AE_LITTLE_ENDIAN ) + { + high = u.p[1]; + low = u.p[0]; + } + else + { + high = u.p[0]; + low = u.p[1]; + } + return ((high &0x7FF00000)==0x7FF00000) && (((high &0x000FFFFF)!=0) || (low!=0)); +} + +ae_bool ae_isinf_stateless(double x, ae_int_t endianness) +{ + union _u + { + double a; + ae_int32_t p[2]; + } u; + ae_int32_t high, low; + u.a = x; + if( endianness==AE_LITTLE_ENDIAN ) + { + high = u.p[1]; + low = u.p[0]; + } + else + { + high = u.p[0]; + low = u.p[1]; + } + + // 31 least significant bits of high are compared + return ((high&0x7FFFFFFF)==0x7FF00000) && (low==0); +} + +ae_bool ae_isposinf_stateless(double x, ae_int_t endianness) +{ + union _u + { + double a; + ae_int32_t p[2]; + } u; + ae_int32_t high, low; + u.a = x; + if( endianness==AE_LITTLE_ENDIAN ) + { + high = u.p[1]; + low = u.p[0]; + } + else + { + high = u.p[0]; + low = u.p[1]; + } + + // all 32 bits of high are compared + return (high==(ae_int32_t)0x7FF00000) && (low==0); +} + +ae_bool ae_isneginf_stateless(double x, ae_int_t endianness) +{ + union _u + { + double a; + ae_int32_t p[2]; + } u; + ae_int32_t high, low; + u.a = x; + if( endianness==AE_LITTLE_ENDIAN ) + { + high = u.p[1]; + low = u.p[0]; + } + else + { + high = u.p[0]; + low = u.p[1]; + } + + // this code is a bit tricky to avoid comparison of high with 0xFFF00000, which may be unsafe with some buggy compilers + return ((high&0x7FFFFFFF)==0x7FF00000) && (high!=(ae_int32_t)0x7FF00000) && (low==0); +} + +ae_int_t ae_get_endianness() +{ + union + { + double a; + ae_int32_t p[2]; + } u; + + /* + * determine endianness + * two types are supported: big-endian and little-endian. + * mixed-endian hardware is NOT supported. + * + * 1983 is used as magic number because its non-periodic double + * representation allow us to easily distinguish between upper + * and lower halfs and to detect mixed endian hardware. + * + */ + u.a = 1.0/1983.0; + if( u.p[1]==(ae_int32_t)0x3f408642 ) + return AE_LITTLE_ENDIAN; + if( u.p[0]==(ae_int32_t)0x3f408642 ) + return AE_BIG_ENDIAN; + return AE_MIXED_ENDIAN; +} + +ae_bool ae_isfinite(double x,ae_state *state) +{ + return ae_isfinite_stateless(x, state->endianness); +} + +ae_bool ae_isnan(double x, ae_state *state) +{ + return ae_isnan_stateless(x, state->endianness); +} + +ae_bool ae_isinf(double x, ae_state *state) +{ + return ae_isinf_stateless(x, state->endianness); +} + +ae_bool ae_isposinf(double x,ae_state *state) +{ + return ae_isposinf_stateless(x, state->endianness); +} + +ae_bool ae_isneginf(double x,ae_state *state) +{ + return ae_isneginf_stateless(x, state->endianness); +} + +double ae_fabs(double x, ae_state *state) +{ + return fabs(x); +} + +ae_int_t ae_iabs(ae_int_t x, ae_state *state) +{ + return x>=0 ? x : -x; +} + +double ae_sqr(double x, ae_state *state) +{ + return x*x; +} + +double ae_sqrt(double x, ae_state *state) +{ + return sqrt(x); +} + +ae_int_t ae_sign(double x, ae_state *state) +{ + if( x>0 ) return 1; + if( x<0 ) return -1; + return 0; +} + +ae_int_t ae_round(double x, ae_state *state) +{ + return (ae_int_t)(ae_ifloor(x+0.5,state)); +} + +ae_int_t ae_trunc(double x, ae_state *state) +{ + return (ae_int_t)(x>0 ? ae_ifloor(x,state) : ae_iceil(x,state)); +} + +ae_int_t ae_ifloor(double x, ae_state *state) +{ + return (ae_int_t)(floor(x)); +} + +ae_int_t ae_iceil(double x, ae_state *state) +{ + return (ae_int_t)(ceil(x)); +} + +ae_int_t ae_maxint(ae_int_t m1, ae_int_t m2, ae_state *state) +{ + return m1>m2 ? m1 : m2; +} + +ae_int_t ae_minint(ae_int_t m1, ae_int_t m2, ae_state *state) +{ + return m1>m2 ? m2 : m1; +} + +double ae_maxreal(double m1, double m2, ae_state *state) +{ + return m1>m2 ? m1 : m2; +} + +double ae_minreal(double m1, double m2, ae_state *state) +{ + return m1>m2 ? m2 : m1; +} + +double ae_randomreal(ae_state *state) +{ + int i1 = rand(); + int i2 = rand(); + double mx; + while(i1==RAND_MAX) + i1 =rand(); + while(i2==RAND_MAX) + i2 =rand(); + mx = RAND_MAX; + return (i1+i2/mx)/mx; +} + +ae_int_t ae_randominteger(ae_int_t maxv, ae_state *state) +{ + return rand()%maxv; +} + +double ae_sin(double x, ae_state *state) +{ + return sin(x); +} + +double ae_cos(double x, ae_state *state) +{ + return cos(x); +} + +double ae_tan(double x, ae_state *state) +{ + return tan(x); +} + +double ae_sinh(double x, ae_state *state) +{ + return sinh(x); +} + +double ae_cosh(double x, ae_state *state) +{ + return cosh(x); +} +double ae_tanh(double x, ae_state *state) +{ + return tanh(x); +} + +double ae_asin(double x, ae_state *state) +{ + return asin(x); +} + +double ae_acos(double x, ae_state *state) +{ + return acos(x); +} + +double ae_atan(double x, ae_state *state) +{ + return atan(x); +} + +double ae_atan2(double x, double y, ae_state *state) +{ + return atan2(x,y); +} + +double ae_log(double x, ae_state *state) +{ + return log(x); +} + +double ae_pow(double x, double y, ae_state *state) +{ + return pow(x,y); +} + +double ae_exp(double x, ae_state *state) +{ + return exp(x); +} + +/************************************************************************ +Symmetric/Hermitian properties: check and force +************************************************************************/ +static void x_split_length(ae_int_t n, ae_int_t nb, ae_int_t* n1, ae_int_t* n2) +{ + ae_int_t r; + if( n<=nb ) + { + *n1 = n; + *n2 = 0; + } + else + { + if( n%nb!=0 ) + { + *n2 = n%nb; + *n1 = n-(*n2); + } + else + { + *n2 = n/2; + *n1 = n-(*n2); + if( *n1%nb==0 ) + { + return; + } + r = nb-*n1%nb; + *n1 = *n1+r; + *n2 = *n2-r; + } + } +} +static double x_safepythag2(double x, double y) +{ + double w; + double xabs; + double yabs; + double z; + xabs = fabs(x); + yabs = fabs(y); + w = xabs>yabs ? xabs : yabs; + z = xabs<yabs ? xabs : yabs; + if( z==0 ) + return w; + else + { + double t; + t = z/w; + return w*sqrt(1+t*t); + } +} +/* + * this function checks difference between offdiagonal blocks BL and BU + * (see below). Block BL is specified by offsets (offset0,offset1) and + * sizes (len0,len1). + * + * [ . ] + * [ A0 BU ] + * A = [ BL A1 ] + * [ . ] + * + * this subroutine updates current values of: + * a) mx maximum value of A[i,j] found so far + * b) err componentwise difference between elements of BL and BU^T + * + */ +static void is_symmetric_rec_off_stat(x_matrix *a, ae_int_t offset0, ae_int_t offset1, ae_int_t len0, ae_int_t len1, ae_bool *nonfinite, double *mx, double *err, ae_state *_state) +{ + /* try to split problem into two smaller ones */ + if( len0>x_nb || len1>x_nb ) + { + ae_int_t n1, n2; + if( len0>len1 ) + { + x_split_length(len0, x_nb, &n1, &n2); + is_symmetric_rec_off_stat(a, offset0, offset1, n1, len1, nonfinite, mx, err, _state); + is_symmetric_rec_off_stat(a, offset0+n1, offset1, n2, len1, nonfinite, mx, err, _state); + } + else + { + x_split_length(len1, x_nb, &n1, &n2); + is_symmetric_rec_off_stat(a, offset0, offset1, len0, n1, nonfinite, mx, err, _state); + is_symmetric_rec_off_stat(a, offset0, offset1+n1, len0, n2, nonfinite, mx, err, _state); + } + return; + } + else + { + /* base case */ + double *p1, *p2, *prow, *pcol; + double v; + ae_int_t i, j; + + p1 = (double*)(a->ptr)+offset0*a->stride+offset1; + p2 = (double*)(a->ptr)+offset1*a->stride+offset0; + for(i=0; i<len0; i++) + { + pcol = p2+i; + prow = p1+i*a->stride; + for(j=0; j<len1; j++) + { + if( !ae_isfinite(*pcol,_state) || !ae_isfinite(*prow,_state) ) + { + *nonfinite = ae_true; + } + else + { + v = fabs(*pcol); + *mx = *mx>v ? *mx : v; + v = fabs(*prow); + *mx = *mx>v ? *mx : v; + v = fabs(*pcol-*prow); + *err = *err>v ? *err : v; + } + pcol += a->stride; + prow++; + } + } + } +} +/* + * this function checks that diagonal block A0 is symmetric. + * Block A0 is specified by its offset and size. + * + * [ . ] + * [ A0 ] + * A = [ . ] + * [ . ] + * + * this subroutine updates current values of: + * a) mx maximum value of A[i,j] found so far + * b) err componentwise difference between A0 and A0^T + * + */ +static void is_symmetric_rec_diag_stat(x_matrix *a, ae_int_t offset, ae_int_t len, ae_bool *nonfinite, double *mx, double *err, ae_state *_state) +{ + double *p, *prow, *pcol; + double v; + ae_int_t i, j; + + /* try to split problem into two smaller ones */ + if( len>x_nb ) + { + ae_int_t n1, n2; + x_split_length(len, x_nb, &n1, &n2); + is_symmetric_rec_diag_stat(a, offset, n1, nonfinite, mx, err, _state); + is_symmetric_rec_diag_stat(a, offset+n1, n2, nonfinite, mx, err, _state); + is_symmetric_rec_off_stat(a, offset+n1, offset, n2, n1, nonfinite, mx, err, _state); + return; + } + + /* base case */ + p = (double*)(a->ptr)+offset*a->stride+offset; + for(i=0; i<len; i++) + { + pcol = p+i; + prow = p+i*a->stride; + for(j=0; j<i; j++,pcol+=a->stride,prow++) + { + if( !ae_isfinite(*pcol,_state) || !ae_isfinite(*prow,_state) ) + { + *nonfinite = ae_true; + } + else + { + v = fabs(*pcol); + *mx = *mx>v ? *mx : v; + v = fabs(*prow); + *mx = *mx>v ? *mx : v; + v = fabs(*pcol-*prow); + *err = *err>v ? *err : v; + } + } + v = fabs(p[i+i*a->stride]); + *mx = *mx>v ? *mx : v; + } +} +/* + * this function checks difference between offdiagonal blocks BL and BU + * (see below). Block BL is specified by offsets (offset0,offset1) and + * sizes (len0,len1). + * + * [ . ] + * [ A0 BU ] + * A = [ BL A1 ] + * [ . ] + * + * this subroutine updates current values of: + * a) mx maximum value of A[i,j] found so far + * b) err componentwise difference between elements of BL and BU^H + * + */ +static void is_hermitian_rec_off_stat(x_matrix *a, ae_int_t offset0, ae_int_t offset1, ae_int_t len0, ae_int_t len1, ae_bool *nonfinite, double *mx, double *err, ae_state *_state) +{ + /* try to split problem into two smaller ones */ + if( len0>x_nb || len1>x_nb ) + { + ae_int_t n1, n2; + if( len0>len1 ) + { + x_split_length(len0, x_nb, &n1, &n2); + is_hermitian_rec_off_stat(a, offset0, offset1, n1, len1, nonfinite, mx, err, _state); + is_hermitian_rec_off_stat(a, offset0+n1, offset1, n2, len1, nonfinite, mx, err, _state); + } + else + { + x_split_length(len1, x_nb, &n1, &n2); + is_hermitian_rec_off_stat(a, offset0, offset1, len0, n1, nonfinite, mx, err, _state); + is_hermitian_rec_off_stat(a, offset0, offset1+n1, len0, n2, nonfinite, mx, err, _state); + } + return; + } + else + { + /* base case */ + ae_complex *p1, *p2, *prow, *pcol; + double v; + ae_int_t i, j; + + p1 = (ae_complex*)(a->ptr)+offset0*a->stride+offset1; + p2 = (ae_complex*)(a->ptr)+offset1*a->stride+offset0; + for(i=0; i<len0; i++) + { + pcol = p2+i; + prow = p1+i*a->stride; + for(j=0; j<len1; j++) + { + if( !ae_isfinite(pcol->x, _state) || !ae_isfinite(pcol->y, _state) || !ae_isfinite(prow->x, _state) || !ae_isfinite(prow->y, _state) ) + { + *nonfinite = ae_true; + } + else + { + v = x_safepythag2(pcol->x, pcol->y); + *mx = *mx>v ? *mx : v; + v = x_safepythag2(prow->x, prow->y); + *mx = *mx>v ? *mx : v; + v = x_safepythag2(pcol->x-prow->x, pcol->y+prow->y); + *err = *err>v ? *err : v; + } + pcol += a->stride; + prow++; + } + } + } +} +/* + * this function checks that diagonal block A0 is Hermitian. + * Block A0 is specified by its offset and size. + * + * [ . ] + * [ A0 ] + * A = [ . ] + * [ . ] + * + * this subroutine updates current values of: + * a) mx maximum value of A[i,j] found so far + * b) err componentwise difference between A0 and A0^H + * + */ +static void is_hermitian_rec_diag_stat(x_matrix *a, ae_int_t offset, ae_int_t len, ae_bool *nonfinite, double *mx, double *err, ae_state *_state) +{ + ae_complex *p, *prow, *pcol; + double v; + ae_int_t i, j; + + /* try to split problem into two smaller ones */ + if( len>x_nb ) + { + ae_int_t n1, n2; + x_split_length(len, x_nb, &n1, &n2); + is_hermitian_rec_diag_stat(a, offset, n1, nonfinite, mx, err, _state); + is_hermitian_rec_diag_stat(a, offset+n1, n2, nonfinite, mx, err, _state); + is_hermitian_rec_off_stat(a, offset+n1, offset, n2, n1, nonfinite, mx, err, _state); + return; + } + + /* base case */ + p = (ae_complex*)(a->ptr)+offset*a->stride+offset; + for(i=0; i<len; i++) + { + pcol = p+i; + prow = p+i*a->stride; + for(j=0; j<i; j++,pcol+=a->stride,prow++) + { + if( !ae_isfinite(pcol->x, _state) || !ae_isfinite(pcol->y, _state) || !ae_isfinite(prow->x, _state) || !ae_isfinite(prow->y, _state) ) + { + *nonfinite = ae_true; + } + else + { + v = x_safepythag2(pcol->x, pcol->y); + *mx = *mx>v ? *mx : v; + v = x_safepythag2(prow->x, prow->y); + *mx = *mx>v ? *mx : v; + v = x_safepythag2(pcol->x-prow->x, pcol->y+prow->y); + *err = *err>v ? *err : v; + } + } + if( !ae_isfinite(p[i+i*a->stride].x, _state) || !ae_isfinite(p[i+i*a->stride].y, _state) ) + { + *nonfinite = ae_true; + } + else + { + v = fabs(p[i+i*a->stride].x); + *mx = *mx>v ? *mx : v; + v = fabs(p[i+i*a->stride].y); + *err = *err>v ? *err : v; + } + } +} +/* + * this function copies offdiagonal block BL to its symmetric counterpart + * BU (see below). Block BL is specified by offsets (offset0,offset1) + * and sizes (len0,len1). + * + * [ . ] + * [ A0 BU ] + * A = [ BL A1 ] + * [ . ] + * + */ +static void force_symmetric_rec_off_stat(x_matrix *a, ae_int_t offset0, ae_int_t offset1, ae_int_t len0, ae_int_t len1) +{ + /* try to split problem into two smaller ones */ + if( len0>x_nb || len1>x_nb ) + { + ae_int_t n1, n2; + if( len0>len1 ) + { + x_split_length(len0, x_nb, &n1, &n2); + force_symmetric_rec_off_stat(a, offset0, offset1, n1, len1); + force_symmetric_rec_off_stat(a, offset0+n1, offset1, n2, len1); + } + else + { + x_split_length(len1, x_nb, &n1, &n2); + force_symmetric_rec_off_stat(a, offset0, offset1, len0, n1); + force_symmetric_rec_off_stat(a, offset0, offset1+n1, len0, n2); + } + return; + } + else + { + /* base case */ + double *p1, *p2, *prow, *pcol; + ae_int_t i, j; + + p1 = (double*)(a->ptr)+offset0*a->stride+offset1; + p2 = (double*)(a->ptr)+offset1*a->stride+offset0; + for(i=0; i<len0; i++) + { + pcol = p2+i; + prow = p1+i*a->stride; + for(j=0; j<len1; j++) + { + *pcol = *prow; + pcol += a->stride; + prow++; + } + } + } +} +/* + * this function copies lower part of diagonal block A0 to its upper part + * Block is specified by offset and size. + * + * [ . ] + * [ A0 ] + * A = [ . ] + * [ . ] + * + */ +static void force_symmetric_rec_diag_stat(x_matrix *a, ae_int_t offset, ae_int_t len) +{ + double *p, *prow, *pcol; + ae_int_t i, j; + + /* try to split problem into two smaller ones */ + if( len>x_nb ) + { + ae_int_t n1, n2; + x_split_length(len, x_nb, &n1, &n2); + force_symmetric_rec_diag_stat(a, offset, n1); + force_symmetric_rec_diag_stat(a, offset+n1, n2); + force_symmetric_rec_off_stat(a, offset+n1, offset, n2, n1); + return; + } + + /* base case */ + p = (double*)(a->ptr)+offset*a->stride+offset; + for(i=0; i<len; i++) + { + pcol = p+i; + prow = p+i*a->stride; + for(j=0; j<i; j++,pcol+=a->stride,prow++) + *pcol = *prow; + } +} +/* + * this function copies Hermitian transpose of offdiagonal block BL to + * its symmetric counterpart BU (see below). Block BL is specified by + * offsets (offset0,offset1) and sizes (len0,len1). + * + * [ . ] + * [ A0 BU ] + * A = [ BL A1 ] + * [ . ] + */ +static void force_hermitian_rec_off_stat(x_matrix *a, ae_int_t offset0, ae_int_t offset1, ae_int_t len0, ae_int_t len1) +{ + /* try to split problem into two smaller ones */ + if( len0>x_nb || len1>x_nb ) + { + ae_int_t n1, n2; + if( len0>len1 ) + { + x_split_length(len0, x_nb, &n1, &n2); + force_hermitian_rec_off_stat(a, offset0, offset1, n1, len1); + force_hermitian_rec_off_stat(a, offset0+n1, offset1, n2, len1); + } + else + { + x_split_length(len1, x_nb, &n1, &n2); + force_hermitian_rec_off_stat(a, offset0, offset1, len0, n1); + force_hermitian_rec_off_stat(a, offset0, offset1+n1, len0, n2); + } + return; + } + else + { + /* base case */ + ae_complex *p1, *p2, *prow, *pcol; + ae_int_t i, j; + + p1 = (ae_complex*)(a->ptr)+offset0*a->stride+offset1; + p2 = (ae_complex*)(a->ptr)+offset1*a->stride+offset0; + for(i=0; i<len0; i++) + { + pcol = p2+i; + prow = p1+i*a->stride; + for(j=0; j<len1; j++) + { + *pcol = *prow; + pcol += a->stride; + prow++; + } + } + } +} +/* + * this function copies Hermitian transpose of lower part of + * diagonal block A0 to its upper part Block is specified by offset and size. + * + * [ . ] + * [ A0 ] + * A = [ . ] + * [ . ] + * + */ +static void force_hermitian_rec_diag_stat(x_matrix *a, ae_int_t offset, ae_int_t len) +{ + ae_complex *p, *prow, *pcol; + ae_int_t i, j; + + /* try to split problem into two smaller ones */ + if( len>x_nb ) + { + ae_int_t n1, n2; + x_split_length(len, x_nb, &n1, &n2); + force_hermitian_rec_diag_stat(a, offset, n1); + force_hermitian_rec_diag_stat(a, offset+n1, n2); + force_hermitian_rec_off_stat(a, offset+n1, offset, n2, n1); + return; + } + + /* base case */ + p = (ae_complex*)(a->ptr)+offset*a->stride+offset; + for(i=0; i<len; i++) + { + pcol = p+i; + prow = p+i*a->stride; + for(j=0; j<i; j++,pcol+=a->stride,prow++) + *pcol = *prow; + } +} +ae_bool x_is_symmetric(x_matrix *a) +{ + double mx, err; + ae_bool nonfinite; + ae_state _alglib_env_state; + if( a->datatype!=DT_REAL ) + return ae_false; + if( a->cols!=a->rows ) + return ae_false; + if( a->cols==0 || a->rows==0 ) + return ae_true; + ae_state_init(&_alglib_env_state); + mx = 0; + err = 0; + nonfinite = ae_false; + is_symmetric_rec_diag_stat(a, 0, (ae_int_t)a->rows, &nonfinite, &mx, &err, &_alglib_env_state); + if( nonfinite ) + return ae_false; + if( mx==0 ) + return ae_true; + return err/mx<=1.0E-14; +} +ae_bool x_is_hermitian(x_matrix *a) +{ + double mx, err; + ae_bool nonfinite; + ae_state _alglib_env_state; + if( a->datatype!=DT_COMPLEX ) + return ae_false; + if( a->cols!=a->rows ) + return ae_false; + if( a->cols==0 || a->rows==0 ) + return ae_true; + ae_state_init(&_alglib_env_state); + mx = 0; + err = 0; + nonfinite = ae_false; + is_hermitian_rec_diag_stat(a, 0, (ae_int_t)a->rows, &nonfinite, &mx, &err, &_alglib_env_state); + if( nonfinite ) + return ae_false; + if( mx==0 ) + return ae_true; + return err/mx<=1.0E-14; +} +ae_bool x_force_symmetric(x_matrix *a) +{ + if( a->datatype!=DT_REAL ) + return ae_false; + if( a->cols!=a->rows ) + return ae_false; + if( a->cols==0 || a->rows==0 ) + return ae_true; + force_symmetric_rec_diag_stat(a, 0, (ae_int_t)a->rows); + return ae_true; +} +ae_bool x_force_hermitian(x_matrix *a) +{ + if( a->datatype!=DT_COMPLEX ) + return ae_false; + if( a->cols!=a->rows ) + return ae_false; + if( a->cols==0 || a->rows==0 ) + return ae_true; + force_hermitian_rec_diag_stat(a, 0, (ae_int_t)a->rows); + return ae_true; +} + +ae_bool ae_is_symmetric(ae_matrix *a) +{ + x_matrix x; + x.owner = OWN_CALLER; + ae_x_attach_to_matrix(&x, a); + return x_is_symmetric(&x); +} + +ae_bool ae_is_hermitian(ae_matrix *a) +{ + x_matrix x; + x.owner = OWN_CALLER; + ae_x_attach_to_matrix(&x, a); + return x_is_hermitian(&x); +} + +ae_bool ae_force_symmetric(ae_matrix *a) +{ + x_matrix x; + x.owner = OWN_CALLER; + ae_x_attach_to_matrix(&x, a); + return x_force_symmetric(&x); +} + +ae_bool ae_force_hermitian(ae_matrix *a) +{ + x_matrix x; + x.owner = OWN_CALLER; + ae_x_attach_to_matrix(&x, a); + return x_force_hermitian(&x); +} + + +/************************************************************************ +Complex math functions +************************************************************************/ +ae_complex ae_complex_from_d(double v) +{ + ae_complex r; + r.x = v; + r.y = 0.0; + return r; +} + +ae_complex ae_c_neg(ae_complex lhs) +{ + ae_complex result; + result.x = -lhs.x; + result.y = -lhs.y; + return result; +} + +ae_complex ae_c_conj(ae_complex lhs, ae_state *state) +{ + ae_complex result; + result.x = +lhs.x; + result.y = -lhs.y; + return result; +} + +ae_complex ae_c_sqr(ae_complex lhs, ae_state *state) +{ + ae_complex result; + result.x = lhs.x*lhs.x-lhs.y*lhs.y; + result.y = 2*lhs.x*lhs.y; + return result; +} + +double ae_c_abs(ae_complex z, ae_state *state) +{ + double w; + double xabs; + double yabs; + double v; + + xabs = fabs(z.x); + yabs = fabs(z.y); + w = xabs>yabs ? xabs : yabs; + v = xabs<yabs ? xabs : yabs; + if( v==0 ) + return w; + else + { + double t = v/w; + return w*sqrt(1+t*t); + } +} + +ae_bool ae_c_eq(ae_complex lhs, ae_complex rhs) +{ + volatile double x1 = lhs.x; + volatile double x2 = rhs.x; + volatile double y1 = lhs.y; + volatile double y2 = rhs.y; + return x1==x2 && y1==y2; +} + +ae_bool ae_c_neq(ae_complex lhs, ae_complex rhs) +{ + volatile double x1 = lhs.x; + volatile double x2 = rhs.x; + volatile double y1 = lhs.y; + volatile double y2 = rhs.y; + return x1!=x2 || y1!=y2; +} + +ae_complex ae_c_add(ae_complex lhs, ae_complex rhs) +{ + ae_complex result; + result.x = lhs.x+rhs.x; + result.y = lhs.y+rhs.y; + return result; +} + +ae_complex ae_c_mul(ae_complex lhs, ae_complex rhs) +{ + ae_complex result; + result.x = lhs.x*rhs.x-lhs.y*rhs.y; + result.y = lhs.x*rhs.y+lhs.y*rhs.x; + return result; +} + +ae_complex ae_c_sub(ae_complex lhs, ae_complex rhs) +{ + ae_complex result; + result.x = lhs.x-rhs.x; + result.y = lhs.y-rhs.y; + return result; +} + +ae_complex ae_c_div(ae_complex lhs, ae_complex rhs) +{ + ae_complex result; + double e; + double f; + if( fabs(rhs.y)<fabs(rhs.x) ) + { + e = rhs.y/rhs.x; + f = rhs.x+rhs.y*e; + result.x = (lhs.x+lhs.y*e)/f; + result.y = (lhs.y-lhs.x*e)/f; + } + else + { + e = rhs.x/rhs.y; + f = rhs.y+rhs.x*e; + result.x = (lhs.y+lhs.x*e)/f; + result.y = (-lhs.x+lhs.y*e)/f; + } + return result; +} + +ae_bool ae_c_eq_d(ae_complex lhs, double rhs) +{ + volatile double x1 = lhs.x; + volatile double x2 = rhs; + volatile double y1 = lhs.y; + volatile double y2 = 0; + return x1==x2 && y1==y2; +} + +ae_bool ae_c_neq_d(ae_complex lhs, double rhs) +{ + volatile double x1 = lhs.x; + volatile double x2 = rhs; + volatile double y1 = lhs.y; + volatile double y2 = 0; + return x1!=x2 || y1!=y2; +} + +ae_complex ae_c_add_d(ae_complex lhs, double rhs) +{ + ae_complex result; + result.x = lhs.x+rhs; + result.y = lhs.y; + return result; +} + +ae_complex ae_c_mul_d(ae_complex lhs, double rhs) +{ + ae_complex result; + result.x = lhs.x*rhs; + result.y = lhs.y*rhs; + return result; +} + +ae_complex ae_c_sub_d(ae_complex lhs, double rhs) +{ + ae_complex result; + result.x = lhs.x-rhs; + result.y = lhs.y; + return result; +} + +ae_complex ae_c_d_sub(double lhs, ae_complex rhs) +{ + ae_complex result; + result.x = lhs-rhs.x; + result.y = -rhs.y; + return result; +} + +ae_complex ae_c_div_d(ae_complex lhs, double rhs) +{ + ae_complex result; + result.x = lhs.x/rhs; + result.y = lhs.y/rhs; + return result; +} + +ae_complex ae_c_d_div(double lhs, ae_complex rhs) +{ + ae_complex result; + double e; + double f; + if( fabs(rhs.y)<fabs(rhs.x) ) + { + e = rhs.y/rhs.x; + f = rhs.x+rhs.y*e; + result.x = lhs/f; + result.y = -lhs*e/f; + } + else + { + e = rhs.x/rhs.y; + f = rhs.y+rhs.x*e; + result.x = lhs*e/f; + result.y = -lhs/f; + } + return result; +} + + +/************************************************************************ +Complex BLAS operations +************************************************************************/ +ae_complex ae_v_cdotproduct(const ae_complex *v0, ae_int_t stride0, const char *conj0, const ae_complex *v1, ae_int_t stride1, const char *conj1, ae_int_t n) +{ + double rx = 0, ry = 0; + ae_int_t i; + ae_bool bconj0 = !((conj0[0]=='N') || (conj0[0]=='n')); + ae_bool bconj1 = !((conj1[0]=='N') || (conj1[0]=='n')); + ae_complex result; + if( bconj0 && bconj1 ) + { + double v0x, v0y, v1x, v1y; + for(i=0; i<n; i++, v0+=stride0, v1+=stride1) + { + v0x = v0->x; + v0y = -v0->y; + v1x = v1->x; + v1y = -v1->y; + rx += v0x*v1x-v0y*v1y; + ry += v0x*v1y+v0y*v1x; + } + } + if( !bconj0 && bconj1 ) + { + double v0x, v0y, v1x, v1y; + for(i=0; i<n; i++, v0+=stride0, v1+=stride1) + { + v0x = v0->x; + v0y = v0->y; + v1x = v1->x; + v1y = -v1->y; + rx += v0x*v1x-v0y*v1y; + ry += v0x*v1y+v0y*v1x; + } + } + if( bconj0 && !bconj1 ) + { + double v0x, v0y, v1x, v1y; + for(i=0; i<n; i++, v0+=stride0, v1+=stride1) + { + v0x = v0->x; + v0y = -v0->y; + v1x = v1->x; + v1y = v1->y; + rx += v0x*v1x-v0y*v1y; + ry += v0x*v1y+v0y*v1x; + } + } + if( !bconj0 && !bconj1 ) + { + double v0x, v0y, v1x, v1y; + for(i=0; i<n; i++, v0+=stride0, v1+=stride1) + { + v0x = v0->x; + v0y = v0->y; + v1x = v1->x; + v1y = v1->y; + rx += v0x*v1x-v0y*v1y; + ry += v0x*v1y+v0y*v1x; + } + } + result.x = rx; + result.y = ry; + return result; +} + +void ae_v_cmove(ae_complex *vdst, ae_int_t stride_dst, const ae_complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n) +{ + ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n')); + ae_int_t i; + if( stride_dst!=1 || stride_src!=1 ) + { + /* + * general unoptimized case + */ + if( bconj ) + { + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + { + vdst->x = vsrc->x; + vdst->y = -vsrc->y; + } + } + else + { + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + *vdst = *vsrc; + } + } + else + { + /* + * optimized case + */ + if( bconj ) + { + for(i=0; i<n; i++, vdst++, vsrc++) + { + vdst->x = vsrc->x; + vdst->y = -vsrc->y; + } + } + else + { + for(i=0; i<n; i++, vdst++, vsrc++) + *vdst = *vsrc; + } + } +} + +void ae_v_cmoveneg(ae_complex *vdst, ae_int_t stride_dst, const ae_complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n) +{ + ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n')); + ae_int_t i; + if( stride_dst!=1 || stride_src!=1 ) + { + /* + * general unoptimized case + */ + if( bconj ) + { + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + { + vdst->x = -vsrc->x; + vdst->y = vsrc->y; + } + } + else + { + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + { + vdst->x = -vsrc->x; + vdst->y = -vsrc->y; + } + } + } + else + { + /* + * optimized case + */ + if( bconj ) + { + for(i=0; i<n; i++, vdst++, vsrc++) + { + vdst->x = -vsrc->x; + vdst->y = vsrc->y; + } + } + else + { + for(i=0; i<n; i++, vdst++, vsrc++) + { + vdst->x = -vsrc->x; + vdst->y = -vsrc->y; + } + } + } +} + +void ae_v_cmoved(ae_complex *vdst, ae_int_t stride_dst, const ae_complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha) +{ + ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n')); + ae_int_t i; + if( stride_dst!=1 || stride_src!=1 ) + { + /* + * general unoptimized case + */ + if( bconj ) + { + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + { + vdst->x = alpha*vsrc->x; + vdst->y = -alpha*vsrc->y; + } + } + else + { + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + { + vdst->x = alpha*vsrc->x; + vdst->y = alpha*vsrc->y; + } + } + } + else + { + /* + * optimized case + */ + if( bconj ) + { + for(i=0; i<n; i++, vdst++, vsrc++) + { + vdst->x = alpha*vsrc->x; + vdst->y = -alpha*vsrc->y; + } + } + else + { + for(i=0; i<n; i++, vdst++, vsrc++) + { + vdst->x = alpha*vsrc->x; + vdst->y = alpha*vsrc->y; + } + } + } +} + +void ae_v_cmovec(ae_complex *vdst, ae_int_t stride_dst, const ae_complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, ae_complex alpha) +{ + ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n')); + ae_int_t i; + if( stride_dst!=1 || stride_src!=1 ) + { + /* + * general unoptimized case + */ + if( bconj ) + { + double ax = alpha.x, ay = alpha.y; + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + { + vdst->x = ax*vsrc->x+ay*vsrc->y; + vdst->y = -ax*vsrc->y+ay*vsrc->x; + } + } + else + { + double ax = alpha.x, ay = alpha.y; + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + { + vdst->x = ax*vsrc->x-ay*vsrc->y; + vdst->y = ax*vsrc->y+ay*vsrc->x; + } + } + } + else + { + /* + * highly optimized case + */ + if( bconj ) + { + double ax = alpha.x, ay = alpha.y; + for(i=0; i<n; i++, vdst++, vsrc++) + { + vdst->x = ax*vsrc->x+ay*vsrc->y; + vdst->y = -ax*vsrc->y+ay*vsrc->x; + } + } + else + { + double ax = alpha.x, ay = alpha.y; + for(i=0; i<n; i++, vdst++, vsrc++) + { + vdst->x = ax*vsrc->x-ay*vsrc->y; + vdst->y = ax*vsrc->y+ay*vsrc->x; + } + } + } +} + +void ae_v_cadd(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n) +{ + ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n')); + ae_int_t i; + if( stride_dst!=1 || stride_src!=1 ) + { + /* + * general unoptimized case + */ + if( bconj ) + { + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + { + vdst->x += vsrc->x; + vdst->y -= vsrc->y; + } + } + else + { + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + { + vdst->x += vsrc->x; + vdst->y += vsrc->y; + } + } + } + else + { + /* + * optimized case + */ + if( bconj ) + { + for(i=0; i<n; i++, vdst++, vsrc++) + { + vdst->x += vsrc->x; + vdst->y -= vsrc->y; + } + } + else + { + for(i=0; i<n; i++, vdst++, vsrc++) + { + vdst->x += vsrc->x; + vdst->y += vsrc->y; + } + } + } +} + +void ae_v_caddd(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha) +{ + ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n')); + ae_int_t i; + if( stride_dst!=1 || stride_src!=1 ) + { + /* + * general unoptimized case + */ + if( bconj ) + { + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + { + vdst->x += alpha*vsrc->x; + vdst->y -= alpha*vsrc->y; + } + } + else + { + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + { + vdst->x += alpha*vsrc->x; + vdst->y += alpha*vsrc->y; + } + } + } + else + { + /* + * optimized case + */ + if( bconj ) + { + for(i=0; i<n; i++, vdst++, vsrc++) + { + vdst->x += alpha*vsrc->x; + vdst->y -= alpha*vsrc->y; + } + } + else + { + for(i=0; i<n; i++, vdst++, vsrc++) + { + vdst->x += alpha*vsrc->x; + vdst->y += alpha*vsrc->y; + } + } + } +} + +void ae_v_caddc(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, ae_complex alpha) +{ + ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n')); + ae_int_t i; + if( stride_dst!=1 || stride_src!=1 ) + { + /* + * general unoptimized case + */ + double ax = alpha.x, ay = alpha.y; + if( bconj ) + { + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + { + vdst->x += ax*vsrc->x+ay*vsrc->y; + vdst->y -= ax*vsrc->y-ay*vsrc->x; + } + } + else + { + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + { + vdst->x += ax*vsrc->x-ay*vsrc->y; + vdst->y += ax*vsrc->y+ay*vsrc->x; + } + } + } + else + { + /* + * highly optimized case + */ + double ax = alpha.x, ay = alpha.y; + if( bconj ) + { + for(i=0; i<n; i++, vdst++, vsrc++) + { + vdst->x += ax*vsrc->x+ay*vsrc->y; + vdst->y -= ax*vsrc->y-ay*vsrc->x; + } + } + else + { + for(i=0; i<n; i++, vdst++, vsrc++) + { + vdst->x += ax*vsrc->x-ay*vsrc->y; + vdst->y += ax*vsrc->y+ay*vsrc->x; + } + } + } +} + +void ae_v_csub(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n) +{ + ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n')); + ae_int_t i; + if( stride_dst!=1 || stride_src!=1 ) + { + /* + * general unoptimized case + */ + if( bconj ) + { + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + { + vdst->x -= vsrc->x; + vdst->y += vsrc->y; + } + } + else + { + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + { + vdst->x -= vsrc->x; + vdst->y -= vsrc->y; + } + } + } + else + { + /* + * highly optimized case + */ + if( bconj ) + { + for(i=0; i<n; i++, vdst++, vsrc++) + { + vdst->x -= vsrc->x; + vdst->y += vsrc->y; + } + } + else + { + for(i=0; i<n; i++, vdst++, vsrc++) + { + vdst->x -= vsrc->x; + vdst->y -= vsrc->y; + } + } + } +} + +void ae_v_csubd(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha) +{ + ae_v_caddd(vdst, stride_dst, vsrc, stride_src, conj_src, n, -alpha); +} + +void ae_v_csubc(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, ae_complex alpha) +{ + alpha.x = -alpha.x; + alpha.y = -alpha.y; + ae_v_caddc(vdst, stride_dst, vsrc, stride_src, conj_src, n, alpha); +} + +void ae_v_cmuld(ae_complex *vdst, ae_int_t stride_dst, ae_int_t n, double alpha) +{ + ae_int_t i; + if( stride_dst!=1 ) + { + /* + * general unoptimized case + */ + for(i=0; i<n; i++, vdst+=stride_dst) + { + vdst->x *= alpha; + vdst->y *= alpha; + } + } + else + { + /* + * optimized case + */ + for(i=0; i<n; i++, vdst++) + { + vdst->x *= alpha; + vdst->y *= alpha; + } + } +} + +void ae_v_cmulc(ae_complex *vdst, ae_int_t stride_dst, ae_int_t n, ae_complex alpha) +{ + ae_int_t i; + if( stride_dst!=1 ) + { + /* + * general unoptimized case + */ + double ax = alpha.x, ay = alpha.y; + for(i=0; i<n; i++, vdst+=stride_dst) + { + double dstx = vdst->x, dsty = vdst->y; + vdst->x = ax*dstx-ay*dsty; + vdst->y = ax*dsty+ay*dstx; + } + } + else + { + /* + * highly optimized case + */ + double ax = alpha.x, ay = alpha.y; + for(i=0; i<n; i++, vdst++) + { + double dstx = vdst->x, dsty = vdst->y; + vdst->x = ax*dstx-ay*dsty; + vdst->y = ax*dsty+ay*dstx; + } + } +} + +/************************************************************************ +Real BLAS operations +************************************************************************/ +double ae_v_dotproduct(const double *v0, ae_int_t stride0, const double *v1, ae_int_t stride1, ae_int_t n) +{ + double result = 0; + ae_int_t i; + if( stride0!=1 || stride1!=1 ) + { + /* + * slow general code + */ + for(i=0; i<n; i++, v0+=stride0, v1+=stride1) + result += (*v0)*(*v1); + } + else + { + /* + * optimized code for stride=1 + */ + ae_int_t n4 = n/4; + ae_int_t nleft = n%4; + for(i=0; i<n4; i++, v0+=4, v1+=4) + result += v0[0]*v1[0]+v0[1]*v1[1]+v0[2]*v1[2]+v0[3]*v1[3]; + for(i=0; i<nleft; i++, v0++, v1++) + result += v0[0]*v1[0]; + } + return result; +} + +void ae_v_move(double *vdst, ae_int_t stride_dst, const double* vsrc, ae_int_t stride_src, ae_int_t n) +{ + ae_int_t i; + if( stride_dst!=1 || stride_src!=1 ) + { + /* + * general unoptimized case + */ + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + *vdst = *vsrc; + } + else + { + /* + * optimized case + */ + ae_int_t n2 = n/2; + for(i=0; i<n2; i++, vdst+=2, vsrc+=2) + { + vdst[0] = vsrc[0]; + vdst[1] = vsrc[1]; + } + if( n%2!=0 ) + vdst[0] = vsrc[0]; + } +} + +void ae_v_moveneg(double *vdst, ae_int_t stride_dst, const double* vsrc, ae_int_t stride_src, ae_int_t n) +{ + ae_int_t i; + if( stride_dst!=1 || stride_src!=1 ) + { + /* + * general unoptimized case + */ + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + *vdst = -*vsrc; + } + else + { + /* + * optimized case + */ + ae_int_t n2 = n/2; + for(i=0; i<n2; i++, vdst+=2, vsrc+=2) + { + vdst[0] = -vsrc[0]; + vdst[1] = -vsrc[1]; + } + if( n%2!=0 ) + vdst[0] = -vsrc[0]; + } +} + +void ae_v_moved(double *vdst, ae_int_t stride_dst, const double* vsrc, ae_int_t stride_src, ae_int_t n, double alpha) +{ + ae_int_t i; + if( stride_dst!=1 || stride_src!=1 ) + { + /* + * general unoptimized case + */ + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + *vdst = alpha*(*vsrc); + } + else + { + /* + * optimized case + */ + ae_int_t n2 = n/2; + for(i=0; i<n2; i++, vdst+=2, vsrc+=2) + { + vdst[0] = alpha*vsrc[0]; + vdst[1] = alpha*vsrc[1]; + } + if( n%2!=0 ) + vdst[0] = alpha*vsrc[0]; + } +} + +void ae_v_add(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n) +{ + ae_int_t i; + if( stride_dst!=1 || stride_src!=1 ) + { + /* + * general unoptimized case + */ + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + *vdst += *vsrc; + } + else + { + /* + * optimized case + */ + ae_int_t n2 = n/2; + for(i=0; i<n2; i++, vdst+=2, vsrc+=2) + { + vdst[0] += vsrc[0]; + vdst[1] += vsrc[1]; + } + if( n%2!=0 ) + vdst[0] += vsrc[0]; + } +} + +void ae_v_addd(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n, double alpha) +{ + ae_int_t i; + if( stride_dst!=1 || stride_src!=1 ) + { + /* + * general unoptimized case + */ + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + *vdst += alpha*(*vsrc); + } + else + { + /* + * optimized case + */ + ae_int_t n2 = n/2; + for(i=0; i<n2; i++, vdst+=2, vsrc+=2) + { + vdst[0] += alpha*vsrc[0]; + vdst[1] += alpha*vsrc[1]; + } + if( n%2!=0 ) + vdst[0] += alpha*vsrc[0]; + } +} + +void ae_v_sub(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n) +{ + ae_int_t i; + if( stride_dst!=1 || stride_src!=1 ) + { + /* + * general unoptimized case + */ + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + *vdst -= *vsrc; + } + else + { + /* + * highly optimized case + */ + ae_int_t n2 = n/2; + for(i=0; i<n2; i++, vdst+=2, vsrc+=2) + { + vdst[0] -= vsrc[0]; + vdst[1] -= vsrc[1]; + } + if( n%2!=0 ) + vdst[0] -= vsrc[0]; + } +} + +void ae_v_subd(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n, double alpha) +{ + ae_v_addd(vdst, stride_dst, vsrc, stride_src, n, -alpha); +} + +void ae_v_muld(double *vdst, ae_int_t stride_dst, ae_int_t n, double alpha) +{ + ae_int_t i; + if( stride_dst!=1 ) + { + /* + * general unoptimized case + */ + for(i=0; i<n; i++, vdst+=stride_dst) + *vdst *= alpha; + } + else + { + /* + * highly optimized case + */ + for(i=0; i<n; i++, vdst++) + *vdst *= alpha; + } +} + +/************************************************************************ +Other functions +************************************************************************/ +ae_int_t ae_v_len(ae_int_t a, ae_int_t b) +{ + return b-a+1; +} + +/************************************************************************ +RComm functions +************************************************************************/ +ae_bool _rcommstate_init(rcommstate* p, ae_state *_state, ae_bool make_automatic) +{ + if( !ae_vector_init(&p->ba, 0, DT_BOOL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->ia, 0, DT_INT, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->ra, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->ca, 0, DT_COMPLEX, _state, make_automatic) ) + return ae_false; + return ae_true; +} + +ae_bool _rcommstate_init_copy(rcommstate* dst, rcommstate* src, ae_state *_state, ae_bool make_automatic) +{ + if( !ae_vector_init_copy(&dst->ba, &src->ba, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->ia, &src->ia, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->ra, &src->ra, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->ca, &src->ca, _state, make_automatic) ) + return ae_false; + dst->stage = src->stage; + return ae_true; +} + +void _rcommstate_clear(rcommstate* p) +{ + ae_vector_clear(&p->ba); + ae_vector_clear(&p->ia); + ae_vector_clear(&p->ra); + ae_vector_clear(&p->ca); +} + +} + +///////////////////////////////////////////////////////////////////////// +// +// THIS SECTION CONTAINS C++ RELATED FUNCTIONALITY +// +///////////////////////////////////////////////////////////////////////// +/******************************************************************** +Internal forwards +********************************************************************/ +namespace alglib +{ + double get_aenv_nan(); + double get_aenv_posinf(); + double get_aenv_neginf(); + ae_int_t my_stricmp(const char *s1, const char *s2); + char* filter_spaces(const char *s); + void str_vector_create(const char *src, bool match_head_only, std::vector<const char*> *p_vec); + void str_matrix_create(const char *src, std::vector< std::vector<const char*> > *p_mat); + + ae_bool parse_bool_delim(const char *s, const char *delim); + ae_int_t parse_int_delim(const char *s, const char *delim); + bool _parse_real_delim(const char *s, const char *delim, double *result, const char **new_s); + double parse_real_delim(const char *s, const char *delim); + alglib::complex parse_complex_delim(const char *s, const char *delim); + + std::string arraytostring(const bool *ptr, ae_int_t n); + std::string arraytostring(const ae_int_t *ptr, ae_int_t n); + std::string arraytostring(const double *ptr, ae_int_t n, int dps); + std::string arraytostring(const alglib::complex *ptr, ae_int_t n, int dps); +} + +/******************************************************************** +Global and local constants +********************************************************************/ +const double alglib::machineepsilon = 5E-16; +const double alglib::maxrealnumber = 1E300; +const double alglib::minrealnumber = 1E-300; +const alglib::ae_int_t alglib::endianness = alglib_impl::ae_get_endianness(); +const double alglib::fp_nan = alglib::get_aenv_nan(); +const double alglib::fp_posinf = alglib::get_aenv_posinf(); +const double alglib::fp_neginf = alglib::get_aenv_neginf(); + + +/******************************************************************** +ap_error +********************************************************************/ +alglib::ap_error::ap_error() +{ +} + +alglib::ap_error::ap_error(const char *s) +{ + msg = s; +} + +void alglib::ap_error::make_assertion(bool bClause) +{ + if(!bClause) + throw ap_error(); +} + +void alglib::ap_error::make_assertion(bool bClause, const char *msg) +{ + if(!bClause) + throw ap_error(msg); +} + + +/******************************************************************** +Complex number with double precision. +********************************************************************/ +alglib::complex::complex():x(0.0),y(0.0) +{ +} + +alglib::complex::complex(const double &_x):x(_x),y(0.0) +{ +} + +alglib::complex::complex(const double &_x, const double &_y):x(_x),y(_y) +{ +} + +alglib::complex::complex(const alglib::complex &z):x(z.x),y(z.y) +{ +} + +alglib::complex& alglib::complex::operator= (const double& v) +{ + x = v; + y = 0.0; + return *this; +} + +alglib::complex& alglib::complex::operator+=(const double& v) +{ + x += v; + return *this; +} + +alglib::complex& alglib::complex::operator-=(const double& v) +{ + x -= v; + return *this; +} + +alglib::complex& alglib::complex::operator*=(const double& v) +{ + x *= v; + y *= v; + return *this; +} + +alglib::complex& alglib::complex::operator/=(const double& v) +{ + x /= v; + y /= v; + return *this; +} + +alglib::complex& alglib::complex::operator= (const alglib::complex& z) +{ + x = z.x; + y = z.y; + return *this; +} + +alglib::complex& alglib::complex::operator+=(const alglib::complex& z) +{ + x += z.x; + y += z.y; + return *this; +} + +alglib::complex& alglib::complex::operator-=(const alglib::complex& z) +{ + x -= z.x; + y -= z.y; + return *this; +} + +alglib::complex& alglib::complex::operator*=(const alglib::complex& z) +{ + double t = x*z.x-y*z.y; + y = x*z.y+y*z.x; + x = t; + return *this; +} + +alglib::complex& alglib::complex::operator/=(const alglib::complex& z) +{ + alglib::complex result; + double e; + double f; + if( fabs(z.y)<fabs(z.x) ) + { + e = z.y/z.x; + f = z.x+z.y*e; + result.x = (x+y*e)/f; + result.y = (y-x*e)/f; + } + else + { + e = z.x/z.y; + f = z.y+z.x*e; + result.x = (y+x*e)/f; + result.y = (-x+y*e)/f; + } + *this = result; + return *this; +} + +alglib_impl::ae_complex* alglib::complex::c_ptr() +{ + return (alglib_impl::ae_complex*)this; +} + +const alglib_impl::ae_complex* alglib::complex::c_ptr() const +{ + return (const alglib_impl::ae_complex*)this; +} + +std::string alglib::complex::tostring(int dps) const +{ + char mask[32]; + char buf_x[32]; + char buf_y[32]; + char buf_zero[32]; + if( dps<=0 || dps>=20 ) + throw ap_error("complex::tostring(): incorrect dps"); + + // handle IEEE special quantities + if( fp_isnan(x) || fp_isnan(y) ) + return "NAN"; + if( fp_isinf(x) || fp_isinf(y) ) + return "INF"; + + // generate mask + if( sprintf(mask, "%%.%df", dps)>=(int)sizeof(mask) ) + throw ap_error("complex::tostring(): buffer overflow"); + + // print |x|, |y| and zero with same mask and compare + if( sprintf(buf_x, mask, (double)(fabs(x)))>=(int)sizeof(buf_x) ) + throw ap_error("complex::tostring(): buffer overflow"); + if( sprintf(buf_y, mask, (double)(fabs(y)))>=(int)sizeof(buf_y) ) + throw ap_error("complex::tostring(): buffer overflow"); + if( sprintf(buf_zero, mask, (double)0)>=(int)sizeof(buf_zero) ) + throw ap_error("complex::tostring(): buffer overflow"); + + // different zero/nonzero patterns + if( strcmp(buf_x,buf_zero)!=0 && strcmp(buf_y,buf_zero)!=0 ) + return std::string(x>0 ? "" : "-")+buf_x+(y>0 ? "+" : "-")+buf_y+"i"; + if( strcmp(buf_x,buf_zero)!=0 && strcmp(buf_y,buf_zero)==0 ) + return std::string(x>0 ? "" : "-")+buf_x; + if( strcmp(buf_x,buf_zero)==0 && strcmp(buf_y,buf_zero)!=0 ) + return std::string(y>0 ? "" : "-")+buf_y+"i"; + return std::string("0"); +} + +const bool alglib::operator==(const alglib::complex& lhs, const alglib::complex& rhs) +{ + volatile double x1 = lhs.x; + volatile double x2 = rhs.x; + volatile double y1 = lhs.y; + volatile double y2 = rhs.y; + return x1==x2 && y1==y2; +} + +const bool alglib::operator!=(const alglib::complex& lhs, const alglib::complex& rhs) +{ return !(lhs==rhs); } + +const alglib::complex alglib::operator+(const alglib::complex& lhs) +{ return lhs; } + +const alglib::complex alglib::operator-(const alglib::complex& lhs) +{ return alglib::complex(-lhs.x, -lhs.y); } + +const alglib::complex alglib::operator+(const alglib::complex& lhs, const alglib::complex& rhs) +{ alglib::complex r = lhs; r += rhs; return r; } + +const alglib::complex alglib::operator+(const alglib::complex& lhs, const double& rhs) +{ alglib::complex r = lhs; r += rhs; return r; } + +const alglib::complex alglib::operator+(const double& lhs, const alglib::complex& rhs) +{ alglib::complex r = rhs; r += lhs; return r; } + +const alglib::complex alglib::operator-(const alglib::complex& lhs, const alglib::complex& rhs) +{ alglib::complex r = lhs; r -= rhs; return r; } + +const alglib::complex alglib::operator-(const alglib::complex& lhs, const double& rhs) +{ alglib::complex r = lhs; r -= rhs; return r; } + +const alglib::complex alglib::operator-(const double& lhs, const alglib::complex& rhs) +{ alglib::complex r = lhs; r -= rhs; return r; } + +const alglib::complex alglib::operator*(const alglib::complex& lhs, const alglib::complex& rhs) +{ return alglib::complex(lhs.x*rhs.x - lhs.y*rhs.y, lhs.x*rhs.y + lhs.y*rhs.x); } + +const alglib::complex alglib::operator*(const alglib::complex& lhs, const double& rhs) +{ return alglib::complex(lhs.x*rhs, lhs.y*rhs); } + +const alglib::complex alglib::operator*(const double& lhs, const alglib::complex& rhs) +{ return alglib::complex(lhs*rhs.x, lhs*rhs.y); } + +const alglib::complex alglib::operator/(const alglib::complex& lhs, const alglib::complex& rhs) +{ + alglib::complex result; + double e; + double f; + if( fabs(rhs.y)<fabs(rhs.x) ) + { + e = rhs.y/rhs.x; + f = rhs.x+rhs.y*e; + result.x = (lhs.x+lhs.y*e)/f; + result.y = (lhs.y-lhs.x*e)/f; + } + else + { + e = rhs.x/rhs.y; + f = rhs.y+rhs.x*e; + result.x = (lhs.y+lhs.x*e)/f; + result.y = (-lhs.x+lhs.y*e)/f; + } + return result; +} + +const alglib::complex alglib::operator/(const double& lhs, const alglib::complex& rhs) +{ + alglib::complex result; + double e; + double f; + if( fabs(rhs.y)<fabs(rhs.x) ) + { + e = rhs.y/rhs.x; + f = rhs.x+rhs.y*e; + result.x = lhs/f; + result.y = -lhs*e/f; + } + else + { + e = rhs.x/rhs.y; + f = rhs.y+rhs.x*e; + result.x = lhs*e/f; + result.y = -lhs/f; + } + return result; +} + +const alglib::complex alglib::operator/(const alglib::complex& lhs, const double& rhs) +{ return alglib::complex(lhs.x/rhs, lhs.y/rhs); } + +double alglib::abscomplex(const alglib::complex &z) +{ + double w; + double xabs; + double yabs; + double v; + + xabs = fabs(z.x); + yabs = fabs(z.y); + w = xabs>yabs ? xabs : yabs; + v = xabs<yabs ? xabs : yabs; + if( v==0 ) + return w; + else + { + double t = v/w; + return w*sqrt(1+t*t); + } +} + +alglib::complex alglib::conj(const alglib::complex &z) +{ return alglib::complex(z.x, -z.y); } + +alglib::complex alglib::csqr(const alglib::complex &z) +{ return alglib::complex(z.x*z.x-z.y*z.y, 2*z.x*z.y); } + + +/******************************************************************** +Level 1 BLAS functions +********************************************************************/ +double alglib::vdotproduct(const double *v0, ae_int_t stride0, const double *v1, ae_int_t stride1, ae_int_t n) +{ + double result = 0; + ae_int_t i; + if( stride0!=1 || stride1!=1 ) + { + // + // slow general code + // + for(i=0; i<n; i++, v0+=stride0, v1+=stride1) + result += (*v0)*(*v1); + } + else + { + // + // optimized code for stride=1 + // + ae_int_t n4 = n/4; + ae_int_t nleft = n%4; + for(i=0; i<n4; i++, v0+=4, v1+=4) + result += v0[0]*v1[0]+v0[1]*v1[1]+v0[2]*v1[2]+v0[3]*v1[3]; + for(i=0; i<nleft; i++, v0++, v1++) + result += v0[0]*v1[0]; + } + return result; +} + +double alglib::vdotproduct(const double *v1, const double *v2, ae_int_t N) +{ + return vdotproduct(v1, 1, v2, 1, N); +} + +alglib::complex alglib::vdotproduct(const alglib::complex *v0, ae_int_t stride0, const char *conj0, const alglib::complex *v1, ae_int_t stride1, const char *conj1, ae_int_t n) +{ + double rx = 0, ry = 0; + ae_int_t i; + bool bconj0 = !((conj0[0]=='N') || (conj0[0]=='n')); + bool bconj1 = !((conj1[0]=='N') || (conj1[0]=='n')); + if( bconj0 && bconj1 ) + { + double v0x, v0y, v1x, v1y; + for(i=0; i<n; i++, v0+=stride0, v1+=stride1) + { + v0x = v0->x; + v0y = -v0->y; + v1x = v1->x; + v1y = -v1->y; + rx += v0x*v1x-v0y*v1y; + ry += v0x*v1y+v0y*v1x; + } + } + if( !bconj0 && bconj1 ) + { + double v0x, v0y, v1x, v1y; + for(i=0; i<n; i++, v0+=stride0, v1+=stride1) + { + v0x = v0->x; + v0y = v0->y; + v1x = v1->x; + v1y = -v1->y; + rx += v0x*v1x-v0y*v1y; + ry += v0x*v1y+v0y*v1x; + } + } + if( bconj0 && !bconj1 ) + { + double v0x, v0y, v1x, v1y; + for(i=0; i<n; i++, v0+=stride0, v1+=stride1) + { + v0x = v0->x; + v0y = -v0->y; + v1x = v1->x; + v1y = v1->y; + rx += v0x*v1x-v0y*v1y; + ry += v0x*v1y+v0y*v1x; + } + } + if( !bconj0 && !bconj1 ) + { + double v0x, v0y, v1x, v1y; + for(i=0; i<n; i++, v0+=stride0, v1+=stride1) + { + v0x = v0->x; + v0y = v0->y; + v1x = v1->x; + v1y = v1->y; + rx += v0x*v1x-v0y*v1y; + ry += v0x*v1y+v0y*v1x; + } + } + return alglib::complex(rx,ry); +} + +alglib::complex alglib::vdotproduct(const alglib::complex *v1, const alglib::complex *v2, ae_int_t N) +{ + return vdotproduct(v1, 1, "N", v2, 1, "N", N); +} + +void alglib::vmove(double *vdst, ae_int_t stride_dst, const double* vsrc, ae_int_t stride_src, ae_int_t n) +{ + ae_int_t i; + if( stride_dst!=1 || stride_src!=1 ) + { + // + // general unoptimized case + // + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + *vdst = *vsrc; + } + else + { + // + // optimized case + // + ae_int_t n2 = n/2; + for(i=0; i<n2; i++, vdst+=2, vsrc+=2) + { + vdst[0] = vsrc[0]; + vdst[1] = vsrc[1]; + } + if( n%2!=0 ) + vdst[0] = vsrc[0]; + } +} + +void alglib::vmove(double *vdst, const double* vsrc, ae_int_t N) +{ + vmove(vdst, 1, vsrc, 1, N); +} + +void alglib::vmove(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n) +{ + bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n')); + ae_int_t i; + if( stride_dst!=1 || stride_src!=1 ) + { + // + // general unoptimized case + // + if( bconj ) + { + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + { + vdst->x = vsrc->x; + vdst->y = -vsrc->y; + } + } + else + { + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + *vdst = *vsrc; + } + } + else + { + // + // optimized case + // + if( bconj ) + { + for(i=0; i<n; i++, vdst++, vsrc++) + { + vdst->x = vsrc->x; + vdst->y = -vsrc->y; + } + } + else + { + for(i=0; i<n; i++, vdst++, vsrc++) + *vdst = *vsrc; + } + } +} + +void alglib::vmove(alglib::complex *vdst, const alglib::complex* vsrc, ae_int_t N) +{ + vmove(vdst, 1, vsrc, 1, "N", N); +} + +void alglib::vmoveneg(double *vdst, ae_int_t stride_dst, const double* vsrc, ae_int_t stride_src, ae_int_t n) +{ + ae_int_t i; + if( stride_dst!=1 || stride_src!=1 ) + { + // + // general unoptimized case + // + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + *vdst = -*vsrc; + } + else + { + // + // optimized case + // + ae_int_t n2 = n/2; + for(i=0; i<n2; i++, vdst+=2, vsrc+=2) + { + vdst[0] = -vsrc[0]; + vdst[1] = -vsrc[1]; + } + if( n%2!=0 ) + vdst[0] = -vsrc[0]; + } +} + +void alglib::vmoveneg(double *vdst, const double *vsrc, ae_int_t N) +{ + vmoveneg(vdst, 1, vsrc, 1, N); +} + +void alglib::vmoveneg(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n) +{ + bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n')); + ae_int_t i; + if( stride_dst!=1 || stride_src!=1 ) + { + // + // general unoptimized case + // + if( bconj ) + { + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + { + vdst->x = -vsrc->x; + vdst->y = vsrc->y; + } + } + else + { + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + { + vdst->x = -vsrc->x; + vdst->y = -vsrc->y; + } + } + } + else + { + // + // optimized case + // + if( bconj ) + { + for(i=0; i<n; i++, vdst++, vsrc++) + { + vdst->x = -vsrc->x; + vdst->y = vsrc->y; + } + } + else + { + for(i=0; i<n; i++, vdst++, vsrc++) + { + vdst->x = -vsrc->x; + vdst->y = -vsrc->y; + } + } + } +} + +void alglib::vmoveneg(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N) +{ + vmoveneg(vdst, 1, vsrc, 1, "N", N); +} + +void alglib::vmove(double *vdst, ae_int_t stride_dst, const double* vsrc, ae_int_t stride_src, ae_int_t n, double alpha) +{ + ae_int_t i; + if( stride_dst!=1 || stride_src!=1 ) + { + // + // general unoptimized case + // + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + *vdst = alpha*(*vsrc); + } + else + { + // + // optimized case + // + ae_int_t n2 = n/2; + for(i=0; i<n2; i++, vdst+=2, vsrc+=2) + { + vdst[0] = alpha*vsrc[0]; + vdst[1] = alpha*vsrc[1]; + } + if( n%2!=0 ) + vdst[0] = alpha*vsrc[0]; + } +} + +void alglib::vmove(double *vdst, const double *vsrc, ae_int_t N, double alpha) +{ + vmove(vdst, 1, vsrc, 1, N, alpha); +} + +void alglib::vmove(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha) +{ + bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n')); + ae_int_t i; + if( stride_dst!=1 || stride_src!=1 ) + { + // + // general unoptimized case + // + if( bconj ) + { + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + { + vdst->x = alpha*vsrc->x; + vdst->y = -alpha*vsrc->y; + } + } + else + { + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + { + vdst->x = alpha*vsrc->x; + vdst->y = alpha*vsrc->y; + } + } + } + else + { + // + // optimized case + // + if( bconj ) + { + for(i=0; i<n; i++, vdst++, vsrc++) + { + vdst->x = alpha*vsrc->x; + vdst->y = -alpha*vsrc->y; + } + } + else + { + for(i=0; i<n; i++, vdst++, vsrc++) + { + vdst->x = alpha*vsrc->x; + vdst->y = alpha*vsrc->y; + } + } + } +} + +void alglib::vmove(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, double alpha) +{ + vmove(vdst, 1, vsrc, 1, "N", N, alpha); +} + +void alglib::vmove(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, alglib::complex alpha) +{ + bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n')); + ae_int_t i; + if( stride_dst!=1 || stride_src!=1 ) + { + // + // general unoptimized case + // + if( bconj ) + { + double ax = alpha.x, ay = alpha.y; + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + { + vdst->x = ax*vsrc->x+ay*vsrc->y; + vdst->y = -ax*vsrc->y+ay*vsrc->x; + } + } + else + { + double ax = alpha.x, ay = alpha.y; + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + { + vdst->x = ax*vsrc->x-ay*vsrc->y; + vdst->y = ax*vsrc->y+ay*vsrc->x; + } + } + } + else + { + // + // optimized case + // + if( bconj ) + { + double ax = alpha.x, ay = alpha.y; + for(i=0; i<n; i++, vdst++, vsrc++) + { + vdst->x = ax*vsrc->x+ay*vsrc->y; + vdst->y = -ax*vsrc->y+ay*vsrc->x; + } + } + else + { + double ax = alpha.x, ay = alpha.y; + for(i=0; i<n; i++, vdst++, vsrc++) + { + vdst->x = ax*vsrc->x-ay*vsrc->y; + vdst->y = ax*vsrc->y+ay*vsrc->x; + } + } + } +} + +void alglib::vmove(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, alglib::complex alpha) +{ + vmove(vdst, 1, vsrc, 1, "N", N, alpha); +} + +void alglib::vadd(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n) +{ + ae_int_t i; + if( stride_dst!=1 || stride_src!=1 ) + { + // + // general unoptimized case + // + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + *vdst += *vsrc; + } + else + { + // + // optimized case + // + ae_int_t n2 = n/2; + for(i=0; i<n2; i++, vdst+=2, vsrc+=2) + { + vdst[0] += vsrc[0]; + vdst[1] += vsrc[1]; + } + if( n%2!=0 ) + vdst[0] += vsrc[0]; + } +} + +void alglib::vadd(double *vdst, const double *vsrc, ae_int_t N) +{ + vadd(vdst, 1, vsrc, 1, N); +} + +void alglib::vadd(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n) +{ + bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n')); + ae_int_t i; + if( stride_dst!=1 || stride_src!=1 ) + { + // + // general unoptimized case + // + if( bconj ) + { + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + { + vdst->x += vsrc->x; + vdst->y -= vsrc->y; + } + } + else + { + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + { + vdst->x += vsrc->x; + vdst->y += vsrc->y; + } + } + } + else + { + // + // optimized case + // + if( bconj ) + { + for(i=0; i<n; i++, vdst++, vsrc++) + { + vdst->x += vsrc->x; + vdst->y -= vsrc->y; + } + } + else + { + for(i=0; i<n; i++, vdst++, vsrc++) + { + vdst->x += vsrc->x; + vdst->y += vsrc->y; + } + } + } +} + +void alglib::vadd(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N) +{ + vadd(vdst, 1, vsrc, 1, "N", N); +} + +void alglib::vadd(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n, double alpha) +{ + ae_int_t i; + if( stride_dst!=1 || stride_src!=1 ) + { + // + // general unoptimized case + // + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + *vdst += alpha*(*vsrc); + } + else + { + // + // optimized case + // + ae_int_t n2 = n/2; + for(i=0; i<n2; i++, vdst+=2, vsrc+=2) + { + vdst[0] += alpha*vsrc[0]; + vdst[1] += alpha*vsrc[1]; + } + if( n%2!=0 ) + vdst[0] += alpha*vsrc[0]; + } +} + +void alglib::vadd(double *vdst, const double *vsrc, ae_int_t N, double alpha) +{ + vadd(vdst, 1, vsrc, 1, N, alpha); +} + +void alglib::vadd(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha) +{ + bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n')); + ae_int_t i; + if( stride_dst!=1 || stride_src!=1 ) + { + // + // general unoptimized case + // + if( bconj ) + { + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + { + vdst->x += alpha*vsrc->x; + vdst->y -= alpha*vsrc->y; + } + } + else + { + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + { + vdst->x += alpha*vsrc->x; + vdst->y += alpha*vsrc->y; + } + } + } + else + { + // + // optimized case + // + if( bconj ) + { + for(i=0; i<n; i++, vdst++, vsrc++) + { + vdst->x += alpha*vsrc->x; + vdst->y -= alpha*vsrc->y; + } + } + else + { + for(i=0; i<n; i++, vdst++, vsrc++) + { + vdst->x += alpha*vsrc->x; + vdst->y += alpha*vsrc->y; + } + } + } +} + +void alglib::vadd(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, double alpha) +{ + vadd(vdst, 1, vsrc, 1, "N", N, alpha); +} + +void alglib::vadd(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, alglib::complex alpha) +{ + bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n')); + ae_int_t i; + if( stride_dst!=1 || stride_src!=1 ) + { + // + // general unoptimized case + // + double ax = alpha.x, ay = alpha.y; + if( bconj ) + { + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + { + vdst->x += ax*vsrc->x+ay*vsrc->y; + vdst->y -= ax*vsrc->y-ay*vsrc->x; + } + } + else + { + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + { + vdst->x += ax*vsrc->x-ay*vsrc->y; + vdst->y += ax*vsrc->y+ay*vsrc->x; + } + } + } + else + { + // + // optimized case + // + double ax = alpha.x, ay = alpha.y; + if( bconj ) + { + for(i=0; i<n; i++, vdst++, vsrc++) + { + vdst->x += ax*vsrc->x+ay*vsrc->y; + vdst->y -= ax*vsrc->y-ay*vsrc->x; + } + } + else + { + for(i=0; i<n; i++, vdst++, vsrc++) + { + vdst->x += ax*vsrc->x-ay*vsrc->y; + vdst->y += ax*vsrc->y+ay*vsrc->x; + } + } + } +} + +void alglib::vadd(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, alglib::complex alpha) +{ + vadd(vdst, 1, vsrc, 1, "N", N, alpha); +} + +void alglib::vsub(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n) +{ + ae_int_t i; + if( stride_dst!=1 || stride_src!=1 ) + { + // + // general unoptimized case + // + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + *vdst -= *vsrc; + } + else + { + // + // optimized case + // + ae_int_t n2 = n/2; + for(i=0; i<n2; i++, vdst+=2, vsrc+=2) + { + vdst[0] -= vsrc[0]; + vdst[1] -= vsrc[1]; + } + if( n%2!=0 ) + vdst[0] -= vsrc[0]; + } +} + +void alglib::vsub(double *vdst, const double *vsrc, ae_int_t N) +{ + vsub(vdst, 1, vsrc, 1, N); +} + +void alglib::vsub(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n) +{ + bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n')); + ae_int_t i; + if( stride_dst!=1 || stride_src!=1 ) + { + // + // general unoptimized case + // + if( bconj ) + { + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + { + vdst->x -= vsrc->x; + vdst->y += vsrc->y; + } + } + else + { + for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src) + { + vdst->x -= vsrc->x; + vdst->y -= vsrc->y; + } + } + } + else + { + // + // optimized case + // + if( bconj ) + { + for(i=0; i<n; i++, vdst++, vsrc++) + { + vdst->x -= vsrc->x; + vdst->y += vsrc->y; + } + } + else + { + for(i=0; i<n; i++, vdst++, vsrc++) + { + vdst->x -= vsrc->x; + vdst->y -= vsrc->y; + } + } + } +} + +void alglib::vsub(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N) +{ + vsub(vdst, 1, vsrc, 1, "N", N); +} + +void alglib::vsub(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n, double alpha) +{ + vadd(vdst, stride_dst, vsrc, stride_src, n, -alpha); +} + +void alglib::vsub(double *vdst, const double *vsrc, ae_int_t N, double alpha) +{ + vadd(vdst, 1, vsrc, 1, N, -alpha); +} + +void alglib::vsub(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha) +{ + vadd(vdst, stride_dst, vsrc, stride_src, conj_src, n, -alpha); +} + +void alglib::vsub(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t n, double alpha) +{ + vadd(vdst, 1, vsrc, 1, "N", n, -alpha); +} + +void alglib::vsub(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, alglib::complex alpha) +{ + vadd(vdst, stride_dst, vsrc, stride_src, conj_src, n, -alpha); +} + +void alglib::vsub(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t n, alglib::complex alpha) +{ + vadd(vdst, 1, vsrc, 1, "N", n, -alpha); +} +void alglib::vmul(double *vdst, ae_int_t stride_dst, ae_int_t n, double alpha) +{ + ae_int_t i; + if( stride_dst!=1 ) + { + // + // general unoptimized case + // + for(i=0; i<n; i++, vdst+=stride_dst) + *vdst *= alpha; + } + else + { + // + // optimized case + // + for(i=0; i<n; i++, vdst++) + *vdst *= alpha; + } +} + +void alglib::vmul(double *vdst, ae_int_t N, double alpha) +{ + vmul(vdst, 1, N, alpha); +} + +void alglib::vmul(alglib::complex *vdst, ae_int_t stride_dst, ae_int_t n, double alpha) +{ + ae_int_t i; + if( stride_dst!=1 ) + { + // + // general unoptimized case + // + for(i=0; i<n; i++, vdst+=stride_dst) + { + vdst->x *= alpha; + vdst->y *= alpha; + } + } + else + { + // + // optimized case + // + for(i=0; i<n; i++, vdst++) + { + vdst->x *= alpha; + vdst->y *= alpha; + } + } +} + +void alglib::vmul(alglib::complex *vdst, ae_int_t N, double alpha) +{ + vmul(vdst, 1, N, alpha); +} + +void alglib::vmul(alglib::complex *vdst, ae_int_t stride_dst, ae_int_t n, alglib::complex alpha) +{ + ae_int_t i; + if( stride_dst!=1 ) + { + // + // general unoptimized case + // + double ax = alpha.x, ay = alpha.y; + for(i=0; i<n; i++, vdst+=stride_dst) + { + double dstx = vdst->x, dsty = vdst->y; + vdst->x = ax*dstx-ay*dsty; + vdst->y = ax*dsty+ay*dstx; + } + } + else + { + // + // optimized case + // + double ax = alpha.x, ay = alpha.y; + for(i=0; i<n; i++, vdst++) + { + double dstx = vdst->x, dsty = vdst->y; + vdst->x = ax*dstx-ay*dsty; + vdst->y = ax*dsty+ay*dstx; + } + } +} + +void alglib::vmul(alglib::complex *vdst, ae_int_t N, alglib::complex alpha) +{ + vmul(vdst, 1, N, alpha); +} + + +/******************************************************************** +Matrices and vectors +********************************************************************/ +alglib::ae_vector_wrapper::ae_vector_wrapper() +{ + p_vec = NULL; +} + +alglib::ae_vector_wrapper::~ae_vector_wrapper() +{ + if( p_vec==&vec ) + ae_vector_clear(p_vec); +} + +alglib::ae_vector_wrapper::ae_vector_wrapper(const alglib::ae_vector_wrapper &rhs) +{ + if( rhs.p_vec!=NULL ) + { + p_vec = &vec; + if( !ae_vector_init_copy(p_vec, rhs.p_vec, NULL, ae_false) ) + throw alglib::ap_error("ALGLIB: malloc error!"); + } + else + p_vec = NULL; +} + +const alglib::ae_vector_wrapper& alglib::ae_vector_wrapper::operator=(const alglib::ae_vector_wrapper &rhs) +{ + if( this==&rhs ) + return *this; + if( p_vec==&vec ) + ae_vector_clear(p_vec); + if( rhs.p_vec!=NULL ) + { + p_vec = &vec; + if( !ae_vector_init_copy(p_vec, rhs.p_vec, NULL, ae_false) ) + throw alglib::ap_error("ALGLIB: malloc error!"); + } + else + p_vec = NULL; + return *this; +} + +void alglib::ae_vector_wrapper::setlength(ae_int_t iLen) +{ + if( p_vec==NULL ) + throw alglib::ap_error("ALGLIB: setlength() error, p_vec==NULL (array was not correctly initialized)"); + if( p_vec!=&vec ) + throw alglib::ap_error("ALGLIB: setlength() error, p_vec!=&vec (attempt to resize frozen array)"); + if( !ae_vector_set_length(p_vec, iLen, NULL) ) + throw alglib::ap_error("ALGLIB: malloc error"); +} + +alglib::ae_int_t alglib::ae_vector_wrapper::length() const +{ + if( p_vec==NULL ) + return 0; + return p_vec->cnt; +} + +void alglib::ae_vector_wrapper::attach_to(alglib_impl::ae_vector *ptr) +{ + if( ptr==&vec ) + throw alglib::ap_error("ALGLIB: attempt to attach vector to itself"); + if( p_vec==&vec ) + ae_vector_clear(p_vec); + p_vec = ptr; +} + +void alglib::ae_vector_wrapper::allocate_own(ae_int_t size, alglib_impl::ae_datatype datatype) +{ + if( p_vec==&vec ) + ae_vector_clear(p_vec); + p_vec = &vec; + if( !ae_vector_init(p_vec, size, datatype, NULL, false) ) + throw alglib::ap_error("ALGLIB: malloc error"); +} + +const alglib_impl::ae_vector* alglib::ae_vector_wrapper::c_ptr() const +{ + return p_vec; +} + +alglib_impl::ae_vector* alglib::ae_vector_wrapper::c_ptr() +{ + return p_vec; +} + +alglib::boolean_1d_array::boolean_1d_array() +{ + allocate_own(0, alglib_impl::DT_BOOL); +} + +alglib::boolean_1d_array::boolean_1d_array(const char *s) +{ + std::vector<const char*> svec; + size_t i; + char *p = filter_spaces(s); + try + { + str_vector_create(p, true, &svec); + allocate_own((ae_int_t)(svec.size()), alglib_impl::DT_BOOL); + for(i=0; i<svec.size(); i++) + operator()((ae_int_t)i) = parse_bool_delim(svec[i],",]"); + alglib_impl::ae_free(p); + } + catch(...) + { + alglib_impl::ae_free(p); + throw; + } +} + +alglib::boolean_1d_array::boolean_1d_array(alglib_impl::ae_vector *p) +{ + p_vec = NULL; + attach_to(p); +} + +alglib::boolean_1d_array::~boolean_1d_array() +{ +} + +const ae_bool& alglib::boolean_1d_array::operator()(ae_int_t i) const +{ + return p_vec->ptr.p_bool[i]; +} + +ae_bool& alglib::boolean_1d_array::operator()(ae_int_t i) +{ + return p_vec->ptr.p_bool[i]; +} + +const ae_bool& alglib::boolean_1d_array::operator[](ae_int_t i) const +{ + return p_vec->ptr.p_bool[i]; +} + +ae_bool& alglib::boolean_1d_array::operator[](ae_int_t i) +{ + return p_vec->ptr.p_bool[i]; +} + +void alglib::boolean_1d_array::setcontent(ae_int_t iLen, const bool *pContent ) +{ + ae_int_t i; + setlength(iLen); + for(i=0; i<iLen; i++) + p_vec->ptr.p_bool[i] = pContent[i]; +} + +ae_bool* alglib::boolean_1d_array::getcontent() +{ + return p_vec->ptr.p_bool; +} + +const ae_bool* alglib::boolean_1d_array::getcontent() const +{ + return p_vec->ptr.p_bool; +} + +std::string alglib::boolean_1d_array::tostring() const +{ + if( length()==0 ) + return "[]"; + return arraytostring(&(operator()(0)), length()); +} + +alglib::integer_1d_array::integer_1d_array() +{ + allocate_own(0, alglib_impl::DT_INT); +} + +alglib::integer_1d_array::integer_1d_array(alglib_impl::ae_vector *p) +{ + p_vec = NULL; + attach_to(p); +} + +alglib::integer_1d_array::integer_1d_array(const char *s) +{ + std::vector<const char*> svec; + size_t i; + char *p = filter_spaces(s); + try + { + str_vector_create(p, true, &svec); + allocate_own((ae_int_t)(svec.size()), alglib_impl::DT_INT); + for(i=0; i<svec.size(); i++) + operator()((ae_int_t)i) = parse_int_delim(svec[i],",]"); + alglib_impl::ae_free(p); + } + catch(...) + { + alglib_impl::ae_free(p); + throw; + } +} + +alglib::integer_1d_array::~integer_1d_array() +{ +} + +const alglib::ae_int_t& alglib::integer_1d_array::operator()(ae_int_t i) const +{ + return p_vec->ptr.p_int[i]; +} + +alglib::ae_int_t& alglib::integer_1d_array::operator()(ae_int_t i) +{ + return p_vec->ptr.p_int[i]; +} + +const alglib::ae_int_t& alglib::integer_1d_array::operator[](ae_int_t i) const +{ + return p_vec->ptr.p_int[i]; +} + +alglib::ae_int_t& alglib::integer_1d_array::operator[](ae_int_t i) +{ + return p_vec->ptr.p_int[i]; +} + +void alglib::integer_1d_array::setcontent(ae_int_t iLen, const ae_int_t *pContent ) +{ + ae_int_t i; + setlength(iLen); + for(i=0; i<iLen; i++) + p_vec->ptr.p_int[i] = pContent[i]; +} + +alglib::ae_int_t* alglib::integer_1d_array::getcontent() +{ + return p_vec->ptr.p_int; +} + +const alglib::ae_int_t* alglib::integer_1d_array::getcontent() const +{ + return p_vec->ptr.p_int; +} + +std::string alglib::integer_1d_array::tostring() const +{ + if( length()==0 ) + return "[]"; + return arraytostring(&operator()(0), length()); +} + +alglib::real_1d_array::real_1d_array() +{ + allocate_own(0, alglib_impl::DT_REAL); +} + +alglib::real_1d_array::real_1d_array(alglib_impl::ae_vector *p) +{ + p_vec = NULL; + attach_to(p); +} + +alglib::real_1d_array::real_1d_array(const char *s) +{ + std::vector<const char*> svec; + size_t i; + char *p = filter_spaces(s); + try + { + str_vector_create(p, true, &svec); + allocate_own((ae_int_t)(svec.size()), alglib_impl::DT_REAL); + for(i=0; i<svec.size(); i++) + operator()((ae_int_t)i) = parse_real_delim(svec[i],",]"); + alglib_impl::ae_free(p); + } + catch(...) + { + alglib_impl::ae_free(p); + throw; + } +} + +alglib::real_1d_array::~real_1d_array() +{ +} + +const double& alglib::real_1d_array::operator()(ae_int_t i) const +{ + return p_vec->ptr.p_double[i]; +} + +double& alglib::real_1d_array::operator()(ae_int_t i) +{ + return p_vec->ptr.p_double[i]; +} + +const double& alglib::real_1d_array::operator[](ae_int_t i) const +{ + return p_vec->ptr.p_double[i]; +} + +double& alglib::real_1d_array::operator[](ae_int_t i) +{ + return p_vec->ptr.p_double[i]; +} + +void alglib::real_1d_array::setcontent(ae_int_t iLen, const double *pContent ) +{ + ae_int_t i; + setlength(iLen); + for(i=0; i<iLen; i++) + p_vec->ptr.p_double[i] = pContent[i]; +} + +double* alglib::real_1d_array::getcontent() +{ + return p_vec->ptr.p_double; +} + +const double* alglib::real_1d_array::getcontent() const +{ + return p_vec->ptr.p_double; +} + +std::string alglib::real_1d_array::tostring(int dps) const +{ + if( length()==0 ) + return "[]"; + return arraytostring(&operator()(0), length(), dps); +} + +alglib::complex_1d_array::complex_1d_array() +{ + allocate_own(0, alglib_impl::DT_COMPLEX); +} + +alglib::complex_1d_array::complex_1d_array(alglib_impl::ae_vector *p) +{ + p_vec = NULL; + attach_to(p); +} + +alglib::complex_1d_array::complex_1d_array(const char *s) +{ + std::vector<const char*> svec; + size_t i; + char *p = filter_spaces(s); + try + { + str_vector_create(p, true, &svec); + allocate_own((ae_int_t)(svec.size()), alglib_impl::DT_COMPLEX); + for(i=0; i<svec.size(); i++) + operator()((ae_int_t)i) = parse_complex_delim(svec[i],",]"); + alglib_impl::ae_free(p); + } + catch(...) + { + alglib_impl::ae_free(p); + throw; + } +} + +alglib::complex_1d_array::~complex_1d_array() +{ +} + +const alglib::complex& alglib::complex_1d_array::operator()(ae_int_t i) const +{ + return *((const alglib::complex*)(p_vec->ptr.p_complex+i)); +} + +alglib::complex& alglib::complex_1d_array::operator()(ae_int_t i) +{ + return *((alglib::complex*)(p_vec->ptr.p_complex+i)); +} + +const alglib::complex& alglib::complex_1d_array::operator[](ae_int_t i) const +{ + return *((const alglib::complex*)(p_vec->ptr.p_complex+i)); +} + +alglib::complex& alglib::complex_1d_array::operator[](ae_int_t i) +{ + return *((alglib::complex*)(p_vec->ptr.p_complex+i)); +} + +void alglib::complex_1d_array::setcontent(ae_int_t iLen, const alglib::complex *pContent ) +{ + ae_int_t i; + setlength(iLen); + for(i=0; i<iLen; i++) + { + p_vec->ptr.p_complex[i].x = pContent[i].x; + p_vec->ptr.p_complex[i].y = pContent[i].y; + } +} + + alglib::complex* alglib::complex_1d_array::getcontent() +{ + return (alglib::complex*)p_vec->ptr.p_complex; +} + +const alglib::complex* alglib::complex_1d_array::getcontent() const +{ + return (const alglib::complex*)p_vec->ptr.p_complex; +} + +std::string alglib::complex_1d_array::tostring(int dps) const +{ + if( length()==0 ) + return "[]"; + return arraytostring(&operator()(0), length(), dps); +} + +alglib::ae_matrix_wrapper::ae_matrix_wrapper() +{ + p_mat = NULL; +} + +alglib::ae_matrix_wrapper::~ae_matrix_wrapper() +{ + if( p_mat==&mat ) + ae_matrix_clear(p_mat); +} + +alglib::ae_matrix_wrapper::ae_matrix_wrapper(const alglib::ae_matrix_wrapper &rhs) +{ + if( rhs.p_mat!=NULL ) + { + p_mat = &mat; + if( !ae_matrix_init_copy(p_mat, rhs.p_mat, NULL, ae_false) ) + throw alglib::ap_error("ALGLIB: malloc error!"); + } + else + p_mat = NULL; +} + +const alglib::ae_matrix_wrapper& alglib::ae_matrix_wrapper::operator=(const alglib::ae_matrix_wrapper &rhs) +{ + if( this==&rhs ) + return *this; + if( p_mat==&mat ) + ae_matrix_clear(p_mat); + if( rhs.p_mat!=NULL ) + { + p_mat = &mat; + if( !ae_matrix_init_copy(p_mat, rhs.p_mat, NULL, ae_false) ) + throw alglib::ap_error("ALGLIB: malloc error!"); + } + else + p_mat = NULL; + return *this; +} + +void alglib::ae_matrix_wrapper::setlength(ae_int_t rows, ae_int_t cols) +{ + if( p_mat==NULL ) + throw alglib::ap_error("ALGLIB: setlength() error, p_mat==NULL (array was not correctly initialized)"); + if( p_mat!=&mat ) + throw alglib::ap_error("ALGLIB: setlength() error, p_mat!=&mat (attempt to resize frozen array)"); + if( !ae_matrix_set_length(p_mat, rows, cols, NULL) ) + throw alglib::ap_error("ALGLIB: malloc error"); +} + +alglib::ae_int_t alglib::ae_matrix_wrapper::rows() const +{ + if( p_mat==NULL ) + return 0; + return p_mat->rows; +} + +alglib::ae_int_t alglib::ae_matrix_wrapper::cols() const +{ + if( p_mat==NULL ) + return 0; + return p_mat->cols; +} + +bool alglib::ae_matrix_wrapper::isempty() const +{ + return rows()==0 || cols()==0; +} + +alglib::ae_int_t alglib::ae_matrix_wrapper::getstride() const +{ + if( p_mat==NULL ) + return 0; + return p_mat->stride; +} + +void alglib::ae_matrix_wrapper::attach_to(alglib_impl::ae_matrix *ptr) +{ + if( ptr==&mat ) + throw alglib::ap_error("ALGLIB: attempt to attach matrix to itself"); + if( p_mat==&mat ) + ae_matrix_clear(p_mat); + p_mat = ptr; +} + +void alglib::ae_matrix_wrapper::allocate_own(ae_int_t rows, ae_int_t cols, alglib_impl::ae_datatype datatype) +{ + if( p_mat==&mat ) + ae_matrix_clear(p_mat); + p_mat = &mat; + if( !ae_matrix_init(p_mat, rows, cols, datatype, NULL, false) ) + throw alglib::ap_error("ALGLIB: malloc error"); +} + +const alglib_impl::ae_matrix* alglib::ae_matrix_wrapper::c_ptr() const +{ + return p_mat; +} + +alglib_impl::ae_matrix* alglib::ae_matrix_wrapper::c_ptr() +{ + return p_mat; +} + +alglib::boolean_2d_array::boolean_2d_array() +{ + allocate_own(0, 0, alglib_impl::DT_BOOL); +} + +alglib::boolean_2d_array::boolean_2d_array(alglib_impl::ae_matrix *p) +{ + p_mat = NULL; + attach_to(p); +} + +alglib::boolean_2d_array::boolean_2d_array(const char *s) +{ + std::vector< std::vector<const char*> > smat; + size_t i, j; + char *p = filter_spaces(s); + try + { + str_matrix_create(p, &smat); + if( smat.size()!=0 ) + { + allocate_own((ae_int_t)(smat.size()), (ae_int_t)(smat[0].size()), alglib_impl::DT_BOOL); + for(i=0; i<smat.size(); i++) + for(j=0; j<smat[0].size(); j++) + operator()((ae_int_t)i,(ae_int_t)j) = parse_bool_delim(smat[i][j],",]"); + } + else + allocate_own(0, 0, alglib_impl::DT_BOOL); + alglib_impl::ae_free(p); + } + catch(...) + { + alglib_impl::ae_free(p); + throw; + } +} + +alglib::boolean_2d_array::~boolean_2d_array() +{ +} + +const ae_bool& alglib::boolean_2d_array::operator()(ae_int_t i, ae_int_t j) const +{ + return p_mat->ptr.pp_bool[i][j]; +} + +ae_bool& alglib::boolean_2d_array::operator()(ae_int_t i, ae_int_t j) +{ + return p_mat->ptr.pp_bool[i][j]; +} + +const ae_bool* alglib::boolean_2d_array::operator[](ae_int_t i) const +{ + return p_mat->ptr.pp_bool[i]; +} + +ae_bool* alglib::boolean_2d_array::operator[](ae_int_t i) +{ + return p_mat->ptr.pp_bool[i]; +} + +void alglib::boolean_2d_array::setcontent(ae_int_t irows, ae_int_t icols, const bool *pContent ) +{ + ae_int_t i, j; + setlength(irows, icols); + for(i=0; i<irows; i++) + for(j=0; j<icols; j++) + p_mat->ptr.pp_bool[i][j] = pContent[i*icols+j]; +} + +std::string alglib::boolean_2d_array::tostring() const +{ + std::string result; + ae_int_t i; + if( isempty() ) + return "[[]]"; + result = "["; + for(i=0; i<rows(); i++) + { + if( i!=0 ) + result += ","; + result += arraytostring(&operator()(i,0), cols()); + } + result += "]"; + return result; +} + +alglib::integer_2d_array::integer_2d_array() +{ + allocate_own(0, 0, alglib_impl::DT_INT); +} + +alglib::integer_2d_array::integer_2d_array(alglib_impl::ae_matrix *p) +{ + p_mat = NULL; + attach_to(p); +} + +alglib::integer_2d_array::integer_2d_array(const char *s) +{ + std::vector< std::vector<const char*> > smat; + size_t i, j; + char *p = filter_spaces(s); + try + { + str_matrix_create(p, &smat); + if( smat.size()!=0 ) + { + allocate_own((ae_int_t)(smat.size()), (ae_int_t)(smat[0].size()), alglib_impl::DT_INT); + for(i=0; i<smat.size(); i++) + for(j=0; j<smat[0].size(); j++) + operator()((ae_int_t)i,(ae_int_t)j) = parse_int_delim(smat[i][j],",]"); + } + else + allocate_own(0, 0, alglib_impl::DT_INT); + alglib_impl::ae_free(p); + } + catch(...) + { + alglib_impl::ae_free(p); + throw; + } +} + +alglib::integer_2d_array::~integer_2d_array() +{ +} + +const alglib::ae_int_t& alglib::integer_2d_array::operator()(ae_int_t i, ae_int_t j) const +{ + return p_mat->ptr.pp_int[i][j]; +} + +alglib::ae_int_t& alglib::integer_2d_array::operator()(ae_int_t i, ae_int_t j) +{ + return p_mat->ptr.pp_int[i][j]; +} + +const alglib::ae_int_t* alglib::integer_2d_array::operator[](ae_int_t i) const +{ + return p_mat->ptr.pp_int[i]; +} + +alglib::ae_int_t* alglib::integer_2d_array::operator[](ae_int_t i) +{ + return p_mat->ptr.pp_int[i]; +} + +void alglib::integer_2d_array::setcontent(ae_int_t irows, ae_int_t icols, const ae_int_t *pContent ) +{ + ae_int_t i, j; + setlength(irows, icols); + for(i=0; i<irows; i++) + for(j=0; j<icols; j++) + p_mat->ptr.pp_int[i][j] = pContent[i*icols+j]; +} + +std::string alglib::integer_2d_array::tostring() const +{ + std::string result; + ae_int_t i; + if( isempty() ) + return "[[]]"; + result = "["; + for(i=0; i<rows(); i++) + { + if( i!=0 ) + result += ","; + result += arraytostring(&operator()(i,0), cols()); + } + result += "]"; + return result; +} + +alglib::real_2d_array::real_2d_array() +{ + allocate_own(0, 0, alglib_impl::DT_REAL); +} + +alglib::real_2d_array::real_2d_array(alglib_impl::ae_matrix *p) +{ + p_mat = NULL; + attach_to(p); +} + +alglib::real_2d_array::real_2d_array(const char *s) +{ + std::vector< std::vector<const char*> > smat; + size_t i, j; + char *p = filter_spaces(s); + try + { + str_matrix_create(p, &smat); + if( smat.size()!=0 ) + { + allocate_own((ae_int_t)(smat.size()), (ae_int_t)(smat[0].size()), alglib_impl::DT_REAL); + for(i=0; i<smat.size(); i++) + for(j=0; j<smat[0].size(); j++) + operator()((ae_int_t)i,(ae_int_t)j) = parse_real_delim(smat[i][j],",]"); + } + else + allocate_own(0, 0, alglib_impl::DT_REAL); + alglib_impl::ae_free(p); + } + catch(...) + { + alglib_impl::ae_free(p); + throw; + } +} + +alglib::real_2d_array::~real_2d_array() +{ +} + +const double& alglib::real_2d_array::operator()(ae_int_t i, ae_int_t j) const +{ + return p_mat->ptr.pp_double[i][j]; +} + +double& alglib::real_2d_array::operator()(ae_int_t i, ae_int_t j) +{ + return p_mat->ptr.pp_double[i][j]; +} + +const double* alglib::real_2d_array::operator[](ae_int_t i) const +{ + return p_mat->ptr.pp_double[i]; +} + +double* alglib::real_2d_array::operator[](ae_int_t i) +{ + return p_mat->ptr.pp_double[i]; +} + +void alglib::real_2d_array::setcontent(ae_int_t irows, ae_int_t icols, const double *pContent ) +{ + ae_int_t i, j; + setlength(irows, icols); + for(i=0; i<irows; i++) + for(j=0; j<icols; j++) + p_mat->ptr.pp_double[i][j] = pContent[i*icols+j]; +} + +std::string alglib::real_2d_array::tostring(int dps) const +{ + std::string result; + ae_int_t i; + if( isempty() ) + return "[[]]"; + result = "["; + for(i=0; i<rows(); i++) + { + if( i!=0 ) + result += ","; + result += arraytostring(&operator()(i,0), cols(), dps); + } + result += "]"; + return result; +} + +alglib::complex_2d_array::complex_2d_array() +{ + allocate_own(0, 0, alglib_impl::DT_COMPLEX); +} + +alglib::complex_2d_array::complex_2d_array(alglib_impl::ae_matrix *p) +{ + p_mat = NULL; + attach_to(p); +} + +alglib::complex_2d_array::complex_2d_array(const char *s) +{ + std::vector< std::vector<const char*> > smat; + size_t i, j; + char *p = filter_spaces(s); + try + { + str_matrix_create(p, &smat); + if( smat.size()!=0 ) + { + allocate_own((ae_int_t)(smat.size()), (ae_int_t)(smat[0].size()), alglib_impl::DT_COMPLEX); + for(i=0; i<smat.size(); i++) + for(j=0; j<smat[0].size(); j++) + operator()((ae_int_t)i,(ae_int_t)j) = parse_complex_delim(smat[i][j],",]"); + } + else + allocate_own(0, 0, alglib_impl::DT_COMPLEX); + alglib_impl::ae_free(p); + } + catch(...) + { + alglib_impl::ae_free(p); + throw; + } +} + +alglib::complex_2d_array::~complex_2d_array() +{ +} + +const alglib::complex& alglib::complex_2d_array::operator()(ae_int_t i, ae_int_t j) const +{ + return *((const alglib::complex*)(p_mat->ptr.pp_complex[i]+j)); +} + +alglib::complex& alglib::complex_2d_array::operator()(ae_int_t i, ae_int_t j) +{ + return *((alglib::complex*)(p_mat->ptr.pp_complex[i]+j)); +} + +const alglib::complex* alglib::complex_2d_array::operator[](ae_int_t i) const +{ + return (const alglib::complex*)(p_mat->ptr.pp_complex[i]); +} + +alglib::complex* alglib::complex_2d_array::operator[](ae_int_t i) +{ + return (alglib::complex*)(p_mat->ptr.pp_complex[i]); +} + +void alglib::complex_2d_array::setcontent(ae_int_t irows, ae_int_t icols, const alglib::complex *pContent ) +{ + ae_int_t i, j; + setlength(irows, icols); + for(i=0; i<irows; i++) + for(j=0; j<icols; j++) + { + p_mat->ptr.pp_complex[i][j].x = pContent[i*icols+j].x; + p_mat->ptr.pp_complex[i][j].y = pContent[i*icols+j].y; + } +} + +std::string alglib::complex_2d_array::tostring(int dps) const +{ + std::string result; + ae_int_t i; + if( isempty() ) + return "[[]]"; + result = "["; + for(i=0; i<rows(); i++) + { + if( i!=0 ) + result += ","; + result += arraytostring(&operator()(i,0), cols(), dps); + } + result += "]"; + return result; +} + + +/******************************************************************** +Internal functions +********************************************************************/ +double alglib::get_aenv_nan() +{ + double r; + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + r = _alglib_env_state.v_nan; + alglib_impl::ae_state_clear(&_alglib_env_state); + return r; +} + +double alglib::get_aenv_posinf() +{ + double r; + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + r = _alglib_env_state.v_posinf; + alglib_impl::ae_state_clear(&_alglib_env_state); + return r; +} + +double alglib::get_aenv_neginf() +{ + double r; + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + r = _alglib_env_state.v_neginf; + alglib_impl::ae_state_clear(&_alglib_env_state); + return r; +} + +alglib::ae_int_t alglib::my_stricmp(const char *s1, const char *s2) +{ + int c1, c2; + + // + // handle special cases + // + if(s1==NULL && s2!=NULL) + return -1; + if(s1!=NULL && s2==NULL) + return +1; + if(s1==NULL && s2==NULL) + return 0; + + // + // compare + // + for (;;) + { + c1 = *s1; + c2 = *s2; + s1++; + s2++; + if( c1==0 ) + return c2==0 ? 0 : -1; + if( c2==0 ) + return c1==0 ? 0 : +1; + c1 = tolower(c1); + c2 = tolower(c2); + if( c1<c2 ) + return -1; + if( c1>c2 ) + return +1; + } +} + +char* alglib::filter_spaces(const char *s) +{ + size_t i, j, n; + char *r; + char *r0; + n = strlen(s); + r = (char*)alglib_impl::ae_malloc(n+1, NULL); + if( r==NULL ) + throw ap_error("malloc error"); + for(i=0,j=0,r0=r; i<=n; i++,s++) + if( !isspace(*s) ) + { + *r0 = *s; + r0++; + } + return r; +} + +void alglib::str_vector_create(const char *src, bool match_head_only, std::vector<const char*> *p_vec) +{ + // + // parse beginning of the string. + // try to handle "[]" string + // + p_vec->clear(); + if( *src!='[' ) + throw alglib::ap_error("Incorrect initializer for vector"); + src++; + if( *src==']' ) + return; + p_vec->push_back(src); + for(;;) + { + if( *src==0 ) + throw alglib::ap_error("Incorrect initializer for vector"); + if( *src==']' ) + { + if( src[1]==0 || !match_head_only) + return; + throw alglib::ap_error("Incorrect initializer for vector"); + } + if( *src==',' ) + { + p_vec->push_back(src+1); + src++; + continue; + } + src++; + } +} + +void alglib::str_matrix_create(const char *src, std::vector< std::vector<const char*> > *p_mat) +{ + p_mat->clear(); + + // + // Try to handle "[[]]" string + // + if( strcmp(src, "[[]]")==0 ) + return; + + // + // Parse non-empty string + // + if( *src!='[' ) + throw alglib::ap_error("Incorrect initializer for matrix"); + src++; + for(;;) + { + p_mat->push_back(std::vector<const char*>()); + str_vector_create(src, false, &p_mat->back()); + if( p_mat->back().size()==0 || p_mat->back().size()!=(*p_mat)[0].size() ) + throw alglib::ap_error("Incorrect initializer for matrix"); + src = strchr(src, ']'); + if( src==NULL ) + throw alglib::ap_error("Incorrect initializer for matrix"); + src++; + if( *src==',' ) + { + src++; + continue; + } + if( *src==']' ) + break; + throw alglib::ap_error("Incorrect initializer for matrix"); + } + src++; + if( *src!=0 ) + throw alglib::ap_error("Incorrect initializer for matrix"); +} + +ae_bool alglib::parse_bool_delim(const char *s, const char *delim) +{ + const char *p; + char buf[8]; + + // try to parse false + p = "false"; + memset(buf, 0, sizeof(buf)); + strncpy(buf, s, strlen(p)); + if( my_stricmp(buf, p)==0 ) + { + if( s[strlen(p)]==0 || strchr(delim,s[strlen(p)])==NULL ) + throw alglib::ap_error("Cannot parse value"); + return ae_false; + } + + // try to parse true + p = "true"; + memset(buf, 0, sizeof(buf)); + strncpy(buf, s, strlen(p)); + if( my_stricmp(buf, p)==0 ) + { + if( s[strlen(p)]==0 || strchr(delim,s[strlen(p)])==NULL ) + throw alglib::ap_error("Cannot parse value"); + return ae_true; + } + + // error + throw alglib::ap_error("Cannot parse value"); +} + +alglib::ae_int_t alglib::parse_int_delim(const char *s, const char *delim) +{ + const char *p; + long long_val; + volatile ae_int_t ae_val; + + p = s; + + // + // check string structure: + // * leading sign + // * at least one digit + // * delimiter + // + if( *s=='-' || *s=='+' ) + s++; + if( *s==0 || strchr("1234567890",*s)==NULL) + throw alglib::ap_error("Cannot parse value"); + while( *s!=0 && strchr("1234567890",*s)!=NULL ) + s++; + if( *s==0 || strchr(delim,*s)==NULL ) + throw alglib::ap_error("Cannot parse value"); + + // convert and ensure that value fits into ae_int_t + s = p; + long_val = atol(s); + ae_val = long_val; + if( ae_val!=long_val ) + throw alglib::ap_error("Cannot parse value"); + return ae_val; +} + +bool alglib::_parse_real_delim(const char *s, const char *delim, double *result, const char **new_s) +{ + const char *p; + char *t; + bool has_digits; + char buf[64]; + int isign; + lconv *loc; + + p = s; + + // + // check string structure and decide what to do + // + isign = 1; + if( *s=='-' || *s=='+' ) + { + isign = *s=='-' ? -1 : +1; + s++; + } + memset(buf, 0, sizeof(buf)); + strncpy(buf, s, 3); + if( my_stricmp(buf,"nan")!=0 && my_stricmp(buf,"inf")!=0 ) + { + // + // [sign] [ddd] [.] [ddd] [e|E[sign]ddd] + // + has_digits = false; + if( *s!=0 && strchr("1234567890",*s)!=NULL ) + { + has_digits = true; + while( *s!=0 && strchr("1234567890",*s)!=NULL ) + s++; + } + if( *s=='.' ) + s++; + if( *s!=0 && strchr("1234567890",*s)!=NULL ) + { + has_digits = true; + while( *s!=0 && strchr("1234567890",*s)!=NULL ) + s++; + } + if (!has_digits ) + return false; + if( *s=='e' || *s=='E' ) + { + s++; + if( *s=='-' || *s=='+' ) + s++; + if( *s==0 || strchr("1234567890",*s)==NULL ) + return false; + while( *s!=0 && strchr("1234567890",*s)!=NULL ) + s++; + } + if( *s==0 || strchr(delim,*s)==NULL ) + return false; + *new_s = s; + + // + // finite value conversion + // + if( *new_s-p>=(int)sizeof(buf) ) + return false; + strncpy(buf, p, (size_t)(*new_s-p)); + buf[*new_s-p] = 0; + loc = localeconv(); + t = strchr(buf,'.'); + if( t!=NULL ) + *t = *loc->decimal_point; + *result = atof(buf); + return true; + } + else + { + // + // check delimiter and update *new_s + // + s += 3; + if( *s==0 || strchr(delim,*s)==NULL ) + return false; + *new_s = s; + + // + // NAN, INF conversion + // + if( my_stricmp(buf,"nan")==0 ) + *result = fp_nan; + if( my_stricmp(buf,"inf")==0 ) + *result = isign>0 ? fp_posinf : fp_neginf; + return true; + } +} + +double alglib::parse_real_delim(const char *s, const char *delim) +{ + double result; + const char *new_s; + if( !_parse_real_delim(s, delim, &result, &new_s) ) + throw alglib::ap_error("Cannot parse value"); + return result; +} + +alglib::complex alglib::parse_complex_delim(const char *s, const char *delim) +{ + double d_result; + const char *new_s; + alglib::complex c_result; + + // parse as real value + if( _parse_real_delim(s, delim, &d_result, &new_s) ) + return d_result; + + // parse as "a+bi" or "a-bi" + if( _parse_real_delim(s, "+-", &c_result.x, &new_s) ) + { + s = new_s; + if( !_parse_real_delim(s, "i", &c_result.y, &new_s) ) + throw alglib::ap_error("Cannot parse value"); + s = new_s+1; + if( *s==0 || strchr(delim,*s)==NULL ) + throw alglib::ap_error("Cannot parse value"); + return c_result; + } + + // parse as complex value "bi+a" or "bi-a" + if( _parse_real_delim(s, "i", &c_result.y, &new_s) ) + { + s = new_s+1; + if( *s==0 ) + throw alglib::ap_error("Cannot parse value"); + if( strchr(delim,*s)!=NULL ) + { + c_result.x = 0; + return c_result; + } + if( strchr("+-",*s)!=NULL ) + { + if( !_parse_real_delim(s, delim, &c_result.x, &new_s) ) + throw alglib::ap_error("Cannot parse value"); + return c_result; + } + throw alglib::ap_error("Cannot parse value"); + } + + // error + throw alglib::ap_error("Cannot parse value"); +} + +std::string alglib::arraytostring(const bool *ptr, ae_int_t n) +{ + std::string result; + ae_int_t i; + result = "["; + for(i=0; i<n; i++) + { + if( i!=0 ) + result += ","; + result += ptr[i] ? "true" : "false"; + } + result += "]"; + return result; +} + +std::string alglib::arraytostring(const ae_int_t *ptr, ae_int_t n) +{ + std::string result; + ae_int_t i; + char buf[64]; + result = "["; + for(i=0; i<n; i++) + { + if( sprintf(buf, i==0 ? "%ld" : ",%ld", long(ptr[i]))>=(int)sizeof(buf) ) + throw ap_error("arraytostring(): buffer overflow"); + result += buf; + } + result += "]"; + return result; +} + +std::string alglib::arraytostring(const double *ptr, ae_int_t n, int dps) +{ + std::string result; + ae_int_t i; + char buf[64]; + char mask1[64]; + char mask2[64]; + result = "["; + if( sprintf(mask1, "%%.%df", dps)>=(int)sizeof(mask1) ) + throw ap_error("arraytostring(): buffer overflow"); + if( sprintf(mask2, ",%s", mask1)>=(int)sizeof(mask2) ) + throw ap_error("arraytostring(): buffer overflow"); + for(i=0; i<n; i++) + { + buf[0] = 0; + if( fp_isfinite(ptr[i]) ) + { + if( sprintf(buf, i==0 ? mask1 : mask2, double(ptr[i]))>=(int)sizeof(buf) ) + throw ap_error("arraytostring(): buffer overflow"); + } + else if( fp_isnan(ptr[i]) ) + strcpy(buf, i==0 ? "NAN" : ",NAN"); + else if( fp_isposinf(ptr[i]) ) + strcpy(buf, i==0 ? "+INF" : ",+INF"); + else if( fp_isneginf(ptr[i]) ) + strcpy(buf, i==0 ? "-INF" : ",-INF"); + result += buf; + } + result += "]"; + return result; +} + +std::string alglib::arraytostring(const alglib::complex *ptr, ae_int_t n, int dps) +{ + std::string result; + ae_int_t i; + result = "["; + for(i=0; i<n; i++) + { + if( i!=0 ) + result += ","; + result += ptr[i].tostring(dps); + } + result += "]"; + return result; +} + + +/******************************************************************** +standard functions +********************************************************************/ +int alglib::sign2(double x) +{ + if( x>0 ) return 1; + if( x<0 ) return -1; + return 0; +} + +double alglib::randomreal() +{ + int i1 = rand(); + int i2 = rand(); + while(i1==RAND_MAX) + i1 =rand(); + while(i2==RAND_MAX) + i2 =rand(); + double mx = RAND_MAX; + return (i1+i2/mx)/mx; +} + +int alglib::randominteger(int maxv) +{ return rand()%maxv; } + +int alglib::round(double x) +{ return int(floor(x+0.5)); } + +int alglib::trunc(double x) +{ return int(x>0 ? floor(x) : ceil(x)); } + +int alglib::ifloor(double x) +{ return int(floor(x)); } + +int alglib::iceil(double x) +{ return int(ceil(x)); } + +double alglib::pi() +{ return 3.14159265358979323846; } + +double alglib::sqr(double x) +{ return x*x; } + +int alglib::maxint(int m1, int m2) +{ + return m1>m2 ? m1 : m2; +} + +int alglib::minint(int m1, int m2) +{ + return m1>m2 ? m2 : m1; +} + +double alglib::maxreal(double m1, double m2) +{ + return m1>m2 ? m1 : m2; +} + +double alglib::minreal(double m1, double m2) +{ + return m1>m2 ? m2 : m1; +} + +bool alglib::fp_eq(double v1, double v2) +{ + // IEEE-strict floating point comparison + volatile double x = v1; + volatile double y = v2; + return x==y; +} + +bool alglib::fp_neq(double v1, double v2) +{ + // IEEE-strict floating point comparison + return !fp_eq(v1,v2); +} + +bool alglib::fp_less(double v1, double v2) +{ + // IEEE-strict floating point comparison + volatile double x = v1; + volatile double y = v2; + return x<y; +} + +bool alglib::fp_less_eq(double v1, double v2) +{ + // IEEE-strict floating point comparison + volatile double x = v1; + volatile double y = v2; + return x<=y; +} + +bool alglib::fp_greater(double v1, double v2) +{ + // IEEE-strict floating point comparison + volatile double x = v1; + volatile double y = v2; + return x>y; +} + +bool alglib::fp_greater_eq(double v1, double v2) +{ + // IEEE-strict floating point comparison + volatile double x = v1; + volatile double y = v2; + return x>=y; +} + +bool alglib::fp_isnan(double x) +{ + return alglib_impl::ae_isnan_stateless(x,endianness); +} + +bool alglib::fp_isposinf(double x) +{ + return alglib_impl::ae_isposinf_stateless(x,endianness); +} + +bool alglib::fp_isneginf(double x) +{ + return alglib_impl::ae_isneginf_stateless(x,endianness); +} + +bool alglib::fp_isinf(double x) +{ + return alglib_impl::ae_isinf_stateless(x,endianness); +} + +bool alglib::fp_isfinite(double x) +{ + return alglib_impl::ae_isfinite_stateless(x,endianness); +} + +/******************************************************************** +Dataset functions +********************************************************************/ +/*bool alglib::readstrings(std::string file, std::list<std::string> *pOutput) +{ + return readstrings(file, pOutput, ""); +} + +bool alglib::readstrings(std::string file, std::list<std::string> *pOutput, std::string comment) +{ + std::string cmd, s; + FILE *f; + char buf[32768]; + char *str; + + f = fopen(file.c_str(), "rb"); + if( !f ) + return false; + s = ""; + pOutput->clear(); + while(str=fgets(buf, sizeof(buf), f)) + { + // TODO: read file by small chunks, combine in one large string + if( strlen(str)==0 ) + continue; + + // + // trim trailing newline chars + // + char *eos = str+strlen(str)-1; + if( *eos=='\n' ) + { + *eos = 0; + eos--; + } + if( *eos=='\r' ) + { + *eos = 0; + eos--; + } + s = str; + + // + // skip comments + // + if( comment.length()>0 ) + if( strncmp(s.c_str(), comment.c_str(), comment.length())==0 ) + { + s = ""; + continue; + } + + // + // read data + // + if( s.length()<1 ) + { + fclose(f); + throw alglib::ap_error("internal error in read_strings"); + } + pOutput->push_back(s); + } + fclose(f); + return true; +} + +void alglib::explodestring(std::string s, char sep, std::vector<std::string> *pOutput) +{ + std::string tmp; + int i; + tmp = ""; + pOutput->clear(); + for(i=0; i<s.length(); i++) + { + if( s[i]!=sep ) + { + tmp += s[i]; + continue; + } + //if( tmp.length()!=0 ) + pOutput->push_back(tmp); + tmp = ""; + } + if( tmp.length()!=0 ) + pOutput->push_back(tmp); +} + +std::string alglib::strtolower(const std::string &s) +{ + std::string r = s; + for(int i=0; i<r.length(); i++) + r[i] = tolower(r[i]); + return r; +} + +std::string alglib::xtrim(std::string s) +{ + char *pstr = (char*)malloc(s.length()+1); + char *p2 = pstr; + if( pstr==NULL ) + throw "xalloc in xtrim()"; + try + { + bool bws; + int i; + + // + // special cases: + // * zero length string + // * string includes only spaces + // + if( s.length()==0 ) + { + free(pstr); + return ""; + } + bws = true; + for(i=0; i<s.length(); i++) + if( s[i]!=' ' ) + bws = false; + if( bws ) + { + free(pstr); + return ""; + } + + // + // merge internal spaces + // + bws = false; + for(i=0; i<s.length(); i++) + { + if( s[i]==' ' && bws ) + continue; + if( s[i]==' ' ) + { + *p2 = ' '; + p2++; + bws = true; + continue; + } + *p2 = s[i]; + bws = false; + p2++; + } + *p2 = 0; + + // + // trim leading/trailing spaces. + // we expect at least one non-space character in the string + // + p2--; + while(*p2==' ') + { + *p2 = 0; + p2--; + } + p2 = pstr; + while((*p2)==' ') + p2++; + + // + // result + // + std::string r = p2; + free(pstr); + return r; + } + catch(...) + { + free(pstr); + throw "unknown exception in xtrim()"; + } +} + +bool alglib::opendataset(std::string file, dataset *pdataset) +{ + std::list<std::string> Lines; + std::vector<std::string> Values, RowsArr, ColsArr, VarsArr, HeadArr; + std::list<std::string>::iterator i; + std::string s; + int TrnFirst, TrnLast, ValFirst, ValLast, TstFirst, TstLast, LinesRead, j; + + // + // Read data + // + if( pdataset==NULL ) + return false; + if( !readstrings(file, &Lines, "//") ) + return false; + i = Lines.begin(); + *pdataset = dataset(); + + // + // Read header + // + if( i==Lines.end() ) + return false; + s = alglib::xtrim(*i); + alglib::explodestring(s, '#', &HeadArr); + if( HeadArr.size()!=2 ) + return false; + + // + // Rows info + // + alglib::explodestring(alglib::xtrim(HeadArr[0]), ' ', &RowsArr); + if( RowsArr.size()==0 || RowsArr.size()>3 ) + return false; + if( RowsArr.size()==1 ) + { + pdataset->totalsize = atol(RowsArr[0].c_str()); + pdataset->trnsize = pdataset->totalsize; + } + if( RowsArr.size()==2 ) + { + pdataset->trnsize = atol(RowsArr[0].c_str()); + pdataset->tstsize = atol(RowsArr[1].c_str()); + pdataset->totalsize = pdataset->trnsize + pdataset->tstsize; + } + if( RowsArr.size()==3 ) + { + pdataset->trnsize = atol(RowsArr[0].c_str()); + pdataset->valsize = atol(RowsArr[1].c_str()); + pdataset->tstsize = atol(RowsArr[2].c_str()); + pdataset->totalsize = pdataset->trnsize + pdataset->valsize + pdataset->tstsize; + } + if( pdataset->totalsize<=0 || pdataset->trnsize<0 || pdataset->valsize<0 || pdataset->tstsize<0 ) + return false; + TrnFirst = 0; + TrnLast = TrnFirst + pdataset->trnsize; + ValFirst = TrnLast; + ValLast = ValFirst + pdataset->valsize; + TstFirst = ValLast; + TstLast = TstFirst + pdataset->tstsize; + + // + // columns + // + alglib::explodestring(alglib::xtrim(HeadArr[1]), ' ', &ColsArr); + if( ColsArr.size()!=1 && ColsArr.size()!=4 ) + return false; + if( ColsArr.size()==1 ) + { + pdataset->nin = atoi(ColsArr[0].c_str()); + if( pdataset->nin<=0 ) + return false; + } + if( ColsArr.size()==4 ) + { + if( alglib::strtolower(ColsArr[0])!="reg" && alglib::strtolower(ColsArr[0])!="cls" ) + return false; + if( ColsArr[2]!="=>" ) + return false; + pdataset->nin = atol(ColsArr[1].c_str()); + if( pdataset->nin<1 ) + return false; + if( alglib::strtolower(ColsArr[0])=="reg" ) + { + pdataset->nclasses = 0; + pdataset->nout = atol(ColsArr[3].c_str()); + if( pdataset->nout<1 ) + return false; + } + else + { + pdataset->nclasses = atol(ColsArr[3].c_str()); + pdataset->nout = 1; + if( pdataset->nclasses<2 ) + return false; + } + } + + // + // initialize arrays + // + pdataset->all.setlength(pdataset->totalsize, pdataset->nin+pdataset->nout); + if( pdataset->trnsize>0 ) pdataset->trn.setlength(pdataset->trnsize, pdataset->nin+pdataset->nout); + if( pdataset->valsize>0 ) pdataset->val.setlength(pdataset->valsize, pdataset->nin+pdataset->nout); + if( pdataset->tstsize>0 ) pdataset->tst.setlength(pdataset->tstsize, pdataset->nin+pdataset->nout); + + // + // read data + // + for(LinesRead=0, i++; i!=Lines.end() && LinesRead<pdataset->totalsize; i++, LinesRead++) + { + std::string sss = *i; + alglib::explodestring(alglib::xtrim(*i), ' ', &VarsArr); + if( VarsArr.size()!=pdataset->nin+pdataset->nout ) + return false; + int tmpc = alglib::round(atof(VarsArr[pdataset->nin+pdataset->nout-1].c_str())); + if( pdataset->nclasses>0 && (tmpc<0 || tmpc>=pdataset->nclasses) ) + return false; + for(j=0; j<pdataset->nin+pdataset->nout; j++) + { + pdataset->all(LinesRead,j) = atof(VarsArr[j].c_str()); + if( LinesRead>=TrnFirst && LinesRead<TrnLast ) + pdataset->trn(LinesRead-TrnFirst,j) = atof(VarsArr[j].c_str()); + if( LinesRead>=ValFirst && LinesRead<ValLast ) + pdataset->val(LinesRead-ValFirst,j) = atof(VarsArr[j].c_str()); + if( LinesRead>=TstFirst && LinesRead<TstLast ) + pdataset->tst(LinesRead-TstFirst,j) = atof(VarsArr[j].c_str()); + } + } + if( LinesRead!=pdataset->totalsize ) + return false; + return true; +}*/ + +/* +previous variant +bool alglib::opendataset(std::string file, dataset *pdataset) +{ + std::list<std::string> Lines; + std::vector<std::string> Values; + std::list<std::string>::iterator i; + int nCol, nRow, nSplitted; + int nColumns, nRows; + + // + // Read data + // + if( pdataset==NULL ) + return false; + if( !readstrings(file, &Lines, "//") ) + return false; + i = Lines.begin(); + *pdataset = dataset(); + + // + // Read columns info + // + if( i==Lines.end() ) + return false; + if( sscanf(i->c_str(), " columns = %d %d ", &pdataset->nin, &pdataset->nout)!=2 ) + return false; + if( pdataset->nin<=0 || pdataset->nout==0 || pdataset->nout==-1) + return false; + if( pdataset->nout<0 ) + { + pdataset->nclasses = -pdataset->nout; + pdataset->nout = 1; + pdataset->iscls = true; + } + else + { + pdataset->isreg = true; + } + nColumns = pdataset->nin+pdataset->nout; + i++; + + // + // Read rows info + // + if( i==Lines.end() ) + return false; + if( sscanf(i->c_str(), " rows = %d %d %d ", &pdataset->trnsize, &pdataset->valsize, &pdataset->tstsize)!=3 ) + return false; + if( (pdataset->trnsize<0) || (pdataset->valsize<0) || (pdataset->tstsize<0) ) + return false; + if( (pdataset->trnsize==0) && (pdataset->valsize==0) && (pdataset->tstsize==0) ) + return false; + nRows = pdataset->trnsize+pdataset->valsize+pdataset->tstsize; + pdataset->size = nRows; + if( Lines.size()!=nRows+2 ) + return false; + i++; + + // + // Read all cases + // + alglib::real_2d_array &arr = pdataset->all; + arr.setbounds(0, nRows-1, 0, nColumns-1); + for(nRow=0; nRow<nRows; nRow++) + { + alglib::ap_error::make_assertion(i!=Lines.end()); + explodestring(*i, '\t', &Values); + if( Values.size()!=nColumns ) + return false; + for(nCol=0; nCol<nColumns; nCol++) + { + double v; + if( sscanf(Values[nCol].c_str(), "%lg", &v)!=1 ) + return false; + if( (nCol==nColumns-1) && pdataset->iscls && ((round(v)<0) || (round(v)>=pdataset->nclasses)) ) + return false; + if( (nCol==nColumns-1) && pdataset->iscls ) + arr(nRow, nCol) = round(v); + else + arr(nRow, nCol) = v; + } + i++; + } + + // + // Split to training, validation and test sets + // + if( pdataset->trnsize>0 ) + pdataset->trn.setbounds(0, pdataset->trnsize-1, 0, nColumns-1); + if( pdataset->valsize>0 ) + pdataset->val.setbounds(0, pdataset->valsize-1, 0, nColumns-1); + if( pdataset->tstsize>0 ) + pdataset->tst.setbounds(0, pdataset->tstsize-1, 0, nColumns-1); + nSplitted=0; + for(nRow=0; nRow<=pdataset->trnsize-1; nRow++, nSplitted++) + for(nCol=0; nCol<=nColumns-1; nCol++) + pdataset->trn(nRow,nCol) = arr(nSplitted,nCol); + for(nRow=0; nRow<=pdataset->valsize-1; nRow++, nSplitted++) + for(nCol=0; nCol<=nColumns-1; nCol++) + pdataset->val(nRow,nCol) = arr(nSplitted,nCol); + for(nRow=0; nRow<=pdataset->tstsize-1; nRow++, nSplitted++) + for(nCol=0; nCol<=nColumns-1; nCol++) + pdataset->tst(nRow,nCol) = arr(nSplitted,nCol); + return true; +}*/ + +alglib::ae_int_t alglib::vlen(ae_int_t n1, ae_int_t n2) +{ + return n2-n1+1; +} + + + +///////////////////////////////////////////////////////////////////////// +// +// THIS SECTIONS CONTAINS OPTIMIZED LINEAR ALGEBRA CODE +// IT IS SHARED BETWEEN C++ AND PURE C LIBRARIES +// +///////////////////////////////////////////////////////////////////////// +namespace alglib_impl +{ +#define alglib_simd_alignment 16 + +#define alglib_r_block 32 +#define alglib_half_r_block 16 +#define alglib_twice_r_block 64 + +#define alglib_c_block 24 +#define alglib_half_c_block 12 +#define alglib_twice_c_block 48 + + +/******************************************************************** +This subroutine calculates fast 32x32 real matrix-vector product: + + y := beta*y + alpha*A*x + +using either generic C code or native optimizations (if available) + +IMPORTANT: +* A must be stored in row-major order, + stride is alglib_r_block, + aligned on alglib_simd_alignment boundary +* X must be aligned on alglib_simd_alignment boundary +* Y may be non-aligned +********************************************************************/ +void _ialglib_mv_32(const double *a, const double *x, double *y, ae_int_t stride, double alpha, double beta) +{ + ae_int_t i, k; + const double *pa0, *pa1, *pb; + + pa0 = a; + pa1 = a+alglib_r_block; + pb = x; + for(i=0; i<16; i++) + { + double v0 = 0, v1 = 0; + for(k=0; k<4; k++) + { + v0 += pa0[0]*pb[0]; + v1 += pa1[0]*pb[0]; + v0 += pa0[1]*pb[1]; + v1 += pa1[1]*pb[1]; + v0 += pa0[2]*pb[2]; + v1 += pa1[2]*pb[2]; + v0 += pa0[3]*pb[3]; + v1 += pa1[3]*pb[3]; + v0 += pa0[4]*pb[4]; + v1 += pa1[4]*pb[4]; + v0 += pa0[5]*pb[5]; + v1 += pa1[5]*pb[5]; + v0 += pa0[6]*pb[6]; + v1 += pa1[6]*pb[6]; + v0 += pa0[7]*pb[7]; + v1 += pa1[7]*pb[7]; + pa0 += 8; + pa1 += 8; + pb += 8; + } + y[0] = beta*y[0]+alpha*v0; + y[stride] = beta*y[stride]+alpha*v1; + + /* + * now we've processed rows I and I+1, + * pa0 and pa1 are pointing to rows I+1 and I+2. + * move to I+2 and I+3. + */ + pa0 += alglib_r_block; + pa1 += alglib_r_block; + pb = x; + y+=2*stride; + } +} + + +/************************************************************************* +This function calculates MxN real matrix-vector product: + + y := beta*y + alpha*A*x + +using generic C code. It calls _ialglib_mv_32 if both M=32 and N=32. + +If beta is zero, we do not use previous values of y (they are overwritten +by alpha*A*x without ever being read). If alpha is zero, no matrix-vector +product is calculated (only beta is updated); however, this update is not +efficient and this function should NOT be used for multiplication of +vector and scalar. + +IMPORTANT: +* 0<=M<=alglib_r_block, 0<=N<=alglib_r_block +* A must be stored in row-major order with stride equal to alglib_r_block +*************************************************************************/ +void _ialglib_rmv(ae_int_t m, ae_int_t n, const double *a, const double *x, double *y, ae_int_t stride, double alpha, double beta) +{ + /* + * Handle special cases: + * - alpha is zero or n is zero + * - m is zero + */ + if( m==0 ) + return; + if( alpha==0.0 || n==0 ) + { + ae_int_t i; + if( beta==0.0 ) + { + for(i=0; i<m; i++) + { + *y = 0.0; + y += stride; + } + } + else + { + for(i=0; i<m; i++) + { + *y *= beta; + y += stride; + } + } + return; + } + + /* + * Handle general case: nonzero alpha, n and m + * + */ + if( m==32 && n==32 ) + { + /* + * 32x32, may be we have something better than general implementation + */ + _ialglib_mv_32(a, x, y, stride, alpha, beta); + } + else + { + ae_int_t i, k, m2, n8, n2, ntrail2; + const double *pa0, *pa1, *pb; + + /* + * First M/2 rows of A are processed in pairs. + * optimized code is used. + */ + m2 = m/2; + n8 = n/8; + ntrail2 = (n-8*n8)/2; + for(i=0; i<m2; i++) + { + double v0 = 0, v1 = 0; + + /* + * 'a' points to the part of the matrix which + * is not processed yet + */ + pb = x; + pa0 = a; + pa1 = a+alglib_r_block; + a += alglib_twice_r_block; + + /* + * 8 elements per iteration + */ + for(k=0; k<n8; k++) + { + v0 += pa0[0]*pb[0]; + v1 += pa1[0]*pb[0]; + v0 += pa0[1]*pb[1]; + v1 += pa1[1]*pb[1]; + v0 += pa0[2]*pb[2]; + v1 += pa1[2]*pb[2]; + v0 += pa0[3]*pb[3]; + v1 += pa1[3]*pb[3]; + v0 += pa0[4]*pb[4]; + v1 += pa1[4]*pb[4]; + v0 += pa0[5]*pb[5]; + v1 += pa1[5]*pb[5]; + v0 += pa0[6]*pb[6]; + v1 += pa1[6]*pb[6]; + v0 += pa0[7]*pb[7]; + v1 += pa1[7]*pb[7]; + pa0 += 8; + pa1 += 8; + pb += 8; + } + + /* + * 2 elements per iteration + */ + for(k=0; k<ntrail2; k++) + { + v0 += pa0[0]*pb[0]; + v1 += pa1[0]*pb[0]; + v0 += pa0[1]*pb[1]; + v1 += pa1[1]*pb[1]; + pa0 += 2; + pa1 += 2; + pb += 2; + } + + /* + * last element, if needed + */ + if( n%2!=0 ) + { + v0 += pa0[0]*pb[0]; + v1 += pa1[0]*pb[0]; + } + + /* + * final update + */ + if( beta!=0 ) + { + y[0] = beta*y[0]+alpha*v0; + y[stride] = beta*y[stride]+alpha*v1; + } + else + { + y[0] = alpha*v0; + y[stride] = alpha*v1; + } + + /* + * move to the next pair of elements + */ + y+=2*stride; + } + + + /* + * Last (odd) row is processed with less optimized code. + */ + if( m%2!=0 ) + { + double v0 = 0; + + /* + * 'a' points to the part of the matrix which + * is not processed yet + */ + pb = x; + pa0 = a; + + /* + * 2 elements per iteration + */ + n2 = n/2; + for(k=0; k<n2; k++) + { + v0 += pa0[0]*pb[0]+pa0[1]*pb[1]; + pa0 += 2; + pb += 2; + } + + /* + * last element, if needed + */ + if( n%2!=0 ) + v0 += pa0[0]*pb[0]; + + /* + * final update + */ + if( beta!=0 ) + y[0] = beta*y[0]+alpha*v0; + else + y[0] = alpha*v0; + } + } +} + + +/************************************************************************* +This function calculates MxN real matrix-vector product: + + y := beta*y + alpha*A*x + +using generic C code. It calls _ialglib_mv_32 if both M=32 and N=32. + +If beta is zero, we do not use previous values of y (they are overwritten +by alpha*A*x without ever being read). If alpha is zero, no matrix-vector +product is calculated (only beta is updated); however, this update is not +efficient and this function should NOT be used for multiplication of +vector and scalar. + +IMPORTANT: +* 0<=M<=alglib_r_block, 0<=N<=alglib_r_block +* A must be stored in row-major order with stride equal to alglib_r_block +* y may be non-aligned +* both A and x must have same offset with respect to 16-byte boundary: + either both are aligned, or both are aligned with offset 8. Function + will crash your system if you try to call it with misaligned or + incorrectly aligned data. + +This function supports SSE2; it can be used when: +1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time) +2. ae_cpuid() result contains CPU_SSE2 (checked at run-time) + +If (1) is failed, this function will still be defined and callable, but it +will do nothing. If (2) is failed , call to this function will probably +crash your system. + +If you want to know whether it is safe to call it, you should check +results of ae_cpuid(). If CPU_SSE2 bit is set, this function is callable +and will do its work. +*************************************************************************/ +void _ialglib_rmv_sse2(ae_int_t m, ae_int_t n, const double *a, const double *x, double *y, ae_int_t stride, double alpha, double beta) +{ +#if defined(AE_HAS_SSE2_INTRINSICS) + ae_int_t i, k, n2; + ae_int_t mb3, mtail, nhead, nb8, nb2, ntail; + const double *pa0, *pa1, *pa2, *pb; + __m128d v0, v1, v2, va0, va1, va2, vx, vtmp; + double buf3[3], buf6[6]; + double d; + + /* + * Handle special cases: + * - alpha is zero or n is zero + * - m is zero + */ + if( m==0 ) + return; + if( alpha==0.0 || n==0 ) + { + if( beta==0.0 ) + { + for(i=0; i<m; i++) + { + *y = 0.0; + y += stride; + } + } + else + { + for(i=0; i<m; i++) + { + *y *= beta; + y += stride; + } + } + return; + } + + /* + * Handle general case: nonzero alpha, n and m + * + * We divide problem as follows... + * + * Rows M are divided into: + * - mb3 blocks, each 3xN + * - mtail blocks, each 1xN + * + * Within a row, elements are divided into: + * - nhead 1x1 blocks (used to align the rest, either 0 or 1) + * - nb8 1x8 blocks, aligned to 16-byte boundary + * - nb2 1x2 blocks, aligned to 16-byte boundary + * - ntail 1x1 blocks, aligned too (altough we don't rely on it) + * + */ + n2 = n/2; + mb3 = m/3; + mtail = m%3; + nhead = ae_misalignment(a,alglib_simd_alignment)==0 ? 0 : 1; + nb8 = (n-nhead)/8; + nb2 = (n-nhead-8*nb8)/2; + ntail = n-nhead-8*nb8-2*nb2; + for(i=0; i<mb3; i++) + { + double row0, row1, row2; + row0 = 0; + row1 = 0; + row2 = 0; + pb = x; + pa0 = a; + pa1 = a+alglib_r_block; + pa2 = a+alglib_twice_r_block; + a += 3*alglib_r_block; + if( nhead==1 ) + { + vx = _mm_load_sd(pb); + v0 = _mm_load_sd(pa0); + v1 = _mm_load_sd(pa1); + v2 = _mm_load_sd(pa2); + + v0 = _mm_mul_sd(v0,vx); + v1 = _mm_mul_sd(v1,vx); + v2 = _mm_mul_sd(v2,vx); + + pa0++; + pa1++; + pa2++; + pb++; + } + else + { + v0 = _mm_setzero_pd(); + v1 = _mm_setzero_pd(); + v2 = _mm_setzero_pd(); + } + for(k=0; k<nb8; k++) + { + /* + * this code is a shuffle of simultaneous dot product. + * see below for commented unshuffled original version. + */ + vx = _mm_load_pd(pb); + va0 = _mm_load_pd(pa0); + va1 = _mm_load_pd(pa1); + va0 = _mm_mul_pd(va0,vx); + va2 = _mm_load_pd(pa2); + v0 = _mm_add_pd(va0,v0); + va1 = _mm_mul_pd(va1,vx); + va0 = _mm_load_pd(pa0+2); + v1 = _mm_add_pd(va1,v1); + va2 = _mm_mul_pd(va2,vx); + va1 = _mm_load_pd(pa1+2); + v2 = _mm_add_pd(va2,v2); + vx = _mm_load_pd(pb+2); + va0 = _mm_mul_pd(va0,vx); + va2 = _mm_load_pd(pa2+2); + v0 = _mm_add_pd(va0,v0); + va1 = _mm_mul_pd(va1,vx); + va0 = _mm_load_pd(pa0+4); + v1 = _mm_add_pd(va1,v1); + va2 = _mm_mul_pd(va2,vx); + va1 = _mm_load_pd(pa1+4); + v2 = _mm_add_pd(va2,v2); + vx = _mm_load_pd(pb+4); + va0 = _mm_mul_pd(va0,vx); + va2 = _mm_load_pd(pa2+4); + v0 = _mm_add_pd(va0,v0); + va1 = _mm_mul_pd(va1,vx); + va0 = _mm_load_pd(pa0+6); + v1 = _mm_add_pd(va1,v1); + va2 = _mm_mul_pd(va2,vx); + va1 = _mm_load_pd(pa1+6); + v2 = _mm_add_pd(va2,v2); + vx = _mm_load_pd(pb+6); + va0 = _mm_mul_pd(va0,vx); + v0 = _mm_add_pd(va0,v0); + va2 = _mm_load_pd(pa2+6); + va1 = _mm_mul_pd(va1,vx); + v1 = _mm_add_pd(va1,v1); + va2 = _mm_mul_pd(va2,vx); + v2 = _mm_add_pd(va2,v2); + + pa0 += 8; + pa1 += 8; + pa2 += 8; + pb += 8; + + /* + this is unshuffled version of code above + + vx = _mm_load_pd(pb); + va0 = _mm_load_pd(pa0); + va1 = _mm_load_pd(pa1); + va2 = _mm_load_pd(pa2); + + va0 = _mm_mul_pd(va0,vx); + va1 = _mm_mul_pd(va1,vx); + va2 = _mm_mul_pd(va2,vx); + + v0 = _mm_add_pd(va0,v0); + v1 = _mm_add_pd(va1,v1); + v2 = _mm_add_pd(va2,v2); + + vx = _mm_load_pd(pb+2); + va0 = _mm_load_pd(pa0+2); + va1 = _mm_load_pd(pa1+2); + va2 = _mm_load_pd(pa2+2); + + va0 = _mm_mul_pd(va0,vx); + va1 = _mm_mul_pd(va1,vx); + va2 = _mm_mul_pd(va2,vx); + + v0 = _mm_add_pd(va0,v0); + v1 = _mm_add_pd(va1,v1); + v2 = _mm_add_pd(va2,v2); + + vx = _mm_load_pd(pb+4); + va0 = _mm_load_pd(pa0+4); + va1 = _mm_load_pd(pa1+4); + va2 = _mm_load_pd(pa2+4); + + va0 = _mm_mul_pd(va0,vx); + va1 = _mm_mul_pd(va1,vx); + va2 = _mm_mul_pd(va2,vx); + + v0 = _mm_add_pd(va0,v0); + v1 = _mm_add_pd(va1,v1); + v2 = _mm_add_pd(va2,v2); + + vx = _mm_load_pd(pb+6); + va0 = _mm_load_pd(pa0+6); + va1 = _mm_load_pd(pa1+6); + va2 = _mm_load_pd(pa2+6); + + va0 = _mm_mul_pd(va0,vx); + va1 = _mm_mul_pd(va1,vx); + va2 = _mm_mul_pd(va2,vx); + + v0 = _mm_add_pd(va0,v0); + v1 = _mm_add_pd(va1,v1); + v2 = _mm_add_pd(va2,v2); + */ + } + for(k=0; k<nb2; k++) + { + vx = _mm_load_pd(pb); + va0 = _mm_load_pd(pa0); + va1 = _mm_load_pd(pa1); + va2 = _mm_load_pd(pa2); + + va0 = _mm_mul_pd(va0,vx); + v0 = _mm_add_pd(va0,v0); + va1 = _mm_mul_pd(va1,vx); + v1 = _mm_add_pd(va1,v1); + va2 = _mm_mul_pd(va2,vx); + v2 = _mm_add_pd(va2,v2); + + pa0 += 2; + pa1 += 2; + pa2 += 2; + pb += 2; + } + for(k=0; k<ntail; k++) + { + vx = _mm_load1_pd(pb); + va0 = _mm_load1_pd(pa0); + va1 = _mm_load1_pd(pa1); + va2 = _mm_load1_pd(pa2); + + va0 = _mm_mul_sd(va0,vx); + v0 = _mm_add_sd(v0,va0); + va1 = _mm_mul_sd(va1,vx); + v1 = _mm_add_sd(v1,va1); + va2 = _mm_mul_sd(va2,vx); + v2 = _mm_add_sd(v2,va2); + } + vtmp = _mm_add_pd(_mm_unpacklo_pd(v0,v1),_mm_unpackhi_pd(v0,v1)); + _mm_storel_pd(&row0, vtmp); + _mm_storeh_pd(&row1, vtmp); + v2 = _mm_add_sd(_mm_shuffle_pd(v2,v2,1),v2); + _mm_storel_pd(&row2, v2); + if( beta!=0 ) + { + y[0] = beta*y[0]+alpha*row0; + y[stride] = beta*y[stride]+alpha*row1; + y[2*stride] = beta*y[2*stride]+alpha*row2; + } + else + { + y[0] = alpha*row0; + y[stride] = alpha*row1; + y[2*stride] = alpha*row2; + } + y+=3*stride; + } + for(i=0; i<mtail; i++) + { + double row0; + row0 = 0; + pb = x; + pa0 = a; + a += alglib_r_block; + for(k=0; k<n2; k++) + { + row0 += pb[0]*pa0[0]+pb[1]*pa0[1]; + pa0 += 2; + pb += 2; + } + if( n%2 ) + row0 += pb[0]*pa0[0]; + if( beta!=0 ) + y[0] = beta*y[0]+alpha*row0; + else + y[0] = alpha*row0; + y+=stride; + } +#endif +} + + +/************************************************************************* +This subroutine calculates fast MxN complex matrix-vector product: + + y := beta*y + alpha*A*x + +using generic C code, where A, x, y, alpha and beta are complex. + +If beta is zero, we do not use previous values of y (they are overwritten +by alpha*A*x without ever being read). However, when alpha is zero, we +still calculate A*x and multiply it by alpha (this distinction can be +important when A or x contain infinities/NANs). + +IMPORTANT: +* 0<=M<=alglib_c_block, 0<=N<=alglib_c_block +* A must be stored in row-major order, as sequence of double precision + pairs. Stride is alglib_c_block (it is measured in pairs of doubles, not + in doubles). +* Y may be referenced by cy (pointer to ae_complex) or + dy (pointer to array of double precision pair) depending on what type of + output you wish. Pass pointer to Y as one of these parameters, + AND SET OTHER PARAMETER TO NULL. +* both A and x must be aligned; y may be non-aligned. +*************************************************************************/ +void _ialglib_cmv(ae_int_t m, ae_int_t n, const double *a, const double *x, ae_complex *cy, double *dy, ae_int_t stride, ae_complex alpha, ae_complex beta) +{ + ae_int_t i, j; + const double *pa, *parow, *pb; + + parow = a; + for(i=0; i<m; i++) + { + double v0 = 0, v1 = 0; + pa = parow; + pb = x; + for(j=0; j<n; j++) + { + v0 += pa[0]*pb[0]; + v1 += pa[0]*pb[1]; + v0 -= pa[1]*pb[1]; + v1 += pa[1]*pb[0]; + + pa += 2; + pb += 2; + } + if( cy!=NULL ) + { + double tx = (beta.x*cy->x-beta.y*cy->y)+(alpha.x*v0-alpha.y*v1); + double ty = (beta.x*cy->y+beta.y*cy->x)+(alpha.x*v1+alpha.y*v0); + cy->x = tx; + cy->y = ty; + cy+=stride; + } + else + { + double tx = (beta.x*dy[0]-beta.y*dy[1])+(alpha.x*v0-alpha.y*v1); + double ty = (beta.x*dy[1]+beta.y*dy[0])+(alpha.x*v1+alpha.y*v0); + dy[0] = tx; + dy[1] = ty; + dy += 2*stride; + } + parow += 2*alglib_c_block; + } +} + + +/************************************************************************* +This subroutine calculates fast MxN complex matrix-vector product: + + y := beta*y + alpha*A*x + +using generic C code, where A, x, y, alpha and beta are complex. + +If beta is zero, we do not use previous values of y (they are overwritten +by alpha*A*x without ever being read). However, when alpha is zero, we +still calculate A*x and multiply it by alpha (this distinction can be +important when A or x contain infinities/NANs). + +IMPORTANT: +* 0<=M<=alglib_c_block, 0<=N<=alglib_c_block +* A must be stored in row-major order, as sequence of double precision + pairs. Stride is alglib_c_block (it is measured in pairs of doubles, not + in doubles). +* Y may be referenced by cy (pointer to ae_complex) or + dy (pointer to array of double precision pair) depending on what type of + output you wish. Pass pointer to Y as one of these parameters, + AND SET OTHER PARAMETER TO NULL. +* both A and x must be aligned; y may be non-aligned. + +This function supports SSE2; it can be used when: +1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time) +2. ae_cpuid() result contains CPU_SSE2 (checked at run-time) + +If (1) is failed, this function will still be defined and callable, but it +will do nothing. If (2) is failed , call to this function will probably +crash your system. + +If you want to know whether it is safe to call it, you should check +results of ae_cpuid(). If CPU_SSE2 bit is set, this function is callable +and will do its work. +*************************************************************************/ +void _ialglib_cmv_sse2(ae_int_t m, ae_int_t n, const double *a, const double *x, ae_complex *cy, double *dy, ae_int_t stride, ae_complex alpha, ae_complex beta) +{ +#if defined(AE_HAS_SSE2_INTRINSICS) + ae_int_t i, j, m2; + const double *pa0, *pa1, *parow, *pb; + __m128d vbeta, vbetax, vbetay; + __m128d valpha, valphax, valphay; + + m2 = m/2; + parow = a; + if( cy!=NULL ) + { + dy = (double*)cy; + cy = NULL; + } + vbeta = _mm_loadh_pd(_mm_load_sd(&beta.x),&beta.y); + vbetax = _mm_unpacklo_pd(vbeta,vbeta); + vbetay = _mm_unpackhi_pd(vbeta,vbeta); + valpha = _mm_loadh_pd(_mm_load_sd(&alpha.x),&alpha.y); + valphax = _mm_unpacklo_pd(valpha,valpha); + valphay = _mm_unpackhi_pd(valpha,valpha); + for(i=0; i<m2; i++) + { + double v0 = 0, v1 = 0, v2 = 0, v3 = 0; + double tx, ty; + __m128d vx, vy, vt0, vt1, vt2, vt3, vt4, vt5, vrx, vry, vtx, vty, vbeta; + pa0 = parow; + pa1 = parow+2*alglib_c_block; + pb = x; + vx = _mm_setzero_pd(); + vy = _mm_setzero_pd(); + for(j=0; j<n; j++) + { + vt0 = _mm_load1_pd(pb); + vt1 = _mm_load1_pd(pb+1); + vt2 = _mm_load_pd(pa0); + vt3 = _mm_load_pd(pa1); + vt5 = _mm_unpacklo_pd(vt2,vt3); + vt4 = _mm_unpackhi_pd(vt2,vt3); + vt2 = vt5; + vt3 = vt4; + + vt2 = _mm_mul_pd(vt2,vt0); + vx = _mm_add_pd(vx,vt2); + vt3 = _mm_mul_pd(vt3,vt1); + vx = _mm_sub_pd(vx,vt3); + vt4 = _mm_mul_pd(vt4,vt0); + vy = _mm_add_pd(vy,vt4); + vt5 = _mm_mul_pd(vt5,vt1); + vy = _mm_add_pd(vy,vt5); + + pa0 += 2; + pa1 += 2; + pb += 2; + } + if( beta.x==0.0 && beta.y==0.0 ) + { + vrx = _mm_setzero_pd(); + vry = _mm_setzero_pd(); + } + else + { + vtx = _mm_loadh_pd(_mm_load_sd(dy+0),dy+2*stride+0); + vty = _mm_loadh_pd(_mm_load_sd(dy+1),dy+2*stride+1); + vrx = _mm_sub_pd(_mm_mul_pd(vbetax,vtx),_mm_mul_pd(vbetay,vty)); + vry = _mm_add_pd(_mm_mul_pd(vbetax,vty),_mm_mul_pd(vbetay,vtx)); + } + vtx = _mm_sub_pd(_mm_mul_pd(valphax,vx),_mm_mul_pd(valphay,vy)); + vty = _mm_add_pd(_mm_mul_pd(valphax,vy),_mm_mul_pd(valphay,vx)); + vrx = _mm_add_pd(vrx,vtx); + vry = _mm_add_pd(vry,vty); + _mm_storel_pd(dy+0, vrx); + _mm_storeh_pd(dy+2*stride+0, vrx); + _mm_storel_pd(dy+1, vry); + _mm_storeh_pd(dy+2*stride+1, vry); + dy += 4*stride; + parow += 4*alglib_c_block; + } + if( m%2 ) + { + double v0 = 0, v1 = 0, v2 = 0, v3 = 0; + double tx, ty; + pa0 = parow; + pb = x; + for(j=0; j<n; j++) + { + v0 += pa0[0]*pb[0]; + v1 += pa0[0]*pb[1]; + v0 -= pa0[1]*pb[1]; + v1 += pa0[1]*pb[0]; + + pa0 += 2; + pb += 2; + } + if( beta.x==0.0 && beta.y==0.0 ) + { + tx = 0.0; + ty = 0.0; + } + else + { + tx = beta.x*dy[0]-beta.y*dy[1]; + ty = beta.x*dy[1]+beta.y*dy[0]; + } + tx += alpha.x*v0-alpha.y*v1; + ty += alpha.x*v1+alpha.y*v0; + dy[0] = tx; + dy[1] = ty; + dy += 2*stride; + parow += 2*alglib_c_block; + } +#endif +} + +/******************************************************************** +This subroutine sets vector to zero +********************************************************************/ +void _ialglib_vzero(ae_int_t n, double *p, ae_int_t stride) +{ + ae_int_t i; + if( stride==1 ) + { + for(i=0; i<n; i++,p++) + *p = 0.0; + } + else + { + for(i=0; i<n; i++,p+=stride) + *p = 0.0; + } +} + +/******************************************************************** +This subroutine sets vector to zero +********************************************************************/ +void _ialglib_vzero_complex(ae_int_t n, ae_complex *p, ae_int_t stride) +{ + ae_int_t i; + if( stride==1 ) + { + for(i=0; i<n; i++,p++) + { + p->x = 0.0; + p->y = 0.0; + } + } + else + { + for(i=0; i<n; i++,p+=stride) + { + p->x = 0.0; + p->y = 0.0; + } + } +} + + +/******************************************************************** +This subroutine copies unaligned real vector +********************************************************************/ +void _ialglib_vcopy(ae_int_t n, const double *a, ae_int_t stridea, double *b, ae_int_t strideb) +{ + ae_int_t i, n2; + if( stridea==1 && strideb==1 ) + { + n2 = n/2; + for(i=n2; i!=0; i--, a+=2, b+=2) + { + b[0] = a[0]; + b[1] = a[1]; + } + if( n%2!=0 ) + b[0] = a[0]; + } + else + { + for(i=0; i<n; i++,a+=stridea,b+=strideb) + *b = *a; + } +} + + +/******************************************************************** +This subroutine copies unaligned complex vector +(passed as ae_complex*) + +1. strideb is stride measured in complex numbers, not doubles +2. conj may be "N" (no conj.) or "C" (conj.) +********************************************************************/ +void _ialglib_vcopy_complex(ae_int_t n, const ae_complex *a, ae_int_t stridea, double *b, ae_int_t strideb, const char *conj) +{ + ae_int_t i; + + /* + * more general case + */ + if( conj[0]=='N' || conj[0]=='n' ) + { + for(i=0; i<n; i++,a+=stridea,b+=2*strideb) + { + b[0] = a->x; + b[1] = a->y; + } + } + else + { + for(i=0; i<n; i++,a+=stridea,b+=2*strideb) + { + b[0] = a->x; + b[1] = -a->y; + } + } +} + + +/******************************************************************** +This subroutine copies unaligned complex vector (passed as double*) + +1. strideb is stride measured in complex numbers, not doubles +2. conj may be "N" (no conj.) or "C" (conj.) +********************************************************************/ +void _ialglib_vcopy_dcomplex(ae_int_t n, const double *a, ae_int_t stridea, double *b, ae_int_t strideb, const char *conj) +{ + ae_int_t i; + + /* + * more general case + */ + if( conj[0]=='N' || conj[0]=='n' ) + { + for(i=0; i<n; i++,a+=2*stridea,b+=2*strideb) + { + b[0] = a[0]; + b[1] = a[1]; + } + } + else + { + for(i=0; i<n; i++,a+=2*stridea,b+=2*strideb) + { + b[0] = a[0]; + b[1] = -a[1]; + } + } +} + + +/******************************************************************** +This subroutine copies matrix from non-aligned non-contigous storage +to aligned contigous storage + +A: +* MxN +* non-aligned +* non-contigous +* may be transformed during copying (as prescribed by op) + +B: +* alglib_r_block*alglib_r_block (only MxN/NxM submatrix is used) +* aligned +* stride is alglib_r_block + +Transformation types: +* 0 - no transform +* 1 - transposition +********************************************************************/ +void _ialglib_mcopyblock(ae_int_t m, ae_int_t n, const double *a, ae_int_t op, ae_int_t stride, double *b) +{ + ae_int_t i, j, n2; + const double *psrc; + double *pdst; + if( op==0 ) + { + n2 = n/2; + for(i=0,psrc=a; i<m; i++,a+=stride,b+=alglib_r_block,psrc=a) + { + for(j=0,pdst=b; j<n2; j++,pdst+=2,psrc+=2) + { + pdst[0] = psrc[0]; + pdst[1] = psrc[1]; + } + if( n%2!=0 ) + pdst[0] = psrc[0]; + } + } + else + { + n2 = n/2; + for(i=0,psrc=a; i<m; i++,a+=stride,b+=1,psrc=a) + { + for(j=0,pdst=b; j<n2; j++,pdst+=alglib_twice_r_block,psrc+=2) + { + pdst[0] = psrc[0]; + pdst[alglib_r_block] = psrc[1]; + } + if( n%2!=0 ) + pdst[0] = psrc[0]; + } + } +} + + +/******************************************************************** +This subroutine copies matrix from non-aligned non-contigous storage +to aligned contigous storage + +A: +* MxN +* non-aligned +* non-contigous +* may be transformed during copying (as prescribed by op) + +B: +* alglib_r_block*alglib_r_block (only MxN/NxM submatrix is used) +* aligned +* stride is alglib_r_block + +Transformation types: +* 0 - no transform +* 1 - transposition + +This function supports SSE2; it can be used when: +1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time) +2. ae_cpuid() result contains CPU_SSE2 (checked at run-time) + +If (1) is failed, this function will still be defined and callable, but it +will do nothing. If (2) is failed , call to this function will probably +crash your system. + +If you want to know whether it is safe to call it, you should check +results of ae_cpuid(). If CPU_SSE2 bit is set, this function is callable +and will do its work. +********************************************************************/ +void _ialglib_mcopyblock_sse2(ae_int_t m, ae_int_t n, const double *a, ae_int_t op, ae_int_t stride, double *b) +{ +#if defined(AE_HAS_SSE2_INTRINSICS) + ae_int_t i, j, nb8, mb2, ntail; + const double *psrc0, *psrc1; + double *pdst; + nb8 = n/8; + ntail = n-8*nb8; + if( op==0 ) + { + for(i=0,psrc0=a; i<m; i++,a+=stride,b+=alglib_r_block,psrc0=a) + { + pdst=b; + for(j=0; j<nb8; j++) + { + __m128d v0, v1; + v0 = _mm_loadu_pd(psrc0); + _mm_store_pd(pdst, v0); + v1 = _mm_loadu_pd(psrc0+2); + _mm_store_pd(pdst+2, v1); + v1 = _mm_loadu_pd(psrc0+4); + _mm_store_pd(pdst+4, v1); + v1 = _mm_loadu_pd(psrc0+6); + _mm_store_pd(pdst+6, v1); + pdst+=8; + psrc0+=8; + } + for(j=0; j<ntail; j++) + pdst[j] = psrc0[j]; + } + } + else + { + const double *arow0, *arow1; + double *bcol0, *bcol1, *pdst0, *pdst1; + ae_int_t nb4, ntail, n2; + + n2 = n/2; + mb2 = m/2; + nb4 = n/4; + ntail = n-4*nb4; + + arow0 = a; + arow1 = a+stride; + bcol0 = b; + bcol1 = b+1; + for(i=0; i<mb2; i++) + { + psrc0 = arow0; + psrc1 = arow1; + pdst0 = bcol0; + pdst1 = bcol1; + for(j=0; j<nb4; j++) + { + __m128d v0, v1, v2, v3; + v0 = _mm_loadu_pd(psrc0); + v1 = _mm_loadu_pd(psrc1); + v2 = _mm_loadu_pd(psrc0+2); + v3 = _mm_loadu_pd(psrc1+2); + _mm_store_pd(pdst0, _mm_unpacklo_pd(v0,v1)); + _mm_store_pd(pdst0+alglib_r_block, _mm_unpackhi_pd(v0,v1)); + _mm_store_pd(pdst0+2*alglib_r_block, _mm_unpacklo_pd(v2,v3)); + _mm_store_pd(pdst0+3*alglib_r_block, _mm_unpackhi_pd(v2,v3)); + + pdst0 += 4*alglib_r_block; + pdst1 += 4*alglib_r_block; + psrc0 += 4; + psrc1 += 4; + } + for(j=0; j<ntail; j++) + { + pdst0[0] = psrc0[0]; + pdst1[0] = psrc1[0]; + pdst0 += alglib_r_block; + pdst1 += alglib_r_block; + psrc0 += 1; + psrc1 += 1; + } + arow0 += 2*stride; + arow1 += 2*stride; + bcol0 += 2; + bcol1 += 2; + } + if( m%2 ) + { + psrc0 = arow0; + pdst0 = bcol0; + for(j=0; j<n2; j++) + { + pdst0[0] = psrc0[0]; + pdst0[alglib_r_block] = psrc0[1]; + pdst0 += alglib_twice_r_block; + psrc0 += 2; + } + if( n%2!=0 ) + pdst0[0] = psrc0[0]; + } + } +#endif +} + + +/******************************************************************** +This subroutine copies matrix from aligned contigous storage to non- +aligned non-contigous storage + +A: +* MxN +* aligned +* contigous +* stride is alglib_r_block +* may be transformed during copying (as prescribed by op) + +B: +* alglib_r_block*alglib_r_block (only MxN/NxM submatrix is used) +* non-aligned, non-contigous + +Transformation types: +* 0 - no transform +* 1 - transposition +********************************************************************/ +void _ialglib_mcopyunblock(ae_int_t m, ae_int_t n, const double *a, ae_int_t op, double *b, ae_int_t stride) +{ + ae_int_t i, j, n2; + const double *psrc; + double *pdst; + if( op==0 ) + { + n2 = n/2; + for(i=0,psrc=a; i<m; i++,a+=alglib_r_block,b+=stride,psrc=a) + { + for(j=0,pdst=b; j<n2; j++,pdst+=2,psrc+=2) + { + pdst[0] = psrc[0]; + pdst[1] = psrc[1]; + } + if( n%2!=0 ) + pdst[0] = psrc[0]; + } + } + else + { + n2 = n/2; + for(i=0,psrc=a; i<m; i++,a++,b+=stride,psrc=a) + { + for(j=0,pdst=b; j<n2; j++,pdst+=2,psrc+=alglib_twice_r_block) + { + pdst[0] = psrc[0]; + pdst[1] = psrc[alglib_r_block]; + } + if( n%2!=0 ) + pdst[0] = psrc[0]; + } + } +} + + +/******************************************************************** +This subroutine copies matrix from non-aligned non-contigous storage +to aligned contigous storage + +A: +* MxN +* non-aligned +* non-contigous +* may be transformed during copying (as prescribed by op) +* pointer to ae_complex is passed + +B: +* 2*alglib_c_block*alglib_c_block doubles (only MxN/NxM submatrix is used) +* aligned +* stride is alglib_c_block +* pointer to double is passed + +Transformation types: +* 0 - no transform +* 1 - transposition +* 2 - conjugate transposition +* 3 - conjugate, but no transposition +********************************************************************/ +void _ialglib_mcopyblock_complex(ae_int_t m, ae_int_t n, const ae_complex *a, ae_int_t op, ae_int_t stride, double *b) +{ + ae_int_t i, j; + const ae_complex *psrc; + double *pdst; + if( op==0 ) + { + for(i=0,psrc=a; i<m; i++,a+=stride,b+=alglib_twice_c_block,psrc=a) + for(j=0,pdst=b; j<n; j++,pdst+=2,psrc++) + { + pdst[0] = psrc->x; + pdst[1] = psrc->y; + } + } + if( op==1 ) + { + for(i=0,psrc=a; i<m; i++,a+=stride,b+=2,psrc=a) + for(j=0,pdst=b; j<n; j++,pdst+=alglib_twice_c_block,psrc++) + { + pdst[0] = psrc->x; + pdst[1] = psrc->y; + } + } + if( op==2 ) + { + for(i=0,psrc=a; i<m; i++,a+=stride,b+=2,psrc=a) + for(j=0,pdst=b; j<n; j++,pdst+=alglib_twice_c_block,psrc++) + { + pdst[0] = psrc->x; + pdst[1] = -psrc->y; + } + } + if( op==3 ) + { + for(i=0,psrc=a; i<m; i++,a+=stride,b+=alglib_twice_c_block,psrc=a) + for(j=0,pdst=b; j<n; j++,pdst+=2,psrc++) + { + pdst[0] = psrc->x; + pdst[1] = -psrc->y; + } + } +} + + +/******************************************************************** +This subroutine copies matrix from aligned contigous storage to +non-aligned non-contigous storage + +A: +* 2*alglib_c_block*alglib_c_block doubles (only MxN submatrix is used) +* aligned +* stride is alglib_c_block +* pointer to double is passed +* may be transformed during copying (as prescribed by op) + +B: +* MxN +* non-aligned +* non-contigous +* pointer to ae_complex is passed + +Transformation types: +* 0 - no transform +* 1 - transposition +* 2 - conjugate transposition +* 3 - conjugate, but no transposition +********************************************************************/ +void _ialglib_mcopyunblock_complex(ae_int_t m, ae_int_t n, const double *a, ae_int_t op, ae_complex* b, ae_int_t stride) +{ + ae_int_t i, j; + const double *psrc; + ae_complex *pdst; + if( op==0 ) + { + for(i=0,psrc=a; i<m; i++,a+=alglib_twice_c_block,b+=stride,psrc=a) + for(j=0,pdst=b; j<n; j++,pdst++,psrc+=2) + { + pdst->x = psrc[0]; + pdst->y = psrc[1]; + } + } + if( op==1 ) + { + for(i=0,psrc=a; i<m; i++,a+=2,b+=stride,psrc=a) + for(j=0,pdst=b; j<n; j++,pdst++,psrc+=alglib_twice_c_block) + { + pdst->x = psrc[0]; + pdst->y = psrc[1]; + } + } + if( op==2 ) + { + for(i=0,psrc=a; i<m; i++,a+=2,b+=stride,psrc=a) + for(j=0,pdst=b; j<n; j++,pdst++,psrc+=alglib_twice_c_block) + { + pdst->x = psrc[0]; + pdst->y = -psrc[1]; + } + } + if( op==3 ) + { + for(i=0,psrc=a; i<m; i++,a+=alglib_twice_c_block,b+=stride,psrc=a) + for(j=0,pdst=b; j<n; j++,pdst++,psrc+=2) + { + pdst->x = psrc[0]; + pdst->y = -psrc[1]; + } + } +} + + +/******************************************************************** +Real GEMM kernel +********************************************************************/ +ae_bool _ialglib_rmatrixgemm(ae_int_t m, + ae_int_t n, + ae_int_t k, + double alpha, + double *_a, + ae_int_t _a_stride, + ae_int_t optypea, + double *_b, + ae_int_t _b_stride, + ae_int_t optypeb, + double beta, + double *_c, + ae_int_t _c_stride) +{ + int i; + double *crow; + double __abuf[alglib_r_block+alglib_simd_alignment]; + double __b[alglib_r_block*alglib_r_block+alglib_simd_alignment]; + double * const abuf = (double * const) ae_align(__abuf,alglib_simd_alignment); + double * const b = (double * const) ae_align(__b, alglib_simd_alignment); + void (*rmv)(ae_int_t, ae_int_t, const double *, const double *, double *, ae_int_t, double, double) = &_ialglib_rmv; + void (*mcopyblock)(ae_int_t, ae_int_t, const double *, ae_int_t, ae_int_t, double *) = &_ialglib_mcopyblock; + + if( m>alglib_r_block || n>alglib_r_block || k>alglib_r_block || m<=0 || n<=0 || k<=0 || alpha==0.0 ) + return ae_false; + + /* + * Check for SSE2 support + */ + if( ae_cpuid() & CPU_SSE2 ) + { + rmv = &_ialglib_rmv_sse2; + mcopyblock = &_ialglib_mcopyblock_sse2; + } + + /* + * copy b + */ + if( optypeb==0 ) + mcopyblock(k, n, _b, 1, _b_stride, b); + else + mcopyblock(n, k, _b, 0, _b_stride, b); + + /* + * multiply B by A (from the right, by rows) + * and store result in C + */ + crow = _c; + if( optypea==0 ) + { + const double *arow = _a; + for(i=0; i<m; i++) + { + _ialglib_vcopy(k, arow, 1, abuf, 1); + if( beta==0 ) + _ialglib_vzero(n, crow, 1); + rmv(n, k, b, abuf, crow, 1, alpha, beta); + crow += _c_stride; + arow += _a_stride; + } + } + else + { + const double *acol = _a; + for(i=0; i<m; i++) + { + _ialglib_vcopy(k, acol, _a_stride, abuf, 1); + if( beta==0 ) + _ialglib_vzero(n, crow, 1); + rmv(n, k, b, abuf, crow, 1, alpha, beta); + crow += _c_stride; + acol++; + } + } + return ae_true; +} + + +/******************************************************************** +Complex GEMM kernel +********************************************************************/ +ae_bool _ialglib_cmatrixgemm(ae_int_t m, + ae_int_t n, + ae_int_t k, + ae_complex alpha, + ae_complex *_a, + ae_int_t _a_stride, + ae_int_t optypea, + ae_complex *_b, + ae_int_t _b_stride, + ae_int_t optypeb, + ae_complex beta, + ae_complex *_c, + ae_int_t _c_stride) + { + const ae_complex *arow; + ae_complex *crow; + ae_int_t i; + double _loc_abuf[2*alglib_c_block+alglib_simd_alignment]; + double _loc_b[2*alglib_c_block*alglib_c_block+alglib_simd_alignment]; + double * const abuf = (double * const) ae_align(_loc_abuf,alglib_simd_alignment); + double * const b = (double * const) ae_align(_loc_b, alglib_simd_alignment); + ae_int_t brows; + ae_int_t bcols; + void (*cmv)(ae_int_t, ae_int_t, const double *, const double *, ae_complex *, double *, ae_int_t, ae_complex, ae_complex) = &_ialglib_cmv; + + if( m>alglib_c_block || n>alglib_c_block || k>alglib_c_block ) + return ae_false; + + /* + * Check for SSE2 support + */ + if( ae_cpuid() & CPU_SSE2 ) + { + cmv = &_ialglib_cmv_sse2; + } + + /* + * copy b + */ + brows = optypeb==0 ? k : n; + bcols = optypeb==0 ? n : k; + if( optypeb==0 ) + _ialglib_mcopyblock_complex(brows, bcols, _b, 1, _b_stride, b); + if( optypeb==1 ) + _ialglib_mcopyblock_complex(brows, bcols, _b, 0, _b_stride, b); + if( optypeb==2 ) + _ialglib_mcopyblock_complex(brows, bcols, _b, 3, _b_stride, b); + + /* + * multiply B by A (from the right, by rows) + * and store result in C + */ + arow = _a; + crow = _c; + for(i=0; i<m; i++) + { + if( optypea==0 ) + { + _ialglib_vcopy_complex(k, arow, 1, abuf, 1, "No conj"); + arow += _a_stride; + } + else if( optypea==1 ) + { + _ialglib_vcopy_complex(k, arow, _a_stride, abuf, 1, "No conj"); + arow++; + } + else + { + _ialglib_vcopy_complex(k, arow, _a_stride, abuf, 1, "Conj"); + arow++; + } + if( beta.x==0 && beta.y==0 ) + _ialglib_vzero_complex(n, crow, 1); + cmv(n, k, b, abuf, crow, NULL, 1, alpha, beta); + crow += _c_stride; + } + return ae_true; +} + + +/******************************************************************** +complex TRSM kernel +********************************************************************/ +ae_bool _ialglib_cmatrixrighttrsm(ae_int_t m, + ae_int_t n, + ae_complex *_a, + ae_int_t _a_stride, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + ae_complex *_x, + ae_int_t _x_stride) +{ + /* + * local buffers + */ + double *pdiag; + ae_int_t i; + double _loc_abuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment]; + double _loc_xbuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment]; + double _loc_tmpbuf[2*alglib_c_block+alglib_simd_alignment]; + double * const abuf = (double * const) ae_align(_loc_abuf, alglib_simd_alignment); + double * const xbuf = (double * const) ae_align(_loc_xbuf, alglib_simd_alignment); + double * const tmpbuf = (double * const) ae_align(_loc_tmpbuf,alglib_simd_alignment); + ae_bool uppera; + void (*cmv)(ae_int_t, ae_int_t, const double *, const double *, ae_complex *, double *, ae_int_t, ae_complex, ae_complex) = &_ialglib_cmv; + + if( m>alglib_c_block || n>alglib_c_block ) + return ae_false; + + /* + * Check for SSE2 support + */ + if( ae_cpuid() & CPU_SSE2 ) + { + cmv = &_ialglib_cmv_sse2; + } + + /* + * Prepare + */ + _ialglib_mcopyblock_complex(n, n, _a, optype, _a_stride, abuf); + _ialglib_mcopyblock_complex(m, n, _x, 0, _x_stride, xbuf); + if( isunit ) + for(i=0,pdiag=abuf; i<n; i++,pdiag+=2*(alglib_c_block+1)) + { + pdiag[0] = 1.0; + pdiag[1] = 0.0; + } + if( optype==0 ) + uppera = isupper; + else + uppera = !isupper; + + /* + * Solve Y*A^-1=X where A is upper or lower triangular + */ + if( uppera ) + { + for(i=0,pdiag=abuf; i<n; i++,pdiag+=2*(alglib_c_block+1)) + { + ae_complex tmp_c; + ae_complex beta; + ae_complex alpha; + tmp_c.x = pdiag[0]; + tmp_c.y = pdiag[1]; + beta = ae_c_d_div(1.0, tmp_c); + alpha.x = -beta.x; + alpha.y = -beta.y; + _ialglib_vcopy_dcomplex(i, abuf+2*i, alglib_c_block, tmpbuf, 1, "No conj"); + cmv(m, i, xbuf, tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta); + } + _ialglib_mcopyunblock_complex(m, n, xbuf, 0, _x, _x_stride); + } + else + { + for(i=n-1,pdiag=abuf+2*((n-1)*alglib_c_block+(n-1)); i>=0; i--,pdiag-=2*(alglib_c_block+1)) + { + ae_complex tmp_c; + ae_complex beta; + ae_complex alpha; + tmp_c.x = pdiag[0]; + tmp_c.y = pdiag[1]; + beta = ae_c_d_div(1.0, tmp_c); + alpha.x = -beta.x; + alpha.y = -beta.y; + _ialglib_vcopy_dcomplex(n-1-i, pdiag+2*alglib_c_block, alglib_c_block, tmpbuf, 1, "No conj"); + cmv(m, n-1-i, xbuf+2*(i+1), tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta); + } + _ialglib_mcopyunblock_complex(m, n, xbuf, 0, _x, _x_stride); + } + return ae_true; +} + + +/******************************************************************** +real TRSM kernel +********************************************************************/ +ae_bool _ialglib_rmatrixrighttrsm(ae_int_t m, + ae_int_t n, + double *_a, + ae_int_t _a_stride, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + double *_x, + ae_int_t _x_stride) +{ + /* + * local buffers + */ + double *pdiag; + ae_int_t i; + double _loc_abuf[alglib_r_block*alglib_r_block+alglib_simd_alignment]; + double _loc_xbuf[alglib_r_block*alglib_r_block+alglib_simd_alignment]; + double _loc_tmpbuf[alglib_r_block+alglib_simd_alignment]; + double * const abuf = (double * const) ae_align(_loc_abuf, alglib_simd_alignment); + double * const xbuf = (double * const) ae_align(_loc_xbuf, alglib_simd_alignment); + double * const tmpbuf = (double * const) ae_align(_loc_tmpbuf,alglib_simd_alignment); + ae_bool uppera; + void (*rmv)(ae_int_t, ae_int_t, const double *, const double *, double *, ae_int_t, double, double) = &_ialglib_rmv; + void (*mcopyblock)(ae_int_t, ae_int_t, const double *, ae_int_t, ae_int_t, double *) = &_ialglib_mcopyblock; + + if( m>alglib_r_block || n>alglib_r_block ) + return ae_false; + + /* + * Check for SSE2 support + */ + if( ae_cpuid() & CPU_SSE2 ) + { + rmv = &_ialglib_rmv_sse2; + mcopyblock = &_ialglib_mcopyblock_sse2; + } + + /* + * Prepare + */ + mcopyblock(n, n, _a, optype, _a_stride, abuf); + mcopyblock(m, n, _x, 0, _x_stride, xbuf); + if( isunit ) + for(i=0,pdiag=abuf; i<n; i++,pdiag+=alglib_r_block+1) + *pdiag = 1.0; + if( optype==0 ) + uppera = isupper; + else + uppera = !isupper; + + /* + * Solve Y*A^-1=X where A is upper or lower triangular + */ + if( uppera ) + { + for(i=0,pdiag=abuf; i<n; i++,pdiag+=alglib_r_block+1) + { + double beta = 1.0/(*pdiag); + double alpha = -beta; + _ialglib_vcopy(i, abuf+i, alglib_r_block, tmpbuf, 1); + rmv(m, i, xbuf, tmpbuf, xbuf+i, alglib_r_block, alpha, beta); + } + _ialglib_mcopyunblock(m, n, xbuf, 0, _x, _x_stride); + } + else + { + for(i=n-1,pdiag=abuf+(n-1)*alglib_r_block+(n-1); i>=0; i--,pdiag-=alglib_r_block+1) + { + double beta = 1.0/(*pdiag); + double alpha = -beta; + _ialglib_vcopy(n-1-i, pdiag+alglib_r_block, alglib_r_block, tmpbuf+i+1, 1); + rmv(m, n-1-i, xbuf+i+1, tmpbuf+i+1, xbuf+i, alglib_r_block, alpha, beta); + } + _ialglib_mcopyunblock(m, n, xbuf, 0, _x, _x_stride); + } + return ae_true; +} + + +/******************************************************************** +complex TRSM kernel +********************************************************************/ +ae_bool _ialglib_cmatrixlefttrsm(ae_int_t m, + ae_int_t n, + ae_complex *_a, + ae_int_t _a_stride, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + ae_complex *_x, + ae_int_t _x_stride) +{ + /* + * local buffers + */ + double *pdiag, *arow; + ae_int_t i; + double _loc_abuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment]; + double _loc_xbuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment]; + double _loc_tmpbuf[2*alglib_c_block+alglib_simd_alignment]; + double * const abuf = (double * const) ae_align(_loc_abuf, alglib_simd_alignment); + double * const xbuf = (double * const) ae_align(_loc_xbuf, alglib_simd_alignment); + double * const tmpbuf = (double * const) ae_align(_loc_tmpbuf,alglib_simd_alignment); + ae_bool uppera; + void (*cmv)(ae_int_t, ae_int_t, const double *, const double *, ae_complex *, double *, ae_int_t, ae_complex, ae_complex) = &_ialglib_cmv; + + if( m>alglib_c_block || n>alglib_c_block ) + return ae_false; + + /* + * Check for SSE2 support + */ + if( ae_cpuid() & CPU_SSE2 ) + { + cmv = &_ialglib_cmv_sse2; + } + + /* + * Prepare + * Transpose X (so we may use mv, which calculates A*x, but not x*A) + */ + _ialglib_mcopyblock_complex(m, m, _a, optype, _a_stride, abuf); + _ialglib_mcopyblock_complex(m, n, _x, 1, _x_stride, xbuf); + if( isunit ) + for(i=0,pdiag=abuf; i<m; i++,pdiag+=2*(alglib_c_block+1)) + { + pdiag[0] = 1.0; + pdiag[1] = 0.0; + } + if( optype==0 ) + uppera = isupper; + else + uppera = !isupper; + + /* + * Solve A^-1*Y^T=X^T where A is upper or lower triangular + */ + if( uppera ) + { + for(i=m-1,pdiag=abuf+2*((m-1)*alglib_c_block+(m-1)); i>=0; i--,pdiag-=2*(alglib_c_block+1)) + { + ae_complex tmp_c; + ae_complex beta; + ae_complex alpha; + tmp_c.x = pdiag[0]; + tmp_c.y = pdiag[1]; + beta = ae_c_d_div(1.0, tmp_c); + alpha.x = -beta.x; + alpha.y = -beta.y; + _ialglib_vcopy_dcomplex(m-1-i, pdiag+2, 1, tmpbuf, 1, "No conj"); + cmv(n, m-1-i, xbuf+2*(i+1), tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta); + } + _ialglib_mcopyunblock_complex(m, n, xbuf, 1, _x, _x_stride); + } + else + { for(i=0,pdiag=abuf,arow=abuf; i<m; i++,pdiag+=2*(alglib_c_block+1),arow+=2*alglib_c_block) + { + ae_complex tmp_c; + ae_complex beta; + ae_complex alpha; + tmp_c.x = pdiag[0]; + tmp_c.y = pdiag[1]; + beta = ae_c_d_div(1.0, tmp_c); + alpha.x = -beta.x; + alpha.y = -beta.y; + _ialglib_vcopy_dcomplex(i, arow, 1, tmpbuf, 1, "No conj"); + cmv(n, i, xbuf, tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta); + } + _ialglib_mcopyunblock_complex(m, n, xbuf, 1, _x, _x_stride); + } + return ae_true; +} + + +/******************************************************************** +real TRSM kernel +********************************************************************/ +ae_bool _ialglib_rmatrixlefttrsm(ae_int_t m, + ae_int_t n, + double *_a, + ae_int_t _a_stride, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + double *_x, + ae_int_t _x_stride) +{ + /* + * local buffers + */ + double *pdiag, *arow; + ae_int_t i; + double _loc_abuf[alglib_r_block*alglib_r_block+alglib_simd_alignment]; + double _loc_xbuf[alglib_r_block*alglib_r_block+alglib_simd_alignment]; + double _loc_tmpbuf[alglib_r_block+alglib_simd_alignment]; + double * const abuf = (double * const) ae_align(_loc_abuf, alglib_simd_alignment); + double * const xbuf = (double * const) ae_align(_loc_xbuf, alglib_simd_alignment); + double * const tmpbuf = (double * const) ae_align(_loc_tmpbuf,alglib_simd_alignment); + ae_bool uppera; + void (*rmv)(ae_int_t, ae_int_t, const double *, const double *, double *, ae_int_t, double, double) = &_ialglib_rmv; + void (*mcopyblock)(ae_int_t, ae_int_t, const double *, ae_int_t, ae_int_t, double *) = &_ialglib_mcopyblock; + + if( m>alglib_r_block || n>alglib_r_block ) + return ae_false; + + /* + * Check for SSE2 support + */ + if( ae_cpuid() & CPU_SSE2 ) + { + rmv = &_ialglib_rmv_sse2; + mcopyblock = &_ialglib_mcopyblock_sse2; + } + + /* + * Prepare + * Transpose X (so we may use mv, which calculates A*x, but not x*A) + */ + mcopyblock(m, m, _a, optype, _a_stride, abuf); + mcopyblock(m, n, _x, 1, _x_stride, xbuf); + if( isunit ) + for(i=0,pdiag=abuf; i<m; i++,pdiag+=alglib_r_block+1) + *pdiag = 1.0; + if( optype==0 ) + uppera = isupper; + else + uppera = !isupper; + + /* + * Solve A^-1*Y^T=X^T where A is upper or lower triangular + */ + if( uppera ) + { + for(i=m-1,pdiag=abuf+(m-1)*alglib_r_block+(m-1); i>=0; i--,pdiag-=alglib_r_block+1) + { + double beta = 1.0/(*pdiag); + double alpha = -beta; + _ialglib_vcopy(m-1-i, pdiag+1, 1, tmpbuf+i+1, 1); + rmv(n, m-1-i, xbuf+i+1, tmpbuf+i+1, xbuf+i, alglib_r_block, alpha, beta); + } + _ialglib_mcopyunblock(m, n, xbuf, 1, _x, _x_stride); + } + else + { for(i=0,pdiag=abuf,arow=abuf; i<m; i++,pdiag+=alglib_r_block+1,arow+=alglib_r_block) + { + double beta = 1.0/(*pdiag); + double alpha = -beta; + _ialglib_vcopy(i, arow, 1, tmpbuf, 1); + rmv(n, i, xbuf, tmpbuf, xbuf+i, alglib_r_block, alpha, beta); + } + _ialglib_mcopyunblock(m, n, xbuf, 1, _x, _x_stride); + } + return ae_true; +} + + +/******************************************************************** +complex SYRK kernel +********************************************************************/ +ae_bool _ialglib_cmatrixsyrk(ae_int_t n, + ae_int_t k, + double alpha, + ae_complex *_a, + ae_int_t _a_stride, + ae_int_t optypea, + double beta, + ae_complex *_c, + ae_int_t _c_stride, + ae_bool isupper) +{ + /* + * local buffers + */ + double *arow, *crow; + ae_complex c_alpha, c_beta; + ae_int_t i; + double _loc_abuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment]; + double _loc_cbuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment]; + double _loc_tmpbuf[2*alglib_c_block+alglib_simd_alignment]; + double * const abuf = (double * const) ae_align(_loc_abuf, alglib_simd_alignment); + double * const cbuf = (double * const) ae_align(_loc_cbuf, alglib_simd_alignment); + double * const tmpbuf = (double * const) ae_align(_loc_tmpbuf,alglib_simd_alignment); + + if( n>alglib_c_block || k>alglib_c_block ) + return ae_false; + if( n==0 ) + return ae_true; + + /* + * copy A and C, task is transformed to "A*A^H"-form. + * if beta==0, then C is filled by zeros (and not referenced) + * + * alpha==0 or k==0 are correctly processed (A is not referenced) + */ + c_alpha.x = alpha; + c_alpha.y = 0; + c_beta.x = beta; + c_beta.y = 0; + if( alpha==0 ) + k = 0; + if( k>0 ) + { + if( optypea==0 ) + _ialglib_mcopyblock_complex(n, k, _a, 3, _a_stride, abuf); + else + _ialglib_mcopyblock_complex(k, n, _a, 1, _a_stride, abuf); + } + _ialglib_mcopyblock_complex(n, n, _c, 0, _c_stride, cbuf); + if( beta==0 ) + { + for(i=0,crow=cbuf; i<n; i++,crow+=2*alglib_c_block) + if( isupper ) + _ialglib_vzero(2*(n-i), crow+2*i, 1); + else + _ialglib_vzero(2*(i+1), crow, 1); + } + + + /* + * update C + */ + if( isupper ) + { + for(i=0,arow=abuf,crow=cbuf; i<n; i++,arow+=2*alglib_c_block,crow+=2*alglib_c_block) + { + _ialglib_vcopy_dcomplex(k, arow, 1, tmpbuf, 1, "Conj"); + _ialglib_cmv(n-i, k, arow, tmpbuf, NULL, crow+2*i, 1, c_alpha, c_beta); + } + } + else + { + for(i=0,arow=abuf,crow=cbuf; i<n; i++,arow+=2*alglib_c_block,crow+=2*alglib_c_block) + { + _ialglib_vcopy_dcomplex(k, arow, 1, tmpbuf, 1, "Conj"); + _ialglib_cmv(i+1, k, abuf, tmpbuf, NULL, crow, 1, c_alpha, c_beta); + } + } + + /* + * copy back + */ + _ialglib_mcopyunblock_complex(n, n, cbuf, 0, _c, _c_stride); + + return ae_true; +} + + +/******************************************************************** +real SYRK kernel +********************************************************************/ +ae_bool _ialglib_rmatrixsyrk(ae_int_t n, + ae_int_t k, + double alpha, + double *_a, + ae_int_t _a_stride, + ae_int_t optypea, + double beta, + double *_c, + ae_int_t _c_stride, + ae_bool isupper) +{ + /* + * local buffers + */ + double *arow, *crow; + ae_int_t i; + double _loc_abuf[alglib_r_block*alglib_r_block+alglib_simd_alignment]; + double _loc_cbuf[alglib_r_block*alglib_r_block+alglib_simd_alignment]; + double * const abuf = (double * const) ae_align(_loc_abuf, alglib_simd_alignment); + double * const cbuf = (double * const) ae_align(_loc_cbuf, alglib_simd_alignment); + + if( n>alglib_r_block || k>alglib_r_block ) + return ae_false; + if( n==0 ) + return ae_true; + + /* + * copy A and C, task is transformed to "A*A^T"-form. + * if beta==0, then C is filled by zeros (and not referenced) + * + * alpha==0 or k==0 are correctly processed (A is not referenced) + */ + if( alpha==0 ) + k = 0; + if( k>0 ) + { + if( optypea==0 ) + _ialglib_mcopyblock(n, k, _a, 0, _a_stride, abuf); + else + _ialglib_mcopyblock(k, n, _a, 1, _a_stride, abuf); + } + _ialglib_mcopyblock(n, n, _c, 0, _c_stride, cbuf); + if( beta==0 ) + { + for(i=0,crow=cbuf; i<n; i++,crow+=alglib_r_block) + if( isupper ) + _ialglib_vzero(n-i, crow+i, 1); + else + _ialglib_vzero(i+1, crow, 1); + } + + + /* + * update C + */ + if( isupper ) + { + for(i=0,arow=abuf,crow=cbuf; i<n; i++,arow+=alglib_r_block,crow+=alglib_r_block) + { + _ialglib_rmv(n-i, k, arow, arow, crow+i, 1, alpha, beta); + } + } + else + { + for(i=0,arow=abuf,crow=cbuf; i<n; i++,arow+=alglib_r_block,crow+=alglib_r_block) + { + _ialglib_rmv(i+1, k, abuf, arow, crow, 1, alpha, beta); + } + } + + /* + * copy back + */ + _ialglib_mcopyunblock(n, n, cbuf, 0, _c, _c_stride); + + return ae_true; +} + + +/******************************************************************** +complex rank-1 kernel +********************************************************************/ +ae_bool _ialglib_cmatrixrank1(ae_int_t m, + ae_int_t n, + ae_complex *_a, + ae_int_t _a_stride, + ae_complex *_u, + ae_complex *_v) +{ + ae_complex *arow, *pu, *pv, *vtmp, *dst; + ae_int_t n2 = n/2; + ae_int_t i, j; + + /* + * update pairs of rows + */ + arow = _a; + pu = _u; + vtmp = _v; + for(i=0; i<m; i++, arow+=_a_stride, pu++) + { + /* + * update by two + */ + for(j=0,pv=vtmp, dst=arow; j<n2; j++, dst+=2, pv+=2) + { + double ux = pu[0].x; + double uy = pu[0].y; + double v0x = pv[0].x; + double v0y = pv[0].y; + double v1x = pv[1].x; + double v1y = pv[1].y; + dst[0].x += ux*v0x-uy*v0y; + dst[0].y += ux*v0y+uy*v0x; + dst[1].x += ux*v1x-uy*v1y; + dst[1].y += ux*v1y+uy*v1x; + } + + /* + * final update + */ + if( n%2!=0 ) + { + double ux = pu[0].x; + double uy = pu[0].y; + double vx = pv[0].x; + double vy = pv[0].y; + dst[0].x += ux*vx-uy*vy; + dst[0].y += ux*vy+uy*vx; + } + } + return ae_true; +} + + +/******************************************************************** +real rank-1 kernel +********************************************************************/ +ae_bool _ialglib_rmatrixrank1(ae_int_t m, + ae_int_t n, + double *_a, + ae_int_t _a_stride, + double *_u, + double *_v) +{ + double *arow0, *arow1, *pu, *pv, *vtmp, *dst0, *dst1; + ae_int_t m2 = m/2; + ae_int_t n2 = n/2; + ae_int_t stride = _a_stride; + ae_int_t stride2 = 2*_a_stride; + ae_int_t i, j; + + /* + * update pairs of rows + */ + arow0 = _a; + arow1 = arow0+stride; + pu = _u; + vtmp = _v; + for(i=0; i<m2; i++,arow0+=stride2,arow1+=stride2,pu+=2) + { + /* + * update by two + */ + for(j=0,pv=vtmp, dst0=arow0, dst1=arow1; j<n2; j++, dst0+=2, dst1+=2, pv+=2) + { + dst0[0] += pu[0]*pv[0]; + dst0[1] += pu[0]*pv[1]; + dst1[0] += pu[1]*pv[0]; + dst1[1] += pu[1]*pv[1]; + } + + /* + * final update + */ + if( n%2!=0 ) + { + dst0[0] += pu[0]*pv[0]; + dst1[0] += pu[1]*pv[0]; + } + } + + /* + * update last row + */ + if( m%2!=0 ) + { + /* + * update by two + */ + for(j=0,pv=vtmp, dst0=arow0; j<n2; j++, dst0+=2, pv+=2) + { + dst0[0] += pu[0]*pv[0]; + dst0[1] += pu[0]*pv[1]; + } + + /* + * final update + */ + if( n%2!=0 ) + dst0[0] += pu[0]*pv[0]; + } + return ae_true; +} + + +/******************************************************************** +Interface functions for efficient kernels +********************************************************************/ +ae_bool _ialglib_i_rmatrixgemmf(ae_int_t m, + ae_int_t n, + ae_int_t k, + double alpha, + ae_matrix *_a, + ae_int_t ia, + ae_int_t ja, + ae_int_t optypea, + ae_matrix *_b, + ae_int_t ib, + ae_int_t jb, + ae_int_t optypeb, + double beta, + ae_matrix *_c, + ae_int_t ic, + ae_int_t jc) +{ + return _ialglib_rmatrixgemm(m, n, k, alpha, _a->ptr.pp_double[ia]+ja, _a->stride, optypea, _b->ptr.pp_double[ib]+jb, _b->stride, optypeb, beta, _c->ptr.pp_double[ic]+jc, _c->stride); +} + +ae_bool _ialglib_i_cmatrixgemmf(ae_int_t m, + ae_int_t n, + ae_int_t k, + ae_complex alpha, + ae_matrix *_a, + ae_int_t ia, + ae_int_t ja, + ae_int_t optypea, + ae_matrix *_b, + ae_int_t ib, + ae_int_t jb, + ae_int_t optypeb, + ae_complex beta, + ae_matrix *_c, + ae_int_t ic, + ae_int_t jc) +{ + return _ialglib_cmatrixgemm(m, n, k, alpha, _a->ptr.pp_complex[ia]+ja, _a->stride, optypea, _b->ptr.pp_complex[ib]+jb, _b->stride, optypeb, beta, _c->ptr.pp_complex[ic]+jc, _c->stride); +} + +ae_bool _ialglib_i_cmatrixrighttrsmf(ae_int_t m, + ae_int_t n, + ae_matrix *a, + ae_int_t i1, + ae_int_t j1, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + ae_matrix *x, + ae_int_t i2, + ae_int_t j2) +{ + return _ialglib_cmatrixrighttrsm(m, n, &a->ptr.pp_complex[i1][j1], a->stride, isupper, isunit, optype, &x->ptr.pp_complex[i2][j2], x->stride); +} + +ae_bool _ialglib_i_rmatrixrighttrsmf(ae_int_t m, + ae_int_t n, + ae_matrix *a, + ae_int_t i1, + ae_int_t j1, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + ae_matrix *x, + ae_int_t i2, + ae_int_t j2) +{ + return _ialglib_rmatrixrighttrsm(m, n, &a->ptr.pp_double[i1][j1], a->stride, isupper, isunit, optype, &x->ptr.pp_double[i2][j2], x->stride); +} + +ae_bool _ialglib_i_cmatrixlefttrsmf(ae_int_t m, + ae_int_t n, + ae_matrix *a, + ae_int_t i1, + ae_int_t j1, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + ae_matrix *x, + ae_int_t i2, + ae_int_t j2) +{ + return _ialglib_cmatrixlefttrsm(m, n, &a->ptr.pp_complex[i1][j1], a->stride, isupper, isunit, optype, &x->ptr.pp_complex[i2][j2], x->stride); +} + +ae_bool _ialglib_i_rmatrixlefttrsmf(ae_int_t m, + ae_int_t n, + ae_matrix *a, + ae_int_t i1, + ae_int_t j1, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + ae_matrix *x, + ae_int_t i2, + ae_int_t j2) +{ + return _ialglib_rmatrixlefttrsm(m, n, &a->ptr.pp_double[i1][j1], a->stride, isupper, isunit, optype, &x->ptr.pp_double[i2][j2], x->stride); +} + +ae_bool _ialglib_i_cmatrixsyrkf(ae_int_t n, + ae_int_t k, + double alpha, + ae_matrix *a, + ae_int_t ia, + ae_int_t ja, + ae_int_t optypea, + double beta, + ae_matrix *c, + ae_int_t ic, + ae_int_t jc, + ae_bool isupper) +{ + return _ialglib_cmatrixsyrk(n, k, alpha, &a->ptr.pp_complex[ia][ja], a->stride, optypea, beta, &c->ptr.pp_complex[ic][jc], c->stride, isupper); +} + +ae_bool _ialglib_i_rmatrixsyrkf(ae_int_t n, + ae_int_t k, + double alpha, + ae_matrix *a, + ae_int_t ia, + ae_int_t ja, + ae_int_t optypea, + double beta, + ae_matrix *c, + ae_int_t ic, + ae_int_t jc, + ae_bool isupper) +{ + return _ialglib_rmatrixsyrk(n, k, alpha, &a->ptr.pp_double[ia][ja], a->stride, optypea, beta, &c->ptr.pp_double[ic][jc], c->stride, isupper); +} + +ae_bool _ialglib_i_cmatrixrank1f(ae_int_t m, + ae_int_t n, + ae_matrix *a, + ae_int_t ia, + ae_int_t ja, + ae_vector *u, + ae_int_t uoffs, + ae_vector *v, + ae_int_t voffs) +{ + return _ialglib_cmatrixrank1(m, n, &a->ptr.pp_complex[ia][ja], a->stride, &u->ptr.p_complex[uoffs], &v->ptr.p_complex[voffs]); +} + +ae_bool _ialglib_i_rmatrixrank1f(ae_int_t m, + ae_int_t n, + ae_matrix *a, + ae_int_t ia, + ae_int_t ja, + ae_vector *u, + ae_int_t uoffs, + ae_vector *v, + ae_int_t voffs) +{ + return _ialglib_rmatrixrank1(m, n, &a->ptr.pp_double[ia][ja], a->stride, &u->ptr.p_double[uoffs], &v->ptr.p_double[voffs]); +} + + + + +/******************************************************************** +This function reads rectangular matrix A given by two column pointers +col0 and col1 and stride src_stride and moves it into contiguous row- +by-row storage given by dst. + +It can handle following special cases: +* col1==NULL in this case second column of A is filled by zeros +********************************************************************/ +void _ialglib_pack_n2( + double *col0, + double *col1, + ae_int_t n, + ae_int_t src_stride, + double *dst) +{ + ae_int_t n2, j, stride2; + + /* + * handle special case + */ + if( col1==NULL ) + { + for(j=0; j<n; j++) + { + dst[0] = *col0; + dst[1] = 0.0; + col0 += src_stride; + dst += 2; + } + return; + } + + /* + * handle general case + */ + n2 = n/2; + stride2 = src_stride*2; + for(j=0; j<n2; j++) + { + dst[0] = *col0; + dst[1] = *col1; + dst[2] = col0[src_stride]; + dst[3] = col1[src_stride]; + col0 += stride2; + col1 += stride2; + dst += 4; + } + if( n%2 ) + { + dst[0] = *col0; + dst[1] = *col1; + } +} + +/************************************************************************* +This function reads rectangular matrix A given by two column pointers col0 +and col1 and stride src_stride and moves it into contiguous row-by-row +storage given by dst. + +dst must be aligned, col0 and col1 may be non-aligned. + +It can handle following special cases: +* col1==NULL in this case second column of A is filled by zeros +* src_stride==1 efficient SSE-based code is used +* col1-col0==1 efficient SSE-based code is used + +This function supports SSE2; it can be used when: +1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time) +2. ae_cpuid() result contains CPU_SSE2 (checked at run-time) + +If (1) is failed, this function will still be defined and callable, but it +will do nothing. If (2) is failed , call to this function will probably +crash your system. + +If you want to know whether it is safe to call it, you should check +results of ae_cpuid(). If CPU_SSE2 bit is set, this function is callable +and will do its work. +*************************************************************************/ +void _ialglib_pack_n2_sse2( + double *col0, + double *col1, + ae_int_t n, + ae_int_t src_stride, + double *dst) +{ +#if defined(AE_HAS_SSE2_INTRINSICS) + ae_int_t n2, j, stride2; + + /* + * handle special case: col1==NULL + */ + if( col1==NULL ) + { + for(j=0; j<n; j++) + { + dst[0] = *col0; + dst[1] = 0.0; + col0 += src_stride; + dst += 2; + } + return; + } + + /* + * handle unit stride + */ + if( src_stride==1 ) + { + __m128d v0, v1, r0, r1; + n2 = n/2; + for(j=0; j<n2; j++) + { + v0 = _mm_loadu_pd(col0); + col0 += 2; + v1 = _mm_loadu_pd(col1); + col1 += 2; + _mm_store_pd(dst, _mm_unpacklo_pd(v0,v1)); + _mm_store_pd(dst+2,_mm_unpackhi_pd(v0,v1)); + dst += 4; + } + if( n%2 ) + { + dst[0] = *col0; + dst[1] = *col1; + } + return; + } + + /* + * handle col1-col0==1 + */ + if( col1-col0==1 ) + { + __m128d v0, v1; + n2 = n/2; + stride2 = 2*src_stride; + for(j=0; j<n2; j++) + { + v0 = _mm_loadu_pd(col0); + v1 = _mm_loadu_pd(col0+src_stride); + _mm_store_pd(dst, v0); + _mm_store_pd(dst+2,v1); + col0 += stride2; + dst += 4; + } + if( n%2 ) + { + dst[0] = col0[0]; + dst[1] = col0[1]; + } + return; + } + + /* + * handle general case + */ + n2 = n/2; + stride2 = src_stride*2; + for(j=0; j<n2; j++) + { + dst[0] = *col0; + dst[1] = *col1; + dst[2] = col0[src_stride]; + dst[3] = col1[src_stride]; + col0 += stride2; + col1 += stride2; + dst += 4; + } + if( n%2 ) + { + dst[0] = *col0; + dst[1] = *col1; + } +#endif +} + + +/******************************************************************** +This function calculates R := alpha*A'*B+beta*R where A and B are Kx2 +matrices stored in contiguous row-by-row storage, R is 2x2 matrix +stored in non-contiguous row-by-row storage. + +A and B must be aligned; R may be non-aligned. + +If beta is zero, contents of R is ignored (not multiplied by zero - +just ignored). + +However, when alpha is zero, we still calculate A'*B, which is +multiplied by zero afterwards. + +Function accepts additional parameter store_mode: +* if 0, full R is stored +* if 1, only first row of R is stored +* if 2, only first column of R is stored +* if 3, only top left element of R is stored +********************************************************************/ +void _ialglib_mm22(double alpha, const double *a, const double *b, ae_int_t k, double beta, double *r, ae_int_t stride, ae_int_t store_mode) +{ + double v00, v01, v10, v11; + ae_int_t t; + v00 = 0.0; + v01 = 0.0; + v10 = 0.0; + v11 = 0.0; + for(t=0; t<k; t++) + { + v00 += a[0]*b[0]; + v01 += a[0]*b[1]; + v10 += a[1]*b[0]; + v11 += a[1]*b[1]; + a+=2; + b+=2; + } + if( store_mode==0 ) + { + if( beta==0 ) + { + r[0] = alpha*v00; + r[1] = alpha*v01; + r[stride+0] = alpha*v10; + r[stride+1] = alpha*v11; + } + else + { + r[0] = beta*r[0] + alpha*v00; + r[1] = beta*r[1] + alpha*v01; + r[stride+0] = beta*r[stride+0] + alpha*v10; + r[stride+1] = beta*r[stride+1] + alpha*v11; + } + return; + } + if( store_mode==1 ) + { + if( beta==0 ) + { + r[0] = alpha*v00; + r[1] = alpha*v01; + } + else + { + r[0] = beta*r[0] + alpha*v00; + r[1] = beta*r[1] + alpha*v01; + } + return; + } + if( store_mode==2 ) + { + if( beta==0 ) + { + r[0] =alpha*v00; + r[stride+0] = alpha*v10; + } + else + { + r[0] = beta*r[0] + alpha*v00; + r[stride+0] = beta*r[stride+0] + alpha*v10; + } + return; + } + if( store_mode==3 ) + { + if( beta==0 ) + { + r[0] = alpha*v00; + } + else + { + r[0] = beta*r[0] + alpha*v00; + } + return; + } +} + + +/******************************************************************** +This function calculates R := alpha*A'*B+beta*R where A and B are Kx2 +matrices stored in contiguous row-by-row storage, R is 2x2 matrix +stored in non-contiguous row-by-row storage. + +A and B must be aligned; R may be non-aligned. + +If beta is zero, contents of R is ignored (not multiplied by zero - +just ignored). + +However, when alpha is zero, we still calculate A'*B, which is +multiplied by zero afterwards. + +Function accepts additional parameter store_mode: +* if 0, full R is stored +* if 1, only first row of R is stored +* if 2, only first column of R is stored +* if 3, only top left element of R is stored + +This function supports SSE2; it can be used when: +1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time) +2. ae_cpuid() result contains CPU_SSE2 (checked at run-time) + +If (1) is failed, this function will still be defined and callable, but it +will do nothing. If (2) is failed , call to this function will probably +crash your system. + +If you want to know whether it is safe to call it, you should check +results of ae_cpuid(). If CPU_SSE2 bit is set, this function is callable +and will do its work. +********************************************************************/ +void _ialglib_mm22_sse2(double alpha, const double *a, const double *b, ae_int_t k, double beta, double *r, ae_int_t stride, ae_int_t store_mode) +{ +#if defined(AE_HAS_SSE2_INTRINSICS) + /* + * We calculate product of two Kx2 matrices (result is 2x2). + * VA and VB store result as follows: + * + * [ VD[0] VE[0] ] + * A'*B = [ ] + * [ VE[1] VD[1] ] + * + */ + __m128d va, vb, vd, ve, vt, vt0, vt1, r0, r1, valpha, vbeta; + ae_int_t t, k2, k3; + + /* + * calculate product + */ + k2 = k/2; + vd = _mm_setzero_pd(); + ve = _mm_setzero_pd(); + for(t=0; t<k2; t++) + { + vb = _mm_load_pd(b); + va = _mm_load_pd(a); + vt = vb; + vb = _mm_mul_pd(va,vb); + vt = _mm_shuffle_pd(vt, vt, 1); + vd = _mm_add_pd(vb,vd); + vt = _mm_mul_pd(va,vt); + vb = _mm_load_pd(b+2); + ve = _mm_add_pd(vt,ve); + va = _mm_load_pd(a+2); + vt = vb; + vb = _mm_mul_pd(va,vb); + vt = _mm_shuffle_pd(vt, vt, 1); + vd = _mm_add_pd(vb,vd); + vt = _mm_mul_pd(va,vt); + ve = _mm_add_pd(vt,ve); + a+=4; + b+=4; + } + if( k%2 ) + { + va = _mm_load_pd(a); + vb = _mm_load_pd(b); + vt = _mm_shuffle_pd(vb, vb, 1); + vd = _mm_add_pd(_mm_mul_pd(va,vb),vd); + ve = _mm_add_pd(_mm_mul_pd(va,vt),ve); + } + + /* + * r0 is first row of alpha*A'*B, r1 is second row + */ + valpha = _mm_load1_pd(&alpha); + r0 = _mm_mul_pd(_mm_unpacklo_pd(vd,ve),valpha); + r1 = _mm_mul_pd(_mm_unpackhi_pd(ve,vd),valpha); + + /* + * store + */ + if( store_mode==0 ) + { + if( beta==0 ) + { + _mm_storeu_pd(r,r0); + _mm_storeu_pd(r+stride,r1); + } + else + { + vbeta = _mm_load1_pd(&beta); + _mm_storeu_pd(r,_mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r),vbeta),r0)); + _mm_storeu_pd(r+stride,_mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r+stride),vbeta),r1)); + } + return; + } + if( store_mode==1 ) + { + if( beta==0 ) + _mm_storeu_pd(r,r0); + else + _mm_storeu_pd(r,_mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r),_mm_load1_pd(&beta)),r0)); + return; + } + if( store_mode==2 ) + { + double buf[4]; + _mm_storeu_pd(buf,r0); + _mm_storeu_pd(buf+2,r1); + if( beta==0 ) + { + r[0] =buf[0]; + r[stride+0] = buf[2]; + } + else + { + r[0] = beta*r[0] + buf[0]; + r[stride+0] = beta*r[stride+0] + buf[2]; + } + return; + } + if( store_mode==3 ) + { + double buf[2]; + _mm_storeu_pd(buf,r0); + if( beta==0 ) + r[0] = buf[0]; + else + r[0] = beta*r[0] + buf[0]; + return; + } +#endif +} + + +/************************************************************************* +This function calculates R := alpha*A'*(B0|B1)+beta*R where A, B0 and B1 +are Kx2 matrices stored in contiguous row-by-row storage, R is 2x4 matrix +stored in non-contiguous row-by-row storage. + +A, B0 and B1 must be aligned; R may be non-aligned. + +Note that B0 and B1 are two separate matrices stored in different +locations. + +If beta is zero, contents of R is ignored (not multiplied by zero - just +ignored). + +However, when alpha is zero , we still calculate MM product, which is +multiplied by zero afterwards. + +Unlike mm22 functions, this function does NOT support partial output of R +- we always store full 2x4 matrix. +*************************************************************************/ +void _ialglib_mm22x2(double alpha, const double *a, const double *b0, const double *b1, ae_int_t k, double beta, double *r, ae_int_t stride) +{ + _ialglib_mm22(alpha, a, b0, k, beta, r, stride, 0); + _ialglib_mm22(alpha, a, b1, k, beta, r+2, stride, 0); +} + +/************************************************************************* +This function calculates R := alpha*A'*(B0|B1)+beta*R where A, B0 and B1 +are Kx2 matrices stored in contiguous row-by-row storage, R is 2x4 matrix +stored in non-contiguous row-by-row storage. + +A, B0 and B1 must be aligned; R may be non-aligned. + +Note that B0 and B1 are two separate matrices stored in different +locations. + +If beta is zero, contents of R is ignored (not multiplied by zero - just +ignored). + +However, when alpha is zero , we still calculate MM product, which is +multiplied by zero afterwards. + +Unlike mm22 functions, this function does NOT support partial output of R +- we always store full 2x4 matrix. + +This function supports SSE2; it can be used when: +1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time) +2. ae_cpuid() result contains CPU_SSE2 (checked at run-time) + +If (1) is failed, this function will still be defined and callable, but it +will do nothing. If (2) is failed , call to this function will probably +crash your system. + +If you want to know whether it is safe to call it, you should check +results of ae_cpuid(). If CPU_SSE2 bit is set, this function is callable +and will do its work. +*************************************************************************/ +void _ialglib_mm22x2_sse2(double alpha, const double *a, const double *b0, const double *b1, ae_int_t k, double beta, double *r, ae_int_t stride) +{ +#if defined(AE_HAS_SSE2_INTRINSICS) + /* + * We calculate product of two Kx2 matrices (result is 2x2). + * V0, V1, V2, V3 store result as follows: + * + * [ V0[0] V1[1] V2[0] V3[1] ] + * R = [ ] + * [ V1[0] V0[1] V3[0] V2[1] ] + * + * VA0 stores current 1x2 block of A, VA1 stores shuffle of VA0, + * VB0 and VB1 are used to store two copies of 1x2 block of B0 or B1 + * (both vars store same data - either B0 or B1). Results from multiplication + * by VA0/VA1 are stored in VB0/VB1 too. + * + */ + __m128d v0, v1, v2, v3, va0, va1, vb0, vb1; + __m128d r00, r01, r10, r11, valpha, vbeta; + ae_int_t t, k2; + + k2 = k/2; + v0 = _mm_setzero_pd(); + v1 = _mm_setzero_pd(); + v2 = _mm_setzero_pd(); + v3 = _mm_setzero_pd(); + for(t=0; t<k; t++) + { + va0 = _mm_load_pd(a); + vb0 = _mm_load_pd(b0); + va1 = _mm_load_pd(a); + + vb0 = _mm_mul_pd(va0,vb0); + vb1 = _mm_load_pd(b0); + v0 = _mm_add_pd(v0,vb0); + vb1 = _mm_mul_pd(va1,vb1); + vb0 = _mm_load_pd(b1); + v1 = _mm_add_pd(v1,vb1); + + vb0 = _mm_mul_pd(va0,vb0); + vb1 = _mm_load_pd(b1); + v2 = _mm_add_pd(v2,vb0); + vb1 = _mm_mul_pd(va1,vb1); + v3 = _mm_add_pd(v3,vb1); + + a+=2; + b0+=2; + b1+=2; + } + + /* + * shuffle V1 and V3 (conversion to more convenient storage format): + * + * [ V0[0] V1[0] V2[0] V3[0] ] + * R = [ ] + * [ V1[1] V0[1] V3[1] V2[1] ] + * + * unpack results to + * + * [ r00 r01 ] + * [ r10 r11 ] + * + */ + valpha = _mm_load1_pd(&alpha); + v1 = _mm_shuffle_pd(v1, v1, 1); + v3 = _mm_shuffle_pd(v3, v3, 1); + r00 = _mm_mul_pd(_mm_unpacklo_pd(v0,v1),valpha); + r10 = _mm_mul_pd(_mm_unpackhi_pd(v1,v0),valpha); + r01 = _mm_mul_pd(_mm_unpacklo_pd(v2,v3),valpha); + r11 = _mm_mul_pd(_mm_unpackhi_pd(v3,v2),valpha); + + /* + * store + */ + if( beta==0 ) + { + _mm_storeu_pd(r,r00); + _mm_storeu_pd(r+2,r01); + _mm_storeu_pd(r+stride,r10); + _mm_storeu_pd(r+stride+2,r11); + } + else + { + vbeta = _mm_load1_pd(&beta); + _mm_storeu_pd(r, _mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r),vbeta),r00)); + _mm_storeu_pd(r+2, _mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r+2),vbeta),r01)); + _mm_storeu_pd(r+stride, _mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r+stride),vbeta),r10)); + _mm_storeu_pd(r+stride+2, _mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r+stride+2),vbeta),r11)); + } +#endif +} + +} diff --git a/contrib/lbfgs/ap.h b/contrib/lbfgs/ap.h new file mode 100755 index 0000000000..eef1d97acc --- /dev/null +++ b/contrib/lbfgs/ap.h @@ -0,0 +1,1203 @@ +/************************************************************************* +Copyright (c) Sergey Bochkanov (ALGLIB project). + +>>> SOURCE LICENSE >>> +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation (www.fsf.org); either version 2 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +A copy of the GNU General Public License is available at +http://www.fsf.org/licensing/licenses +>>> END OF LICENSE >>> +*************************************************************************/ +#ifndef _ap_h +#define _ap_h + +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <string> +#include <cstring> +#include <math.h> + +#ifdef __BORLANDC__ +#include <list.h> +#include <vector.h> +#else +#include <list> +#include <vector> +#endif + +#define AE_USE_CPP + +///////////////////////////////////////////////////////////////////////// +// +// THIS SECTION CONTAINS DECLARATIONS FOR BASIC FUNCTIONALITY +// LIKE MEMORY MANAGEMENT FOR VECTORS/MATRICES WHICH IS SHARED +// BETWEEN C++ AND PURE C LIBRARIES +// +///////////////////////////////////////////////////////////////////////// +namespace alglib_impl +{ +/* + * definitions + */ +#define AE_UNKNOWN 0 +#define AE_MSVC 1 +#define AE_GNUC 2 +#define AE_SUNC 3 +#define AE_INTEL 1 +#define AE_SPARC 2 + +/* + * automatically determine compiler + */ +#define AE_COMPILER AE_UNKNOWN +#ifdef __GNUC__ +#undef AE_COMPILER +#define AE_COMPILER AE_GNUC +#endif +#ifdef __SUNPRO_C +#undef AE_COMPILER +#define AE_COMPILER AE_SUNC +#endif +#ifdef _MSC_VER +#undef AE_COMPILER +#define AE_COMPILER AE_MSVC +#endif + +/* + * if we work under C++ environment, define several conditions + */ +#ifdef AE_USE_CPP +#define AE_USE_CPP_BOOL +#define AE_USE_CPP_ERROR_HANDLING +#endif + + +/* + * define ae_int32_t, ae_int64_t, ae_int_t, ae_bool, ae_complex, ae_error_type and ae_datatype + */ +#if defined(AE_HAVE_STDINT) +#include <stdint.h> +#endif + +#if defined(AE_INT32_T) +typedef AE_INT32_T ae_int32_t; +#endif +#if defined(AE_HAVE_STDINT) && !defined(AE_INT32_T) +typedef int32_t ae_int32_t; +#endif +#if !defined(AE_HAVE_STDINT) && !defined(AE_INT32_T) +#if AE_COMPILER==AE_MSVC +typedef _int32 ae_int32_t; +#endif +#if (AE_COMPILER==AE_GNUC) || (AE_COMPILER==AE_SUNC) || (AE_COMPILER==AE_UNKNOWN) +typedef int ae_int32_t; +#endif +#endif + +#if defined(AE_INT64_T) +typedef AE_INT64_T ae_int64_t; +#endif +#if defined(AE_HAVE_STDINT) && !defined(AE_INT64_T) +typedef int64_t ae_int64_t; +#endif +#if !defined(AE_HAVE_STDINT) && !defined(AE_INT64_T) +#if AE_COMPILER==AE_MSVC +typedef _int64 ae_int64_t; +#endif +#if (AE_COMPILER==AE_GNUC) || (AE_COMPILER==AE_SUNC) || (AE_COMPILER==AE_UNKNOWN) +typedef signed long long ae_int64_t; +#endif +#endif + +#if !defined(AE_INT_T) +typedef ptrdiff_t ae_int_t; +#endif + +#if !defined(AE_USE_CPP_BOOL) +#define ae_bool char +#define ae_true 1 +#define ae_false 0 +#else +#define ae_bool bool +#define ae_true true +#define ae_false false +#endif + + +/* + * SSE2 intrinsics + * + * Preprocessor directives below: + * - include headers for SSE2 intrinsics + * - define AE_HAS_SSE2_INTRINSICS definition + * + * These actions are performed when we have: + * - x86 architecture definition (AE_CPU==AE_INTEL) + * - compiler which supports intrinsics + * + * Presence of AE_HAS_SSE2_INTRINSICS does NOT mean that our CPU + * actually supports SSE2 - such things should be determined at runtime + * with ae_cpuid() call. It means that we are working under Intel and + * out compiler can issue SSE2-capable code. + * + */ +#if defined(AE_CPU) +#if AE_CPU==AE_INTEL + +#ifdef AE_USE_CPP +} // end of namespace declaration, subsequent includes must be out of namespace +#endif + +#if AE_COMPILER==AE_MSVC +#include <emmintrin.h> +#define AE_HAS_SSE2_INTRINSICS +#endif + +#if (AE_COMPILER==AE_GNUC)||(AE_COMPILER==AE_SUNC) +#include <xmmintrin.h> +#define AE_HAS_SSE2_INTRINSICS +#endif + +#ifdef AE_USE_CPP +namespace alglib_impl { // namespace declaration continued +#endif + +#endif +#endif + + +typedef struct { double x, y; } ae_complex; + +typedef enum +{ + ERR_OK = 0, + ERR_OUT_OF_MEMORY = 1, + ERR_XARRAY_TOO_LARGE = 2, + ERR_ASSERTION_FAILED = 3 +} ae_error_type; + +typedef ae_int_t ae_datatype; + +/* + * other definitions + */ +enum { OWN_CALLER=1, OWN_AE=2 }; +enum { ACT_UNCHANGED=1, ACT_SAME_LOCATION=2, ACT_NEW_LOCATION=3 }; +enum { DT_BOOL=1, DT_INT=2, DT_REAL=3, DT_COMPLEX=4 }; +enum { CPU_SSE2=1 }; + + +/************************************************************************ +x-string (zero-terminated): + owner OWN_CALLER or OWN_AE. Determines what to do on realloc(). + If vector is owned by caller, X-interface will just set + ptr to NULL before realloc(). If it is owned by X, it + will call ae_free/x_free/aligned_free family functions. + + last_action ACT_UNCHANGED, ACT_SAME_LOCATION, ACT_NEW_LOCATION + contents is either: unchanged, stored at the same location, + stored at the new location. + this field is set on return from X. + + ptr pointer to the actual data + +Members of this structure are ae_int64_t to avoid alignment problems. +************************************************************************/ +typedef struct +{ + ae_int64_t owner; + ae_int64_t last_action; + char *ptr; +} x_string; + +/************************************************************************ +x-vector: + cnt number of elements + + datatype one of the DT_XXXX values + + owner OWN_CALLER or OWN_AE. Determines what to do on realloc(). + If vector is owned by caller, X-interface will just set + ptr to NULL before realloc(). If it is owned by X, it + will call ae_free/x_free/aligned_free family functions. + + last_action ACT_UNCHANGED, ACT_SAME_LOCATION, ACT_NEW_LOCATION + contents is either: unchanged, stored at the same location, + stored at the new location. + this field is set on return from X interface and may be + used by caller as hint when deciding what to do with data + (if it was ACT_UNCHANGED or ACT_SAME_LOCATION, no array + reallocation or copying is required). + + ptr pointer to the actual data + +Members of this structure are ae_int64_t to avoid alignment problems. +************************************************************************/ +typedef struct +{ + ae_int64_t cnt; + ae_int64_t datatype; + ae_int64_t owner; + ae_int64_t last_action; + void *ptr; +} x_vector; + + +/************************************************************************ +x-matrix: + rows number of rows. may be zero only when cols is zero too. + + cols number of columns. may be zero only when rows is zero too. + + stride stride, i.e. distance between first elements of rows (in bytes) + + datatype one of the DT_XXXX values + + owner OWN_CALLER or OWN_AE. Determines what to do on realloc(). + If vector is owned by caller, X-interface will just set + ptr to NULL before realloc(). If it is owned by X, it + will call ae_free/x_free/aligned_free family functions. + + last_action ACT_UNCHANGED, ACT_SAME_LOCATION, ACT_NEW_LOCATION + contents is either: unchanged, stored at the same location, + stored at the new location. + this field is set on return from X interface and may be + used by caller as hint when deciding what to do with data + (if it was ACT_UNCHANGED or ACT_SAME_LOCATION, no array + reallocation or copying is required). + + ptr pointer to the actual data, stored rowwise + +Members of this structure are ae_int64_t to avoid alignment problems. +************************************************************************/ +typedef struct +{ + ae_int64_t rows; + ae_int64_t cols; + ae_int64_t stride; + ae_int64_t datatype; + ae_int64_t owner; + ae_int64_t last_action; + void *ptr; +} x_matrix; + + +/************************************************************************ +dynamic block which may be automatically deallocated during stack unwinding + +p_next next block in the stack unwinding list. + NULL means that this block is not in the list +deallocator deallocator function which should be used to deallocate block. + NULL for "special" blocks (frame/stack boundaries) +ptr pointer which should be passed to the deallocator. + may be null (for zero-size block), DYN_BOTTOM or DYN_FRAME + for "special" blocks (frame/stack boundaries). + +************************************************************************/ +typedef struct ae_dyn_block +{ + struct ae_dyn_block * volatile p_next; + /* void *deallocator; */ + void (*deallocator)(void*); + void * volatile ptr; +} ae_dyn_block; + +/************************************************************************ +frame marker +************************************************************************/ +typedef struct ae_frame +{ + ae_dyn_block db_marker; +} ae_frame; + +typedef struct +{ + ae_int_t endianness; + double v_nan; + double v_posinf; + double v_neginf; + + ae_dyn_block * volatile p_top_block; + ae_dyn_block last_block; +#ifndef AE_USE_CPP_ERROR_HANDLING + jmp_buf * volatile break_jump; +#endif + ae_error_type volatile last_error; + const char* volatile error_msg; +} ae_state; + +typedef void(*ae_deallocator)(void*); + +typedef struct ae_vector +{ + ae_int_t cnt; + ae_datatype datatype; + ae_dyn_block data; + union + { + void *p_ptr; + ae_bool *p_bool; + ae_int_t *p_int; + double *p_double; + ae_complex *p_complex; + } ptr; +} ae_vector; + +typedef struct ae_matrix +{ + ae_int_t rows; + ae_int_t cols; + ae_int_t stride; + ae_datatype datatype; + ae_dyn_block data; + union + { + void *p_ptr; + void **pp_void; + ae_bool **pp_bool; + ae_int_t **pp_int; + double **pp_double; + ae_complex **pp_complex; + } ptr; +} ae_matrix; + +ae_int_t ae_misalignment(const void *ptr, size_t alignment); +void* ae_align(void *ptr, size_t alignment); +void* aligned_malloc(size_t size, size_t alignment); +void aligned_free(void *block); + +void* ae_malloc(size_t size, ae_state *state); +void ae_free(void *p); +ae_int_t ae_sizeof(ae_datatype datatype); + +void ae_state_init(ae_state *state); +void ae_state_clear(ae_state *state); +#ifndef AE_USE_CPP_ERROR_HANDLING +void ae_state_set_break_jump(ae_state *state, jmp_buf *buf); +#endif +void ae_break(ae_state *state, ae_error_type error_type, const char *msg); + +void ae_frame_make(ae_state *state, ae_frame *tmp); +void ae_frame_leave(ae_state *state); + +void ae_db_attach(ae_dyn_block *block, ae_state *state); +ae_bool ae_db_malloc(ae_dyn_block *block, ae_int_t size, ae_state *state, ae_bool make_automatic); +ae_bool ae_db_realloc(ae_dyn_block *block, ae_int_t size, ae_state *state); +void ae_db_free(ae_dyn_block *block); +void ae_db_swap(ae_dyn_block *block1, ae_dyn_block *block2); + +ae_bool ae_vector_init(ae_vector *dst, ae_int_t size, ae_datatype datatype, ae_state *state, ae_bool make_automatic); +ae_bool ae_vector_init_copy(ae_vector *dst, ae_vector *src, ae_state *state, ae_bool make_automatic); +void ae_vector_init_from_x(ae_vector *dst, x_vector *src, ae_state *state, ae_bool make_automatic); +ae_bool ae_vector_set_length(ae_vector *dst, ae_int_t newsize, ae_state *state); +void ae_vector_clear(ae_vector *dst); +void ae_swap_vectors(ae_vector *vec1, ae_vector *vec2); + +ae_bool ae_matrix_init(ae_matrix *dst, ae_int_t rows, ae_int_t cols, ae_datatype datatype, ae_state *state, ae_bool make_automatic); +ae_bool ae_matrix_init_copy(ae_matrix *dst, ae_matrix *src, ae_state *state, ae_bool make_automatic); +void ae_matrix_init_from_x(ae_matrix *dst, x_matrix *src, ae_state *state, ae_bool make_automatic); +ae_bool ae_matrix_set_length(ae_matrix *dst, ae_int_t rows, ae_int_t cols, ae_state *state); +void ae_matrix_clear(ae_matrix *dst); +void ae_swap_matrices(ae_matrix *mat1, ae_matrix *mat2); + +void ae_x_set_vector(x_vector *dst, ae_vector *src, ae_state *state); +void ae_x_set_matrix(x_matrix *dst, ae_matrix *src, ae_state *state); +void ae_x_attach_to_vector(x_vector *dst, ae_vector *src); +void ae_x_attach_to_matrix(x_matrix *dst, ae_matrix *src); + +void x_vector_clear(x_vector *dst); + +ae_bool x_is_symmetric(x_matrix *a); +ae_bool x_is_hermitian(x_matrix *a); +ae_bool x_force_symmetric(x_matrix *a); +ae_bool x_force_hermitian(x_matrix *a); +ae_bool ae_is_symmetric(ae_matrix *a); +ae_bool ae_is_hermitian(ae_matrix *a); +ae_bool ae_force_symmetric(ae_matrix *a); +ae_bool ae_force_hermitian(ae_matrix *a); + +/************************************************************************ +Service functions +************************************************************************/ +void ae_assert(ae_bool cond, const char *msg, ae_state *state); +ae_int_t ae_cpuid(); + +/************************************************************************ +Real math functions: +* IEEE-compliant floating point comparisons +* standard functions +************************************************************************/ +ae_bool ae_fp_eq(double v1, double v2); +ae_bool ae_fp_neq(double v1, double v2); +ae_bool ae_fp_less(double v1, double v2); +ae_bool ae_fp_less_eq(double v1, double v2); +ae_bool ae_fp_greater(double v1, double v2); +ae_bool ae_fp_greater_eq(double v1, double v2); + +ae_bool ae_isfinite_stateless(double x, ae_int_t endianness); +ae_bool ae_isnan_stateless(double x, ae_int_t endianness); +ae_bool ae_isinf_stateless(double x, ae_int_t endianness); +ae_bool ae_isposinf_stateless(double x, ae_int_t endianness); +ae_bool ae_isneginf_stateless(double x, ae_int_t endianness); + +ae_int_t ae_get_endianness(); + +ae_bool ae_isfinite(double x,ae_state *state); +ae_bool ae_isnan(double x, ae_state *state); +ae_bool ae_isinf(double x, ae_state *state); +ae_bool ae_isposinf(double x,ae_state *state); +ae_bool ae_isneginf(double x,ae_state *state); + +double ae_fabs(double x, ae_state *state); +ae_int_t ae_iabs(ae_int_t x, ae_state *state); +double ae_sqr(double x, ae_state *state); +double ae_sqrt(double x, ae_state *state); + +ae_int_t ae_sign(double x, ae_state *state); +ae_int_t ae_round(double x, ae_state *state); +ae_int_t ae_trunc(double x, ae_state *state); +ae_int_t ae_ifloor(double x, ae_state *state); +ae_int_t ae_iceil(double x, ae_state *state); + +ae_int_t ae_maxint(ae_int_t m1, ae_int_t m2, ae_state *state); +ae_int_t ae_minint(ae_int_t m1, ae_int_t m2, ae_state *state); +double ae_maxreal(double m1, double m2, ae_state *state); +double ae_minreal(double m1, double m2, ae_state *state); +double ae_randomreal(ae_state *state); +ae_int_t ae_randominteger(ae_int_t maxv, ae_state *state); + +double ae_sin(double x, ae_state *state); +double ae_cos(double x, ae_state *state); +double ae_tan(double x, ae_state *state); +double ae_sinh(double x, ae_state *state); +double ae_cosh(double x, ae_state *state); +double ae_tanh(double x, ae_state *state); +double ae_asin(double x, ae_state *state); +double ae_acos(double x, ae_state *state); +double ae_atan(double x, ae_state *state); +double ae_atan2(double x, double y, ae_state *state); + +double ae_log(double x, ae_state *state); +double ae_pow(double x, double y, ae_state *state); +double ae_exp(double x, ae_state *state); + +/************************************************************************ +Complex math functions: +* basic arithmetic operations +* standard functions +************************************************************************/ +ae_complex ae_complex_from_d(double v); + +ae_complex ae_c_neg(ae_complex lhs); +ae_bool ae_c_eq(ae_complex lhs, ae_complex rhs); +ae_bool ae_c_neq(ae_complex lhs, ae_complex rhs); +ae_complex ae_c_add(ae_complex lhs, ae_complex rhs); +ae_complex ae_c_mul(ae_complex lhs, ae_complex rhs); +ae_complex ae_c_sub(ae_complex lhs, ae_complex rhs); +ae_complex ae_c_div(ae_complex lhs, ae_complex rhs); +ae_bool ae_c_eq_d(ae_complex lhs, double rhs); +ae_bool ae_c_neq_d(ae_complex lhs, double rhs); +ae_complex ae_c_add_d(ae_complex lhs, double rhs); +ae_complex ae_c_mul_d(ae_complex lhs, double rhs); +ae_complex ae_c_sub_d(ae_complex lhs, double rhs); +ae_complex ae_c_d_sub(double lhs, ae_complex rhs); +ae_complex ae_c_div_d(ae_complex lhs, double rhs); +ae_complex ae_c_d_div(double lhs, ae_complex rhs); + +ae_complex ae_c_conj(ae_complex lhs, ae_state *state); +ae_complex ae_c_sqr(ae_complex lhs, ae_state *state); +double ae_c_abs(ae_complex z, ae_state *state); + +/************************************************************************ +Complex BLAS operations +************************************************************************/ +ae_complex ae_v_cdotproduct(const ae_complex *v0, ae_int_t stride0, const char *conj0, const ae_complex *v1, ae_int_t stride1, const char *conj1, ae_int_t n); +void ae_v_cmove(ae_complex *vdst, ae_int_t stride_dst, const ae_complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n); +void ae_v_cmoveneg(ae_complex *vdst, ae_int_t stride_dst, const ae_complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n); +void ae_v_cmoved(ae_complex *vdst, ae_int_t stride_dst, const ae_complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha); +void ae_v_cmovec(ae_complex *vdst, ae_int_t stride_dst, const ae_complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, ae_complex alpha); +void ae_v_cadd(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n); +void ae_v_caddd(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha); +void ae_v_caddc(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, ae_complex alpha); +void ae_v_csub(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n); +void ae_v_csubd(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha); +void ae_v_csubc(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, ae_complex alpha); +void ae_v_cmuld(ae_complex *vdst, ae_int_t stride_dst, ae_int_t n, double alpha); +void ae_v_cmulc(ae_complex *vdst, ae_int_t stride_dst, ae_int_t n, ae_complex alpha); + +/************************************************************************ +Real BLAS operations +************************************************************************/ +double ae_v_dotproduct(const double *v0, ae_int_t stride0, const double *v1, ae_int_t stride1, ae_int_t n); +void ae_v_move(double *vdst, ae_int_t stride_dst, const double* vsrc, ae_int_t stride_src, ae_int_t n); +void ae_v_moveneg(double *vdst, ae_int_t stride_dst, const double* vsrc, ae_int_t stride_src, ae_int_t n); +void ae_v_moved(double *vdst, ae_int_t stride_dst, const double* vsrc, ae_int_t stride_src, ae_int_t n, double alpha); +void ae_v_add(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n); +void ae_v_addd(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n, double alpha); +void ae_v_sub(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n); +void ae_v_subd(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n, double alpha); +void ae_v_muld(double *vdst, ae_int_t stride_dst, ae_int_t n, double alpha); + +/************************************************************************ +Other functions +************************************************************************/ +ae_int_t ae_v_len(ae_int_t a, ae_int_t b); + +/* +extern const double ae_machineepsilon; +extern const double ae_maxrealnumber; +extern const double ae_minrealnumber; +extern const double ae_pi; +*/ +#define ae_machineepsilon 5E-16 +#define ae_maxrealnumber 1E300 +#define ae_minrealnumber 1E-300 +#define ae_pi 3.1415926535897932384626433832795 + + +/************************************************************************ +RComm functions +************************************************************************/ +typedef struct rcommstate +{ + int stage; + ae_vector ia; + ae_vector ba; + ae_vector ra; + ae_vector ca; +} rcommstate; +ae_bool _rcommstate_init(rcommstate* p, ae_state *_state, ae_bool make_automatic); +ae_bool _rcommstate_init_copy(rcommstate* dst, rcommstate* src, ae_state *_state, ae_bool make_automatic); +void _rcommstate_clear(rcommstate* p); + +#ifdef AE_USE_ALLOC_COUNTER +extern ae_int64_t _alloc_counter; +#endif + + +/************************************************************************ +debug functions (must be turned on by preprocessor definitions: +* tickcount(), which is wrapper around GetTickCount() +************************************************************************/ +#ifdef AE_DEBUG4WINDOWS +#include <windows.h> +#include <stdio.h> +#define tickcount(s) GetTickCount() +#define flushconsole(s) fflush(stdout) +#endif + + + +} + + +///////////////////////////////////////////////////////////////////////// +// +// THIS SECTION CONTAINS DECLARATIONS FOR C++ RELATED FUNCTIONALITY +// +///////////////////////////////////////////////////////////////////////// + +namespace alglib +{ + +typedef alglib_impl::ae_int_t ae_int_t; + +/******************************************************************** +Class forwards +********************************************************************/ +class complex; + +ae_int_t vlen(ae_int_t n1, ae_int_t n2); + +/******************************************************************** +Exception class. +********************************************************************/ +class ap_error +{ +public: + std::string msg; + + ap_error(); + ap_error(const char *s); + static void make_assertion(bool bClause); + static void make_assertion(bool bClause, const char *msg); +private: +}; + +/******************************************************************** +Complex number with double precision. +********************************************************************/ +class complex +{ +public: + complex(); + complex(const double &_x); + complex(const double &_x, const double &_y); + complex(const complex &z); + + complex& operator= (const double& v); + complex& operator+=(const double& v); + complex& operator-=(const double& v); + complex& operator*=(const double& v); + complex& operator/=(const double& v); + + complex& operator= (const complex& z); + complex& operator+=(const complex& z); + complex& operator-=(const complex& z); + complex& operator*=(const complex& z); + complex& operator/=(const complex& z); + + alglib_impl::ae_complex* c_ptr(); + const alglib_impl::ae_complex* c_ptr() const; + + std::string tostring(int dps) const; + + double x, y; +}; + +const alglib::complex operator/(const alglib::complex& lhs, const alglib::complex& rhs); +const bool operator==(const alglib::complex& lhs, const alglib::complex& rhs); +const bool operator!=(const alglib::complex& lhs, const alglib::complex& rhs); +const alglib::complex operator+(const alglib::complex& lhs); +const alglib::complex operator-(const alglib::complex& lhs); +const alglib::complex operator+(const alglib::complex& lhs, const alglib::complex& rhs); +const alglib::complex operator+(const alglib::complex& lhs, const double& rhs); +const alglib::complex operator+(const double& lhs, const alglib::complex& rhs); +const alglib::complex operator-(const alglib::complex& lhs, const alglib::complex& rhs); +const alglib::complex operator-(const alglib::complex& lhs, const double& rhs); +const alglib::complex operator-(const double& lhs, const alglib::complex& rhs); +const alglib::complex operator*(const alglib::complex& lhs, const alglib::complex& rhs); +const alglib::complex operator*(const alglib::complex& lhs, const double& rhs); +const alglib::complex operator*(const double& lhs, const alglib::complex& rhs); +const alglib::complex operator/(const alglib::complex& lhs, const alglib::complex& rhs); +const alglib::complex operator/(const double& lhs, const alglib::complex& rhs); +const alglib::complex operator/(const alglib::complex& lhs, const double& rhs); +double abscomplex(const alglib::complex &z); +alglib::complex conj(const alglib::complex &z); +alglib::complex csqr(const alglib::complex &z); + +/******************************************************************** +Level 1 BLAS functions + +NOTES: +* destination and source should NOT overlap +* stride is assumed to be positive, but it is not + assert'ed within function +* conj_src parameter specifies whether complex source is conjugated + before processing or not. Pass string which starts with 'N' or 'n' + ("No conj", for example) to use unmodified parameter. All other + values will result in conjugation of input, but it is recommended + to use "Conj" in such cases. +********************************************************************/ +double vdotproduct(const double *v0, ae_int_t stride0, const double *v1, ae_int_t stride1, ae_int_t n); +double vdotproduct(const double *v1, const double *v2, ae_int_t N); + +alglib::complex vdotproduct(const alglib::complex *v0, ae_int_t stride0, const char *conj0, const alglib::complex *v1, ae_int_t stride1, const char *conj1, ae_int_t n); +alglib::complex vdotproduct(const alglib::complex *v1, const alglib::complex *v2, ae_int_t N); + +void vmove(double *vdst, ae_int_t stride_dst, const double* vsrc, ae_int_t stride_src, ae_int_t n); +void vmove(double *vdst, const double* vsrc, ae_int_t N); + +void vmove(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n); +void vmove(alglib::complex *vdst, const alglib::complex* vsrc, ae_int_t N); + +void vmoveneg(double *vdst, ae_int_t stride_dst, const double* vsrc, ae_int_t stride_src, ae_int_t n); +void vmoveneg(double *vdst, const double *vsrc, ae_int_t N); + +void vmoveneg(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n); +void vmoveneg(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N); + +void vmove(double *vdst, ae_int_t stride_dst, const double* vsrc, ae_int_t stride_src, ae_int_t n, double alpha); +void vmove(double *vdst, const double *vsrc, ae_int_t N, double alpha); + +void vmove(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha); +void vmove(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, double alpha); + +void vmove(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, alglib::complex alpha); +void vmove(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, alglib::complex alpha); + +void vadd(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n); +void vadd(double *vdst, const double *vsrc, ae_int_t N); + +void vadd(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n); +void vadd(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N); + +void vadd(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n, double alpha); +void vadd(double *vdst, const double *vsrc, ae_int_t N, double alpha); + +void vadd(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha); +void vadd(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, double alpha); + +void vadd(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, alglib::complex alpha); +void vadd(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, alglib::complex alpha); + +void vsub(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n); +void vsub(double *vdst, const double *vsrc, ae_int_t N); + +void vsub(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n); +void vsub(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N); + +void vsub(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n, double alpha); +void vsub(double *vdst, const double *vsrc, ae_int_t N, double alpha); + +void vsub(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha); +void vsub(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, double alpha); + +void vsub(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, alglib::complex alpha); +void vsub(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, alglib::complex alpha); + +void vmul(double *vdst, ae_int_t stride_dst, ae_int_t n, double alpha); +void vmul(double *vdst, ae_int_t N, double alpha); + +void vmul(alglib::complex *vdst, ae_int_t stride_dst, ae_int_t n, double alpha); +void vmul(alglib::complex *vdst, ae_int_t N, double alpha); + +void vmul(alglib::complex *vdst, ae_int_t stride_dst, ae_int_t n, alglib::complex alpha); +void vmul(alglib::complex *vdst, ae_int_t N, alglib::complex alpha); + + + +/******************************************************************** +string conversion functions !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +********************************************************************/ + +/******************************************************************** +1- and 2-dimensional arrays +********************************************************************/ +class ae_vector_wrapper +{ +public: + ae_vector_wrapper(); + virtual ~ae_vector_wrapper(); + ae_vector_wrapper(const ae_vector_wrapper &rhs); + const ae_vector_wrapper& operator=(const ae_vector_wrapper &rhs); + + void setlength(ae_int_t iLen); + ae_int_t length() const; + + void attach_to(alglib_impl::ae_vector *ptr); + void allocate_own(ae_int_t size, alglib_impl::ae_datatype datatype); + const alglib_impl::ae_vector* c_ptr() const; + alglib_impl::ae_vector* c_ptr(); +protected: + alglib_impl::ae_vector *p_vec; + alglib_impl::ae_vector vec; +}; + +class boolean_1d_array : public ae_vector_wrapper +{ +public: + boolean_1d_array(); + boolean_1d_array(const char *s); + boolean_1d_array(alglib_impl::ae_vector *p); + virtual ~boolean_1d_array() ; + + const ae_bool& operator()(ae_int_t i) const; + ae_bool& operator()(ae_int_t i); + + const ae_bool& operator[](ae_int_t i) const; + ae_bool& operator[](ae_int_t i); + + void setcontent(ae_int_t iLen, const bool *pContent ); + ae_bool* getcontent(); + const ae_bool* getcontent() const; + + std::string tostring() const; +}; + +class integer_1d_array : public ae_vector_wrapper +{ +public: + integer_1d_array(); + integer_1d_array(alglib_impl::ae_vector *p); + integer_1d_array(const char *s); + virtual ~integer_1d_array(); + + const ae_int_t& operator()(ae_int_t i) const; + ae_int_t& operator()(ae_int_t i); + + const ae_int_t& operator[](ae_int_t i) const; + ae_int_t& operator[](ae_int_t i); + + void setcontent(ae_int_t iLen, const ae_int_t *pContent ); + + ae_int_t* getcontent(); + const ae_int_t* getcontent() const; + + std::string tostring() const; +}; + +class real_1d_array : public ae_vector_wrapper +{ +public: + real_1d_array(); + real_1d_array(alglib_impl::ae_vector *p); + real_1d_array(const char *s); + virtual ~real_1d_array(); + + const double& operator()(ae_int_t i) const; + double& operator()(ae_int_t i); + + const double& operator[](ae_int_t i) const; + double& operator[](ae_int_t i); + + void setcontent(ae_int_t iLen, const double *pContent ); + double* getcontent(); + const double* getcontent() const; + + std::string tostring(int dps) const; +}; + +class complex_1d_array : public ae_vector_wrapper +{ +public: + complex_1d_array(); + complex_1d_array(alglib_impl::ae_vector *p); + complex_1d_array(const char *s); + virtual ~complex_1d_array(); + + const alglib::complex& operator()(ae_int_t i) const; + alglib::complex& operator()(ae_int_t i); + + const alglib::complex& operator[](ae_int_t i) const; + alglib::complex& operator[](ae_int_t i); + + void setcontent(ae_int_t iLen, const alglib::complex *pContent ); + alglib::complex* getcontent(); + const alglib::complex* getcontent() const; + + std::string tostring(int dps) const; +}; + +class ae_matrix_wrapper +{ +public: + ae_matrix_wrapper(); + virtual ~ae_matrix_wrapper(); + ae_matrix_wrapper(const ae_matrix_wrapper &rhs); + const ae_matrix_wrapper& operator=(const ae_matrix_wrapper &rhs); + + void setlength(ae_int_t rows, ae_int_t cols); + ae_int_t rows() const; + ae_int_t cols() const; + bool isempty() const; + ae_int_t getstride() const; + + void attach_to(alglib_impl::ae_matrix *ptr); + void allocate_own(ae_int_t rows, ae_int_t cols, alglib_impl::ae_datatype datatype); + const alglib_impl::ae_matrix* c_ptr() const; + alglib_impl::ae_matrix* c_ptr(); +protected: + alglib_impl::ae_matrix *p_mat; + alglib_impl::ae_matrix mat; +}; + +class boolean_2d_array : public ae_matrix_wrapper +{ +public: + boolean_2d_array(); + boolean_2d_array(alglib_impl::ae_matrix *p); + boolean_2d_array(const char *s); + virtual ~boolean_2d_array(); + + const ae_bool& operator()(ae_int_t i, ae_int_t j) const; + ae_bool& operator()(ae_int_t i, ae_int_t j); + + const ae_bool* operator[](ae_int_t i) const; + ae_bool* operator[](ae_int_t i); + + void setcontent(ae_int_t irows, ae_int_t icols, const bool *pContent ); + + std::string tostring() const ; +}; + +class integer_2d_array : public ae_matrix_wrapper +{ +public: + integer_2d_array(); + integer_2d_array(alglib_impl::ae_matrix *p); + integer_2d_array(const char *s); + virtual ~integer_2d_array(); + + const ae_int_t& operator()(ae_int_t i, ae_int_t j) const; + ae_int_t& operator()(ae_int_t i, ae_int_t j); + + const ae_int_t* operator[](ae_int_t i) const; + ae_int_t* operator[](ae_int_t i); + + void setcontent(ae_int_t irows, ae_int_t icols, const ae_int_t *pContent ); + + std::string tostring() const; +}; + +class real_2d_array : public ae_matrix_wrapper +{ +public: + real_2d_array(); + real_2d_array(alglib_impl::ae_matrix *p); + real_2d_array(const char *s); + virtual ~real_2d_array(); + + const double& operator()(ae_int_t i, ae_int_t j) const; + double& operator()(ae_int_t i, ae_int_t j); + + const double* operator[](ae_int_t i) const; + double* operator[](ae_int_t i); + + void setcontent(ae_int_t irows, ae_int_t icols, const double *pContent ); + + std::string tostring(int dps) const; +}; + +class complex_2d_array : public ae_matrix_wrapper +{ +public: + complex_2d_array(); + complex_2d_array(alglib_impl::ae_matrix *p); + complex_2d_array(const char *s); + virtual ~complex_2d_array(); + + const alglib::complex& operator()(ae_int_t i, ae_int_t j) const; + alglib::complex& operator()(ae_int_t i, ae_int_t j); + + const alglib::complex* operator[](ae_int_t i) const; + alglib::complex* operator[](ae_int_t i); + + void setcontent(ae_int_t irows, ae_int_t icols, const alglib::complex *pContent ); + + std::string tostring(int dps) const; +}; + + +/******************************************************************** +dataset information. + +can store regression dataset, classification dataset, or non-labeled +task: +* nout==0 means non-labeled task (clustering, for example) +* nout>0 && nclasses==0 means regression task +* nout>0 && nclasses>0 means classification task +********************************************************************/ +/*class dataset +{ +public: + dataset():nin(0), nout(0), nclasses(0), trnsize(0), valsize(0), tstsize(0), totalsize(0){}; + + int nin, nout, nclasses; + + int trnsize; + int valsize; + int tstsize; + int totalsize; + + alglib::real_2d_array trn; + alglib::real_2d_array val; + alglib::real_2d_array tst; + alglib::real_2d_array all; +}; + +bool opendataset(std::string file, dataset *pdataset); + +// +// internal functions +// +std::string strtolower(const std::string &s); +bool readstrings(std::string file, std::list<std::string> *pOutput); +bool readstrings(std::string file, std::list<std::string> *pOutput, std::string comment); +void explodestring(std::string s, char sep, std::vector<std::string> *pOutput); +std::string xtrim(std::string s);*/ + +/******************************************************************** +Constants and functions introduced for compatibility with AlgoPascal +********************************************************************/ +extern const double machineepsilon; +extern const double maxrealnumber; +extern const double minrealnumber; +extern const double fp_nan; +extern const double fp_posinf; +extern const double fp_neginf; +extern const ae_int_t endianness; + +int sign2(double x); +double randomreal(); +int randominteger(int maxv); +int round(double x); +int trunc(double x); +int ifloor(double x); +int iceil(double x); +double pi(); +double sqr(double x); +int maxint(int m1, int m2); +int minint(int m1, int m2); +double maxreal(double m1, double m2); +double minreal(double m1, double m2); + +bool fp_eq(double v1, double v2); +bool fp_neq(double v1, double v2); +bool fp_less(double v1, double v2); +bool fp_less_eq(double v1, double v2); +bool fp_greater(double v1, double v2); +bool fp_greater_eq(double v1, double v2); + +bool fp_isnan(double x); +bool fp_isposinf(double x); +bool fp_isneginf(double x); +bool fp_isinf(double x); +bool fp_isfinite(double x); + + +}//namespace alglib + + +///////////////////////////////////////////////////////////////////////// +// +// THIS SECTIONS CONTAINS DECLARATIONS FOR OPTIMIZED LINEAR ALGEBRA CODES +// IT IS SHARED BETWEEN C++ AND PURE C LIBRARIES +// +///////////////////////////////////////////////////////////////////////// +namespace alglib_impl +{ +#define ALGLIB_INTERCEPTS_ABLAS + +void _ialglib_vzero(ae_int_t n, double *p, ae_int_t stride); +void _ialglib_vzero_complex(ae_int_t n, ae_complex *p, ae_int_t stride); +void _ialglib_vcopy(ae_int_t n, const double *a, ae_int_t stridea, double *b, ae_int_t strideb); +void _ialglib_vcopy_complex(ae_int_t n, const ae_complex *a, ae_int_t stridea, double *b, ae_int_t strideb, const char *conj); +void _ialglib_vcopy_dcomplex(ae_int_t n, const double *a, ae_int_t stridea, double *b, ae_int_t strideb, const char *conj); +void _ialglib_mcopyblock(ae_int_t m, ae_int_t n, const double *a, ae_int_t op, ae_int_t stride, double *b); +void _ialglib_mcopyunblock(ae_int_t m, ae_int_t n, const double *a, ae_int_t op, double *b, ae_int_t stride); +void _ialglib_mcopyblock_complex(ae_int_t m, ae_int_t n, const ae_complex *a, ae_int_t op, ae_int_t stride, double *b); +void _ialglib_mcopyunblock_complex(ae_int_t m, ae_int_t n, const double *a, ae_int_t op, ae_complex* b, ae_int_t stride); + +ae_bool _ialglib_i_rmatrixgemmf(ae_int_t m, + ae_int_t n, + ae_int_t k, + double alpha, + ae_matrix *a, + ae_int_t ia, + ae_int_t ja, + ae_int_t optypea, + ae_matrix *b, + ae_int_t ib, + ae_int_t jb, + ae_int_t optypeb, + double beta, + ae_matrix *c, + ae_int_t ic, + ae_int_t jc); +ae_bool _ialglib_i_cmatrixgemmf(ae_int_t m, + ae_int_t n, + ae_int_t k, + ae_complex alpha, + ae_matrix *a, + ae_int_t ia, + ae_int_t ja, + ae_int_t optypea, + ae_matrix *b, + ae_int_t ib, + ae_int_t jb, + ae_int_t optypeb, + ae_complex beta, + ae_matrix *c, + ae_int_t ic, + ae_int_t jc); +ae_bool _ialglib_i_cmatrixrighttrsmf(ae_int_t m, + ae_int_t n, + ae_matrix *a, + ae_int_t i1, + ae_int_t j1, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + ae_matrix *x, + ae_int_t i2, + ae_int_t j2); +ae_bool _ialglib_i_rmatrixrighttrsmf(ae_int_t m, + ae_int_t n, + ae_matrix *a, + ae_int_t i1, + ae_int_t j1, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + ae_matrix *x, + ae_int_t i2, + ae_int_t j2); +ae_bool _ialglib_i_cmatrixlefttrsmf(ae_int_t m, + ae_int_t n, + ae_matrix *a, + ae_int_t i1, + ae_int_t j1, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + ae_matrix *x, + ae_int_t i2, + ae_int_t j2); +ae_bool _ialglib_i_rmatrixlefttrsmf(ae_int_t m, + ae_int_t n, + ae_matrix *a, + ae_int_t i1, + ae_int_t j1, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + ae_matrix *x, + ae_int_t i2, + ae_int_t j2); +ae_bool _ialglib_i_cmatrixsyrkf(ae_int_t n, + ae_int_t k, + double alpha, + ae_matrix *a, + ae_int_t ia, + ae_int_t ja, + ae_int_t optypea, + double beta, + ae_matrix *c, + ae_int_t ic, + ae_int_t jc, + ae_bool isupper); +ae_bool _ialglib_i_rmatrixsyrkf(ae_int_t n, + ae_int_t k, + double alpha, + ae_matrix *a, + ae_int_t ia, + ae_int_t ja, + ae_int_t optypea, + double beta, + ae_matrix *c, + ae_int_t ic, + ae_int_t jc, + ae_bool isupper); +ae_bool _ialglib_i_cmatrixrank1f(ae_int_t m, + ae_int_t n, + ae_matrix *a, + ae_int_t ia, + ae_int_t ja, + ae_vector *u, + ae_int_t uoffs, + ae_vector *v, + ae_int_t voffs); +ae_bool _ialglib_i_rmatrixrank1f(ae_int_t m, + ae_int_t n, + ae_matrix *a, + ae_int_t ia, + ae_int_t ja, + ae_vector *u, + ae_int_t uoffs, + ae_vector *v, + ae_int_t voffs); + +} + +#endif + diff --git a/contrib/lbfgs/linalg.cpp b/contrib/lbfgs/linalg.cpp new file mode 100755 index 0000000000..11998ea124 --- /dev/null +++ b/contrib/lbfgs/linalg.cpp @@ -0,0 +1,30199 @@ +/************************************************************************* +Copyright (c) Sergey Bochkanov (ALGLIB project). + +>>> SOURCE LICENSE >>> +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation (www.fsf.org); either version 2 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +A copy of the GNU General Public License is available at +http://www.fsf.org/licensing/licenses +>>> END OF LICENSE >>> +*************************************************************************/ +#include "stdafx.h" +#include "linalg.h" + +// disable some irrelevant warnings +#if (AE_COMPILER==AE_MSVC) +#pragma warning(disable:4100) +#pragma warning(disable:4127) +#pragma warning(disable:4702) +#pragma warning(disable:4996) +#endif +using namespace std; + +///////////////////////////////////////////////////////////////////////// +// +// THIS SECTION CONTAINS IMPLEMENTATION OF C++ INTERFACE +// +///////////////////////////////////////////////////////////////////////// +namespace alglib +{ + + +/************************************************************************* +Cache-oblivous complex "copy-and-transpose" + +Input parameters: + M - number of rows + N - number of columns + A - source matrix, MxN submatrix is copied and transposed + IA - submatrix offset (row index) + JA - submatrix offset (column index) + A - destination matrix + IB - submatrix offset (row index) + JB - submatrix offset (column index) +*************************************************************************/ +void cmatrixtranspose(const ae_int_t m, const ae_int_t n, const complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, complex_2d_array &b, const ae_int_t ib, const ae_int_t jb) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::cmatrixtranspose(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), ib, jb, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Cache-oblivous real "copy-and-transpose" + +Input parameters: + M - number of rows + N - number of columns + A - source matrix, MxN submatrix is copied and transposed + IA - submatrix offset (row index) + JA - submatrix offset (column index) + A - destination matrix + IB - submatrix offset (row index) + JB - submatrix offset (column index) +*************************************************************************/ +void rmatrixtranspose(const ae_int_t m, const ae_int_t n, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, real_2d_array &b, const ae_int_t ib, const ae_int_t jb) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixtranspose(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), ib, jb, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Copy + +Input parameters: + M - number of rows + N - number of columns + A - source matrix, MxN submatrix is copied and transposed + IA - submatrix offset (row index) + JA - submatrix offset (column index) + B - destination matrix + IB - submatrix offset (row index) + JB - submatrix offset (column index) +*************************************************************************/ +void cmatrixcopy(const ae_int_t m, const ae_int_t n, const complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, complex_2d_array &b, const ae_int_t ib, const ae_int_t jb) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::cmatrixcopy(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), ib, jb, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Copy + +Input parameters: + M - number of rows + N - number of columns + A - source matrix, MxN submatrix is copied and transposed + IA - submatrix offset (row index) + JA - submatrix offset (column index) + B - destination matrix + IB - submatrix offset (row index) + JB - submatrix offset (column index) +*************************************************************************/ +void rmatrixcopy(const ae_int_t m, const ae_int_t n, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, real_2d_array &b, const ae_int_t ib, const ae_int_t jb) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixcopy(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), ib, jb, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Rank-1 correction: A := A + u*v' + +INPUT PARAMETERS: + M - number of rows + N - number of columns + A - target matrix, MxN submatrix is updated + IA - submatrix offset (row index) + JA - submatrix offset (column index) + U - vector #1 + IU - subvector offset + V - vector #2 + IV - subvector offset +*************************************************************************/ +void cmatrixrank1(const ae_int_t m, const ae_int_t n, complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, complex_1d_array &u, const ae_int_t iu, complex_1d_array &v, const ae_int_t iv) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::cmatrixrank1(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, const_cast<alglib_impl::ae_vector*>(u.c_ptr()), iu, const_cast<alglib_impl::ae_vector*>(v.c_ptr()), iv, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Rank-1 correction: A := A + u*v' + +INPUT PARAMETERS: + M - number of rows + N - number of columns + A - target matrix, MxN submatrix is updated + IA - submatrix offset (row index) + JA - submatrix offset (column index) + U - vector #1 + IU - subvector offset + V - vector #2 + IV - subvector offset +*************************************************************************/ +void rmatrixrank1(const ae_int_t m, const ae_int_t n, real_2d_array &a, const ae_int_t ia, const ae_int_t ja, real_1d_array &u, const ae_int_t iu, real_1d_array &v, const ae_int_t iv) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixrank1(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, const_cast<alglib_impl::ae_vector*>(u.c_ptr()), iu, const_cast<alglib_impl::ae_vector*>(v.c_ptr()), iv, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Matrix-vector product: y := op(A)*x + +INPUT PARAMETERS: + M - number of rows of op(A) + M>=0 + N - number of columns of op(A) + N>=0 + A - target matrix + IA - submatrix offset (row index) + JA - submatrix offset (column index) + OpA - operation type: + * OpA=0 => op(A) = A + * OpA=1 => op(A) = A^T + * OpA=2 => op(A) = A^H + X - input vector + IX - subvector offset + IY - subvector offset + +OUTPUT PARAMETERS: + Y - vector which stores result + +if M=0, then subroutine does nothing. +if N=0, Y is filled by zeros. + + + -- ALGLIB routine -- + + 28.01.2010 + Bochkanov Sergey +*************************************************************************/ +void cmatrixmv(const ae_int_t m, const ae_int_t n, const complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t opa, const complex_1d_array &x, const ae_int_t ix, complex_1d_array &y, const ae_int_t iy) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::cmatrixmv(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, opa, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), ix, const_cast<alglib_impl::ae_vector*>(y.c_ptr()), iy, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Matrix-vector product: y := op(A)*x + +INPUT PARAMETERS: + M - number of rows of op(A) + N - number of columns of op(A) + A - target matrix + IA - submatrix offset (row index) + JA - submatrix offset (column index) + OpA - operation type: + * OpA=0 => op(A) = A + * OpA=1 => op(A) = A^T + X - input vector + IX - subvector offset + IY - subvector offset + +OUTPUT PARAMETERS: + Y - vector which stores result + +if M=0, then subroutine does nothing. +if N=0, Y is filled by zeros. + + + -- ALGLIB routine -- + + 28.01.2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixmv(const ae_int_t m, const ae_int_t n, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t opa, const real_1d_array &x, const ae_int_t ix, real_1d_array &y, const ae_int_t iy) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixmv(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, opa, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), ix, const_cast<alglib_impl::ae_vector*>(y.c_ptr()), iy, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This subroutine calculates X*op(A^-1) where: +* X is MxN general matrix +* A is NxN upper/lower triangular/unitriangular matrix +* "op" may be identity transformation, transposition, conjugate transposition + +Multiplication result replaces X. +Cache-oblivious algorithm is used. + +INPUT PARAMETERS + N - matrix size, N>=0 + M - matrix size, N>=0 + A - matrix, actial matrix is stored in A[I1:I1+N-1,J1:J1+N-1] + I1 - submatrix offset + J1 - submatrix offset + IsUpper - whether matrix is upper triangular + IsUnit - whether matrix is unitriangular + OpType - transformation type: + * 0 - no transformation + * 1 - transposition + * 2 - conjugate transposition + C - matrix, actial matrix is stored in C[I2:I2+M-1,J2:J2+N-1] + I2 - submatrix offset + J2 - submatrix offset + + -- ALGLIB routine -- + 15.12.2009 + Bochkanov Sergey +*************************************************************************/ +void cmatrixrighttrsm(const ae_int_t m, const ae_int_t n, const complex_2d_array &a, const ae_int_t i1, const ae_int_t j1, const bool isupper, const bool isunit, const ae_int_t optype, complex_2d_array &x, const ae_int_t i2, const ae_int_t j2) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::cmatrixrighttrsm(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), i1, j1, isupper, isunit, optype, const_cast<alglib_impl::ae_matrix*>(x.c_ptr()), i2, j2, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This subroutine calculates op(A^-1)*X where: +* X is MxN general matrix +* A is MxM upper/lower triangular/unitriangular matrix +* "op" may be identity transformation, transposition, conjugate transposition + +Multiplication result replaces X. +Cache-oblivious algorithm is used. + +INPUT PARAMETERS + N - matrix size, N>=0 + M - matrix size, N>=0 + A - matrix, actial matrix is stored in A[I1:I1+M-1,J1:J1+M-1] + I1 - submatrix offset + J1 - submatrix offset + IsUpper - whether matrix is upper triangular + IsUnit - whether matrix is unitriangular + OpType - transformation type: + * 0 - no transformation + * 1 - transposition + * 2 - conjugate transposition + C - matrix, actial matrix is stored in C[I2:I2+M-1,J2:J2+N-1] + I2 - submatrix offset + J2 - submatrix offset + + -- ALGLIB routine -- + 15.12.2009 + Bochkanov Sergey +*************************************************************************/ +void cmatrixlefttrsm(const ae_int_t m, const ae_int_t n, const complex_2d_array &a, const ae_int_t i1, const ae_int_t j1, const bool isupper, const bool isunit, const ae_int_t optype, complex_2d_array &x, const ae_int_t i2, const ae_int_t j2) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::cmatrixlefttrsm(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), i1, j1, isupper, isunit, optype, const_cast<alglib_impl::ae_matrix*>(x.c_ptr()), i2, j2, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Same as CMatrixRightTRSM, but for real matrices + +OpType may be only 0 or 1. + + -- ALGLIB routine -- + 15.12.2009 + Bochkanov Sergey +*************************************************************************/ +void rmatrixrighttrsm(const ae_int_t m, const ae_int_t n, const real_2d_array &a, const ae_int_t i1, const ae_int_t j1, const bool isupper, const bool isunit, const ae_int_t optype, real_2d_array &x, const ae_int_t i2, const ae_int_t j2) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixrighttrsm(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), i1, j1, isupper, isunit, optype, const_cast<alglib_impl::ae_matrix*>(x.c_ptr()), i2, j2, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Same as CMatrixLeftTRSM, but for real matrices + +OpType may be only 0 or 1. + + -- ALGLIB routine -- + 15.12.2009 + Bochkanov Sergey +*************************************************************************/ +void rmatrixlefttrsm(const ae_int_t m, const ae_int_t n, const real_2d_array &a, const ae_int_t i1, const ae_int_t j1, const bool isupper, const bool isunit, const ae_int_t optype, real_2d_array &x, const ae_int_t i2, const ae_int_t j2) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixlefttrsm(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), i1, j1, isupper, isunit, optype, const_cast<alglib_impl::ae_matrix*>(x.c_ptr()), i2, j2, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This subroutine calculates C=alpha*A*A^H+beta*C or C=alpha*A^H*A+beta*C +where: +* C is NxN Hermitian matrix given by its upper/lower triangle +* A is NxK matrix when A*A^H is calculated, KxN matrix otherwise + +Additional info: +* cache-oblivious algorithm is used. +* multiplication result replaces C. If Beta=0, C elements are not used in + calculations (not multiplied by zero - just not referenced) +* if Alpha=0, A is not used (not multiplied by zero - just not referenced) +* if both Beta and Alpha are zero, C is filled by zeros. + +INPUT PARAMETERS + N - matrix size, N>=0 + K - matrix size, K>=0 + Alpha - coefficient + A - matrix + IA - submatrix offset + JA - submatrix offset + OpTypeA - multiplication type: + * 0 - A*A^H is calculated + * 2 - A^H*A is calculated + Beta - coefficient + C - matrix + IC - submatrix offset + JC - submatrix offset + IsUpper - whether C is upper triangular or lower triangular + + -- ALGLIB routine -- + 16.12.2009 + Bochkanov Sergey +*************************************************************************/ +void cmatrixsyrk(const ae_int_t n, const ae_int_t k, const double alpha, const complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t optypea, const double beta, complex_2d_array &c, const ae_int_t ic, const ae_int_t jc, const bool isupper) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::cmatrixsyrk(n, k, alpha, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, optypea, beta, const_cast<alglib_impl::ae_matrix*>(c.c_ptr()), ic, jc, isupper, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Same as CMatrixSYRK, but for real matrices + +OpType may be only 0 or 1. + + -- ALGLIB routine -- + 16.12.2009 + Bochkanov Sergey +*************************************************************************/ +void rmatrixsyrk(const ae_int_t n, const ae_int_t k, const double alpha, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t optypea, const double beta, real_2d_array &c, const ae_int_t ic, const ae_int_t jc, const bool isupper) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixsyrk(n, k, alpha, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, optypea, beta, const_cast<alglib_impl::ae_matrix*>(c.c_ptr()), ic, jc, isupper, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This subroutine calculates C = alpha*op1(A)*op2(B) +beta*C where: +* C is MxN general matrix +* op1(A) is MxK matrix +* op2(B) is KxN matrix +* "op" may be identity transformation, transposition, conjugate transposition + +Additional info: +* cache-oblivious algorithm is used. +* multiplication result replaces C. If Beta=0, C elements are not used in + calculations (not multiplied by zero - just not referenced) +* if Alpha=0, A is not used (not multiplied by zero - just not referenced) +* if both Beta and Alpha are zero, C is filled by zeros. + +INPUT PARAMETERS + N - matrix size, N>0 + M - matrix size, N>0 + K - matrix size, K>0 + Alpha - coefficient + A - matrix + IA - submatrix offset + JA - submatrix offset + OpTypeA - transformation type: + * 0 - no transformation + * 1 - transposition + * 2 - conjugate transposition + B - matrix + IB - submatrix offset + JB - submatrix offset + OpTypeB - transformation type: + * 0 - no transformation + * 1 - transposition + * 2 - conjugate transposition + Beta - coefficient + C - matrix + IC - submatrix offset + JC - submatrix offset + + -- ALGLIB routine -- + 16.12.2009 + Bochkanov Sergey +*************************************************************************/ +void cmatrixgemm(const ae_int_t m, const ae_int_t n, const ae_int_t k, const alglib::complex alpha, const complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t optypea, const complex_2d_array &b, const ae_int_t ib, const ae_int_t jb, const ae_int_t optypeb, const alglib::complex beta, complex_2d_array &c, const ae_int_t ic, const ae_int_t jc) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::cmatrixgemm(m, n, k, *alpha.c_ptr(), const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, optypea, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), ib, jb, optypeb, *beta.c_ptr(), const_cast<alglib_impl::ae_matrix*>(c.c_ptr()), ic, jc, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Same as CMatrixGEMM, but for real numbers. +OpType may be only 0 or 1. + + -- ALGLIB routine -- + 16.12.2009 + Bochkanov Sergey +*************************************************************************/ +void rmatrixgemm(const ae_int_t m, const ae_int_t n, const ae_int_t k, const double alpha, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t optypea, const real_2d_array &b, const ae_int_t ib, const ae_int_t jb, const ae_int_t optypeb, const double beta, real_2d_array &c, const ae_int_t ic, const ae_int_t jc) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixgemm(m, n, k, alpha, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, optypea, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), ib, jb, optypeb, beta, const_cast<alglib_impl::ae_matrix*>(c.c_ptr()), ic, jc, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +QR decomposition of a rectangular matrix of size MxN + +Input parameters: + A - matrix A whose indexes range within [0..M-1, 0..N-1]. + M - number of rows in matrix A. + N - number of columns in matrix A. + +Output parameters: + A - matrices Q and R in compact form (see below). + Tau - array of scalar factors which are used to form + matrix Q. Array whose index ranges within [0.. Min(M-1,N-1)]. + +Matrix A is represented as A = QR, where Q is an orthogonal matrix of size +MxM, R - upper triangular (or upper trapezoid) matrix of size M x N. + +The elements of matrix R are located on and above the main diagonal of +matrix A. The elements which are located in Tau array and below the main +diagonal of matrix A are used to form matrix Q as follows: + +Matrix Q is represented as a product of elementary reflections + +Q = H(0)*H(2)*...*H(k-1), + +where k = min(m,n), and each H(i) is in the form + +H(i) = 1 - tau * v * (v^T) + +where tau is a scalar stored in Tau[I]; v - real vector, +so that v(0:i-1) = 0, v(i) = 1, v(i+1:m-1) stored in A(i+1:m-1,i). + + -- ALGLIB routine -- + 17.02.2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixqr(real_2d_array &a, const ae_int_t m, const ae_int_t n, real_1d_array &tau) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixqr(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +LQ decomposition of a rectangular matrix of size MxN + +Input parameters: + A - matrix A whose indexes range within [0..M-1, 0..N-1]. + M - number of rows in matrix A. + N - number of columns in matrix A. + +Output parameters: + A - matrices L and Q in compact form (see below) + Tau - array of scalar factors which are used to form + matrix Q. Array whose index ranges within [0..Min(M,N)-1]. + +Matrix A is represented as A = LQ, where Q is an orthogonal matrix of size +MxM, L - lower triangular (or lower trapezoid) matrix of size M x N. + +The elements of matrix L are located on and below the main diagonal of +matrix A. The elements which are located in Tau array and above the main +diagonal of matrix A are used to form matrix Q as follows: + +Matrix Q is represented as a product of elementary reflections + +Q = H(k-1)*H(k-2)*...*H(1)*H(0), + +where k = min(m,n), and each H(i) is of the form + +H(i) = 1 - tau * v * (v^T) + +where tau is a scalar stored in Tau[I]; v - real vector, so that v(0:i-1)=0, +v(i) = 1, v(i+1:n-1) stored in A(i,i+1:n-1). + + -- ALGLIB routine -- + 17.02.2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixlq(real_2d_array &a, const ae_int_t m, const ae_int_t n, real_1d_array &tau) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixlq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +QR decomposition of a rectangular complex matrix of size MxN + +Input parameters: + A - matrix A whose indexes range within [0..M-1, 0..N-1] + M - number of rows in matrix A. + N - number of columns in matrix A. + +Output parameters: + A - matrices Q and R in compact form + Tau - array of scalar factors which are used to form matrix Q. Array + whose indexes range within [0.. Min(M,N)-1] + +Matrix A is represented as A = QR, where Q is an orthogonal matrix of size +MxM, R - upper triangular (or upper trapezoid) matrix of size MxN. + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 +*************************************************************************/ +void cmatrixqr(complex_2d_array &a, const ae_int_t m, const ae_int_t n, complex_1d_array &tau) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::cmatrixqr(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +LQ decomposition of a rectangular complex matrix of size MxN + +Input parameters: + A - matrix A whose indexes range within [0..M-1, 0..N-1] + M - number of rows in matrix A. + N - number of columns in matrix A. + +Output parameters: + A - matrices Q and L in compact form + Tau - array of scalar factors which are used to form matrix Q. Array + whose indexes range within [0.. Min(M,N)-1] + +Matrix A is represented as A = LQ, where Q is an orthogonal matrix of size +MxM, L - lower triangular (or lower trapezoid) matrix of size MxN. + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 +*************************************************************************/ +void cmatrixlq(complex_2d_array &a, const ae_int_t m, const ae_int_t n, complex_1d_array &tau) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::cmatrixlq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Partial unpacking of matrix Q from the QR decomposition of a matrix A + +Input parameters: + A - matrices Q and R in compact form. + Output of RMatrixQR subroutine. + M - number of rows in given matrix A. M>=0. + N - number of columns in given matrix A. N>=0. + Tau - scalar factors which are used to form Q. + Output of the RMatrixQR subroutine. + QColumns - required number of columns of matrix Q. M>=QColumns>=0. + +Output parameters: + Q - first QColumns columns of matrix Q. + Array whose indexes range within [0..M-1, 0..QColumns-1]. + If QColumns=0, the array remains unchanged. + + -- ALGLIB routine -- + 17.02.2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixqrunpackq(const real_2d_array &a, const ae_int_t m, const ae_int_t n, const real_1d_array &tau, const ae_int_t qcolumns, real_2d_array &q) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixqrunpackq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), qcolumns, const_cast<alglib_impl::ae_matrix*>(q.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Unpacking of matrix R from the QR decomposition of a matrix A + +Input parameters: + A - matrices Q and R in compact form. + Output of RMatrixQR subroutine. + M - number of rows in given matrix A. M>=0. + N - number of columns in given matrix A. N>=0. + +Output parameters: + R - matrix R, array[0..M-1, 0..N-1]. + + -- ALGLIB routine -- + 17.02.2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixqrunpackr(const real_2d_array &a, const ae_int_t m, const ae_int_t n, real_2d_array &r) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixqrunpackr(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_matrix*>(r.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Partial unpacking of matrix Q from the LQ decomposition of a matrix A + +Input parameters: + A - matrices L and Q in compact form. + Output of RMatrixLQ subroutine. + M - number of rows in given matrix A. M>=0. + N - number of columns in given matrix A. N>=0. + Tau - scalar factors which are used to form Q. + Output of the RMatrixLQ subroutine. + QRows - required number of rows in matrix Q. N>=QRows>=0. + +Output parameters: + Q - first QRows rows of matrix Q. Array whose indexes range + within [0..QRows-1, 0..N-1]. If QRows=0, the array remains + unchanged. + + -- ALGLIB routine -- + 17.02.2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixlqunpackq(const real_2d_array &a, const ae_int_t m, const ae_int_t n, const real_1d_array &tau, const ae_int_t qrows, real_2d_array &q) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixlqunpackq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), qrows, const_cast<alglib_impl::ae_matrix*>(q.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Unpacking of matrix L from the LQ decomposition of a matrix A + +Input parameters: + A - matrices Q and L in compact form. + Output of RMatrixLQ subroutine. + M - number of rows in given matrix A. M>=0. + N - number of columns in given matrix A. N>=0. + +Output parameters: + L - matrix L, array[0..M-1, 0..N-1]. + + -- ALGLIB routine -- + 17.02.2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixlqunpackl(const real_2d_array &a, const ae_int_t m, const ae_int_t n, real_2d_array &l) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixlqunpackl(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_matrix*>(l.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Partial unpacking of matrix Q from QR decomposition of a complex matrix A. + +Input parameters: + A - matrices Q and R in compact form. + Output of CMatrixQR subroutine . + M - number of rows in matrix A. M>=0. + N - number of columns in matrix A. N>=0. + Tau - scalar factors which are used to form Q. + Output of CMatrixQR subroutine . + QColumns - required number of columns in matrix Q. M>=QColumns>=0. + +Output parameters: + Q - first QColumns columns of matrix Q. + Array whose index ranges within [0..M-1, 0..QColumns-1]. + If QColumns=0, array isn't changed. + + -- ALGLIB routine -- + 17.02.2010 + Bochkanov Sergey +*************************************************************************/ +void cmatrixqrunpackq(const complex_2d_array &a, const ae_int_t m, const ae_int_t n, const complex_1d_array &tau, const ae_int_t qcolumns, complex_2d_array &q) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::cmatrixqrunpackq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), qcolumns, const_cast<alglib_impl::ae_matrix*>(q.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Unpacking of matrix R from the QR decomposition of a matrix A + +Input parameters: + A - matrices Q and R in compact form. + Output of CMatrixQR subroutine. + M - number of rows in given matrix A. M>=0. + N - number of columns in given matrix A. N>=0. + +Output parameters: + R - matrix R, array[0..M-1, 0..N-1]. + + -- ALGLIB routine -- + 17.02.2010 + Bochkanov Sergey +*************************************************************************/ +void cmatrixqrunpackr(const complex_2d_array &a, const ae_int_t m, const ae_int_t n, complex_2d_array &r) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::cmatrixqrunpackr(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_matrix*>(r.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Partial unpacking of matrix Q from LQ decomposition of a complex matrix A. + +Input parameters: + A - matrices Q and R in compact form. + Output of CMatrixLQ subroutine . + M - number of rows in matrix A. M>=0. + N - number of columns in matrix A. N>=0. + Tau - scalar factors which are used to form Q. + Output of CMatrixLQ subroutine . + QRows - required number of rows in matrix Q. N>=QColumns>=0. + +Output parameters: + Q - first QRows rows of matrix Q. + Array whose index ranges within [0..QRows-1, 0..N-1]. + If QRows=0, array isn't changed. + + -- ALGLIB routine -- + 17.02.2010 + Bochkanov Sergey +*************************************************************************/ +void cmatrixlqunpackq(const complex_2d_array &a, const ae_int_t m, const ae_int_t n, const complex_1d_array &tau, const ae_int_t qrows, complex_2d_array &q) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::cmatrixlqunpackq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), qrows, const_cast<alglib_impl::ae_matrix*>(q.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Unpacking of matrix L from the LQ decomposition of a matrix A + +Input parameters: + A - matrices Q and L in compact form. + Output of CMatrixLQ subroutine. + M - number of rows in given matrix A. M>=0. + N - number of columns in given matrix A. N>=0. + +Output parameters: + L - matrix L, array[0..M-1, 0..N-1]. + + -- ALGLIB routine -- + 17.02.2010 + Bochkanov Sergey +*************************************************************************/ +void cmatrixlqunpackl(const complex_2d_array &a, const ae_int_t m, const ae_int_t n, complex_2d_array &l) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::cmatrixlqunpackl(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_matrix*>(l.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Reduction of a rectangular matrix to bidiagonal form + +The algorithm reduces the rectangular matrix A to bidiagonal form by +orthogonal transformations P and Q: A = Q*B*P. + +Input parameters: + A - source matrix. array[0..M-1, 0..N-1] + M - number of rows in matrix A. + N - number of columns in matrix A. + +Output parameters: + A - matrices Q, B, P in compact form (see below). + TauQ - scalar factors which are used to form matrix Q. + TauP - scalar factors which are used to form matrix P. + +The main diagonal and one of the secondary diagonals of matrix A are +replaced with bidiagonal matrix B. Other elements contain elementary +reflections which form MxM matrix Q and NxN matrix P, respectively. + +If M>=N, B is the upper bidiagonal MxN matrix and is stored in the +corresponding elements of matrix A. Matrix Q is represented as a +product of elementary reflections Q = H(0)*H(1)*...*H(n-1), where +H(i) = 1-tau*v*v'. Here tau is a scalar which is stored in TauQ[i], and +vector v has the following structure: v(0:i-1)=0, v(i)=1, v(i+1:m-1) is +stored in elements A(i+1:m-1,i). Matrix P is as follows: P = +G(0)*G(1)*...*G(n-2), where G(i) = 1 - tau*u*u'. Tau is stored in TauP[i], +u(0:i)=0, u(i+1)=1, u(i+2:n-1) is stored in elements A(i,i+2:n-1). + +If M<N, B is the lower bidiagonal MxN matrix and is stored in the +corresponding elements of matrix A. Q = H(0)*H(1)*...*H(m-2), where +H(i) = 1 - tau*v*v', tau is stored in TauQ, v(0:i)=0, v(i+1)=1, v(i+2:m-1) +is stored in elements A(i+2:m-1,i). P = G(0)*G(1)*...*G(m-1), +G(i) = 1-tau*u*u', tau is stored in TauP, u(0:i-1)=0, u(i)=1, u(i+1:n-1) +is stored in A(i,i+1:n-1). + +EXAMPLE: + +m=6, n=5 (m > n): m=5, n=6 (m < n): + +( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) +( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) +( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) +( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) +( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) +( v1 v2 v3 v4 v5 ) + +Here vi and ui are vectors which form H(i) and G(i), and d and e - +are the diagonal and off-diagonal elements of matrix B. + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994. + Sergey Bochkanov, ALGLIB project, translation from FORTRAN to + pseudocode, 2007-2010. +*************************************************************************/ +void rmatrixbd(real_2d_array &a, const ae_int_t m, const ae_int_t n, real_1d_array &tauq, real_1d_array &taup) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixbd(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tauq.c_ptr()), const_cast<alglib_impl::ae_vector*>(taup.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Unpacking matrix Q which reduces a matrix to bidiagonal form. + +Input parameters: + QP - matrices Q and P in compact form. + Output of ToBidiagonal subroutine. + M - number of rows in matrix A. + N - number of columns in matrix A. + TAUQ - scalar factors which are used to form Q. + Output of ToBidiagonal subroutine. + QColumns - required number of columns in matrix Q. + M>=QColumns>=0. + +Output parameters: + Q - first QColumns columns of matrix Q. + Array[0..M-1, 0..QColumns-1] + If QColumns=0, the array is not modified. + + -- ALGLIB -- + 2005-2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixbdunpackq(const real_2d_array &qp, const ae_int_t m, const ae_int_t n, const real_1d_array &tauq, const ae_int_t qcolumns, real_2d_array &q) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixbdunpackq(const_cast<alglib_impl::ae_matrix*>(qp.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tauq.c_ptr()), qcolumns, const_cast<alglib_impl::ae_matrix*>(q.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Multiplication by matrix Q which reduces matrix A to bidiagonal form. + +The algorithm allows pre- or post-multiply by Q or Q'. + +Input parameters: + QP - matrices Q and P in compact form. + Output of ToBidiagonal subroutine. + M - number of rows in matrix A. + N - number of columns in matrix A. + TAUQ - scalar factors which are used to form Q. + Output of ToBidiagonal subroutine. + Z - multiplied matrix. + array[0..ZRows-1,0..ZColumns-1] + ZRows - number of rows in matrix Z. If FromTheRight=False, + ZRows=M, otherwise ZRows can be arbitrary. + ZColumns - number of columns in matrix Z. If FromTheRight=True, + ZColumns=M, otherwise ZColumns can be arbitrary. + FromTheRight - pre- or post-multiply. + DoTranspose - multiply by Q or Q'. + +Output parameters: + Z - product of Z and Q. + Array[0..ZRows-1,0..ZColumns-1] + If ZRows=0 or ZColumns=0, the array is not modified. + + -- ALGLIB -- + 2005-2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixbdmultiplybyq(const real_2d_array &qp, const ae_int_t m, const ae_int_t n, const real_1d_array &tauq, real_2d_array &z, const ae_int_t zrows, const ae_int_t zcolumns, const bool fromtheright, const bool dotranspose) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixbdmultiplybyq(const_cast<alglib_impl::ae_matrix*>(qp.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tauq.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), zrows, zcolumns, fromtheright, dotranspose, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Unpacking matrix P which reduces matrix A to bidiagonal form. +The subroutine returns transposed matrix P. + +Input parameters: + QP - matrices Q and P in compact form. + Output of ToBidiagonal subroutine. + M - number of rows in matrix A. + N - number of columns in matrix A. + TAUP - scalar factors which are used to form P. + Output of ToBidiagonal subroutine. + PTRows - required number of rows of matrix P^T. N >= PTRows >= 0. + +Output parameters: + PT - first PTRows columns of matrix P^T + Array[0..PTRows-1, 0..N-1] + If PTRows=0, the array is not modified. + + -- ALGLIB -- + 2005-2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixbdunpackpt(const real_2d_array &qp, const ae_int_t m, const ae_int_t n, const real_1d_array &taup, const ae_int_t ptrows, real_2d_array &pt) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixbdunpackpt(const_cast<alglib_impl::ae_matrix*>(qp.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(taup.c_ptr()), ptrows, const_cast<alglib_impl::ae_matrix*>(pt.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Multiplication by matrix P which reduces matrix A to bidiagonal form. + +The algorithm allows pre- or post-multiply by P or P'. + +Input parameters: + QP - matrices Q and P in compact form. + Output of RMatrixBD subroutine. + M - number of rows in matrix A. + N - number of columns in matrix A. + TAUP - scalar factors which are used to form P. + Output of RMatrixBD subroutine. + Z - multiplied matrix. + Array whose indexes range within [0..ZRows-1,0..ZColumns-1]. + ZRows - number of rows in matrix Z. If FromTheRight=False, + ZRows=N, otherwise ZRows can be arbitrary. + ZColumns - number of columns in matrix Z. If FromTheRight=True, + ZColumns=N, otherwise ZColumns can be arbitrary. + FromTheRight - pre- or post-multiply. + DoTranspose - multiply by P or P'. + +Output parameters: + Z - product of Z and P. + Array whose indexes range within [0..ZRows-1,0..ZColumns-1]. + If ZRows=0 or ZColumns=0, the array is not modified. + + -- ALGLIB -- + 2005-2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixbdmultiplybyp(const real_2d_array &qp, const ae_int_t m, const ae_int_t n, const real_1d_array &taup, real_2d_array &z, const ae_int_t zrows, const ae_int_t zcolumns, const bool fromtheright, const bool dotranspose) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixbdmultiplybyp(const_cast<alglib_impl::ae_matrix*>(qp.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(taup.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), zrows, zcolumns, fromtheright, dotranspose, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Unpacking of the main and secondary diagonals of bidiagonal decomposition +of matrix A. + +Input parameters: + B - output of RMatrixBD subroutine. + M - number of rows in matrix B. + N - number of columns in matrix B. + +Output parameters: + IsUpper - True, if the matrix is upper bidiagonal. + otherwise IsUpper is False. + D - the main diagonal. + Array whose index ranges within [0..Min(M,N)-1]. + E - the secondary diagonal (upper or lower, depending on + the value of IsUpper). + Array index ranges within [0..Min(M,N)-1], the last + element is not used. + + -- ALGLIB -- + 2005-2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixbdunpackdiagonals(const real_2d_array &b, const ae_int_t m, const ae_int_t n, bool &isupper, real_1d_array &d, real_1d_array &e) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixbdunpackdiagonals(const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), m, n, &isupper, const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(e.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Reduction of a square matrix to upper Hessenberg form: Q'*A*Q = H, +where Q is an orthogonal matrix, H - Hessenberg matrix. + +Input parameters: + A - matrix A with elements [0..N-1, 0..N-1] + N - size of matrix A. + +Output parameters: + A - matrices Q and P in compact form (see below). + Tau - array of scalar factors which are used to form matrix Q. + Array whose index ranges within [0..N-2] + +Matrix H is located on the main diagonal, on the lower secondary diagonal +and above the main diagonal of matrix A. The elements which are used to +form matrix Q are situated in array Tau and below the lower secondary +diagonal of matrix A as follows: + +Matrix Q is represented as a product of elementary reflections + +Q = H(0)*H(2)*...*H(n-2), + +where each H(i) is given by + +H(i) = 1 - tau * v * (v^T) + +where tau is a scalar stored in Tau[I]; v - is a real vector, +so that v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) stored in A(i+2:n-1,i). + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 +*************************************************************************/ +void rmatrixhessenberg(real_2d_array &a, const ae_int_t n, real_1d_array &tau) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixhessenberg(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Unpacking matrix Q which reduces matrix A to upper Hessenberg form + +Input parameters: + A - output of RMatrixHessenberg subroutine. + N - size of matrix A. + Tau - scalar factors which are used to form Q. + Output of RMatrixHessenberg subroutine. + +Output parameters: + Q - matrix Q. + Array whose indexes range within [0..N-1, 0..N-1]. + + -- ALGLIB -- + 2005-2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixhessenbergunpackq(const real_2d_array &a, const ae_int_t n, const real_1d_array &tau, real_2d_array &q) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixhessenbergunpackq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), const_cast<alglib_impl::ae_matrix*>(q.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Unpacking matrix H (the result of matrix A reduction to upper Hessenberg form) + +Input parameters: + A - output of RMatrixHessenberg subroutine. + N - size of matrix A. + +Output parameters: + H - matrix H. Array whose indexes range within [0..N-1, 0..N-1]. + + -- ALGLIB -- + 2005-2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixhessenbergunpackh(const real_2d_array &a, const ae_int_t n, real_2d_array &h) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixhessenbergunpackh(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, const_cast<alglib_impl::ae_matrix*>(h.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Reduction of a symmetric matrix which is given by its higher or lower +triangular part to a tridiagonal matrix using orthogonal similarity +transformation: Q'*A*Q=T. + +Input parameters: + A - matrix to be transformed + array with elements [0..N-1, 0..N-1]. + N - size of matrix A. + IsUpper - storage format. If IsUpper = True, then matrix A is given + by its upper triangle, and the lower triangle is not used + and not modified by the algorithm, and vice versa + if IsUpper = False. + +Output parameters: + A - matrices T and Q in compact form (see lower) + Tau - array of factors which are forming matrices H(i) + array with elements [0..N-2]. + D - main diagonal of symmetric matrix T. + array with elements [0..N-1]. + E - secondary diagonal of symmetric matrix T. + array with elements [0..N-2]. + + + If IsUpper=True, the matrix Q is represented as a product of elementary + reflectors + + Q = H(n-2) . . . H(2) H(0). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(i+1:n-1) = 0, v(i) = 1, v(0:i-1) is stored on exit in + A(0:i-1,i+1), and tau in TAU(i). + + If IsUpper=False, the matrix Q is represented as a product of elementary + reflectors + + Q = H(0) H(2) . . . H(n-2). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) is stored on exit in A(i+2:n-1,i), + and tau in TAU(i). + + The contents of A on exit are illustrated by the following examples + with n = 5: + + if UPLO = 'U': if UPLO = 'L': + + ( d e v1 v2 v3 ) ( d ) + ( d e v2 v3 ) ( e d ) + ( d e v3 ) ( v0 e d ) + ( d e ) ( v0 v1 e d ) + ( d ) ( v0 v1 v2 e d ) + + where d and e denote diagonal and off-diagonal elements of T, and vi + denotes an element of the vector defining H(i). + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 +*************************************************************************/ +void smatrixtd(real_2d_array &a, const ae_int_t n, const bool isupper, real_1d_array &tau, real_1d_array &d, real_1d_array &e) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::smatrixtd(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(e.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Unpacking matrix Q which reduces symmetric matrix to a tridiagonal +form. + +Input parameters: + A - the result of a SMatrixTD subroutine + N - size of matrix A. + IsUpper - storage format (a parameter of SMatrixTD subroutine) + Tau - the result of a SMatrixTD subroutine + +Output parameters: + Q - transformation matrix. + array with elements [0..N-1, 0..N-1]. + + -- ALGLIB -- + Copyright 2005-2010 by Bochkanov Sergey +*************************************************************************/ +void smatrixtdunpackq(const real_2d_array &a, const ae_int_t n, const bool isupper, const real_1d_array &tau, real_2d_array &q) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::smatrixtdunpackq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), const_cast<alglib_impl::ae_matrix*>(q.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Reduction of a Hermitian matrix which is given by its higher or lower +triangular part to a real tridiagonal matrix using unitary similarity +transformation: Q'*A*Q = T. + +Input parameters: + A - matrix to be transformed + array with elements [0..N-1, 0..N-1]. + N - size of matrix A. + IsUpper - storage format. If IsUpper = True, then matrix A is given + by its upper triangle, and the lower triangle is not used + and not modified by the algorithm, and vice versa + if IsUpper = False. + +Output parameters: + A - matrices T and Q in compact form (see lower) + Tau - array of factors which are forming matrices H(i) + array with elements [0..N-2]. + D - main diagonal of real symmetric matrix T. + array with elements [0..N-1]. + E - secondary diagonal of real symmetric matrix T. + array with elements [0..N-2]. + + + If IsUpper=True, the matrix Q is represented as a product of elementary + reflectors + + Q = H(n-2) . . . H(2) H(0). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(i+1:n-1) = 0, v(i) = 1, v(0:i-1) is stored on exit in + A(0:i-1,i+1), and tau in TAU(i). + + If IsUpper=False, the matrix Q is represented as a product of elementary + reflectors + + Q = H(0) H(2) . . . H(n-2). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) is stored on exit in A(i+2:n-1,i), + and tau in TAU(i). + + The contents of A on exit are illustrated by the following examples + with n = 5: + + if UPLO = 'U': if UPLO = 'L': + + ( d e v1 v2 v3 ) ( d ) + ( d e v2 v3 ) ( e d ) + ( d e v3 ) ( v0 e d ) + ( d e ) ( v0 v1 e d ) + ( d ) ( v0 v1 v2 e d ) + +where d and e denote diagonal and off-diagonal elements of T, and vi +denotes an element of the vector defining H(i). + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 +*************************************************************************/ +void hmatrixtd(complex_2d_array &a, const ae_int_t n, const bool isupper, complex_1d_array &tau, real_1d_array &d, real_1d_array &e) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::hmatrixtd(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(e.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Unpacking matrix Q which reduces a Hermitian matrix to a real tridiagonal +form. + +Input parameters: + A - the result of a HMatrixTD subroutine + N - size of matrix A. + IsUpper - storage format (a parameter of HMatrixTD subroutine) + Tau - the result of a HMatrixTD subroutine + +Output parameters: + Q - transformation matrix. + array with elements [0..N-1, 0..N-1]. + + -- ALGLIB -- + Copyright 2005-2010 by Bochkanov Sergey +*************************************************************************/ +void hmatrixtdunpackq(const complex_2d_array &a, const ae_int_t n, const bool isupper, const complex_1d_array &tau, complex_2d_array &q) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::hmatrixtdunpackq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), const_cast<alglib_impl::ae_matrix*>(q.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Finding the eigenvalues and eigenvectors of a symmetric matrix + +The algorithm finds eigen pairs of a symmetric matrix by reducing it to +tridiagonal form and using the QL/QR algorithm. + +Input parameters: + A - symmetric matrix which is given by its upper or lower + triangular part. + Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + ZNeeded - flag controlling whether the eigenvectors are needed or not. + If ZNeeded is equal to: + * 0, the eigenvectors are not returned; + * 1, the eigenvectors are returned. + IsUpper - storage format. + +Output parameters: + D - eigenvalues in ascending order. + Array whose index ranges within [0..N-1]. + Z - if ZNeeded is equal to: + * 0, Z hasn’t changed; + * 1, Z contains the eigenvectors. + Array whose indexes range within [0..N-1, 0..N-1]. + The eigenvectors are stored in the matrix columns. + +Result: + True, if the algorithm has converged. + False, if the algorithm hasn't converged (rare case). + + -- ALGLIB -- + Copyright 2005-2008 by Bochkanov Sergey +*************************************************************************/ +bool smatrixevd(const real_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, real_1d_array &d, real_2d_array &z) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + ae_bool result = alglib_impl::smatrixevd(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, zneeded, isupper, const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<bool*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Subroutine for finding the eigenvalues (and eigenvectors) of a symmetric +matrix in a given half open interval (A, B] by using a bisection and +inverse iteration + +Input parameters: + A - symmetric matrix which is given by its upper or lower + triangular part. Array [0..N-1, 0..N-1]. + N - size of matrix A. + ZNeeded - flag controlling whether the eigenvectors are needed or not. + If ZNeeded is equal to: + * 0, the eigenvectors are not returned; + * 1, the eigenvectors are returned. + IsUpperA - storage format of matrix A. + B1, B2 - half open interval (B1, B2] to search eigenvalues in. + +Output parameters: + M - number of eigenvalues found in a given half-interval (M>=0). + W - array of the eigenvalues found. + Array whose index ranges within [0..M-1]. + Z - if ZNeeded is equal to: + * 0, Z hasn’t changed; + * 1, Z contains eigenvectors. + Array whose indexes range within [0..N-1, 0..M-1]. + The eigenvectors are stored in the matrix columns. + +Result: + True, if successful. M contains the number of eigenvalues in the given + half-interval (could be equal to 0), W contains the eigenvalues, + Z contains the eigenvectors (if needed). + + False, if the bisection method subroutine wasn't able to find the + eigenvalues in the given interval or if the inverse iteration subroutine + wasn't able to find all the corresponding eigenvectors. + In that case, the eigenvalues and eigenvectors are not returned, + M is equal to 0. + + -- ALGLIB -- + Copyright 07.01.2006 by Bochkanov Sergey +*************************************************************************/ +bool smatrixevdr(const real_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, const double b1, const double b2, ae_int_t &m, real_1d_array &w, real_2d_array &z) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + ae_bool result = alglib_impl::smatrixevdr(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, zneeded, isupper, b1, b2, &m, const_cast<alglib_impl::ae_vector*>(w.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<bool*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Subroutine for finding the eigenvalues and eigenvectors of a symmetric +matrix with given indexes by using bisection and inverse iteration methods. + +Input parameters: + A - symmetric matrix which is given by its upper or lower + triangular part. Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + ZNeeded - flag controlling whether the eigenvectors are needed or not. + If ZNeeded is equal to: + * 0, the eigenvectors are not returned; + * 1, the eigenvectors are returned. + IsUpperA - storage format of matrix A. + I1, I2 - index interval for searching (from I1 to I2). + 0 <= I1 <= I2 <= N-1. + +Output parameters: + W - array of the eigenvalues found. + Array whose index ranges within [0..I2-I1]. + Z - if ZNeeded is equal to: + * 0, Z hasn’t changed; + * 1, Z contains eigenvectors. + Array whose indexes range within [0..N-1, 0..I2-I1]. + In that case, the eigenvectors are stored in the matrix columns. + +Result: + True, if successful. W contains the eigenvalues, Z contains the + eigenvectors (if needed). + + False, if the bisection method subroutine wasn't able to find the + eigenvalues in the given interval or if the inverse iteration subroutine + wasn't able to find all the corresponding eigenvectors. + In that case, the eigenvalues and eigenvectors are not returned. + + -- ALGLIB -- + Copyright 07.01.2006 by Bochkanov Sergey +*************************************************************************/ +bool smatrixevdi(const real_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, const ae_int_t i1, const ae_int_t i2, real_1d_array &w, real_2d_array &z) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + ae_bool result = alglib_impl::smatrixevdi(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, zneeded, isupper, i1, i2, const_cast<alglib_impl::ae_vector*>(w.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<bool*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Finding the eigenvalues and eigenvectors of a Hermitian matrix + +The algorithm finds eigen pairs of a Hermitian matrix by reducing it to +real tridiagonal form and using the QL/QR algorithm. + +Input parameters: + A - Hermitian matrix which is given by its upper or lower + triangular part. + Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + IsUpper - storage format. + ZNeeded - flag controlling whether the eigenvectors are needed or + not. If ZNeeded is equal to: + * 0, the eigenvectors are not returned; + * 1, the eigenvectors are returned. + +Output parameters: + D - eigenvalues in ascending order. + Array whose index ranges within [0..N-1]. + Z - if ZNeeded is equal to: + * 0, Z hasn’t changed; + * 1, Z contains the eigenvectors. + Array whose indexes range within [0..N-1, 0..N-1]. + The eigenvectors are stored in the matrix columns. + +Result: + True, if the algorithm has converged. + False, if the algorithm hasn't converged (rare case). + +Note: + eigenvectors of Hermitian matrix are defined up to multiplication by + a complex number L, such that |L|=1. + + -- ALGLIB -- + Copyright 2005, 23 March 2007 by Bochkanov Sergey +*************************************************************************/ +bool hmatrixevd(const complex_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, real_1d_array &d, complex_2d_array &z) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + ae_bool result = alglib_impl::hmatrixevd(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, zneeded, isupper, const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<bool*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Subroutine for finding the eigenvalues (and eigenvectors) of a Hermitian +matrix in a given half-interval (A, B] by using a bisection and inverse +iteration + +Input parameters: + A - Hermitian matrix which is given by its upper or lower + triangular part. Array whose indexes range within + [0..N-1, 0..N-1]. + N - size of matrix A. + ZNeeded - flag controlling whether the eigenvectors are needed or + not. If ZNeeded is equal to: + * 0, the eigenvectors are not returned; + * 1, the eigenvectors are returned. + IsUpperA - storage format of matrix A. + B1, B2 - half-interval (B1, B2] to search eigenvalues in. + +Output parameters: + M - number of eigenvalues found in a given half-interval, M>=0 + W - array of the eigenvalues found. + Array whose index ranges within [0..M-1]. + Z - if ZNeeded is equal to: + * 0, Z hasn’t changed; + * 1, Z contains eigenvectors. + Array whose indexes range within [0..N-1, 0..M-1]. + The eigenvectors are stored in the matrix columns. + +Result: + True, if successful. M contains the number of eigenvalues in the given + half-interval (could be equal to 0), W contains the eigenvalues, + Z contains the eigenvectors (if needed). + + False, if the bisection method subroutine wasn't able to find the + eigenvalues in the given interval or if the inverse iteration + subroutine wasn't able to find all the corresponding eigenvectors. + In that case, the eigenvalues and eigenvectors are not returned, M is + equal to 0. + +Note: + eigen vectors of Hermitian matrix are defined up to multiplication by + a complex number L, such as |L|=1. + + -- ALGLIB -- + Copyright 07.01.2006, 24.03.2007 by Bochkanov Sergey. +*************************************************************************/ +bool hmatrixevdr(const complex_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, const double b1, const double b2, ae_int_t &m, real_1d_array &w, complex_2d_array &z) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + ae_bool result = alglib_impl::hmatrixevdr(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, zneeded, isupper, b1, b2, &m, const_cast<alglib_impl::ae_vector*>(w.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<bool*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Subroutine for finding the eigenvalues and eigenvectors of a Hermitian +matrix with given indexes by using bisection and inverse iteration methods + +Input parameters: + A - Hermitian matrix which is given by its upper or lower + triangular part. + Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + ZNeeded - flag controlling whether the eigenvectors are needed or + not. If ZNeeded is equal to: + * 0, the eigenvectors are not returned; + * 1, the eigenvectors are returned. + IsUpperA - storage format of matrix A. + I1, I2 - index interval for searching (from I1 to I2). + 0 <= I1 <= I2 <= N-1. + +Output parameters: + W - array of the eigenvalues found. + Array whose index ranges within [0..I2-I1]. + Z - if ZNeeded is equal to: + * 0, Z hasn’t changed; + * 1, Z contains eigenvectors. + Array whose indexes range within [0..N-1, 0..I2-I1]. + In that case, the eigenvectors are stored in the matrix + columns. + +Result: + True, if successful. W contains the eigenvalues, Z contains the + eigenvectors (if needed). + + False, if the bisection method subroutine wasn't able to find the + eigenvalues in the given interval or if the inverse iteration + subroutine wasn't able to find all the corresponding eigenvectors. + In that case, the eigenvalues and eigenvectors are not returned. + +Note: + eigen vectors of Hermitian matrix are defined up to multiplication by + a complex number L, such as |L|=1. + + -- ALGLIB -- + Copyright 07.01.2006, 24.03.2007 by Bochkanov Sergey. +*************************************************************************/ +bool hmatrixevdi(const complex_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, const ae_int_t i1, const ae_int_t i2, real_1d_array &w, complex_2d_array &z) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + ae_bool result = alglib_impl::hmatrixevdi(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, zneeded, isupper, i1, i2, const_cast<alglib_impl::ae_vector*>(w.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<bool*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Finding the eigenvalues and eigenvectors of a tridiagonal symmetric matrix + +The algorithm finds the eigen pairs of a tridiagonal symmetric matrix by +using an QL/QR algorithm with implicit shifts. + +Input parameters: + D - the main diagonal of a tridiagonal matrix. + Array whose index ranges within [0..N-1]. + E - the secondary diagonal of a tridiagonal matrix. + Array whose index ranges within [0..N-2]. + N - size of matrix A. + ZNeeded - flag controlling whether the eigenvectors are needed or not. + If ZNeeded is equal to: + * 0, the eigenvectors are not needed; + * 1, the eigenvectors of a tridiagonal matrix + are multiplied by the square matrix Z. It is used if the + tridiagonal matrix is obtained by the similarity + transformation of a symmetric matrix; + * 2, the eigenvectors of a tridiagonal matrix replace the + square matrix Z; + * 3, matrix Z contains the first row of the eigenvectors + matrix. + Z - if ZNeeded=1, Z contains the square matrix by which the + eigenvectors are multiplied. + Array whose indexes range within [0..N-1, 0..N-1]. + +Output parameters: + D - eigenvalues in ascending order. + Array whose index ranges within [0..N-1]. + Z - if ZNeeded is equal to: + * 0, Z hasn’t changed; + * 1, Z contains the product of a given matrix (from the left) + and the eigenvectors matrix (from the right); + * 2, Z contains the eigenvectors. + * 3, Z contains the first row of the eigenvectors matrix. + If ZNeeded<3, Z is the array whose indexes range within [0..N-1, 0..N-1]. + In that case, the eigenvectors are stored in the matrix columns. + If ZNeeded=3, Z is the array whose indexes range within [0..0, 0..N-1]. + +Result: + True, if the algorithm has converged. + False, if the algorithm hasn't converged. + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 +*************************************************************************/ +bool smatrixtdevd(real_1d_array &d, const real_1d_array &e, const ae_int_t n, const ae_int_t zneeded, real_2d_array &z) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + ae_bool result = alglib_impl::smatrixtdevd(const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(e.c_ptr()), n, zneeded, const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<bool*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Subroutine for finding the tridiagonal matrix eigenvalues/vectors in a +given half-interval (A, B] by using bisection and inverse iteration. + +Input parameters: + D - the main diagonal of a tridiagonal matrix. + Array whose index ranges within [0..N-1]. + E - the secondary diagonal of a tridiagonal matrix. + Array whose index ranges within [0..N-2]. + N - size of matrix, N>=0. + ZNeeded - flag controlling whether the eigenvectors are needed or not. + If ZNeeded is equal to: + * 0, the eigenvectors are not needed; + * 1, the eigenvectors of a tridiagonal matrix are multiplied + by the square matrix Z. It is used if the tridiagonal + matrix is obtained by the similarity transformation + of a symmetric matrix. + * 2, the eigenvectors of a tridiagonal matrix replace matrix Z. + A, B - half-interval (A, B] to search eigenvalues in. + Z - if ZNeeded is equal to: + * 0, Z isn't used and remains unchanged; + * 1, Z contains the square matrix (array whose indexes range + within [0..N-1, 0..N-1]) which reduces the given symmetric + matrix to tridiagonal form; + * 2, Z isn't used (but changed on the exit). + +Output parameters: + D - array of the eigenvalues found. + Array whose index ranges within [0..M-1]. + M - number of eigenvalues found in the given half-interval (M>=0). + Z - if ZNeeded is equal to: + * 0, doesn't contain any information; + * 1, contains the product of a given NxN matrix Z (from the + left) and NxM matrix of the eigenvectors found (from the + right). Array whose indexes range within [0..N-1, 0..M-1]. + * 2, contains the matrix of the eigenvectors found. + Array whose indexes range within [0..N-1, 0..M-1]. + +Result: + + True, if successful. In that case, M contains the number of eigenvalues + in the given half-interval (could be equal to 0), D contains the eigenvalues, + Z contains the eigenvectors (if needed). + It should be noted that the subroutine changes the size of arrays D and Z. + + False, if the bisection method subroutine wasn't able to find the + eigenvalues in the given interval or if the inverse iteration subroutine + wasn't able to find all the corresponding eigenvectors. In that case, + the eigenvalues and eigenvectors are not returned, M is equal to 0. + + -- ALGLIB -- + Copyright 31.03.2008 by Bochkanov Sergey +*************************************************************************/ +bool smatrixtdevdr(real_1d_array &d, const real_1d_array &e, const ae_int_t n, const ae_int_t zneeded, const double a, const double b, ae_int_t &m, real_2d_array &z) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + ae_bool result = alglib_impl::smatrixtdevdr(const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(e.c_ptr()), n, zneeded, a, b, &m, const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<bool*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Subroutine for finding tridiagonal matrix eigenvalues/vectors with given +indexes (in ascending order) by using the bisection and inverse iteraion. + +Input parameters: + D - the main diagonal of a tridiagonal matrix. + Array whose index ranges within [0..N-1]. + E - the secondary diagonal of a tridiagonal matrix. + Array whose index ranges within [0..N-2]. + N - size of matrix. N>=0. + ZNeeded - flag controlling whether the eigenvectors are needed or not. + If ZNeeded is equal to: + * 0, the eigenvectors are not needed; + * 1, the eigenvectors of a tridiagonal matrix are multiplied + by the square matrix Z. It is used if the + tridiagonal matrix is obtained by the similarity transformation + of a symmetric matrix. + * 2, the eigenvectors of a tridiagonal matrix replace + matrix Z. + I1, I2 - index interval for searching (from I1 to I2). + 0 <= I1 <= I2 <= N-1. + Z - if ZNeeded is equal to: + * 0, Z isn't used and remains unchanged; + * 1, Z contains the square matrix (array whose indexes range within [0..N-1, 0..N-1]) + which reduces the given symmetric matrix to tridiagonal form; + * 2, Z isn't used (but changed on the exit). + +Output parameters: + D - array of the eigenvalues found. + Array whose index ranges within [0..I2-I1]. + Z - if ZNeeded is equal to: + * 0, doesn't contain any information; + * 1, contains the product of a given NxN matrix Z (from the left) and + Nx(I2-I1) matrix of the eigenvectors found (from the right). + Array whose indexes range within [0..N-1, 0..I2-I1]. + * 2, contains the matrix of the eigenvalues found. + Array whose indexes range within [0..N-1, 0..I2-I1]. + + +Result: + + True, if successful. In that case, D contains the eigenvalues, + Z contains the eigenvectors (if needed). + It should be noted that the subroutine changes the size of arrays D and Z. + + False, if the bisection method subroutine wasn't able to find the eigenvalues + in the given interval or if the inverse iteration subroutine wasn't able + to find all the corresponding eigenvectors. In that case, the eigenvalues + and eigenvectors are not returned. + + -- ALGLIB -- + Copyright 25.12.2005 by Bochkanov Sergey +*************************************************************************/ +bool smatrixtdevdi(real_1d_array &d, const real_1d_array &e, const ae_int_t n, const ae_int_t zneeded, const ae_int_t i1, const ae_int_t i2, real_2d_array &z) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + ae_bool result = alglib_impl::smatrixtdevdi(const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(e.c_ptr()), n, zneeded, i1, i2, const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<bool*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Finding eigenvalues and eigenvectors of a general matrix + +The algorithm finds eigenvalues and eigenvectors of a general matrix by +using the QR algorithm with multiple shifts. The algorithm can find +eigenvalues and both left and right eigenvectors. + +The right eigenvector is a vector x such that A*x = w*x, and the left +eigenvector is a vector y such that y'*A = w*y' (here y' implies a complex +conjugate transposition of vector y). + +Input parameters: + A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + VNeeded - flag controlling whether eigenvectors are needed or not. + If VNeeded is equal to: + * 0, eigenvectors are not returned; + * 1, right eigenvectors are returned; + * 2, left eigenvectors are returned; + * 3, both left and right eigenvectors are returned. + +Output parameters: + WR - real parts of eigenvalues. + Array whose index ranges within [0..N-1]. + WR - imaginary parts of eigenvalues. + Array whose index ranges within [0..N-1]. + VL, VR - arrays of left and right eigenvectors (if they are needed). + If WI[i]=0, the respective eigenvalue is a real number, + and it corresponds to the column number I of matrices VL/VR. + If WI[i]>0, we have a pair of complex conjugate numbers with + positive and negative imaginary parts: + the first eigenvalue WR[i] + sqrt(-1)*WI[i]; + the second eigenvalue WR[i+1] + sqrt(-1)*WI[i+1]; + WI[i]>0 + WI[i+1] = -WI[i] < 0 + In that case, the eigenvector corresponding to the first + eigenvalue is located in i and i+1 columns of matrices + VL/VR (the column number i contains the real part, and the + column number i+1 contains the imaginary part), and the vector + corresponding to the second eigenvalue is a complex conjugate to + the first vector. + Arrays whose indexes range within [0..N-1, 0..N-1]. + +Result: + True, if the algorithm has converged. + False, if the algorithm has not converged. + +Note 1: + Some users may ask the following question: what if WI[N-1]>0? + WI[N] must contain an eigenvalue which is complex conjugate to the + N-th eigenvalue, but the array has only size N? + The answer is as follows: such a situation cannot occur because the + algorithm finds a pairs of eigenvalues, therefore, if WI[i]>0, I is + strictly less than N-1. + +Note 2: + The algorithm performance depends on the value of the internal parameter + NS of the InternalSchurDecomposition subroutine which defines the number + of shifts in the QR algorithm (similarly to the block width in block-matrix + algorithms of linear algebra). If you require maximum performance + on your machine, it is recommended to adjust this parameter manually. + + +See also the InternalTREVC subroutine. + +The algorithm is based on the LAPACK 3.0 library. +*************************************************************************/ +bool rmatrixevd(const real_2d_array &a, const ae_int_t n, const ae_int_t vneeded, real_1d_array &wr, real_1d_array &wi, real_2d_array &vl, real_2d_array &vr) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + ae_bool result = alglib_impl::rmatrixevd(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, vneeded, const_cast<alglib_impl::ae_vector*>(wr.c_ptr()), const_cast<alglib_impl::ae_vector*>(wi.c_ptr()), const_cast<alglib_impl::ae_matrix*>(vl.c_ptr()), const_cast<alglib_impl::ae_matrix*>(vr.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<bool*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Generation of a random uniformly distributed (Haar) orthogonal matrix + +INPUT PARAMETERS: + N - matrix size, N>=1 + +OUTPUT PARAMETERS: + A - orthogonal NxN matrix, array[0..N-1,0..N-1] + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void rmatrixrndorthogonal(const ae_int_t n, real_2d_array &a) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixrndorthogonal(n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Generation of random NxN matrix with given condition number and norm2(A)=1 + +INPUT PARAMETERS: + N - matrix size + C - condition number (in 2-norm) + +OUTPUT PARAMETERS: + A - random matrix with norm2(A)=1 and cond(A)=C + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void rmatrixrndcond(const ae_int_t n, const double c, real_2d_array &a) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixrndcond(n, c, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Generation of a random Haar distributed orthogonal complex matrix + +INPUT PARAMETERS: + N - matrix size, N>=1 + +OUTPUT PARAMETERS: + A - orthogonal NxN matrix, array[0..N-1,0..N-1] + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void cmatrixrndorthogonal(const ae_int_t n, complex_2d_array &a) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::cmatrixrndorthogonal(n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Generation of random NxN complex matrix with given condition number C and +norm2(A)=1 + +INPUT PARAMETERS: + N - matrix size + C - condition number (in 2-norm) + +OUTPUT PARAMETERS: + A - random matrix with norm2(A)=1 and cond(A)=C + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void cmatrixrndcond(const ae_int_t n, const double c, complex_2d_array &a) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::cmatrixrndcond(n, c, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Generation of random NxN symmetric matrix with given condition number and +norm2(A)=1 + +INPUT PARAMETERS: + N - matrix size + C - condition number (in 2-norm) + +OUTPUT PARAMETERS: + A - random matrix with norm2(A)=1 and cond(A)=C + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void smatrixrndcond(const ae_int_t n, const double c, real_2d_array &a) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::smatrixrndcond(n, c, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Generation of random NxN symmetric positive definite matrix with given +condition number and norm2(A)=1 + +INPUT PARAMETERS: + N - matrix size + C - condition number (in 2-norm) + +OUTPUT PARAMETERS: + A - random SPD matrix with norm2(A)=1 and cond(A)=C + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void spdmatrixrndcond(const ae_int_t n, const double c, real_2d_array &a) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::spdmatrixrndcond(n, c, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Generation of random NxN Hermitian matrix with given condition number and +norm2(A)=1 + +INPUT PARAMETERS: + N - matrix size + C - condition number (in 2-norm) + +OUTPUT PARAMETERS: + A - random matrix with norm2(A)=1 and cond(A)=C + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void hmatrixrndcond(const ae_int_t n, const double c, complex_2d_array &a) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::hmatrixrndcond(n, c, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Generation of random NxN Hermitian positive definite matrix with given +condition number and norm2(A)=1 + +INPUT PARAMETERS: + N - matrix size + C - condition number (in 2-norm) + +OUTPUT PARAMETERS: + A - random HPD matrix with norm2(A)=1 and cond(A)=C + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void hpdmatrixrndcond(const ae_int_t n, const double c, complex_2d_array &a) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::hpdmatrixrndcond(n, c, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Multiplication of MxN matrix by NxN random Haar distributed orthogonal matrix + +INPUT PARAMETERS: + A - matrix, array[0..M-1, 0..N-1] + M, N- matrix size + +OUTPUT PARAMETERS: + A - A*Q, where Q is random NxN orthogonal matrix + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void rmatrixrndorthogonalfromtheright(real_2d_array &a, const ae_int_t m, const ae_int_t n) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixrndorthogonalfromtheright(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Multiplication of MxN matrix by MxM random Haar distributed orthogonal matrix + +INPUT PARAMETERS: + A - matrix, array[0..M-1, 0..N-1] + M, N- matrix size + +OUTPUT PARAMETERS: + A - Q*A, where Q is random MxM orthogonal matrix + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void rmatrixrndorthogonalfromtheleft(real_2d_array &a, const ae_int_t m, const ae_int_t n) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixrndorthogonalfromtheleft(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Multiplication of MxN complex matrix by NxN random Haar distributed +complex orthogonal matrix + +INPUT PARAMETERS: + A - matrix, array[0..M-1, 0..N-1] + M, N- matrix size + +OUTPUT PARAMETERS: + A - A*Q, where Q is random NxN orthogonal matrix + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void cmatrixrndorthogonalfromtheright(complex_2d_array &a, const ae_int_t m, const ae_int_t n) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::cmatrixrndorthogonalfromtheright(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Multiplication of MxN complex matrix by MxM random Haar distributed +complex orthogonal matrix + +INPUT PARAMETERS: + A - matrix, array[0..M-1, 0..N-1] + M, N- matrix size + +OUTPUT PARAMETERS: + A - Q*A, where Q is random MxM orthogonal matrix + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void cmatrixrndorthogonalfromtheleft(complex_2d_array &a, const ae_int_t m, const ae_int_t n) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::cmatrixrndorthogonalfromtheleft(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Symmetric multiplication of NxN matrix by random Haar distributed +orthogonal matrix + +INPUT PARAMETERS: + A - matrix, array[0..N-1, 0..N-1] + N - matrix size + +OUTPUT PARAMETERS: + A - Q'*A*Q, where Q is random NxN orthogonal matrix + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void smatrixrndmultiply(real_2d_array &a, const ae_int_t n) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::smatrixrndmultiply(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Hermitian multiplication of NxN matrix by random Haar distributed +complex orthogonal matrix + +INPUT PARAMETERS: + A - matrix, array[0..N-1, 0..N-1] + N - matrix size + +OUTPUT PARAMETERS: + A - Q^H*A*Q, where Q is random NxN orthogonal matrix + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void hmatrixrndmultiply(complex_2d_array &a, const ae_int_t n) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::hmatrixrndmultiply(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +LU decomposition of a general real matrix with row pivoting + +A is represented as A = P*L*U, where: +* L is lower unitriangular matrix +* U is upper triangular matrix +* P = P0*P1*...*PK, K=min(M,N)-1, + Pi - permutation matrix for I and Pivots[I] + +This is cache-oblivous implementation of LU decomposition. +It is optimized for square matrices. As for rectangular matrices: +* best case - M>>N +* worst case - N>>M, small M, large N, matrix does not fit in CPU cache + +INPUT PARAMETERS: + A - array[0..M-1, 0..N-1]. + M - number of rows in matrix A. + N - number of columns in matrix A. + + +OUTPUT PARAMETERS: + A - matrices L and U in compact form: + * L is stored under main diagonal + * U is stored on and above main diagonal + Pivots - permutation matrix in compact form. + array[0..Min(M-1,N-1)]. + + -- ALGLIB routine -- + 10.01.2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixlu(real_2d_array &a, const ae_int_t m, const ae_int_t n, integer_1d_array &pivots) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixlu(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +LU decomposition of a general complex matrix with row pivoting + +A is represented as A = P*L*U, where: +* L is lower unitriangular matrix +* U is upper triangular matrix +* P = P0*P1*...*PK, K=min(M,N)-1, + Pi - permutation matrix for I and Pivots[I] + +This is cache-oblivous implementation of LU decomposition. It is optimized +for square matrices. As for rectangular matrices: +* best case - M>>N +* worst case - N>>M, small M, large N, matrix does not fit in CPU cache + +INPUT PARAMETERS: + A - array[0..M-1, 0..N-1]. + M - number of rows in matrix A. + N - number of columns in matrix A. + + +OUTPUT PARAMETERS: + A - matrices L and U in compact form: + * L is stored under main diagonal + * U is stored on and above main diagonal + Pivots - permutation matrix in compact form. + array[0..Min(M-1,N-1)]. + + -- ALGLIB routine -- + 10.01.2010 + Bochkanov Sergey +*************************************************************************/ +void cmatrixlu(complex_2d_array &a, const ae_int_t m, const ae_int_t n, integer_1d_array &pivots) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::cmatrixlu(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Cache-oblivious Cholesky decomposition + +The algorithm computes Cholesky decomposition of a Hermitian positive- +definite matrix. The result of an algorithm is a representation of A as +A=U'*U or A=L*L' (here X' detones conj(X^T)). + +INPUT PARAMETERS: + A - upper or lower triangle of a factorized matrix. + array with elements [0..N-1, 0..N-1]. + N - size of matrix A. + IsUpper - if IsUpper=True, then A contains an upper triangle of + a symmetric matrix, otherwise A contains a lower one. + +OUTPUT PARAMETERS: + A - the result of factorization. If IsUpper=True, then + the upper triangle contains matrix U, so that A = U'*U, + and the elements below the main diagonal are not modified. + Similarly, if IsUpper = False. + +RESULT: + If the matrix is positive-definite, the function returns True. + Otherwise, the function returns False. Contents of A is not determined + in such case. + + -- ALGLIB routine -- + 15.12.2009 + Bochkanov Sergey +*************************************************************************/ +bool hpdmatrixcholesky(complex_2d_array &a, const ae_int_t n, const bool isupper) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + ae_bool result = alglib_impl::hpdmatrixcholesky(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<bool*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Cache-oblivious Cholesky decomposition + +The algorithm computes Cholesky decomposition of a symmetric positive- +definite matrix. The result of an algorithm is a representation of A as +A=U^T*U or A=L*L^T + +INPUT PARAMETERS: + A - upper or lower triangle of a factorized matrix. + array with elements [0..N-1, 0..N-1]. + N - size of matrix A. + IsUpper - if IsUpper=True, then A contains an upper triangle of + a symmetric matrix, otherwise A contains a lower one. + +OUTPUT PARAMETERS: + A - the result of factorization. If IsUpper=True, then + the upper triangle contains matrix U, so that A = U^T*U, + and the elements below the main diagonal are not modified. + Similarly, if IsUpper = False. + +RESULT: + If the matrix is positive-definite, the function returns True. + Otherwise, the function returns False. Contents of A is not determined + in such case. + + -- ALGLIB routine -- + 15.12.2009 + Bochkanov Sergey +*************************************************************************/ +bool spdmatrixcholesky(real_2d_array &a, const ae_int_t n, const bool isupper) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + ae_bool result = alglib_impl::spdmatrixcholesky(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<bool*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Estimate of a matrix condition number (1-norm) + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double rmatrixrcond1(const real_2d_array &a, const ae_int_t n) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + double result = alglib_impl::rmatrixrcond1(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<double*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Estimate of a matrix condition number (infinity-norm). + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double rmatrixrcondinf(const real_2d_array &a, const ae_int_t n) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + double result = alglib_impl::rmatrixrcondinf(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<double*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Condition number estimate of a symmetric positive definite matrix. + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +It should be noted that 1-norm and inf-norm of condition numbers of symmetric +matrices are equal, so the algorithm doesn't take into account the +differences between these types of norms. + +Input parameters: + A - symmetric positive definite matrix which is given by its + upper or lower triangle depending on the value of + IsUpper. Array with elements [0..N-1, 0..N-1]. + N - size of matrix A. + IsUpper - storage format. + +Result: + 1/LowerBound(cond(A)), if matrix A is positive definite, + -1, if matrix A is not positive definite, and its condition number + could not be found by this algorithm. + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double spdmatrixrcond(const real_2d_array &a, const ae_int_t n, const bool isupper) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + double result = alglib_impl::spdmatrixrcond(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<double*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Triangular matrix: estimate of a condition number (1-norm) + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + A - matrix. Array[0..N-1, 0..N-1]. + N - size of A. + IsUpper - True, if the matrix is upper triangular. + IsUnit - True, if the matrix has a unit diagonal. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double rmatrixtrrcond1(const real_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + double result = alglib_impl::rmatrixtrrcond1(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, isunit, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<double*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Triangular matrix: estimate of a matrix condition number (infinity-norm). + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + IsUpper - True, if the matrix is upper triangular. + IsUnit - True, if the matrix has a unit diagonal. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double rmatrixtrrcondinf(const real_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + double result = alglib_impl::rmatrixtrrcondinf(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, isunit, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<double*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Condition number estimate of a Hermitian positive definite matrix. + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +It should be noted that 1-norm and inf-norm of condition numbers of symmetric +matrices are equal, so the algorithm doesn't take into account the +differences between these types of norms. + +Input parameters: + A - Hermitian positive definite matrix which is given by its + upper or lower triangle depending on the value of + IsUpper. Array with elements [0..N-1, 0..N-1]. + N - size of matrix A. + IsUpper - storage format. + +Result: + 1/LowerBound(cond(A)), if matrix A is positive definite, + -1, if matrix A is not positive definite, and its condition number + could not be found by this algorithm. + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double hpdmatrixrcond(const complex_2d_array &a, const ae_int_t n, const bool isupper) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + double result = alglib_impl::hpdmatrixrcond(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<double*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Estimate of a matrix condition number (1-norm) + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double cmatrixrcond1(const complex_2d_array &a, const ae_int_t n) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + double result = alglib_impl::cmatrixrcond1(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<double*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Estimate of a matrix condition number (infinity-norm). + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double cmatrixrcondinf(const complex_2d_array &a, const ae_int_t n) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + double result = alglib_impl::cmatrixrcondinf(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<double*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Estimate of the condition number of a matrix given by its LU decomposition (1-norm) + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + LUA - LU decomposition of a matrix in compact form. Output of + the RMatrixLU subroutine. + N - size of matrix A. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double rmatrixlurcond1(const real_2d_array &lua, const ae_int_t n) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + double result = alglib_impl::rmatrixlurcond1(const_cast<alglib_impl::ae_matrix*>(lua.c_ptr()), n, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<double*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Estimate of the condition number of a matrix given by its LU decomposition +(infinity norm). + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + LUA - LU decomposition of a matrix in compact form. Output of + the RMatrixLU subroutine. + N - size of matrix A. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double rmatrixlurcondinf(const real_2d_array &lua, const ae_int_t n) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + double result = alglib_impl::rmatrixlurcondinf(const_cast<alglib_impl::ae_matrix*>(lua.c_ptr()), n, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<double*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Condition number estimate of a symmetric positive definite matrix given by +Cholesky decomposition. + +The algorithm calculates a lower bound of the condition number. In this +case, the algorithm does not return a lower bound of the condition number, +but an inverse number (to avoid an overflow in case of a singular matrix). + +It should be noted that 1-norm and inf-norm condition numbers of symmetric +matrices are equal, so the algorithm doesn't take into account the +differences between these types of norms. + +Input parameters: + CD - Cholesky decomposition of matrix A, + output of SMatrixCholesky subroutine. + N - size of matrix A. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double spdmatrixcholeskyrcond(const real_2d_array &a, const ae_int_t n, const bool isupper) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + double result = alglib_impl::spdmatrixcholeskyrcond(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<double*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Condition number estimate of a Hermitian positive definite matrix given by +Cholesky decomposition. + +The algorithm calculates a lower bound of the condition number. In this +case, the algorithm does not return a lower bound of the condition number, +but an inverse number (to avoid an overflow in case of a singular matrix). + +It should be noted that 1-norm and inf-norm condition numbers of symmetric +matrices are equal, so the algorithm doesn't take into account the +differences between these types of norms. + +Input parameters: + CD - Cholesky decomposition of matrix A, + output of SMatrixCholesky subroutine. + N - size of matrix A. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double hpdmatrixcholeskyrcond(const complex_2d_array &a, const ae_int_t n, const bool isupper) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + double result = alglib_impl::hpdmatrixcholeskyrcond(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<double*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Estimate of the condition number of a matrix given by its LU decomposition (1-norm) + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + LUA - LU decomposition of a matrix in compact form. Output of + the CMatrixLU subroutine. + N - size of matrix A. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double cmatrixlurcond1(const complex_2d_array &lua, const ae_int_t n) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + double result = alglib_impl::cmatrixlurcond1(const_cast<alglib_impl::ae_matrix*>(lua.c_ptr()), n, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<double*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Estimate of the condition number of a matrix given by its LU decomposition +(infinity norm). + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + LUA - LU decomposition of a matrix in compact form. Output of + the CMatrixLU subroutine. + N - size of matrix A. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double cmatrixlurcondinf(const complex_2d_array &lua, const ae_int_t n) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + double result = alglib_impl::cmatrixlurcondinf(const_cast<alglib_impl::ae_matrix*>(lua.c_ptr()), n, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<double*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Triangular matrix: estimate of a condition number (1-norm) + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + A - matrix. Array[0..N-1, 0..N-1]. + N - size of A. + IsUpper - True, if the matrix is upper triangular. + IsUnit - True, if the matrix has a unit diagonal. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double cmatrixtrrcond1(const complex_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + double result = alglib_impl::cmatrixtrrcond1(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, isunit, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<double*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Triangular matrix: estimate of a matrix condition number (infinity-norm). + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + IsUpper - True, if the matrix is upper triangular. + IsUnit - True, if the matrix has a unit diagonal. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double cmatrixtrrcondinf(const complex_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + double result = alglib_impl::cmatrixtrrcondinf(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, isunit, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<double*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Matrix inverse report: +* R1 reciprocal of condition number in 1-norm +* RInf reciprocal of condition number in inf-norm +*************************************************************************/ +_matinvreport_owner::_matinvreport_owner() +{ + p_struct = (alglib_impl::matinvreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::matinvreport), NULL); + if( p_struct==NULL ) + throw ap_error("ALGLIB: malloc error"); + if( !alglib_impl::_matinvreport_init(p_struct, NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); +} + +_matinvreport_owner::_matinvreport_owner(const _matinvreport_owner &rhs) +{ + p_struct = (alglib_impl::matinvreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::matinvreport), NULL); + if( p_struct==NULL ) + throw ap_error("ALGLIB: malloc error"); + if( !alglib_impl::_matinvreport_init_copy(p_struct, const_cast<alglib_impl::matinvreport*>(rhs.p_struct), NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); +} + +_matinvreport_owner& _matinvreport_owner::operator=(const _matinvreport_owner &rhs) +{ + if( this==&rhs ) + return *this; + alglib_impl::_matinvreport_clear(p_struct); + if( !alglib_impl::_matinvreport_init_copy(p_struct, const_cast<alglib_impl::matinvreport*>(rhs.p_struct), NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); + return *this; +} + +_matinvreport_owner::~_matinvreport_owner() +{ + alglib_impl::_matinvreport_clear(p_struct); + ae_free(p_struct); +} + +alglib_impl::matinvreport* _matinvreport_owner::c_ptr() +{ + return p_struct; +} + +alglib_impl::matinvreport* _matinvreport_owner::c_ptr() const +{ + return const_cast<alglib_impl::matinvreport*>(p_struct); +} +matinvreport::matinvreport() : _matinvreport_owner() ,r1(p_struct->r1),rinf(p_struct->rinf) +{ +} + +matinvreport::matinvreport(const matinvreport &rhs):_matinvreport_owner(rhs) ,r1(p_struct->r1),rinf(p_struct->rinf) +{ +} + +matinvreport& matinvreport::operator=(const matinvreport &rhs) +{ + if( this==&rhs ) + return *this; + _matinvreport_owner::operator=(rhs); + return *this; +} + +matinvreport::~matinvreport() +{ +} + +/************************************************************************* +Inversion of a matrix given by its LU decomposition. + +INPUT PARAMETERS: + A - LU decomposition of the matrix + (output of RMatrixLU subroutine). + Pivots - table of permutations + (the output of RMatrixLU subroutine). + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + +OUTPUT PARAMETERS: + Info - return code: + * -3 A is singular, or VERY close to singular. + it is filled by zeros in such cases. + * 1 task is solved (but matrix A may be ill-conditioned, + check R1/RInf parameters for condition numbers). + Rep - solver report, see below for more info + A - inverse of matrix A. + Array whose indexes range within [0..N-1, 0..N-1]. + +SOLVER REPORT + +Subroutine sets following fields of the Rep structure: +* R1 reciprocal of condition number: 1/cond(A), 1-norm. +* RInf reciprocal of condition number: 1/cond(A), inf-norm. + + -- ALGLIB routine -- + 05.02.2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixluinverse(real_2d_array &a, const integer_1d_array &pivots, const ae_int_t n, ae_int_t &info, matinvreport &rep) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixluinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), n, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Inversion of a matrix given by its LU decomposition. + +INPUT PARAMETERS: + A - LU decomposition of the matrix + (output of RMatrixLU subroutine). + Pivots - table of permutations + (the output of RMatrixLU subroutine). + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + +OUTPUT PARAMETERS: + Info - return code: + * -3 A is singular, or VERY close to singular. + it is filled by zeros in such cases. + * 1 task is solved (but matrix A may be ill-conditioned, + check R1/RInf parameters for condition numbers). + Rep - solver report, see below for more info + A - inverse of matrix A. + Array whose indexes range within [0..N-1, 0..N-1]. + +SOLVER REPORT + +Subroutine sets following fields of the Rep structure: +* R1 reciprocal of condition number: 1/cond(A), 1-norm. +* RInf reciprocal of condition number: 1/cond(A), inf-norm. + + -- ALGLIB routine -- + 05.02.2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixluinverse(real_2d_array &a, const integer_1d_array &pivots, ae_int_t &info, matinvreport &rep) +{ + alglib_impl::ae_state _alglib_env_state; + ae_int_t n; + if( (a.cols()!=a.rows()) || (a.cols()!=pivots.length())) + throw ap_error("Error while calling 'rmatrixluinverse': looks like one of arguments has wrong size"); + n = a.cols(); + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixluinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), n, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state); + + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Inversion of a general matrix. + +Input parameters: + A - matrix. + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + +Output parameters: + Info - return code, same as in RMatrixLUInverse + Rep - solver report, same as in RMatrixLUInverse + A - inverse of matrix A, same as in RMatrixLUInverse + +Result: + True, if the matrix is not singular. + False, if the matrix is singular. + + -- ALGLIB -- + Copyright 2005-2010 by Bochkanov Sergey +*************************************************************************/ +void rmatrixinverse(real_2d_array &a, const ae_int_t n, ae_int_t &info, matinvreport &rep) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Inversion of a general matrix. + +Input parameters: + A - matrix. + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + +Output parameters: + Info - return code, same as in RMatrixLUInverse + Rep - solver report, same as in RMatrixLUInverse + A - inverse of matrix A, same as in RMatrixLUInverse + +Result: + True, if the matrix is not singular. + False, if the matrix is singular. + + -- ALGLIB -- + Copyright 2005-2010 by Bochkanov Sergey +*************************************************************************/ +void rmatrixinverse(real_2d_array &a, ae_int_t &info, matinvreport &rep) +{ + alglib_impl::ae_state _alglib_env_state; + ae_int_t n; + if( (a.cols()!=a.rows())) + throw ap_error("Error while calling 'rmatrixinverse': looks like one of arguments has wrong size"); + n = a.cols(); + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state); + + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Inversion of a matrix given by its LU decomposition. + +INPUT PARAMETERS: + A - LU decomposition of the matrix + (output of CMatrixLU subroutine). + Pivots - table of permutations + (the output of CMatrixLU subroutine). + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + +OUTPUT PARAMETERS: + Info - return code, same as in RMatrixLUInverse + Rep - solver report, same as in RMatrixLUInverse + A - inverse of matrix A, same as in RMatrixLUInverse + + -- ALGLIB routine -- + 05.02.2010 + Bochkanov Sergey +*************************************************************************/ +void cmatrixluinverse(complex_2d_array &a, const integer_1d_array &pivots, const ae_int_t n, ae_int_t &info, matinvreport &rep) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::cmatrixluinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), n, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Inversion of a matrix given by its LU decomposition. + +INPUT PARAMETERS: + A - LU decomposition of the matrix + (output of CMatrixLU subroutine). + Pivots - table of permutations + (the output of CMatrixLU subroutine). + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + +OUTPUT PARAMETERS: + Info - return code, same as in RMatrixLUInverse + Rep - solver report, same as in RMatrixLUInverse + A - inverse of matrix A, same as in RMatrixLUInverse + + -- ALGLIB routine -- + 05.02.2010 + Bochkanov Sergey +*************************************************************************/ +void cmatrixluinverse(complex_2d_array &a, const integer_1d_array &pivots, ae_int_t &info, matinvreport &rep) +{ + alglib_impl::ae_state _alglib_env_state; + ae_int_t n; + if( (a.cols()!=a.rows()) || (a.cols()!=pivots.length())) + throw ap_error("Error while calling 'cmatrixluinverse': looks like one of arguments has wrong size"); + n = a.cols(); + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::cmatrixluinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), n, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state); + + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Inversion of a general matrix. + +Input parameters: + A - matrix + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + +Output parameters: + Info - return code, same as in RMatrixLUInverse + Rep - solver report, same as in RMatrixLUInverse + A - inverse of matrix A, same as in RMatrixLUInverse + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +void cmatrixinverse(complex_2d_array &a, const ae_int_t n, ae_int_t &info, matinvreport &rep) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::cmatrixinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Inversion of a general matrix. + +Input parameters: + A - matrix + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + +Output parameters: + Info - return code, same as in RMatrixLUInverse + Rep - solver report, same as in RMatrixLUInverse + A - inverse of matrix A, same as in RMatrixLUInverse + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +void cmatrixinverse(complex_2d_array &a, ae_int_t &info, matinvreport &rep) +{ + alglib_impl::ae_state _alglib_env_state; + ae_int_t n; + if( (a.cols()!=a.rows())) + throw ap_error("Error while calling 'cmatrixinverse': looks like one of arguments has wrong size"); + n = a.cols(); + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::cmatrixinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state); + + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Inversion of a symmetric positive definite matrix which is given +by Cholesky decomposition. + +Input parameters: + A - Cholesky decomposition of the matrix to be inverted: + A=U’*U or A = L*L'. + Output of SPDMatrixCholesky subroutine. + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + IsUpper - storage type (optional): + * if True, symmetric matrix A is given by its upper + triangle, and the lower triangle isn’t used/changed by + function + * if False, symmetric matrix A is given by its lower + triangle, and the upper triangle isn’t used/changed by + function + * if not given, lower half is used. + +Output parameters: + Info - return code, same as in RMatrixLUInverse + Rep - solver report, same as in RMatrixLUInverse + A - inverse of matrix A, same as in RMatrixLUInverse + + -- ALGLIB routine -- + 10.02.2010 + Bochkanov Sergey +*************************************************************************/ +void spdmatrixcholeskyinverse(real_2d_array &a, const ae_int_t n, const bool isupper, ae_int_t &info, matinvreport &rep) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::spdmatrixcholeskyinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Inversion of a symmetric positive definite matrix which is given +by Cholesky decomposition. + +Input parameters: + A - Cholesky decomposition of the matrix to be inverted: + A=U’*U or A = L*L'. + Output of SPDMatrixCholesky subroutine. + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + IsUpper - storage type (optional): + * if True, symmetric matrix A is given by its upper + triangle, and the lower triangle isn’t used/changed by + function + * if False, symmetric matrix A is given by its lower + triangle, and the upper triangle isn’t used/changed by + function + * if not given, lower half is used. + +Output parameters: + Info - return code, same as in RMatrixLUInverse + Rep - solver report, same as in RMatrixLUInverse + A - inverse of matrix A, same as in RMatrixLUInverse + + -- ALGLIB routine -- + 10.02.2010 + Bochkanov Sergey +*************************************************************************/ +void spdmatrixcholeskyinverse(real_2d_array &a, ae_int_t &info, matinvreport &rep) +{ + alglib_impl::ae_state _alglib_env_state; + ae_int_t n; + bool isupper; + if( (a.cols()!=a.rows())) + throw ap_error("Error while calling 'spdmatrixcholeskyinverse': looks like one of arguments has wrong size"); + n = a.cols(); + isupper = false; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::spdmatrixcholeskyinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state); + + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Inversion of a symmetric positive definite matrix. + +Given an upper or lower triangle of a symmetric positive definite matrix, +the algorithm generates matrix A^-1 and saves the upper or lower triangle +depending on the input. + +Input parameters: + A - matrix to be inverted (upper or lower triangle). + Array with elements [0..N-1,0..N-1]. + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + IsUpper - storage type (optional): + * if True, symmetric matrix A is given by its upper + triangle, and the lower triangle isn’t used/changed by + function + * if False, symmetric matrix A is given by its lower + triangle, and the upper triangle isn’t used/changed by + function + * if not given, both lower and upper triangles must be + filled. + +Output parameters: + Info - return code, same as in RMatrixLUInverse + Rep - solver report, same as in RMatrixLUInverse + A - inverse of matrix A, same as in RMatrixLUInverse + + -- ALGLIB routine -- + 10.02.2010 + Bochkanov Sergey +*************************************************************************/ +void spdmatrixinverse(real_2d_array &a, const ae_int_t n, const bool isupper, ae_int_t &info, matinvreport &rep) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::spdmatrixinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Inversion of a symmetric positive definite matrix. + +Given an upper or lower triangle of a symmetric positive definite matrix, +the algorithm generates matrix A^-1 and saves the upper or lower triangle +depending on the input. + +Input parameters: + A - matrix to be inverted (upper or lower triangle). + Array with elements [0..N-1,0..N-1]. + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + IsUpper - storage type (optional): + * if True, symmetric matrix A is given by its upper + triangle, and the lower triangle isn’t used/changed by + function + * if False, symmetric matrix A is given by its lower + triangle, and the upper triangle isn’t used/changed by + function + * if not given, both lower and upper triangles must be + filled. + +Output parameters: + Info - return code, same as in RMatrixLUInverse + Rep - solver report, same as in RMatrixLUInverse + A - inverse of matrix A, same as in RMatrixLUInverse + + -- ALGLIB routine -- + 10.02.2010 + Bochkanov Sergey +*************************************************************************/ +void spdmatrixinverse(real_2d_array &a, ae_int_t &info, matinvreport &rep) +{ + alglib_impl::ae_state _alglib_env_state; + ae_int_t n; + bool isupper; + if( (a.cols()!=a.rows())) + throw ap_error("Error while calling 'spdmatrixinverse': looks like one of arguments has wrong size"); + if( !alglib_impl::ae_is_symmetric(const_cast<alglib_impl::ae_matrix*>(a.c_ptr())) ) + throw ap_error("'a' parameter is not symmetric matrix"); + n = a.cols(); + isupper = false; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::spdmatrixinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state); + if( !alglib_impl::ae_force_symmetric(const_cast<alglib_impl::ae_matrix*>(a.c_ptr())) ) + throw ap_error("Internal error while forcing symmetricity of 'a' parameter"); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Inversion of a Hermitian positive definite matrix which is given +by Cholesky decomposition. + +Input parameters: + A - Cholesky decomposition of the matrix to be inverted: + A=U’*U or A = L*L'. + Output of HPDMatrixCholesky subroutine. + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + IsUpper - storage type (optional): + * if True, symmetric matrix A is given by its upper + triangle, and the lower triangle isn’t used/changed by + function + * if False, symmetric matrix A is given by its lower + triangle, and the upper triangle isn’t used/changed by + function + * if not given, lower half is used. + +Output parameters: + Info - return code, same as in RMatrixLUInverse + Rep - solver report, same as in RMatrixLUInverse + A - inverse of matrix A, same as in RMatrixLUInverse + + -- ALGLIB routine -- + 10.02.2010 + Bochkanov Sergey +*************************************************************************/ +void hpdmatrixcholeskyinverse(complex_2d_array &a, const ae_int_t n, const bool isupper, ae_int_t &info, matinvreport &rep) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::hpdmatrixcholeskyinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Inversion of a Hermitian positive definite matrix which is given +by Cholesky decomposition. + +Input parameters: + A - Cholesky decomposition of the matrix to be inverted: + A=U’*U or A = L*L'. + Output of HPDMatrixCholesky subroutine. + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + IsUpper - storage type (optional): + * if True, symmetric matrix A is given by its upper + triangle, and the lower triangle isn’t used/changed by + function + * if False, symmetric matrix A is given by its lower + triangle, and the upper triangle isn’t used/changed by + function + * if not given, lower half is used. + +Output parameters: + Info - return code, same as in RMatrixLUInverse + Rep - solver report, same as in RMatrixLUInverse + A - inverse of matrix A, same as in RMatrixLUInverse + + -- ALGLIB routine -- + 10.02.2010 + Bochkanov Sergey +*************************************************************************/ +void hpdmatrixcholeskyinverse(complex_2d_array &a, ae_int_t &info, matinvreport &rep) +{ + alglib_impl::ae_state _alglib_env_state; + ae_int_t n; + bool isupper; + if( (a.cols()!=a.rows())) + throw ap_error("Error while calling 'hpdmatrixcholeskyinverse': looks like one of arguments has wrong size"); + n = a.cols(); + isupper = false; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::hpdmatrixcholeskyinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state); + + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Inversion of a Hermitian positive definite matrix. + +Given an upper or lower triangle of a Hermitian positive definite matrix, +the algorithm generates matrix A^-1 and saves the upper or lower triangle +depending on the input. + +Input parameters: + A - matrix to be inverted (upper or lower triangle). + Array with elements [0..N-1,0..N-1]. + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + IsUpper - storage type (optional): + * if True, symmetric matrix A is given by its upper + triangle, and the lower triangle isn’t used/changed by + function + * if False, symmetric matrix A is given by its lower + triangle, and the upper triangle isn’t used/changed by + function + * if not given, both lower and upper triangles must be + filled. + +Output parameters: + Info - return code, same as in RMatrixLUInverse + Rep - solver report, same as in RMatrixLUInverse + A - inverse of matrix A, same as in RMatrixLUInverse + + -- ALGLIB routine -- + 10.02.2010 + Bochkanov Sergey +*************************************************************************/ +void hpdmatrixinverse(complex_2d_array &a, const ae_int_t n, const bool isupper, ae_int_t &info, matinvreport &rep) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::hpdmatrixinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Inversion of a Hermitian positive definite matrix. + +Given an upper or lower triangle of a Hermitian positive definite matrix, +the algorithm generates matrix A^-1 and saves the upper or lower triangle +depending on the input. + +Input parameters: + A - matrix to be inverted (upper or lower triangle). + Array with elements [0..N-1,0..N-1]. + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + IsUpper - storage type (optional): + * if True, symmetric matrix A is given by its upper + triangle, and the lower triangle isn’t used/changed by + function + * if False, symmetric matrix A is given by its lower + triangle, and the upper triangle isn’t used/changed by + function + * if not given, both lower and upper triangles must be + filled. + +Output parameters: + Info - return code, same as in RMatrixLUInverse + Rep - solver report, same as in RMatrixLUInverse + A - inverse of matrix A, same as in RMatrixLUInverse + + -- ALGLIB routine -- + 10.02.2010 + Bochkanov Sergey +*************************************************************************/ +void hpdmatrixinverse(complex_2d_array &a, ae_int_t &info, matinvreport &rep) +{ + alglib_impl::ae_state _alglib_env_state; + ae_int_t n; + bool isupper; + if( (a.cols()!=a.rows())) + throw ap_error("Error while calling 'hpdmatrixinverse': looks like one of arguments has wrong size"); + if( !alglib_impl::ae_is_hermitian(const_cast<alglib_impl::ae_matrix*>(a.c_ptr())) ) + throw ap_error("'a' parameter is not Hermitian matrix"); + n = a.cols(); + isupper = false; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::hpdmatrixinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state); + if( !alglib_impl::ae_force_hermitian(const_cast<alglib_impl::ae_matrix*>(a.c_ptr())) ) + throw ap_error("Internal error while forcing Hermitian properties of 'a' parameter"); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Triangular matrix inverse (real) + +The subroutine inverts the following types of matrices: + * upper triangular + * upper triangular with unit diagonal + * lower triangular + * lower triangular with unit diagonal + +In case of an upper (lower) triangular matrix, the inverse matrix will +also be upper (lower) triangular, and after the end of the algorithm, the +inverse matrix replaces the source matrix. The elements below (above) the +main diagonal are not changed by the algorithm. + +If the matrix has a unit diagonal, the inverse matrix also has a unit +diagonal, and the diagonal elements are not passed to the algorithm. + +Input parameters: + A - matrix, array[0..N-1, 0..N-1]. + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + IsUpper - True, if the matrix is upper triangular. + IsUnit - diagonal type (optional): + * if True, matrix has unit diagonal (a[i,i] are NOT used) + * if False, matrix diagonal is arbitrary + * if not given, False is assumed + +Output parameters: + Info - same as for RMatrixLUInverse + Rep - same as for RMatrixLUInverse + A - same as for RMatrixLUInverse. + + -- ALGLIB -- + Copyright 05.02.2010 by Bochkanov Sergey +*************************************************************************/ +void rmatrixtrinverse(real_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit, ae_int_t &info, matinvreport &rep) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixtrinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, isunit, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Triangular matrix inverse (real) + +The subroutine inverts the following types of matrices: + * upper triangular + * upper triangular with unit diagonal + * lower triangular + * lower triangular with unit diagonal + +In case of an upper (lower) triangular matrix, the inverse matrix will +also be upper (lower) triangular, and after the end of the algorithm, the +inverse matrix replaces the source matrix. The elements below (above) the +main diagonal are not changed by the algorithm. + +If the matrix has a unit diagonal, the inverse matrix also has a unit +diagonal, and the diagonal elements are not passed to the algorithm. + +Input parameters: + A - matrix, array[0..N-1, 0..N-1]. + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + IsUpper - True, if the matrix is upper triangular. + IsUnit - diagonal type (optional): + * if True, matrix has unit diagonal (a[i,i] are NOT used) + * if False, matrix diagonal is arbitrary + * if not given, False is assumed + +Output parameters: + Info - same as for RMatrixLUInverse + Rep - same as for RMatrixLUInverse + A - same as for RMatrixLUInverse. + + -- ALGLIB -- + Copyright 05.02.2010 by Bochkanov Sergey +*************************************************************************/ +void rmatrixtrinverse(real_2d_array &a, const bool isupper, ae_int_t &info, matinvreport &rep) +{ + alglib_impl::ae_state _alglib_env_state; + ae_int_t n; + bool isunit; + if( (a.cols()!=a.rows())) + throw ap_error("Error while calling 'rmatrixtrinverse': looks like one of arguments has wrong size"); + n = a.cols(); + isunit = false; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixtrinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, isunit, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state); + + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Triangular matrix inverse (complex) + +The subroutine inverts the following types of matrices: + * upper triangular + * upper triangular with unit diagonal + * lower triangular + * lower triangular with unit diagonal + +In case of an upper (lower) triangular matrix, the inverse matrix will +also be upper (lower) triangular, and after the end of the algorithm, the +inverse matrix replaces the source matrix. The elements below (above) the +main diagonal are not changed by the algorithm. + +If the matrix has a unit diagonal, the inverse matrix also has a unit +diagonal, and the diagonal elements are not passed to the algorithm. + +Input parameters: + A - matrix, array[0..N-1, 0..N-1]. + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + IsUpper - True, if the matrix is upper triangular. + IsUnit - diagonal type (optional): + * if True, matrix has unit diagonal (a[i,i] are NOT used) + * if False, matrix diagonal is arbitrary + * if not given, False is assumed + +Output parameters: + Info - same as for RMatrixLUInverse + Rep - same as for RMatrixLUInverse + A - same as for RMatrixLUInverse. + + -- ALGLIB -- + Copyright 05.02.2010 by Bochkanov Sergey +*************************************************************************/ +void cmatrixtrinverse(complex_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit, ae_int_t &info, matinvreport &rep) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::cmatrixtrinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, isunit, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Triangular matrix inverse (complex) + +The subroutine inverts the following types of matrices: + * upper triangular + * upper triangular with unit diagonal + * lower triangular + * lower triangular with unit diagonal + +In case of an upper (lower) triangular matrix, the inverse matrix will +also be upper (lower) triangular, and after the end of the algorithm, the +inverse matrix replaces the source matrix. The elements below (above) the +main diagonal are not changed by the algorithm. + +If the matrix has a unit diagonal, the inverse matrix also has a unit +diagonal, and the diagonal elements are not passed to the algorithm. + +Input parameters: + A - matrix, array[0..N-1, 0..N-1]. + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + IsUpper - True, if the matrix is upper triangular. + IsUnit - diagonal type (optional): + * if True, matrix has unit diagonal (a[i,i] are NOT used) + * if False, matrix diagonal is arbitrary + * if not given, False is assumed + +Output parameters: + Info - same as for RMatrixLUInverse + Rep - same as for RMatrixLUInverse + A - same as for RMatrixLUInverse. + + -- ALGLIB -- + Copyright 05.02.2010 by Bochkanov Sergey +*************************************************************************/ +void cmatrixtrinverse(complex_2d_array &a, const bool isupper, ae_int_t &info, matinvreport &rep) +{ + alglib_impl::ae_state _alglib_env_state; + ae_int_t n; + bool isunit; + if( (a.cols()!=a.rows())) + throw ap_error("Error while calling 'cmatrixtrinverse': looks like one of arguments has wrong size"); + n = a.cols(); + isunit = false; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::cmatrixtrinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, isunit, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state); + + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Singular value decomposition of a bidiagonal matrix (extended algorithm) + +The algorithm performs the singular value decomposition of a bidiagonal +matrix B (upper or lower) representing it as B = Q*S*P^T, where Q and P - +orthogonal matrices, S - diagonal matrix with non-negative elements on the +main diagonal, in descending order. + +The algorithm finds singular values. In addition, the algorithm can +calculate matrices Q and P (more precisely, not the matrices, but their +product with given matrices U and VT - U*Q and (P^T)*VT)). Of course, +matrices U and VT can be of any type, including identity. Furthermore, the +algorithm can calculate Q'*C (this product is calculated more effectively +than U*Q, because this calculation operates with rows instead of matrix +columns). + +The feature of the algorithm is its ability to find all singular values +including those which are arbitrarily close to 0 with relative accuracy +close to machine precision. If the parameter IsFractionalAccuracyRequired +is set to True, all singular values will have high relative accuracy close +to machine precision. If the parameter is set to False, only the biggest +singular value will have relative accuracy close to machine precision. +The absolute error of other singular values is equal to the absolute error +of the biggest singular value. + +Input parameters: + D - main diagonal of matrix B. + Array whose index ranges within [0..N-1]. + E - superdiagonal (or subdiagonal) of matrix B. + Array whose index ranges within [0..N-2]. + N - size of matrix B. + IsUpper - True, if the matrix is upper bidiagonal. + IsFractionalAccuracyRequired - + accuracy to search singular values with. + U - matrix to be multiplied by Q. + Array whose indexes range within [0..NRU-1, 0..N-1]. + The matrix can be bigger, in that case only the submatrix + [0..NRU-1, 0..N-1] will be multiplied by Q. + NRU - number of rows in matrix U. + C - matrix to be multiplied by Q'. + Array whose indexes range within [0..N-1, 0..NCC-1]. + The matrix can be bigger, in that case only the submatrix + [0..N-1, 0..NCC-1] will be multiplied by Q'. + NCC - number of columns in matrix C. + VT - matrix to be multiplied by P^T. + Array whose indexes range within [0..N-1, 0..NCVT-1]. + The matrix can be bigger, in that case only the submatrix + [0..N-1, 0..NCVT-1] will be multiplied by P^T. + NCVT - number of columns in matrix VT. + +Output parameters: + D - singular values of matrix B in descending order. + U - if NRU>0, contains matrix U*Q. + VT - if NCVT>0, contains matrix (P^T)*VT. + C - if NCC>0, contains matrix Q'*C. + +Result: + True, if the algorithm has converged. + False, if the algorithm hasn't converged (rare case). + +Additional information: + The type of convergence is controlled by the internal parameter TOL. + If the parameter is greater than 0, the singular values will have + relative accuracy TOL. If TOL<0, the singular values will have + absolute accuracy ABS(TOL)*norm(B). + By default, |TOL| falls within the range of 10*Epsilon and 100*Epsilon, + where Epsilon is the machine precision. It is not recommended to use + TOL less than 10*Epsilon since this will considerably slow down the + algorithm and may not lead to error decreasing. +History: + * 31 March, 2007. + changed MAXITR from 6 to 12. + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1999. +*************************************************************************/ +bool rmatrixbdsvd(real_1d_array &d, const real_1d_array &e, const ae_int_t n, const bool isupper, const bool isfractionalaccuracyrequired, real_2d_array &u, const ae_int_t nru, real_2d_array &c, const ae_int_t ncc, real_2d_array &vt, const ae_int_t ncvt) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + ae_bool result = alglib_impl::rmatrixbdsvd(const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(e.c_ptr()), n, isupper, isfractionalaccuracyrequired, const_cast<alglib_impl::ae_matrix*>(u.c_ptr()), nru, const_cast<alglib_impl::ae_matrix*>(c.c_ptr()), ncc, const_cast<alglib_impl::ae_matrix*>(vt.c_ptr()), ncvt, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<bool*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Singular value decomposition of a rectangular matrix. + +The algorithm calculates the singular value decomposition of a matrix of +size MxN: A = U * S * V^T + +The algorithm finds the singular values and, optionally, matrices U and V^T. +The algorithm can find both first min(M,N) columns of matrix U and rows of +matrix V^T (singular vectors), and matrices U and V^T wholly (of sizes MxM +and NxN respectively). + +Take into account that the subroutine does not return matrix V but V^T. + +Input parameters: + A - matrix to be decomposed. + Array whose indexes range within [0..M-1, 0..N-1]. + M - number of rows in matrix A. + N - number of columns in matrix A. + UNeeded - 0, 1 or 2. See the description of the parameter U. + VTNeeded - 0, 1 or 2. See the description of the parameter VT. + AdditionalMemory - + If the parameter: + * equals 0, the algorithm doesn’t use additional + memory (lower requirements, lower performance). + * equals 1, the algorithm uses additional + memory of size min(M,N)*min(M,N) of real numbers. + It often speeds up the algorithm. + * equals 2, the algorithm uses additional + memory of size M*min(M,N) of real numbers. + It allows to get a maximum performance. + The recommended value of the parameter is 2. + +Output parameters: + W - contains singular values in descending order. + U - if UNeeded=0, U isn't changed, the left singular vectors + are not calculated. + if Uneeded=1, U contains left singular vectors (first + min(M,N) columns of matrix U). Array whose indexes range + within [0..M-1, 0..Min(M,N)-1]. + if UNeeded=2, U contains matrix U wholly. Array whose + indexes range within [0..M-1, 0..M-1]. + VT - if VTNeeded=0, VT isn’t changed, the right singular vectors + are not calculated. + if VTNeeded=1, VT contains right singular vectors (first + min(M,N) rows of matrix V^T). Array whose indexes range + within [0..min(M,N)-1, 0..N-1]. + if VTNeeded=2, VT contains matrix V^T wholly. Array whose + indexes range within [0..N-1, 0..N-1]. + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +bool rmatrixsvd(const real_2d_array &a, const ae_int_t m, const ae_int_t n, const ae_int_t uneeded, const ae_int_t vtneeded, const ae_int_t additionalmemory, real_1d_array &w, real_2d_array &u, real_2d_array &vt) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + ae_bool result = alglib_impl::rmatrixsvd(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, uneeded, vtneeded, additionalmemory, const_cast<alglib_impl::ae_vector*>(w.c_ptr()), const_cast<alglib_impl::ae_matrix*>(u.c_ptr()), const_cast<alglib_impl::ae_matrix*>(vt.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<bool*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + + + +/************************************************************************* +Determinant calculation of the matrix given by its LU decomposition. + +Input parameters: + A - LU decomposition of the matrix (output of + RMatrixLU subroutine). + Pivots - table of permutations which were made during + the LU decomposition. + Output of RMatrixLU subroutine. + N - (optional) size of matrix A: + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, automatically determined from matrix size + (A must be square matrix) + +Result: matrix determinant. + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +double rmatrixludet(const real_2d_array &a, const integer_1d_array &pivots, const ae_int_t n) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + double result = alglib_impl::rmatrixludet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), n, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<double*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Determinant calculation of the matrix given by its LU decomposition. + +Input parameters: + A - LU decomposition of the matrix (output of + RMatrixLU subroutine). + Pivots - table of permutations which were made during + the LU decomposition. + Output of RMatrixLU subroutine. + N - (optional) size of matrix A: + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, automatically determined from matrix size + (A must be square matrix) + +Result: matrix determinant. + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +double rmatrixludet(const real_2d_array &a, const integer_1d_array &pivots) +{ + alglib_impl::ae_state _alglib_env_state; + ae_int_t n; + if( (a.rows()!=a.cols()) || (a.rows()!=pivots.length())) + throw ap_error("Error while calling 'rmatrixludet': looks like one of arguments has wrong size"); + n = a.rows(); + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + double result = alglib_impl::rmatrixludet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), n, &_alglib_env_state); + + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<double*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Calculation of the determinant of a general matrix + +Input parameters: + A - matrix, array[0..N-1, 0..N-1] + N - (optional) size of matrix A: + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, automatically determined from matrix size + (A must be square matrix) + +Result: determinant of matrix A. + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +double rmatrixdet(const real_2d_array &a, const ae_int_t n) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + double result = alglib_impl::rmatrixdet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<double*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Calculation of the determinant of a general matrix + +Input parameters: + A - matrix, array[0..N-1, 0..N-1] + N - (optional) size of matrix A: + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, automatically determined from matrix size + (A must be square matrix) + +Result: determinant of matrix A. + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +double rmatrixdet(const real_2d_array &a) +{ + alglib_impl::ae_state _alglib_env_state; + ae_int_t n; + if( (a.rows()!=a.cols())) + throw ap_error("Error while calling 'rmatrixdet': looks like one of arguments has wrong size"); + n = a.rows(); + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + double result = alglib_impl::rmatrixdet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state); + + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<double*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Determinant calculation of the matrix given by its LU decomposition. + +Input parameters: + A - LU decomposition of the matrix (output of + RMatrixLU subroutine). + Pivots - table of permutations which were made during + the LU decomposition. + Output of RMatrixLU subroutine. + N - (optional) size of matrix A: + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, automatically determined from matrix size + (A must be square matrix) + +Result: matrix determinant. + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +alglib::complex cmatrixludet(const complex_2d_array &a, const integer_1d_array &pivots, const ae_int_t n) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::ae_complex result = alglib_impl::cmatrixludet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), n, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<alglib::complex*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Determinant calculation of the matrix given by its LU decomposition. + +Input parameters: + A - LU decomposition of the matrix (output of + RMatrixLU subroutine). + Pivots - table of permutations which were made during + the LU decomposition. + Output of RMatrixLU subroutine. + N - (optional) size of matrix A: + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, automatically determined from matrix size + (A must be square matrix) + +Result: matrix determinant. + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +alglib::complex cmatrixludet(const complex_2d_array &a, const integer_1d_array &pivots) +{ + alglib_impl::ae_state _alglib_env_state; + ae_int_t n; + if( (a.rows()!=a.cols()) || (a.rows()!=pivots.length())) + throw ap_error("Error while calling 'cmatrixludet': looks like one of arguments has wrong size"); + n = a.rows(); + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::ae_complex result = alglib_impl::cmatrixludet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), n, &_alglib_env_state); + + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<alglib::complex*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Calculation of the determinant of a general matrix + +Input parameters: + A - matrix, array[0..N-1, 0..N-1] + N - (optional) size of matrix A: + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, automatically determined from matrix size + (A must be square matrix) + +Result: determinant of matrix A. + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +alglib::complex cmatrixdet(const complex_2d_array &a, const ae_int_t n) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::ae_complex result = alglib_impl::cmatrixdet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<alglib::complex*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Calculation of the determinant of a general matrix + +Input parameters: + A - matrix, array[0..N-1, 0..N-1] + N - (optional) size of matrix A: + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, automatically determined from matrix size + (A must be square matrix) + +Result: determinant of matrix A. + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +alglib::complex cmatrixdet(const complex_2d_array &a) +{ + alglib_impl::ae_state _alglib_env_state; + ae_int_t n; + if( (a.rows()!=a.cols())) + throw ap_error("Error while calling 'cmatrixdet': looks like one of arguments has wrong size"); + n = a.rows(); + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::ae_complex result = alglib_impl::cmatrixdet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state); + + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<alglib::complex*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Determinant calculation of the matrix given by the Cholesky decomposition. + +Input parameters: + A - Cholesky decomposition, + output of SMatrixCholesky subroutine. + N - (optional) size of matrix A: + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, automatically determined from matrix size + (A must be square matrix) + +As the determinant is equal to the product of squares of diagonal elements, +it’s not necessary to specify which triangle - lower or upper - the matrix +is stored in. + +Result: + matrix determinant. + + -- ALGLIB -- + Copyright 2005-2008 by Bochkanov Sergey +*************************************************************************/ +double spdmatrixcholeskydet(const real_2d_array &a, const ae_int_t n) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + double result = alglib_impl::spdmatrixcholeskydet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<double*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Determinant calculation of the matrix given by the Cholesky decomposition. + +Input parameters: + A - Cholesky decomposition, + output of SMatrixCholesky subroutine. + N - (optional) size of matrix A: + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, automatically determined from matrix size + (A must be square matrix) + +As the determinant is equal to the product of squares of diagonal elements, +it’s not necessary to specify which triangle - lower or upper - the matrix +is stored in. + +Result: + matrix determinant. + + -- ALGLIB -- + Copyright 2005-2008 by Bochkanov Sergey +*************************************************************************/ +double spdmatrixcholeskydet(const real_2d_array &a) +{ + alglib_impl::ae_state _alglib_env_state; + ae_int_t n; + if( (a.rows()!=a.cols())) + throw ap_error("Error while calling 'spdmatrixcholeskydet': looks like one of arguments has wrong size"); + n = a.rows(); + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + double result = alglib_impl::spdmatrixcholeskydet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state); + + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<double*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Determinant calculation of the symmetric positive definite matrix. + +Input parameters: + A - matrix. Array with elements [0..N-1, 0..N-1]. + N - (optional) size of matrix A: + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, automatically determined from matrix size + (A must be square matrix) + IsUpper - (optional) storage type: + * if True, symmetric matrix A is given by its upper + triangle, and the lower triangle isn’t used/changed by + function + * if False, symmetric matrix A is given by its lower + triangle, and the upper triangle isn’t used/changed by + function + * if not given, both lower and upper triangles must be + filled. + +Result: + determinant of matrix A. + If matrix A is not positive definite, exception is thrown. + + -- ALGLIB -- + Copyright 2005-2008 by Bochkanov Sergey +*************************************************************************/ +double spdmatrixdet(const real_2d_array &a, const ae_int_t n, const bool isupper) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + double result = alglib_impl::spdmatrixdet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<double*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Determinant calculation of the symmetric positive definite matrix. + +Input parameters: + A - matrix. Array with elements [0..N-1, 0..N-1]. + N - (optional) size of matrix A: + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, automatically determined from matrix size + (A must be square matrix) + IsUpper - (optional) storage type: + * if True, symmetric matrix A is given by its upper + triangle, and the lower triangle isn’t used/changed by + function + * if False, symmetric matrix A is given by its lower + triangle, and the upper triangle isn’t used/changed by + function + * if not given, both lower and upper triangles must be + filled. + +Result: + determinant of matrix A. + If matrix A is not positive definite, exception is thrown. + + -- ALGLIB -- + Copyright 2005-2008 by Bochkanov Sergey +*************************************************************************/ +double spdmatrixdet(const real_2d_array &a) +{ + alglib_impl::ae_state _alglib_env_state; + ae_int_t n; + bool isupper; + if( (a.rows()!=a.cols())) + throw ap_error("Error while calling 'spdmatrixdet': looks like one of arguments has wrong size"); + if( !alglib_impl::ae_is_symmetric(const_cast<alglib_impl::ae_matrix*>(a.c_ptr())) ) + throw ap_error("'a' parameter is not symmetric matrix"); + n = a.rows(); + isupper = false; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + double result = alglib_impl::spdmatrixdet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state); + + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<double*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Algorithm for solving the following generalized symmetric positive-definite +eigenproblem: + A*x = lambda*B*x (1) or + A*B*x = lambda*x (2) or + B*A*x = lambda*x (3). +where A is a symmetric matrix, B - symmetric positive-definite matrix. +The problem is solved by reducing it to an ordinary symmetric eigenvalue +problem. + +Input parameters: + A - symmetric matrix which is given by its upper or lower + triangular part. + Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrices A and B. + IsUpperA - storage format of matrix A. + B - symmetric positive-definite matrix which is given by + its upper or lower triangular part. + Array whose indexes range within [0..N-1, 0..N-1]. + IsUpperB - storage format of matrix B. + ZNeeded - if ZNeeded is equal to: + * 0, the eigenvectors are not returned; + * 1, the eigenvectors are returned. + ProblemType - if ProblemType is equal to: + * 1, the following problem is solved: A*x = lambda*B*x; + * 2, the following problem is solved: A*B*x = lambda*x; + * 3, the following problem is solved: B*A*x = lambda*x. + +Output parameters: + D - eigenvalues in ascending order. + Array whose index ranges within [0..N-1]. + Z - if ZNeeded is equal to: + * 0, Z hasn’t changed; + * 1, Z contains eigenvectors. + Array whose indexes range within [0..N-1, 0..N-1]. + The eigenvectors are stored in matrix columns. It should + be noted that the eigenvectors in such problems do not + form an orthogonal system. + +Result: + True, if the problem was solved successfully. + False, if the error occurred during the Cholesky decomposition of matrix + B (the matrix isn’t positive-definite) or during the work of the iterative + algorithm for solving the symmetric eigenproblem. + +See also the GeneralizedSymmetricDefiniteEVDReduce subroutine. + + -- ALGLIB -- + Copyright 1.28.2006 by Bochkanov Sergey +*************************************************************************/ +bool smatrixgevd(const real_2d_array &a, const ae_int_t n, const bool isuppera, const real_2d_array &b, const bool isupperb, const ae_int_t zneeded, const ae_int_t problemtype, real_1d_array &d, real_2d_array &z) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + ae_bool result = alglib_impl::smatrixgevd(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isuppera, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), isupperb, zneeded, problemtype, const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<bool*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Algorithm for reduction of the following generalized symmetric positive- +definite eigenvalue problem: + A*x = lambda*B*x (1) or + A*B*x = lambda*x (2) or + B*A*x = lambda*x (3) +to the symmetric eigenvalues problem C*y = lambda*y (eigenvalues of this and +the given problems are the same, and the eigenvectors of the given problem +could be obtained by multiplying the obtained eigenvectors by the +transformation matrix x = R*y). + +Here A is a symmetric matrix, B - symmetric positive-definite matrix. + +Input parameters: + A - symmetric matrix which is given by its upper or lower + triangular part. + Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrices A and B. + IsUpperA - storage format of matrix A. + B - symmetric positive-definite matrix which is given by + its upper or lower triangular part. + Array whose indexes range within [0..N-1, 0..N-1]. + IsUpperB - storage format of matrix B. + ProblemType - if ProblemType is equal to: + * 1, the following problem is solved: A*x = lambda*B*x; + * 2, the following problem is solved: A*B*x = lambda*x; + * 3, the following problem is solved: B*A*x = lambda*x. + +Output parameters: + A - symmetric matrix which is given by its upper or lower + triangle depending on IsUpperA. Contains matrix C. + Array whose indexes range within [0..N-1, 0..N-1]. + R - upper triangular or low triangular transformation matrix + which is used to obtain the eigenvectors of a given problem + as the product of eigenvectors of C (from the right) and + matrix R (from the left). If the matrix is upper + triangular, the elements below the main diagonal + are equal to 0 (and vice versa). Thus, we can perform + the multiplication without taking into account the + internal structure (which is an easier though less + effective way). + Array whose indexes range within [0..N-1, 0..N-1]. + IsUpperR - type of matrix R (upper or lower triangular). + +Result: + True, if the problem was reduced successfully. + False, if the error occurred during the Cholesky decomposition of + matrix B (the matrix is not positive-definite). + + -- ALGLIB -- + Copyright 1.28.2006 by Bochkanov Sergey +*************************************************************************/ +bool smatrixgevdreduce(real_2d_array &a, const ae_int_t n, const bool isuppera, const real_2d_array &b, const bool isupperb, const ae_int_t problemtype, real_2d_array &r, bool &isupperr) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + ae_bool result = alglib_impl::smatrixgevdreduce(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isuppera, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), isupperb, problemtype, const_cast<alglib_impl::ae_matrix*>(r.c_ptr()), &isupperr, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<bool*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Inverse matrix update by the Sherman-Morrison formula + +The algorithm updates matrix A^-1 when adding a number to an element +of matrix A. + +Input parameters: + InvA - inverse of matrix A. + Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + UpdRow - row where the element to be updated is stored. + UpdColumn - column where the element to be updated is stored. + UpdVal - a number to be added to the element. + + +Output parameters: + InvA - inverse of modified matrix A. + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +void rmatrixinvupdatesimple(real_2d_array &inva, const ae_int_t n, const ae_int_t updrow, const ae_int_t updcolumn, const double updval) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixinvupdatesimple(const_cast<alglib_impl::ae_matrix*>(inva.c_ptr()), n, updrow, updcolumn, updval, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Inverse matrix update by the Sherman-Morrison formula + +The algorithm updates matrix A^-1 when adding a vector to a row +of matrix A. + +Input parameters: + InvA - inverse of matrix A. + Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + UpdRow - the row of A whose vector V was added. + 0 <= Row <= N-1 + V - the vector to be added to a row. + Array whose index ranges within [0..N-1]. + +Output parameters: + InvA - inverse of modified matrix A. + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +void rmatrixinvupdaterow(real_2d_array &inva, const ae_int_t n, const ae_int_t updrow, const real_1d_array &v) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixinvupdaterow(const_cast<alglib_impl::ae_matrix*>(inva.c_ptr()), n, updrow, const_cast<alglib_impl::ae_vector*>(v.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Inverse matrix update by the Sherman-Morrison formula + +The algorithm updates matrix A^-1 when adding a vector to a column +of matrix A. + +Input parameters: + InvA - inverse of matrix A. + Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + UpdColumn - the column of A whose vector U was added. + 0 <= UpdColumn <= N-1 + U - the vector to be added to a column. + Array whose index ranges within [0..N-1]. + +Output parameters: + InvA - inverse of modified matrix A. + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +void rmatrixinvupdatecolumn(real_2d_array &inva, const ae_int_t n, const ae_int_t updcolumn, const real_1d_array &u) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixinvupdatecolumn(const_cast<alglib_impl::ae_matrix*>(inva.c_ptr()), n, updcolumn, const_cast<alglib_impl::ae_vector*>(u.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Inverse matrix update by the Sherman-Morrison formula + +The algorithm computes the inverse of matrix A+u*v’ by using the given matrix +A^-1 and the vectors u and v. + +Input parameters: + InvA - inverse of matrix A. + Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + U - the vector modifying the matrix. + Array whose index ranges within [0..N-1]. + V - the vector modifying the matrix. + Array whose index ranges within [0..N-1]. + +Output parameters: + InvA - inverse of matrix A + u*v'. + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +void rmatrixinvupdateuv(real_2d_array &inva, const ae_int_t n, const real_1d_array &u, const real_1d_array &v) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::rmatrixinvupdateuv(const_cast<alglib_impl::ae_matrix*>(inva.c_ptr()), n, const_cast<alglib_impl::ae_vector*>(u.c_ptr()), const_cast<alglib_impl::ae_vector*>(v.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Subroutine performing the Schur decomposition of a general matrix by using +the QR algorithm with multiple shifts. + +The source matrix A is represented as S'*A*S = T, where S is an orthogonal +matrix (Schur vectors), T - upper quasi-triangular matrix (with blocks of +sizes 1x1 and 2x2 on the main diagonal). + +Input parameters: + A - matrix to be decomposed. + Array whose indexes range within [0..N-1, 0..N-1]. + N - size of A, N>=0. + + +Output parameters: + A - contains matrix T. + Array whose indexes range within [0..N-1, 0..N-1]. + S - contains Schur vectors. + Array whose indexes range within [0..N-1, 0..N-1]. + +Note 1: + The block structure of matrix T can be easily recognized: since all + the elements below the blocks are zeros, the elements a[i+1,i] which + are equal to 0 show the block border. + +Note 2: + The algorithm performance depends on the value of the internal parameter + NS of the InternalSchurDecomposition subroutine which defines the number + of shifts in the QR algorithm (similarly to the block width in block-matrix + algorithms in linear algebra). If you require maximum performance on + your machine, it is recommended to adjust this parameter manually. + +Result: + True, + if the algorithm has converged and parameters A and S contain the result. + False, + if the algorithm has not converged. + +Algorithm implemented on the basis of the DHSEQR subroutine (LAPACK 3.0 library). +*************************************************************************/ +bool rmatrixschur(real_2d_array &a, const ae_int_t n, real_2d_array &s) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + ae_bool result = alglib_impl::rmatrixschur(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, const_cast<alglib_impl::ae_matrix*>(s.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<bool*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} +} + +///////////////////////////////////////////////////////////////////////// +// +// THIS SECTION CONTAINS IMPLEMENTATION OF COMPUTATIONAL CORE +// +///////////////////////////////////////////////////////////////////////// +namespace alglib_impl +{ +static void ablas_ablasinternalsplitlength(ae_int_t n, + ae_int_t nb, + ae_int_t* n1, + ae_int_t* n2, + ae_state *_state); +static void ablas_cmatrixrighttrsm2(ae_int_t m, + ae_int_t n, + /* Complex */ ae_matrix* a, + ae_int_t i1, + ae_int_t j1, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + /* Complex */ ae_matrix* x, + ae_int_t i2, + ae_int_t j2, + ae_state *_state); +static void ablas_cmatrixlefttrsm2(ae_int_t m, + ae_int_t n, + /* Complex */ ae_matrix* a, + ae_int_t i1, + ae_int_t j1, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + /* Complex */ ae_matrix* x, + ae_int_t i2, + ae_int_t j2, + ae_state *_state); +static void ablas_rmatrixrighttrsm2(ae_int_t m, + ae_int_t n, + /* Real */ ae_matrix* a, + ae_int_t i1, + ae_int_t j1, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + /* Real */ ae_matrix* x, + ae_int_t i2, + ae_int_t j2, + ae_state *_state); +static void ablas_rmatrixlefttrsm2(ae_int_t m, + ae_int_t n, + /* Real */ ae_matrix* a, + ae_int_t i1, + ae_int_t j1, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + /* Real */ ae_matrix* x, + ae_int_t i2, + ae_int_t j2, + ae_state *_state); +static void ablas_cmatrixsyrk2(ae_int_t n, + ae_int_t k, + double alpha, + /* Complex */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + ae_int_t optypea, + double beta, + /* Complex */ ae_matrix* c, + ae_int_t ic, + ae_int_t jc, + ae_bool isupper, + ae_state *_state); +static void ablas_rmatrixsyrk2(ae_int_t n, + ae_int_t k, + double alpha, + /* Real */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + ae_int_t optypea, + double beta, + /* Real */ ae_matrix* c, + ae_int_t ic, + ae_int_t jc, + ae_bool isupper, + ae_state *_state); +static void ablas_cmatrixgemmk(ae_int_t m, + ae_int_t n, + ae_int_t k, + ae_complex alpha, + /* Complex */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + ae_int_t optypea, + /* Complex */ ae_matrix* b, + ae_int_t ib, + ae_int_t jb, + ae_int_t optypeb, + ae_complex beta, + /* Complex */ ae_matrix* c, + ae_int_t ic, + ae_int_t jc, + ae_state *_state); +static void ablas_rmatrixgemmk(ae_int_t m, + ae_int_t n, + ae_int_t k, + double alpha, + /* Real */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + ae_int_t optypea, + /* Real */ ae_matrix* b, + ae_int_t ib, + ae_int_t jb, + ae_int_t optypeb, + double beta, + /* Real */ ae_matrix* c, + ae_int_t ic, + ae_int_t jc, + ae_state *_state); + + +static void ortfac_rmatrixqrbasecase(/* Real */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Real */ ae_vector* work, + /* Real */ ae_vector* t, + /* Real */ ae_vector* tau, + ae_state *_state); +static void ortfac_rmatrixlqbasecase(/* Real */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Real */ ae_vector* work, + /* Real */ ae_vector* t, + /* Real */ ae_vector* tau, + ae_state *_state); +static void ortfac_cmatrixqrbasecase(/* Complex */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Complex */ ae_vector* work, + /* Complex */ ae_vector* t, + /* Complex */ ae_vector* tau, + ae_state *_state); +static void ortfac_cmatrixlqbasecase(/* Complex */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Complex */ ae_vector* work, + /* Complex */ ae_vector* t, + /* Complex */ ae_vector* tau, + ae_state *_state); +static void ortfac_rmatrixblockreflector(/* Real */ ae_matrix* a, + /* Real */ ae_vector* tau, + ae_bool columnwisea, + ae_int_t lengtha, + ae_int_t blocksize, + /* Real */ ae_matrix* t, + /* Real */ ae_vector* work, + ae_state *_state); +static void ortfac_cmatrixblockreflector(/* Complex */ ae_matrix* a, + /* Complex */ ae_vector* tau, + ae_bool columnwisea, + ae_int_t lengtha, + ae_int_t blocksize, + /* Complex */ ae_matrix* t, + /* Complex */ ae_vector* work, + ae_state *_state); + + +static ae_bool evd_tridiagonalevd(/* Real */ ae_vector* d, + /* Real */ ae_vector* e, + ae_int_t n, + ae_int_t zneeded, + /* Real */ ae_matrix* z, + ae_state *_state); +static void evd_tdevde2(double a, + double b, + double c, + double* rt1, + double* rt2, + ae_state *_state); +static void evd_tdevdev2(double a, + double b, + double c, + double* rt1, + double* rt2, + double* cs1, + double* sn1, + ae_state *_state); +static double evd_tdevdpythag(double a, double b, ae_state *_state); +static double evd_tdevdextsign(double a, double b, ae_state *_state); +static ae_bool evd_internalbisectioneigenvalues(/* Real */ ae_vector* d, + /* Real */ ae_vector* e, + ae_int_t n, + ae_int_t irange, + ae_int_t iorder, + double vl, + double vu, + ae_int_t il, + ae_int_t iu, + double abstol, + /* Real */ ae_vector* w, + ae_int_t* m, + ae_int_t* nsplit, + /* Integer */ ae_vector* iblock, + /* Integer */ ae_vector* isplit, + ae_int_t* errorcode, + ae_state *_state); +static void evd_internaldstein(ae_int_t n, + /* Real */ ae_vector* d, + /* Real */ ae_vector* e, + ae_int_t m, + /* Real */ ae_vector* w, + /* Integer */ ae_vector* iblock, + /* Integer */ ae_vector* isplit, + /* Real */ ae_matrix* z, + /* Integer */ ae_vector* ifail, + ae_int_t* info, + ae_state *_state); +static void evd_tdininternaldlagtf(ae_int_t n, + /* Real */ ae_vector* a, + double lambdav, + /* Real */ ae_vector* b, + /* Real */ ae_vector* c, + double tol, + /* Real */ ae_vector* d, + /* Integer */ ae_vector* iin, + ae_int_t* info, + ae_state *_state); +static void evd_tdininternaldlagts(ae_int_t n, + /* Real */ ae_vector* a, + /* Real */ ae_vector* b, + /* Real */ ae_vector* c, + /* Real */ ae_vector* d, + /* Integer */ ae_vector* iin, + /* Real */ ae_vector* y, + double* tol, + ae_int_t* info, + ae_state *_state); +static void evd_internaldlaebz(ae_int_t ijob, + ae_int_t nitmax, + ae_int_t n, + ae_int_t mmax, + ae_int_t minp, + double abstol, + double reltol, + double pivmin, + /* Real */ ae_vector* d, + /* Real */ ae_vector* e, + /* Real */ ae_vector* e2, + /* Integer */ ae_vector* nval, + /* Real */ ae_matrix* ab, + /* Real */ ae_vector* c, + ae_int_t* mout, + /* Integer */ ae_matrix* nab, + /* Real */ ae_vector* work, + /* Integer */ ae_vector* iwork, + ae_int_t* info, + ae_state *_state); +static void evd_internaltrevc(/* Real */ ae_matrix* t, + ae_int_t n, + ae_int_t side, + ae_int_t howmny, + /* Boolean */ ae_vector* vselect, + /* Real */ ae_matrix* vl, + /* Real */ ae_matrix* vr, + ae_int_t* m, + ae_int_t* info, + ae_state *_state); +static void evd_internalhsevdlaln2(ae_bool ltrans, + ae_int_t na, + ae_int_t nw, + double smin, + double ca, + /* Real */ ae_matrix* a, + double d1, + double d2, + /* Real */ ae_matrix* b, + double wr, + double wi, + /* Boolean */ ae_vector* rswap4, + /* Boolean */ ae_vector* zswap4, + /* Integer */ ae_matrix* ipivot44, + /* Real */ ae_vector* civ4, + /* Real */ ae_vector* crv4, + /* Real */ ae_matrix* x, + double* scl, + double* xnorm, + ae_int_t* info, + ae_state *_state); +static void evd_internalhsevdladiv(double a, + double b, + double c, + double d, + double* p, + double* q, + ae_state *_state); +static ae_bool evd_nonsymmetricevd(/* Real */ ae_matrix* a, + ae_int_t n, + ae_int_t vneeded, + /* Real */ ae_vector* wr, + /* Real */ ae_vector* wi, + /* Real */ ae_matrix* vl, + /* Real */ ae_matrix* vr, + ae_state *_state); +static void evd_toupperhessenberg(/* Real */ ae_matrix* a, + ae_int_t n, + /* Real */ ae_vector* tau, + ae_state *_state); +static void evd_unpackqfromupperhessenberg(/* Real */ ae_matrix* a, + ae_int_t n, + /* Real */ ae_vector* tau, + /* Real */ ae_matrix* q, + ae_state *_state); + + + + +static void trfac_cmatrixluprec(/* Complex */ ae_matrix* a, + ae_int_t offs, + ae_int_t m, + ae_int_t n, + /* Integer */ ae_vector* pivots, + /* Complex */ ae_vector* tmp, + ae_state *_state); +static void trfac_rmatrixluprec(/* Real */ ae_matrix* a, + ae_int_t offs, + ae_int_t m, + ae_int_t n, + /* Integer */ ae_vector* pivots, + /* Real */ ae_vector* tmp, + ae_state *_state); +static void trfac_cmatrixplurec(/* Complex */ ae_matrix* a, + ae_int_t offs, + ae_int_t m, + ae_int_t n, + /* Integer */ ae_vector* pivots, + /* Complex */ ae_vector* tmp, + ae_state *_state); +static void trfac_rmatrixplurec(/* Real */ ae_matrix* a, + ae_int_t offs, + ae_int_t m, + ae_int_t n, + /* Integer */ ae_vector* pivots, + /* Real */ ae_vector* tmp, + ae_state *_state); +static void trfac_cmatrixlup2(/* Complex */ ae_matrix* a, + ae_int_t offs, + ae_int_t m, + ae_int_t n, + /* Integer */ ae_vector* pivots, + /* Complex */ ae_vector* tmp, + ae_state *_state); +static void trfac_rmatrixlup2(/* Real */ ae_matrix* a, + ae_int_t offs, + ae_int_t m, + ae_int_t n, + /* Integer */ ae_vector* pivots, + /* Real */ ae_vector* tmp, + ae_state *_state); +static void trfac_cmatrixplu2(/* Complex */ ae_matrix* a, + ae_int_t offs, + ae_int_t m, + ae_int_t n, + /* Integer */ ae_vector* pivots, + /* Complex */ ae_vector* tmp, + ae_state *_state); +static void trfac_rmatrixplu2(/* Real */ ae_matrix* a, + ae_int_t offs, + ae_int_t m, + ae_int_t n, + /* Integer */ ae_vector* pivots, + /* Real */ ae_vector* tmp, + ae_state *_state); +static ae_bool trfac_hpdmatrixcholeskyrec(/* Complex */ ae_matrix* a, + ae_int_t offs, + ae_int_t n, + ae_bool isupper, + /* Complex */ ae_vector* tmp, + ae_state *_state); +static ae_bool trfac_hpdmatrixcholesky2(/* Complex */ ae_matrix* aaa, + ae_int_t offs, + ae_int_t n, + ae_bool isupper, + /* Complex */ ae_vector* tmp, + ae_state *_state); +static ae_bool trfac_spdmatrixcholesky2(/* Real */ ae_matrix* aaa, + ae_int_t offs, + ae_int_t n, + ae_bool isupper, + /* Real */ ae_vector* tmp, + ae_state *_state); + + +static void rcond_rmatrixrcondtrinternal(/* Real */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_bool isunit, + ae_bool onenorm, + double anorm, + double* rc, + ae_state *_state); +static void rcond_cmatrixrcondtrinternal(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_bool isunit, + ae_bool onenorm, + double anorm, + double* rc, + ae_state *_state); +static void rcond_spdmatrixrcondcholeskyinternal(/* Real */ ae_matrix* cha, + ae_int_t n, + ae_bool isupper, + ae_bool isnormprovided, + double anorm, + double* rc, + ae_state *_state); +static void rcond_hpdmatrixrcondcholeskyinternal(/* Complex */ ae_matrix* cha, + ae_int_t n, + ae_bool isupper, + ae_bool isnormprovided, + double anorm, + double* rc, + ae_state *_state); +static void rcond_rmatrixrcondluinternal(/* Real */ ae_matrix* lua, + ae_int_t n, + ae_bool onenorm, + ae_bool isanormprovided, + double anorm, + double* rc, + ae_state *_state); +static void rcond_cmatrixrcondluinternal(/* Complex */ ae_matrix* lua, + ae_int_t n, + ae_bool onenorm, + ae_bool isanormprovided, + double anorm, + double* rc, + ae_state *_state); +static void rcond_rmatrixestimatenorm(ae_int_t n, + /* Real */ ae_vector* v, + /* Real */ ae_vector* x, + /* Integer */ ae_vector* isgn, + double* est, + ae_int_t* kase, + ae_state *_state); +static void rcond_cmatrixestimatenorm(ae_int_t n, + /* Complex */ ae_vector* v, + /* Complex */ ae_vector* x, + double* est, + ae_int_t* kase, + /* Integer */ ae_vector* isave, + /* Real */ ae_vector* rsave, + ae_state *_state); +static double rcond_internalcomplexrcondscsum1(/* Complex */ ae_vector* x, + ae_int_t n, + ae_state *_state); +static ae_int_t rcond_internalcomplexrcondicmax1(/* Complex */ ae_vector* x, + ae_int_t n, + ae_state *_state); +static void rcond_internalcomplexrcondsaveall(/* Integer */ ae_vector* isave, + /* Real */ ae_vector* rsave, + ae_int_t* i, + ae_int_t* iter, + ae_int_t* j, + ae_int_t* jlast, + ae_int_t* jump, + double* absxi, + double* altsgn, + double* estold, + double* temp, + ae_state *_state); +static void rcond_internalcomplexrcondloadall(/* Integer */ ae_vector* isave, + /* Real */ ae_vector* rsave, + ae_int_t* i, + ae_int_t* iter, + ae_int_t* j, + ae_int_t* jlast, + ae_int_t* jump, + double* absxi, + double* altsgn, + double* estold, + double* temp, + ae_state *_state); + + +static void matinv_rmatrixtrinverserec(/* Real */ ae_matrix* a, + ae_int_t offs, + ae_int_t n, + ae_bool isupper, + ae_bool isunit, + /* Real */ ae_vector* tmp, + ae_int_t* info, + matinvreport* rep, + ae_state *_state); +static void matinv_cmatrixtrinverserec(/* Complex */ ae_matrix* a, + ae_int_t offs, + ae_int_t n, + ae_bool isupper, + ae_bool isunit, + /* Complex */ ae_vector* tmp, + ae_int_t* info, + matinvreport* rep, + ae_state *_state); +static void matinv_rmatrixluinverserec(/* Real */ ae_matrix* a, + ae_int_t offs, + ae_int_t n, + /* Real */ ae_vector* work, + ae_int_t* info, + matinvreport* rep, + ae_state *_state); +static void matinv_cmatrixluinverserec(/* Complex */ ae_matrix* a, + ae_int_t offs, + ae_int_t n, + /* Complex */ ae_vector* work, + ae_int_t* info, + matinvreport* rep, + ae_state *_state); +static void matinv_spdmatrixcholeskyinverserec(/* Real */ ae_matrix* a, + ae_int_t offs, + ae_int_t n, + ae_bool isupper, + /* Real */ ae_vector* tmp, + ae_state *_state); +static void matinv_hpdmatrixcholeskyinverserec(/* Complex */ ae_matrix* a, + ae_int_t offs, + ae_int_t n, + ae_bool isupper, + /* Complex */ ae_vector* tmp, + ae_state *_state); + + +static ae_bool bdsvd_bidiagonalsvddecompositioninternal(/* Real */ ae_vector* d, + /* Real */ ae_vector* e, + ae_int_t n, + ae_bool isupper, + ae_bool isfractionalaccuracyrequired, + /* Real */ ae_matrix* u, + ae_int_t ustart, + ae_int_t nru, + /* Real */ ae_matrix* c, + ae_int_t cstart, + ae_int_t ncc, + /* Real */ ae_matrix* vt, + ae_int_t vstart, + ae_int_t ncvt, + ae_state *_state); +static double bdsvd_extsignbdsqr(double a, double b, ae_state *_state); +static void bdsvd_svd2x2(double f, + double g, + double h, + double* ssmin, + double* ssmax, + ae_state *_state); +static void bdsvd_svdv2x2(double f, + double g, + double h, + double* ssmin, + double* ssmax, + double* snr, + double* csr, + double* snl, + double* csl, + ae_state *_state); + + + + + + + + + + + + + + + + + +/************************************************************************* +Splits matrix length in two parts, left part should match ABLAS block size + +INPUT PARAMETERS + A - real matrix, is passed to ensure that we didn't split + complex matrix using real splitting subroutine. + matrix itself is not changed. + N - length, N>0 + +OUTPUT PARAMETERS + N1 - length + N2 - length + +N1+N2=N, N1>=N2, N2 may be zero + + -- ALGLIB routine -- + 15.12.2009 + Bochkanov Sergey +*************************************************************************/ +void ablassplitlength(/* Real */ ae_matrix* a, + ae_int_t n, + ae_int_t* n1, + ae_int_t* n2, + ae_state *_state) +{ + + *n1 = 0; + *n2 = 0; + + if( n>ablasblocksize(a, _state) ) + { + ablas_ablasinternalsplitlength(n, ablasblocksize(a, _state), n1, n2, _state); + } + else + { + ablas_ablasinternalsplitlength(n, ablasmicroblocksize(_state), n1, n2, _state); + } +} + + +/************************************************************************* +Complex ABLASSplitLength + + -- ALGLIB routine -- + 15.12.2009 + Bochkanov Sergey +*************************************************************************/ +void ablascomplexsplitlength(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_int_t* n1, + ae_int_t* n2, + ae_state *_state) +{ + + *n1 = 0; + *n2 = 0; + + if( n>ablascomplexblocksize(a, _state) ) + { + ablas_ablasinternalsplitlength(n, ablascomplexblocksize(a, _state), n1, n2, _state); + } + else + { + ablas_ablasinternalsplitlength(n, ablasmicroblocksize(_state), n1, n2, _state); + } +} + + +/************************************************************************* +Returns block size - subdivision size where cache-oblivious soubroutines +switch to the optimized kernel. + +INPUT PARAMETERS + A - real matrix, is passed to ensure that we didn't split + complex matrix using real splitting subroutine. + matrix itself is not changed. + + -- ALGLIB routine -- + 15.12.2009 + Bochkanov Sergey +*************************************************************************/ +ae_int_t ablasblocksize(/* Real */ ae_matrix* a, ae_state *_state) +{ + ae_int_t result; + + + result = 32; + return result; +} + + +/************************************************************************* +Block size for complex subroutines. + + -- ALGLIB routine -- + 15.12.2009 + Bochkanov Sergey +*************************************************************************/ +ae_int_t ablascomplexblocksize(/* Complex */ ae_matrix* a, + ae_state *_state) +{ + ae_int_t result; + + + result = 24; + return result; +} + + +/************************************************************************* +Microblock size + + -- ALGLIB routine -- + 15.12.2009 + Bochkanov Sergey +*************************************************************************/ +ae_int_t ablasmicroblocksize(ae_state *_state) +{ + ae_int_t result; + + + result = 8; + return result; +} + + +/************************************************************************* +Cache-oblivous complex "copy-and-transpose" + +Input parameters: + M - number of rows + N - number of columns + A - source matrix, MxN submatrix is copied and transposed + IA - submatrix offset (row index) + JA - submatrix offset (column index) + A - destination matrix + IB - submatrix offset (row index) + JB - submatrix offset (column index) +*************************************************************************/ +void cmatrixtranspose(ae_int_t m, + ae_int_t n, + /* Complex */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + /* Complex */ ae_matrix* b, + ae_int_t ib, + ae_int_t jb, + ae_state *_state) +{ + ae_int_t i; + ae_int_t s1; + ae_int_t s2; + + + if( m<=2*ablascomplexblocksize(a, _state)&&n<=2*ablascomplexblocksize(a, _state) ) + { + + /* + * base case + */ + for(i=0; i<=m-1; i++) + { + ae_v_cmove(&b->ptr.pp_complex[ib][jb+i], b->stride, &a->ptr.pp_complex[ia+i][ja], 1, "N", ae_v_len(ib,ib+n-1)); + } + } + else + { + + /* + * Cache-oblivious recursion + */ + if( m>n ) + { + ablascomplexsplitlength(a, m, &s1, &s2, _state); + cmatrixtranspose(s1, n, a, ia, ja, b, ib, jb, _state); + cmatrixtranspose(s2, n, a, ia+s1, ja, b, ib, jb+s1, _state); + } + else + { + ablascomplexsplitlength(a, n, &s1, &s2, _state); + cmatrixtranspose(m, s1, a, ia, ja, b, ib, jb, _state); + cmatrixtranspose(m, s2, a, ia, ja+s1, b, ib+s1, jb, _state); + } + } +} + + +/************************************************************************* +Cache-oblivous real "copy-and-transpose" + +Input parameters: + M - number of rows + N - number of columns + A - source matrix, MxN submatrix is copied and transposed + IA - submatrix offset (row index) + JA - submatrix offset (column index) + A - destination matrix + IB - submatrix offset (row index) + JB - submatrix offset (column index) +*************************************************************************/ +void rmatrixtranspose(ae_int_t m, + ae_int_t n, + /* Real */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + /* Real */ ae_matrix* b, + ae_int_t ib, + ae_int_t jb, + ae_state *_state) +{ + ae_int_t i; + ae_int_t s1; + ae_int_t s2; + + + if( m<=2*ablasblocksize(a, _state)&&n<=2*ablasblocksize(a, _state) ) + { + + /* + * base case + */ + for(i=0; i<=m-1; i++) + { + ae_v_move(&b->ptr.pp_double[ib][jb+i], b->stride, &a->ptr.pp_double[ia+i][ja], 1, ae_v_len(ib,ib+n-1)); + } + } + else + { + + /* + * Cache-oblivious recursion + */ + if( m>n ) + { + ablassplitlength(a, m, &s1, &s2, _state); + rmatrixtranspose(s1, n, a, ia, ja, b, ib, jb, _state); + rmatrixtranspose(s2, n, a, ia+s1, ja, b, ib, jb+s1, _state); + } + else + { + ablassplitlength(a, n, &s1, &s2, _state); + rmatrixtranspose(m, s1, a, ia, ja, b, ib, jb, _state); + rmatrixtranspose(m, s2, a, ia, ja+s1, b, ib+s1, jb, _state); + } + } +} + + +/************************************************************************* +Copy + +Input parameters: + M - number of rows + N - number of columns + A - source matrix, MxN submatrix is copied and transposed + IA - submatrix offset (row index) + JA - submatrix offset (column index) + B - destination matrix + IB - submatrix offset (row index) + JB - submatrix offset (column index) +*************************************************************************/ +void cmatrixcopy(ae_int_t m, + ae_int_t n, + /* Complex */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + /* Complex */ ae_matrix* b, + ae_int_t ib, + ae_int_t jb, + ae_state *_state) +{ + ae_int_t i; + + + for(i=0; i<=m-1; i++) + { + ae_v_cmove(&b->ptr.pp_complex[ib+i][jb], 1, &a->ptr.pp_complex[ia+i][ja], 1, "N", ae_v_len(jb,jb+n-1)); + } +} + + +/************************************************************************* +Copy + +Input parameters: + M - number of rows + N - number of columns + A - source matrix, MxN submatrix is copied and transposed + IA - submatrix offset (row index) + JA - submatrix offset (column index) + B - destination matrix + IB - submatrix offset (row index) + JB - submatrix offset (column index) +*************************************************************************/ +void rmatrixcopy(ae_int_t m, + ae_int_t n, + /* Real */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + /* Real */ ae_matrix* b, + ae_int_t ib, + ae_int_t jb, + ae_state *_state) +{ + ae_int_t i; + + + for(i=0; i<=m-1; i++) + { + ae_v_move(&b->ptr.pp_double[ib+i][jb], 1, &a->ptr.pp_double[ia+i][ja], 1, ae_v_len(jb,jb+n-1)); + } +} + + +/************************************************************************* +Rank-1 correction: A := A + u*v' + +INPUT PARAMETERS: + M - number of rows + N - number of columns + A - target matrix, MxN submatrix is updated + IA - submatrix offset (row index) + JA - submatrix offset (column index) + U - vector #1 + IU - subvector offset + V - vector #2 + IV - subvector offset +*************************************************************************/ +void cmatrixrank1(ae_int_t m, + ae_int_t n, + /* Complex */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + /* Complex */ ae_vector* u, + ae_int_t iu, + /* Complex */ ae_vector* v, + ae_int_t iv, + ae_state *_state) +{ + ae_int_t i; + ae_complex s; + + + if( m==0||n==0 ) + { + return; + } + if( cmatrixrank1f(m, n, a, ia, ja, u, iu, v, iv, _state) ) + { + return; + } + for(i=0; i<=m-1; i++) + { + s = u->ptr.p_complex[iu+i]; + ae_v_caddc(&a->ptr.pp_complex[ia+i][ja], 1, &v->ptr.p_complex[iv], 1, "N", ae_v_len(ja,ja+n-1), s); + } +} + + +/************************************************************************* +Rank-1 correction: A := A + u*v' + +INPUT PARAMETERS: + M - number of rows + N - number of columns + A - target matrix, MxN submatrix is updated + IA - submatrix offset (row index) + JA - submatrix offset (column index) + U - vector #1 + IU - subvector offset + V - vector #2 + IV - subvector offset +*************************************************************************/ +void rmatrixrank1(ae_int_t m, + ae_int_t n, + /* Real */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + /* Real */ ae_vector* u, + ae_int_t iu, + /* Real */ ae_vector* v, + ae_int_t iv, + ae_state *_state) +{ + ae_int_t i; + double s; + + + if( m==0||n==0 ) + { + return; + } + if( rmatrixrank1f(m, n, a, ia, ja, u, iu, v, iv, _state) ) + { + return; + } + for(i=0; i<=m-1; i++) + { + s = u->ptr.p_double[iu+i]; + ae_v_addd(&a->ptr.pp_double[ia+i][ja], 1, &v->ptr.p_double[iv], 1, ae_v_len(ja,ja+n-1), s); + } +} + + +/************************************************************************* +Matrix-vector product: y := op(A)*x + +INPUT PARAMETERS: + M - number of rows of op(A) + M>=0 + N - number of columns of op(A) + N>=0 + A - target matrix + IA - submatrix offset (row index) + JA - submatrix offset (column index) + OpA - operation type: + * OpA=0 => op(A) = A + * OpA=1 => op(A) = A^T + * OpA=2 => op(A) = A^H + X - input vector + IX - subvector offset + IY - subvector offset + +OUTPUT PARAMETERS: + Y - vector which stores result + +if M=0, then subroutine does nothing. +if N=0, Y is filled by zeros. + + + -- ALGLIB routine -- + + 28.01.2010 + Bochkanov Sergey +*************************************************************************/ +void cmatrixmv(ae_int_t m, + ae_int_t n, + /* Complex */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + ae_int_t opa, + /* Complex */ ae_vector* x, + ae_int_t ix, + /* Complex */ ae_vector* y, + ae_int_t iy, + ae_state *_state) +{ + ae_int_t i; + ae_complex v; + + + if( m==0 ) + { + return; + } + if( n==0 ) + { + for(i=0; i<=m-1; i++) + { + y->ptr.p_complex[iy+i] = ae_complex_from_d(0); + } + return; + } + if( cmatrixmvf(m, n, a, ia, ja, opa, x, ix, y, iy, _state) ) + { + return; + } + if( opa==0 ) + { + + /* + * y = A*x + */ + for(i=0; i<=m-1; i++) + { + v = ae_v_cdotproduct(&a->ptr.pp_complex[ia+i][ja], 1, "N", &x->ptr.p_complex[ix], 1, "N", ae_v_len(ja,ja+n-1)); + y->ptr.p_complex[iy+i] = v; + } + return; + } + if( opa==1 ) + { + + /* + * y = A^T*x + */ + for(i=0; i<=m-1; i++) + { + y->ptr.p_complex[iy+i] = ae_complex_from_d(0); + } + for(i=0; i<=n-1; i++) + { + v = x->ptr.p_complex[ix+i]; + ae_v_caddc(&y->ptr.p_complex[iy], 1, &a->ptr.pp_complex[ia+i][ja], 1, "N", ae_v_len(iy,iy+m-1), v); + } + return; + } + if( opa==2 ) + { + + /* + * y = A^H*x + */ + for(i=0; i<=m-1; i++) + { + y->ptr.p_complex[iy+i] = ae_complex_from_d(0); + } + for(i=0; i<=n-1; i++) + { + v = x->ptr.p_complex[ix+i]; + ae_v_caddc(&y->ptr.p_complex[iy], 1, &a->ptr.pp_complex[ia+i][ja], 1, "Conj", ae_v_len(iy,iy+m-1), v); + } + return; + } +} + + +/************************************************************************* +Matrix-vector product: y := op(A)*x + +INPUT PARAMETERS: + M - number of rows of op(A) + N - number of columns of op(A) + A - target matrix + IA - submatrix offset (row index) + JA - submatrix offset (column index) + OpA - operation type: + * OpA=0 => op(A) = A + * OpA=1 => op(A) = A^T + X - input vector + IX - subvector offset + IY - subvector offset + +OUTPUT PARAMETERS: + Y - vector which stores result + +if M=0, then subroutine does nothing. +if N=0, Y is filled by zeros. + + + -- ALGLIB routine -- + + 28.01.2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixmv(ae_int_t m, + ae_int_t n, + /* Real */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + ae_int_t opa, + /* Real */ ae_vector* x, + ae_int_t ix, + /* Real */ ae_vector* y, + ae_int_t iy, + ae_state *_state) +{ + ae_int_t i; + double v; + + + if( m==0 ) + { + return; + } + if( n==0 ) + { + for(i=0; i<=m-1; i++) + { + y->ptr.p_double[iy+i] = 0; + } + return; + } + if( rmatrixmvf(m, n, a, ia, ja, opa, x, ix, y, iy, _state) ) + { + return; + } + if( opa==0 ) + { + + /* + * y = A*x + */ + for(i=0; i<=m-1; i++) + { + v = ae_v_dotproduct(&a->ptr.pp_double[ia+i][ja], 1, &x->ptr.p_double[ix], 1, ae_v_len(ja,ja+n-1)); + y->ptr.p_double[iy+i] = v; + } + return; + } + if( opa==1 ) + { + + /* + * y = A^T*x + */ + for(i=0; i<=m-1; i++) + { + y->ptr.p_double[iy+i] = 0; + } + for(i=0; i<=n-1; i++) + { + v = x->ptr.p_double[ix+i]; + ae_v_addd(&y->ptr.p_double[iy], 1, &a->ptr.pp_double[ia+i][ja], 1, ae_v_len(iy,iy+m-1), v); + } + return; + } +} + + +/************************************************************************* +This subroutine calculates X*op(A^-1) where: +* X is MxN general matrix +* A is NxN upper/lower triangular/unitriangular matrix +* "op" may be identity transformation, transposition, conjugate transposition + +Multiplication result replaces X. +Cache-oblivious algorithm is used. + +INPUT PARAMETERS + N - matrix size, N>=0 + M - matrix size, N>=0 + A - matrix, actial matrix is stored in A[I1:I1+N-1,J1:J1+N-1] + I1 - submatrix offset + J1 - submatrix offset + IsUpper - whether matrix is upper triangular + IsUnit - whether matrix is unitriangular + OpType - transformation type: + * 0 - no transformation + * 1 - transposition + * 2 - conjugate transposition + C - matrix, actial matrix is stored in C[I2:I2+M-1,J2:J2+N-1] + I2 - submatrix offset + J2 - submatrix offset + + -- ALGLIB routine -- + 15.12.2009 + Bochkanov Sergey +*************************************************************************/ +void cmatrixrighttrsm(ae_int_t m, + ae_int_t n, + /* Complex */ ae_matrix* a, + ae_int_t i1, + ae_int_t j1, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + /* Complex */ ae_matrix* x, + ae_int_t i2, + ae_int_t j2, + ae_state *_state) +{ + ae_int_t s1; + ae_int_t s2; + ae_int_t bs; + + + bs = ablascomplexblocksize(a, _state); + if( m<=bs&&n<=bs ) + { + ablas_cmatrixrighttrsm2(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state); + return; + } + if( m>=n ) + { + + /* + * Split X: X*A = (X1 X2)^T*A + */ + ablascomplexsplitlength(a, m, &s1, &s2, _state); + cmatrixrighttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state); + cmatrixrighttrsm(s2, n, a, i1, j1, isupper, isunit, optype, x, i2+s1, j2, _state); + } + else + { + + /* + * Split A: + * (A1 A12) + * X*op(A) = X*op( ) + * ( A2) + * + * Different variants depending on + * IsUpper/OpType combinations + */ + ablascomplexsplitlength(a, n, &s1, &s2, _state); + if( isupper&&optype==0 ) + { + + /* + * (A1 A12)-1 + * X*A^-1 = (X1 X2)*( ) + * ( A2) + */ + cmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state); + cmatrixgemm(m, s2, s1, ae_complex_from_d(-1.0), x, i2, j2, 0, a, i1, j1+s1, 0, ae_complex_from_d(1.0), x, i2, j2+s1, _state); + cmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1, _state); + return; + } + if( isupper&&optype!=0 ) + { + + /* + * (A1' )-1 + * X*A^-1 = (X1 X2)*( ) + * (A12' A2') + */ + cmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1, _state); + cmatrixgemm(m, s1, s2, ae_complex_from_d(-1.0), x, i2, j2+s1, 0, a, i1, j1+s1, optype, ae_complex_from_d(1.0), x, i2, j2, _state); + cmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state); + return; + } + if( !isupper&&optype==0 ) + { + + /* + * (A1 )-1 + * X*A^-1 = (X1 X2)*( ) + * (A21 A2) + */ + cmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1, _state); + cmatrixgemm(m, s1, s2, ae_complex_from_d(-1.0), x, i2, j2+s1, 0, a, i1+s1, j1, 0, ae_complex_from_d(1.0), x, i2, j2, _state); + cmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state); + return; + } + if( !isupper&&optype!=0 ) + { + + /* + * (A1' A21')-1 + * X*A^-1 = (X1 X2)*( ) + * ( A2') + */ + cmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state); + cmatrixgemm(m, s2, s1, ae_complex_from_d(-1.0), x, i2, j2, 0, a, i1+s1, j1, optype, ae_complex_from_d(1.0), x, i2, j2+s1, _state); + cmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1, _state); + return; + } + } +} + + +/************************************************************************* +This subroutine calculates op(A^-1)*X where: +* X is MxN general matrix +* A is MxM upper/lower triangular/unitriangular matrix +* "op" may be identity transformation, transposition, conjugate transposition + +Multiplication result replaces X. +Cache-oblivious algorithm is used. + +INPUT PARAMETERS + N - matrix size, N>=0 + M - matrix size, N>=0 + A - matrix, actial matrix is stored in A[I1:I1+M-1,J1:J1+M-1] + I1 - submatrix offset + J1 - submatrix offset + IsUpper - whether matrix is upper triangular + IsUnit - whether matrix is unitriangular + OpType - transformation type: + * 0 - no transformation + * 1 - transposition + * 2 - conjugate transposition + C - matrix, actial matrix is stored in C[I2:I2+M-1,J2:J2+N-1] + I2 - submatrix offset + J2 - submatrix offset + + -- ALGLIB routine -- + 15.12.2009 + Bochkanov Sergey +*************************************************************************/ +void cmatrixlefttrsm(ae_int_t m, + ae_int_t n, + /* Complex */ ae_matrix* a, + ae_int_t i1, + ae_int_t j1, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + /* Complex */ ae_matrix* x, + ae_int_t i2, + ae_int_t j2, + ae_state *_state) +{ + ae_int_t s1; + ae_int_t s2; + ae_int_t bs; + + + bs = ablascomplexblocksize(a, _state); + if( m<=bs&&n<=bs ) + { + ablas_cmatrixlefttrsm2(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state); + return; + } + if( n>=m ) + { + + /* + * Split X: op(A)^-1*X = op(A)^-1*(X1 X2) + */ + ablascomplexsplitlength(x, n, &s1, &s2, _state); + cmatrixlefttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state); + cmatrixlefttrsm(m, s2, a, i1, j1, isupper, isunit, optype, x, i2, j2+s1, _state); + } + else + { + + /* + * Split A + */ + ablascomplexsplitlength(a, m, &s1, &s2, _state); + if( isupper&&optype==0 ) + { + + /* + * (A1 A12)-1 ( X1 ) + * A^-1*X* = ( ) *( ) + * ( A2) ( X2 ) + */ + cmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2, _state); + cmatrixgemm(s1, n, s2, ae_complex_from_d(-1.0), a, i1, j1+s1, 0, x, i2+s1, j2, 0, ae_complex_from_d(1.0), x, i2, j2, _state); + cmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state); + return; + } + if( isupper&&optype!=0 ) + { + + /* + * (A1' )-1 ( X1 ) + * A^-1*X = ( ) *( ) + * (A12' A2') ( X2 ) + */ + cmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state); + cmatrixgemm(s2, n, s1, ae_complex_from_d(-1.0), a, i1, j1+s1, optype, x, i2, j2, 0, ae_complex_from_d(1.0), x, i2+s1, j2, _state); + cmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2, _state); + return; + } + if( !isupper&&optype==0 ) + { + + /* + * (A1 )-1 ( X1 ) + * A^-1*X = ( ) *( ) + * (A21 A2) ( X2 ) + */ + cmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state); + cmatrixgemm(s2, n, s1, ae_complex_from_d(-1.0), a, i1+s1, j1, 0, x, i2, j2, 0, ae_complex_from_d(1.0), x, i2+s1, j2, _state); + cmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2, _state); + return; + } + if( !isupper&&optype!=0 ) + { + + /* + * (A1' A21')-1 ( X1 ) + * A^-1*X = ( ) *( ) + * ( A2') ( X2 ) + */ + cmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2, _state); + cmatrixgemm(s1, n, s2, ae_complex_from_d(-1.0), a, i1+s1, j1, optype, x, i2+s1, j2, 0, ae_complex_from_d(1.0), x, i2, j2, _state); + cmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state); + return; + } + } +} + + +/************************************************************************* +Same as CMatrixRightTRSM, but for real matrices + +OpType may be only 0 or 1. + + -- ALGLIB routine -- + 15.12.2009 + Bochkanov Sergey +*************************************************************************/ +void rmatrixrighttrsm(ae_int_t m, + ae_int_t n, + /* Real */ ae_matrix* a, + ae_int_t i1, + ae_int_t j1, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + /* Real */ ae_matrix* x, + ae_int_t i2, + ae_int_t j2, + ae_state *_state) +{ + ae_int_t s1; + ae_int_t s2; + ae_int_t bs; + + + bs = ablasblocksize(a, _state); + if( m<=bs&&n<=bs ) + { + ablas_rmatrixrighttrsm2(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state); + return; + } + if( m>=n ) + { + + /* + * Split X: X*A = (X1 X2)^T*A + */ + ablassplitlength(a, m, &s1, &s2, _state); + rmatrixrighttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state); + rmatrixrighttrsm(s2, n, a, i1, j1, isupper, isunit, optype, x, i2+s1, j2, _state); + } + else + { + + /* + * Split A: + * (A1 A12) + * X*op(A) = X*op( ) + * ( A2) + * + * Different variants depending on + * IsUpper/OpType combinations + */ + ablassplitlength(a, n, &s1, &s2, _state); + if( isupper&&optype==0 ) + { + + /* + * (A1 A12)-1 + * X*A^-1 = (X1 X2)*( ) + * ( A2) + */ + rmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state); + rmatrixgemm(m, s2, s1, -1.0, x, i2, j2, 0, a, i1, j1+s1, 0, 1.0, x, i2, j2+s1, _state); + rmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1, _state); + return; + } + if( isupper&&optype!=0 ) + { + + /* + * (A1' )-1 + * X*A^-1 = (X1 X2)*( ) + * (A12' A2') + */ + rmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1, _state); + rmatrixgemm(m, s1, s2, -1.0, x, i2, j2+s1, 0, a, i1, j1+s1, optype, 1.0, x, i2, j2, _state); + rmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state); + return; + } + if( !isupper&&optype==0 ) + { + + /* + * (A1 )-1 + * X*A^-1 = (X1 X2)*( ) + * (A21 A2) + */ + rmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1, _state); + rmatrixgemm(m, s1, s2, -1.0, x, i2, j2+s1, 0, a, i1+s1, j1, 0, 1.0, x, i2, j2, _state); + rmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state); + return; + } + if( !isupper&&optype!=0 ) + { + + /* + * (A1' A21')-1 + * X*A^-1 = (X1 X2)*( ) + * ( A2') + */ + rmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state); + rmatrixgemm(m, s2, s1, -1.0, x, i2, j2, 0, a, i1+s1, j1, optype, 1.0, x, i2, j2+s1, _state); + rmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1, _state); + return; + } + } +} + + +/************************************************************************* +Same as CMatrixLeftTRSM, but for real matrices + +OpType may be only 0 or 1. + + -- ALGLIB routine -- + 15.12.2009 + Bochkanov Sergey +*************************************************************************/ +void rmatrixlefttrsm(ae_int_t m, + ae_int_t n, + /* Real */ ae_matrix* a, + ae_int_t i1, + ae_int_t j1, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + /* Real */ ae_matrix* x, + ae_int_t i2, + ae_int_t j2, + ae_state *_state) +{ + ae_int_t s1; + ae_int_t s2; + ae_int_t bs; + + + bs = ablasblocksize(a, _state); + if( m<=bs&&n<=bs ) + { + ablas_rmatrixlefttrsm2(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state); + return; + } + if( n>=m ) + { + + /* + * Split X: op(A)^-1*X = op(A)^-1*(X1 X2) + */ + ablassplitlength(x, n, &s1, &s2, _state); + rmatrixlefttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state); + rmatrixlefttrsm(m, s2, a, i1, j1, isupper, isunit, optype, x, i2, j2+s1, _state); + } + else + { + + /* + * Split A + */ + ablassplitlength(a, m, &s1, &s2, _state); + if( isupper&&optype==0 ) + { + + /* + * (A1 A12)-1 ( X1 ) + * A^-1*X* = ( ) *( ) + * ( A2) ( X2 ) + */ + rmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2, _state); + rmatrixgemm(s1, n, s2, -1.0, a, i1, j1+s1, 0, x, i2+s1, j2, 0, 1.0, x, i2, j2, _state); + rmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state); + return; + } + if( isupper&&optype!=0 ) + { + + /* + * (A1' )-1 ( X1 ) + * A^-1*X = ( ) *( ) + * (A12' A2') ( X2 ) + */ + rmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state); + rmatrixgemm(s2, n, s1, -1.0, a, i1, j1+s1, optype, x, i2, j2, 0, 1.0, x, i2+s1, j2, _state); + rmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2, _state); + return; + } + if( !isupper&&optype==0 ) + { + + /* + * (A1 )-1 ( X1 ) + * A^-1*X = ( ) *( ) + * (A21 A2) ( X2 ) + */ + rmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state); + rmatrixgemm(s2, n, s1, -1.0, a, i1+s1, j1, 0, x, i2, j2, 0, 1.0, x, i2+s1, j2, _state); + rmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2, _state); + return; + } + if( !isupper&&optype!=0 ) + { + + /* + * (A1' A21')-1 ( X1 ) + * A^-1*X = ( ) *( ) + * ( A2') ( X2 ) + */ + rmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2, _state); + rmatrixgemm(s1, n, s2, -1.0, a, i1+s1, j1, optype, x, i2+s1, j2, 0, 1.0, x, i2, j2, _state); + rmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state); + return; + } + } +} + + +/************************************************************************* +This subroutine calculates C=alpha*A*A^H+beta*C or C=alpha*A^H*A+beta*C +where: +* C is NxN Hermitian matrix given by its upper/lower triangle +* A is NxK matrix when A*A^H is calculated, KxN matrix otherwise + +Additional info: +* cache-oblivious algorithm is used. +* multiplication result replaces C. If Beta=0, C elements are not used in + calculations (not multiplied by zero - just not referenced) +* if Alpha=0, A is not used (not multiplied by zero - just not referenced) +* if both Beta and Alpha are zero, C is filled by zeros. + +INPUT PARAMETERS + N - matrix size, N>=0 + K - matrix size, K>=0 + Alpha - coefficient + A - matrix + IA - submatrix offset + JA - submatrix offset + OpTypeA - multiplication type: + * 0 - A*A^H is calculated + * 2 - A^H*A is calculated + Beta - coefficient + C - matrix + IC - submatrix offset + JC - submatrix offset + IsUpper - whether C is upper triangular or lower triangular + + -- ALGLIB routine -- + 16.12.2009 + Bochkanov Sergey +*************************************************************************/ +void cmatrixsyrk(ae_int_t n, + ae_int_t k, + double alpha, + /* Complex */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + ae_int_t optypea, + double beta, + /* Complex */ ae_matrix* c, + ae_int_t ic, + ae_int_t jc, + ae_bool isupper, + ae_state *_state) +{ + ae_int_t s1; + ae_int_t s2; + ae_int_t bs; + + + bs = ablascomplexblocksize(a, _state); + if( n<=bs&&k<=bs ) + { + ablas_cmatrixsyrk2(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state); + return; + } + if( k>=n ) + { + + /* + * Split K + */ + ablascomplexsplitlength(a, k, &s1, &s2, _state); + if( optypea==0 ) + { + cmatrixsyrk(n, s1, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state); + cmatrixsyrk(n, s2, alpha, a, ia, ja+s1, optypea, 1.0, c, ic, jc, isupper, _state); + } + else + { + cmatrixsyrk(n, s1, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state); + cmatrixsyrk(n, s2, alpha, a, ia+s1, ja, optypea, 1.0, c, ic, jc, isupper, _state); + } + } + else + { + + /* + * Split N + */ + ablascomplexsplitlength(a, n, &s1, &s2, _state); + if( optypea==0&&isupper ) + { + cmatrixsyrk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state); + cmatrixgemm(s1, s2, k, ae_complex_from_d(alpha), a, ia, ja, 0, a, ia+s1, ja, 2, ae_complex_from_d(beta), c, ic, jc+s1, _state); + cmatrixsyrk(s2, k, alpha, a, ia+s1, ja, optypea, beta, c, ic+s1, jc+s1, isupper, _state); + return; + } + if( optypea==0&&!isupper ) + { + cmatrixsyrk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state); + cmatrixgemm(s2, s1, k, ae_complex_from_d(alpha), a, ia+s1, ja, 0, a, ia, ja, 2, ae_complex_from_d(beta), c, ic+s1, jc, _state); + cmatrixsyrk(s2, k, alpha, a, ia+s1, ja, optypea, beta, c, ic+s1, jc+s1, isupper, _state); + return; + } + if( optypea!=0&&isupper ) + { + cmatrixsyrk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state); + cmatrixgemm(s1, s2, k, ae_complex_from_d(alpha), a, ia, ja, 2, a, ia, ja+s1, 0, ae_complex_from_d(beta), c, ic, jc+s1, _state); + cmatrixsyrk(s2, k, alpha, a, ia, ja+s1, optypea, beta, c, ic+s1, jc+s1, isupper, _state); + return; + } + if( optypea!=0&&!isupper ) + { + cmatrixsyrk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state); + cmatrixgemm(s2, s1, k, ae_complex_from_d(alpha), a, ia, ja+s1, 2, a, ia, ja, 0, ae_complex_from_d(beta), c, ic+s1, jc, _state); + cmatrixsyrk(s2, k, alpha, a, ia, ja+s1, optypea, beta, c, ic+s1, jc+s1, isupper, _state); + return; + } + } +} + + +/************************************************************************* +Same as CMatrixSYRK, but for real matrices + +OpType may be only 0 or 1. + + -- ALGLIB routine -- + 16.12.2009 + Bochkanov Sergey +*************************************************************************/ +void rmatrixsyrk(ae_int_t n, + ae_int_t k, + double alpha, + /* Real */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + ae_int_t optypea, + double beta, + /* Real */ ae_matrix* c, + ae_int_t ic, + ae_int_t jc, + ae_bool isupper, + ae_state *_state) +{ + ae_int_t s1; + ae_int_t s2; + ae_int_t bs; + + + bs = ablasblocksize(a, _state); + if( n<=bs&&k<=bs ) + { + ablas_rmatrixsyrk2(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state); + return; + } + if( k>=n ) + { + + /* + * Split K + */ + ablassplitlength(a, k, &s1, &s2, _state); + if( optypea==0 ) + { + rmatrixsyrk(n, s1, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state); + rmatrixsyrk(n, s2, alpha, a, ia, ja+s1, optypea, 1.0, c, ic, jc, isupper, _state); + } + else + { + rmatrixsyrk(n, s1, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state); + rmatrixsyrk(n, s2, alpha, a, ia+s1, ja, optypea, 1.0, c, ic, jc, isupper, _state); + } + } + else + { + + /* + * Split N + */ + ablassplitlength(a, n, &s1, &s2, _state); + if( optypea==0&&isupper ) + { + rmatrixsyrk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state); + rmatrixgemm(s1, s2, k, alpha, a, ia, ja, 0, a, ia+s1, ja, 1, beta, c, ic, jc+s1, _state); + rmatrixsyrk(s2, k, alpha, a, ia+s1, ja, optypea, beta, c, ic+s1, jc+s1, isupper, _state); + return; + } + if( optypea==0&&!isupper ) + { + rmatrixsyrk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state); + rmatrixgemm(s2, s1, k, alpha, a, ia+s1, ja, 0, a, ia, ja, 1, beta, c, ic+s1, jc, _state); + rmatrixsyrk(s2, k, alpha, a, ia+s1, ja, optypea, beta, c, ic+s1, jc+s1, isupper, _state); + return; + } + if( optypea!=0&&isupper ) + { + rmatrixsyrk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state); + rmatrixgemm(s1, s2, k, alpha, a, ia, ja, 1, a, ia, ja+s1, 0, beta, c, ic, jc+s1, _state); + rmatrixsyrk(s2, k, alpha, a, ia, ja+s1, optypea, beta, c, ic+s1, jc+s1, isupper, _state); + return; + } + if( optypea!=0&&!isupper ) + { + rmatrixsyrk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state); + rmatrixgemm(s2, s1, k, alpha, a, ia, ja+s1, 1, a, ia, ja, 0, beta, c, ic+s1, jc, _state); + rmatrixsyrk(s2, k, alpha, a, ia, ja+s1, optypea, beta, c, ic+s1, jc+s1, isupper, _state); + return; + } + } +} + + +/************************************************************************* +This subroutine calculates C = alpha*op1(A)*op2(B) +beta*C where: +* C is MxN general matrix +* op1(A) is MxK matrix +* op2(B) is KxN matrix +* "op" may be identity transformation, transposition, conjugate transposition + +Additional info: +* cache-oblivious algorithm is used. +* multiplication result replaces C. If Beta=0, C elements are not used in + calculations (not multiplied by zero - just not referenced) +* if Alpha=0, A is not used (not multiplied by zero - just not referenced) +* if both Beta and Alpha are zero, C is filled by zeros. + +INPUT PARAMETERS + N - matrix size, N>0 + M - matrix size, N>0 + K - matrix size, K>0 + Alpha - coefficient + A - matrix + IA - submatrix offset + JA - submatrix offset + OpTypeA - transformation type: + * 0 - no transformation + * 1 - transposition + * 2 - conjugate transposition + B - matrix + IB - submatrix offset + JB - submatrix offset + OpTypeB - transformation type: + * 0 - no transformation + * 1 - transposition + * 2 - conjugate transposition + Beta - coefficient + C - matrix + IC - submatrix offset + JC - submatrix offset + + -- ALGLIB routine -- + 16.12.2009 + Bochkanov Sergey +*************************************************************************/ +void cmatrixgemm(ae_int_t m, + ae_int_t n, + ae_int_t k, + ae_complex alpha, + /* Complex */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + ae_int_t optypea, + /* Complex */ ae_matrix* b, + ae_int_t ib, + ae_int_t jb, + ae_int_t optypeb, + ae_complex beta, + /* Complex */ ae_matrix* c, + ae_int_t ic, + ae_int_t jc, + ae_state *_state) +{ + ae_int_t s1; + ae_int_t s2; + ae_int_t bs; + + + bs = ablascomplexblocksize(a, _state); + if( (m<=bs&&n<=bs)&&k<=bs ) + { + ablas_cmatrixgemmk(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state); + return; + } + if( m>=n&&m>=k ) + { + + /* + * A*B = (A1 A2)^T*B + */ + ablascomplexsplitlength(a, m, &s1, &s2, _state); + cmatrixgemm(s1, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state); + if( optypea==0 ) + { + cmatrixgemm(s2, n, k, alpha, a, ia+s1, ja, optypea, b, ib, jb, optypeb, beta, c, ic+s1, jc, _state); + } + else + { + cmatrixgemm(s2, n, k, alpha, a, ia, ja+s1, optypea, b, ib, jb, optypeb, beta, c, ic+s1, jc, _state); + } + return; + } + if( n>=m&&n>=k ) + { + + /* + * A*B = A*(B1 B2) + */ + ablascomplexsplitlength(a, n, &s1, &s2, _state); + if( optypeb==0 ) + { + cmatrixgemm(m, s1, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state); + cmatrixgemm(m, s2, k, alpha, a, ia, ja, optypea, b, ib, jb+s1, optypeb, beta, c, ic, jc+s1, _state); + } + else + { + cmatrixgemm(m, s1, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state); + cmatrixgemm(m, s2, k, alpha, a, ia, ja, optypea, b, ib+s1, jb, optypeb, beta, c, ic, jc+s1, _state); + } + return; + } + if( k>=m&&k>=n ) + { + + /* + * A*B = (A1 A2)*(B1 B2)^T + */ + ablascomplexsplitlength(a, k, &s1, &s2, _state); + if( optypea==0&&optypeb==0 ) + { + cmatrixgemm(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state); + cmatrixgemm(m, n, s2, alpha, a, ia, ja+s1, optypea, b, ib+s1, jb, optypeb, ae_complex_from_d(1.0), c, ic, jc, _state); + } + if( optypea==0&&optypeb!=0 ) + { + cmatrixgemm(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state); + cmatrixgemm(m, n, s2, alpha, a, ia, ja+s1, optypea, b, ib, jb+s1, optypeb, ae_complex_from_d(1.0), c, ic, jc, _state); + } + if( optypea!=0&&optypeb==0 ) + { + cmatrixgemm(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state); + cmatrixgemm(m, n, s2, alpha, a, ia+s1, ja, optypea, b, ib+s1, jb, optypeb, ae_complex_from_d(1.0), c, ic, jc, _state); + } + if( optypea!=0&&optypeb!=0 ) + { + cmatrixgemm(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state); + cmatrixgemm(m, n, s2, alpha, a, ia+s1, ja, optypea, b, ib, jb+s1, optypeb, ae_complex_from_d(1.0), c, ic, jc, _state); + } + return; + } +} + + +/************************************************************************* +Same as CMatrixGEMM, but for real numbers. +OpType may be only 0 or 1. + + -- ALGLIB routine -- + 16.12.2009 + Bochkanov Sergey +*************************************************************************/ +void rmatrixgemm(ae_int_t m, + ae_int_t n, + ae_int_t k, + double alpha, + /* Real */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + ae_int_t optypea, + /* Real */ ae_matrix* b, + ae_int_t ib, + ae_int_t jb, + ae_int_t optypeb, + double beta, + /* Real */ ae_matrix* c, + ae_int_t ic, + ae_int_t jc, + ae_state *_state) +{ + ae_int_t s1; + ae_int_t s2; + ae_int_t bs; + + + bs = ablasblocksize(a, _state); + if( (m<=bs&&n<=bs)&&k<=bs ) + { + ablas_rmatrixgemmk(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state); + return; + } + if( m>=n&&m>=k ) + { + + /* + * A*B = (A1 A2)^T*B + */ + ablassplitlength(a, m, &s1, &s2, _state); + if( optypea==0 ) + { + rmatrixgemm(s1, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state); + rmatrixgemm(s2, n, k, alpha, a, ia+s1, ja, optypea, b, ib, jb, optypeb, beta, c, ic+s1, jc, _state); + } + else + { + rmatrixgemm(s1, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state); + rmatrixgemm(s2, n, k, alpha, a, ia, ja+s1, optypea, b, ib, jb, optypeb, beta, c, ic+s1, jc, _state); + } + return; + } + if( n>=m&&n>=k ) + { + + /* + * A*B = A*(B1 B2) + */ + ablassplitlength(a, n, &s1, &s2, _state); + if( optypeb==0 ) + { + rmatrixgemm(m, s1, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state); + rmatrixgemm(m, s2, k, alpha, a, ia, ja, optypea, b, ib, jb+s1, optypeb, beta, c, ic, jc+s1, _state); + } + else + { + rmatrixgemm(m, s1, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state); + rmatrixgemm(m, s2, k, alpha, a, ia, ja, optypea, b, ib+s1, jb, optypeb, beta, c, ic, jc+s1, _state); + } + return; + } + if( k>=m&&k>=n ) + { + + /* + * A*B = (A1 A2)*(B1 B2)^T + */ + ablassplitlength(a, k, &s1, &s2, _state); + if( optypea==0&&optypeb==0 ) + { + rmatrixgemm(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state); + rmatrixgemm(m, n, s2, alpha, a, ia, ja+s1, optypea, b, ib+s1, jb, optypeb, 1.0, c, ic, jc, _state); + } + if( optypea==0&&optypeb!=0 ) + { + rmatrixgemm(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state); + rmatrixgemm(m, n, s2, alpha, a, ia, ja+s1, optypea, b, ib, jb+s1, optypeb, 1.0, c, ic, jc, _state); + } + if( optypea!=0&&optypeb==0 ) + { + rmatrixgemm(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state); + rmatrixgemm(m, n, s2, alpha, a, ia+s1, ja, optypea, b, ib+s1, jb, optypeb, 1.0, c, ic, jc, _state); + } + if( optypea!=0&&optypeb!=0 ) + { + rmatrixgemm(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state); + rmatrixgemm(m, n, s2, alpha, a, ia+s1, ja, optypea, b, ib, jb+s1, optypeb, 1.0, c, ic, jc, _state); + } + return; + } +} + + +/************************************************************************* +Complex ABLASSplitLength + + -- ALGLIB routine -- + 15.12.2009 + Bochkanov Sergey +*************************************************************************/ +static void ablas_ablasinternalsplitlength(ae_int_t n, + ae_int_t nb, + ae_int_t* n1, + ae_int_t* n2, + ae_state *_state) +{ + ae_int_t r; + + *n1 = 0; + *n2 = 0; + + if( n<=nb ) + { + + /* + * Block size, no further splitting + */ + *n1 = n; + *n2 = 0; + } + else + { + + /* + * Greater than block size + */ + if( n%nb!=0 ) + { + + /* + * Split remainder + */ + *n2 = n%nb; + *n1 = n-(*n2); + } + else + { + + /* + * Split on block boundaries + */ + *n2 = n/2; + *n1 = n-(*n2); + if( *n1%nb==0 ) + { + return; + } + r = nb-*n1%nb; + *n1 = *n1+r; + *n2 = *n2-r; + } + } +} + + +/************************************************************************* +Level 2 variant of CMatrixRightTRSM +*************************************************************************/ +static void ablas_cmatrixrighttrsm2(ae_int_t m, + ae_int_t n, + /* Complex */ ae_matrix* a, + ae_int_t i1, + ae_int_t j1, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + /* Complex */ ae_matrix* x, + ae_int_t i2, + ae_int_t j2, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + ae_complex vc; + ae_complex vd; + + + + /* + * Special case + */ + if( n*m==0 ) + { + return; + } + + /* + * Try to call fast TRSM + */ + if( cmatrixrighttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state) ) + { + return; + } + + /* + * General case + */ + if( isupper ) + { + + /* + * Upper triangular matrix + */ + if( optype==0 ) + { + + /* + * X*A^(-1) + */ + for(i=0; i<=m-1; i++) + { + for(j=0; j<=n-1; j++) + { + if( isunit ) + { + vd = ae_complex_from_d(1); + } + else + { + vd = a->ptr.pp_complex[i1+j][j1+j]; + } + x->ptr.pp_complex[i2+i][j2+j] = ae_c_div(x->ptr.pp_complex[i2+i][j2+j],vd); + if( j<n-1 ) + { + vc = x->ptr.pp_complex[i2+i][j2+j]; + ae_v_csubc(&x->ptr.pp_complex[i2+i][j2+j+1], 1, &a->ptr.pp_complex[i1+j][j1+j+1], 1, "N", ae_v_len(j2+j+1,j2+n-1), vc); + } + } + } + return; + } + if( optype==1 ) + { + + /* + * X*A^(-T) + */ + for(i=0; i<=m-1; i++) + { + for(j=n-1; j>=0; j--) + { + vc = ae_complex_from_d(0); + vd = ae_complex_from_d(1); + if( j<n-1 ) + { + vc = ae_v_cdotproduct(&x->ptr.pp_complex[i2+i][j2+j+1], 1, "N", &a->ptr.pp_complex[i1+j][j1+j+1], 1, "N", ae_v_len(j2+j+1,j2+n-1)); + } + if( !isunit ) + { + vd = a->ptr.pp_complex[i1+j][j1+j]; + } + x->ptr.pp_complex[i2+i][j2+j] = ae_c_div(ae_c_sub(x->ptr.pp_complex[i2+i][j2+j],vc),vd); + } + } + return; + } + if( optype==2 ) + { + + /* + * X*A^(-H) + */ + for(i=0; i<=m-1; i++) + { + for(j=n-1; j>=0; j--) + { + vc = ae_complex_from_d(0); + vd = ae_complex_from_d(1); + if( j<n-1 ) + { + vc = ae_v_cdotproduct(&x->ptr.pp_complex[i2+i][j2+j+1], 1, "N", &a->ptr.pp_complex[i1+j][j1+j+1], 1, "Conj", ae_v_len(j2+j+1,j2+n-1)); + } + if( !isunit ) + { + vd = ae_c_conj(a->ptr.pp_complex[i1+j][j1+j], _state); + } + x->ptr.pp_complex[i2+i][j2+j] = ae_c_div(ae_c_sub(x->ptr.pp_complex[i2+i][j2+j],vc),vd); + } + } + return; + } + } + else + { + + /* + * Lower triangular matrix + */ + if( optype==0 ) + { + + /* + * X*A^(-1) + */ + for(i=0; i<=m-1; i++) + { + for(j=n-1; j>=0; j--) + { + if( isunit ) + { + vd = ae_complex_from_d(1); + } + else + { + vd = a->ptr.pp_complex[i1+j][j1+j]; + } + x->ptr.pp_complex[i2+i][j2+j] = ae_c_div(x->ptr.pp_complex[i2+i][j2+j],vd); + if( j>0 ) + { + vc = x->ptr.pp_complex[i2+i][j2+j]; + ae_v_csubc(&x->ptr.pp_complex[i2+i][j2], 1, &a->ptr.pp_complex[i1+j][j1], 1, "N", ae_v_len(j2,j2+j-1), vc); + } + } + } + return; + } + if( optype==1 ) + { + + /* + * X*A^(-T) + */ + for(i=0; i<=m-1; i++) + { + for(j=0; j<=n-1; j++) + { + vc = ae_complex_from_d(0); + vd = ae_complex_from_d(1); + if( j>0 ) + { + vc = ae_v_cdotproduct(&x->ptr.pp_complex[i2+i][j2], 1, "N", &a->ptr.pp_complex[i1+j][j1], 1, "N", ae_v_len(j2,j2+j-1)); + } + if( !isunit ) + { + vd = a->ptr.pp_complex[i1+j][j1+j]; + } + x->ptr.pp_complex[i2+i][j2+j] = ae_c_div(ae_c_sub(x->ptr.pp_complex[i2+i][j2+j],vc),vd); + } + } + return; + } + if( optype==2 ) + { + + /* + * X*A^(-H) + */ + for(i=0; i<=m-1; i++) + { + for(j=0; j<=n-1; j++) + { + vc = ae_complex_from_d(0); + vd = ae_complex_from_d(1); + if( j>0 ) + { + vc = ae_v_cdotproduct(&x->ptr.pp_complex[i2+i][j2], 1, "N", &a->ptr.pp_complex[i1+j][j1], 1, "Conj", ae_v_len(j2,j2+j-1)); + } + if( !isunit ) + { + vd = ae_c_conj(a->ptr.pp_complex[i1+j][j1+j], _state); + } + x->ptr.pp_complex[i2+i][j2+j] = ae_c_div(ae_c_sub(x->ptr.pp_complex[i2+i][j2+j],vc),vd); + } + } + return; + } + } +} + + +/************************************************************************* +Level-2 subroutine +*************************************************************************/ +static void ablas_cmatrixlefttrsm2(ae_int_t m, + ae_int_t n, + /* Complex */ ae_matrix* a, + ae_int_t i1, + ae_int_t j1, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + /* Complex */ ae_matrix* x, + ae_int_t i2, + ae_int_t j2, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + ae_complex vc; + ae_complex vd; + + + + /* + * Special case + */ + if( n*m==0 ) + { + return; + } + + /* + * Try to call fast TRSM + */ + if( cmatrixlefttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state) ) + { + return; + } + + /* + * General case + */ + if( isupper ) + { + + /* + * Upper triangular matrix + */ + if( optype==0 ) + { + + /* + * A^(-1)*X + */ + for(i=m-1; i>=0; i--) + { + for(j=i+1; j<=m-1; j++) + { + vc = a->ptr.pp_complex[i1+i][j1+j]; + ae_v_csubc(&x->ptr.pp_complex[i2+i][j2], 1, &x->ptr.pp_complex[i2+j][j2], 1, "N", ae_v_len(j2,j2+n-1), vc); + } + if( !isunit ) + { + vd = ae_c_d_div(1,a->ptr.pp_complex[i1+i][j1+i]); + ae_v_cmulc(&x->ptr.pp_complex[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd); + } + } + return; + } + if( optype==1 ) + { + + /* + * A^(-T)*X + */ + for(i=0; i<=m-1; i++) + { + if( isunit ) + { + vd = ae_complex_from_d(1); + } + else + { + vd = ae_c_d_div(1,a->ptr.pp_complex[i1+i][j1+i]); + } + ae_v_cmulc(&x->ptr.pp_complex[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd); + for(j=i+1; j<=m-1; j++) + { + vc = a->ptr.pp_complex[i1+i][j1+j]; + ae_v_csubc(&x->ptr.pp_complex[i2+j][j2], 1, &x->ptr.pp_complex[i2+i][j2], 1, "N", ae_v_len(j2,j2+n-1), vc); + } + } + return; + } + if( optype==2 ) + { + + /* + * A^(-H)*X + */ + for(i=0; i<=m-1; i++) + { + if( isunit ) + { + vd = ae_complex_from_d(1); + } + else + { + vd = ae_c_d_div(1,ae_c_conj(a->ptr.pp_complex[i1+i][j1+i], _state)); + } + ae_v_cmulc(&x->ptr.pp_complex[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd); + for(j=i+1; j<=m-1; j++) + { + vc = ae_c_conj(a->ptr.pp_complex[i1+i][j1+j], _state); + ae_v_csubc(&x->ptr.pp_complex[i2+j][j2], 1, &x->ptr.pp_complex[i2+i][j2], 1, "N", ae_v_len(j2,j2+n-1), vc); + } + } + return; + } + } + else + { + + /* + * Lower triangular matrix + */ + if( optype==0 ) + { + + /* + * A^(-1)*X + */ + for(i=0; i<=m-1; i++) + { + for(j=0; j<=i-1; j++) + { + vc = a->ptr.pp_complex[i1+i][j1+j]; + ae_v_csubc(&x->ptr.pp_complex[i2+i][j2], 1, &x->ptr.pp_complex[i2+j][j2], 1, "N", ae_v_len(j2,j2+n-1), vc); + } + if( isunit ) + { + vd = ae_complex_from_d(1); + } + else + { + vd = ae_c_d_div(1,a->ptr.pp_complex[i1+j][j1+j]); + } + ae_v_cmulc(&x->ptr.pp_complex[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd); + } + return; + } + if( optype==1 ) + { + + /* + * A^(-T)*X + */ + for(i=m-1; i>=0; i--) + { + if( isunit ) + { + vd = ae_complex_from_d(1); + } + else + { + vd = ae_c_d_div(1,a->ptr.pp_complex[i1+i][j1+i]); + } + ae_v_cmulc(&x->ptr.pp_complex[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd); + for(j=i-1; j>=0; j--) + { + vc = a->ptr.pp_complex[i1+i][j1+j]; + ae_v_csubc(&x->ptr.pp_complex[i2+j][j2], 1, &x->ptr.pp_complex[i2+i][j2], 1, "N", ae_v_len(j2,j2+n-1), vc); + } + } + return; + } + if( optype==2 ) + { + + /* + * A^(-H)*X + */ + for(i=m-1; i>=0; i--) + { + if( isunit ) + { + vd = ae_complex_from_d(1); + } + else + { + vd = ae_c_d_div(1,ae_c_conj(a->ptr.pp_complex[i1+i][j1+i], _state)); + } + ae_v_cmulc(&x->ptr.pp_complex[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd); + for(j=i-1; j>=0; j--) + { + vc = ae_c_conj(a->ptr.pp_complex[i1+i][j1+j], _state); + ae_v_csubc(&x->ptr.pp_complex[i2+j][j2], 1, &x->ptr.pp_complex[i2+i][j2], 1, "N", ae_v_len(j2,j2+n-1), vc); + } + } + return; + } + } +} + + +/************************************************************************* +Level 2 subroutine + + -- ALGLIB routine -- + 15.12.2009 + Bochkanov Sergey +*************************************************************************/ +static void ablas_rmatrixrighttrsm2(ae_int_t m, + ae_int_t n, + /* Real */ ae_matrix* a, + ae_int_t i1, + ae_int_t j1, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + /* Real */ ae_matrix* x, + ae_int_t i2, + ae_int_t j2, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + double vr; + double vd; + + + + /* + * Special case + */ + if( n*m==0 ) + { + return; + } + + /* + * Try to use "fast" code + */ + if( rmatrixrighttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state) ) + { + return; + } + + /* + * General case + */ + if( isupper ) + { + + /* + * Upper triangular matrix + */ + if( optype==0 ) + { + + /* + * X*A^(-1) + */ + for(i=0; i<=m-1; i++) + { + for(j=0; j<=n-1; j++) + { + if( isunit ) + { + vd = 1; + } + else + { + vd = a->ptr.pp_double[i1+j][j1+j]; + } + x->ptr.pp_double[i2+i][j2+j] = x->ptr.pp_double[i2+i][j2+j]/vd; + if( j<n-1 ) + { + vr = x->ptr.pp_double[i2+i][j2+j]; + ae_v_subd(&x->ptr.pp_double[i2+i][j2+j+1], 1, &a->ptr.pp_double[i1+j][j1+j+1], 1, ae_v_len(j2+j+1,j2+n-1), vr); + } + } + } + return; + } + if( optype==1 ) + { + + /* + * X*A^(-T) + */ + for(i=0; i<=m-1; i++) + { + for(j=n-1; j>=0; j--) + { + vr = 0; + vd = 1; + if( j<n-1 ) + { + vr = ae_v_dotproduct(&x->ptr.pp_double[i2+i][j2+j+1], 1, &a->ptr.pp_double[i1+j][j1+j+1], 1, ae_v_len(j2+j+1,j2+n-1)); + } + if( !isunit ) + { + vd = a->ptr.pp_double[i1+j][j1+j]; + } + x->ptr.pp_double[i2+i][j2+j] = (x->ptr.pp_double[i2+i][j2+j]-vr)/vd; + } + } + return; + } + } + else + { + + /* + * Lower triangular matrix + */ + if( optype==0 ) + { + + /* + * X*A^(-1) + */ + for(i=0; i<=m-1; i++) + { + for(j=n-1; j>=0; j--) + { + if( isunit ) + { + vd = 1; + } + else + { + vd = a->ptr.pp_double[i1+j][j1+j]; + } + x->ptr.pp_double[i2+i][j2+j] = x->ptr.pp_double[i2+i][j2+j]/vd; + if( j>0 ) + { + vr = x->ptr.pp_double[i2+i][j2+j]; + ae_v_subd(&x->ptr.pp_double[i2+i][j2], 1, &a->ptr.pp_double[i1+j][j1], 1, ae_v_len(j2,j2+j-1), vr); + } + } + } + return; + } + if( optype==1 ) + { + + /* + * X*A^(-T) + */ + for(i=0; i<=m-1; i++) + { + for(j=0; j<=n-1; j++) + { + vr = 0; + vd = 1; + if( j>0 ) + { + vr = ae_v_dotproduct(&x->ptr.pp_double[i2+i][j2], 1, &a->ptr.pp_double[i1+j][j1], 1, ae_v_len(j2,j2+j-1)); + } + if( !isunit ) + { + vd = a->ptr.pp_double[i1+j][j1+j]; + } + x->ptr.pp_double[i2+i][j2+j] = (x->ptr.pp_double[i2+i][j2+j]-vr)/vd; + } + } + return; + } + } +} + + +/************************************************************************* +Level 2 subroutine +*************************************************************************/ +static void ablas_rmatrixlefttrsm2(ae_int_t m, + ae_int_t n, + /* Real */ ae_matrix* a, + ae_int_t i1, + ae_int_t j1, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + /* Real */ ae_matrix* x, + ae_int_t i2, + ae_int_t j2, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + double vr; + double vd; + + + + /* + * Special case + */ + if( n*m==0 ) + { + return; + } + + /* + * Try fast code + */ + if( rmatrixlefttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state) ) + { + return; + } + + /* + * General case + */ + if( isupper ) + { + + /* + * Upper triangular matrix + */ + if( optype==0 ) + { + + /* + * A^(-1)*X + */ + for(i=m-1; i>=0; i--) + { + for(j=i+1; j<=m-1; j++) + { + vr = a->ptr.pp_double[i1+i][j1+j]; + ae_v_subd(&x->ptr.pp_double[i2+i][j2], 1, &x->ptr.pp_double[i2+j][j2], 1, ae_v_len(j2,j2+n-1), vr); + } + if( !isunit ) + { + vd = 1/a->ptr.pp_double[i1+i][j1+i]; + ae_v_muld(&x->ptr.pp_double[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd); + } + } + return; + } + if( optype==1 ) + { + + /* + * A^(-T)*X + */ + for(i=0; i<=m-1; i++) + { + if( isunit ) + { + vd = 1; + } + else + { + vd = 1/a->ptr.pp_double[i1+i][j1+i]; + } + ae_v_muld(&x->ptr.pp_double[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd); + for(j=i+1; j<=m-1; j++) + { + vr = a->ptr.pp_double[i1+i][j1+j]; + ae_v_subd(&x->ptr.pp_double[i2+j][j2], 1, &x->ptr.pp_double[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vr); + } + } + return; + } + } + else + { + + /* + * Lower triangular matrix + */ + if( optype==0 ) + { + + /* + * A^(-1)*X + */ + for(i=0; i<=m-1; i++) + { + for(j=0; j<=i-1; j++) + { + vr = a->ptr.pp_double[i1+i][j1+j]; + ae_v_subd(&x->ptr.pp_double[i2+i][j2], 1, &x->ptr.pp_double[i2+j][j2], 1, ae_v_len(j2,j2+n-1), vr); + } + if( isunit ) + { + vd = 1; + } + else + { + vd = 1/a->ptr.pp_double[i1+j][j1+j]; + } + ae_v_muld(&x->ptr.pp_double[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd); + } + return; + } + if( optype==1 ) + { + + /* + * A^(-T)*X + */ + for(i=m-1; i>=0; i--) + { + if( isunit ) + { + vd = 1; + } + else + { + vd = 1/a->ptr.pp_double[i1+i][j1+i]; + } + ae_v_muld(&x->ptr.pp_double[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd); + for(j=i-1; j>=0; j--) + { + vr = a->ptr.pp_double[i1+i][j1+j]; + ae_v_subd(&x->ptr.pp_double[i2+j][j2], 1, &x->ptr.pp_double[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vr); + } + } + return; + } + } +} + + +/************************************************************************* +Level 2 subroutine +*************************************************************************/ +static void ablas_cmatrixsyrk2(ae_int_t n, + ae_int_t k, + double alpha, + /* Complex */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + ae_int_t optypea, + double beta, + /* Complex */ ae_matrix* c, + ae_int_t ic, + ae_int_t jc, + ae_bool isupper, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + ae_int_t j1; + ae_int_t j2; + ae_complex v; + + + + /* + * Fast exit (nothing to be done) + */ + if( (ae_fp_eq(alpha,0)||k==0)&&ae_fp_eq(beta,1) ) + { + return; + } + + /* + * Try to call fast SYRK + */ + if( cmatrixsyrkf(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state) ) + { + return; + } + + /* + * SYRK + */ + if( optypea==0 ) + { + + /* + * C=alpha*A*A^H+beta*C + */ + for(i=0; i<=n-1; i++) + { + if( isupper ) + { + j1 = i; + j2 = n-1; + } + else + { + j1 = 0; + j2 = i; + } + for(j=j1; j<=j2; j++) + { + if( ae_fp_neq(alpha,0)&&k>0 ) + { + v = ae_v_cdotproduct(&a->ptr.pp_complex[ia+i][ja], 1, "N", &a->ptr.pp_complex[ia+j][ja], 1, "Conj", ae_v_len(ja,ja+k-1)); + } + else + { + v = ae_complex_from_d(0); + } + if( ae_fp_eq(beta,0) ) + { + c->ptr.pp_complex[ic+i][jc+j] = ae_c_mul_d(v,alpha); + } + else + { + c->ptr.pp_complex[ic+i][jc+j] = ae_c_add(ae_c_mul_d(c->ptr.pp_complex[ic+i][jc+j],beta),ae_c_mul_d(v,alpha)); + } + } + } + return; + } + else + { + + /* + * C=alpha*A^H*A+beta*C + */ + for(i=0; i<=n-1; i++) + { + if( isupper ) + { + j1 = i; + j2 = n-1; + } + else + { + j1 = 0; + j2 = i; + } + if( ae_fp_eq(beta,0) ) + { + for(j=j1; j<=j2; j++) + { + c->ptr.pp_complex[ic+i][jc+j] = ae_complex_from_d(0); + } + } + else + { + ae_v_cmuld(&c->ptr.pp_complex[ic+i][jc+j1], 1, ae_v_len(jc+j1,jc+j2), beta); + } + } + for(i=0; i<=k-1; i++) + { + for(j=0; j<=n-1; j++) + { + if( isupper ) + { + j1 = j; + j2 = n-1; + } + else + { + j1 = 0; + j2 = j; + } + v = ae_c_mul_d(ae_c_conj(a->ptr.pp_complex[ia+i][ja+j], _state),alpha); + ae_v_caddc(&c->ptr.pp_complex[ic+j][jc+j1], 1, &a->ptr.pp_complex[ia+i][ja+j1], 1, "N", ae_v_len(jc+j1,jc+j2), v); + } + } + return; + } +} + + +/************************************************************************* +Level 2 subrotuine +*************************************************************************/ +static void ablas_rmatrixsyrk2(ae_int_t n, + ae_int_t k, + double alpha, + /* Real */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + ae_int_t optypea, + double beta, + /* Real */ ae_matrix* c, + ae_int_t ic, + ae_int_t jc, + ae_bool isupper, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + ae_int_t j1; + ae_int_t j2; + double v; + + + + /* + * Fast exit (nothing to be done) + */ + if( (ae_fp_eq(alpha,0)||k==0)&&ae_fp_eq(beta,1) ) + { + return; + } + + /* + * Try to call fast SYRK + */ + if( rmatrixsyrkf(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state) ) + { + return; + } + + /* + * SYRK + */ + if( optypea==0 ) + { + + /* + * C=alpha*A*A^H+beta*C + */ + for(i=0; i<=n-1; i++) + { + if( isupper ) + { + j1 = i; + j2 = n-1; + } + else + { + j1 = 0; + j2 = i; + } + for(j=j1; j<=j2; j++) + { + if( ae_fp_neq(alpha,0)&&k>0 ) + { + v = ae_v_dotproduct(&a->ptr.pp_double[ia+i][ja], 1, &a->ptr.pp_double[ia+j][ja], 1, ae_v_len(ja,ja+k-1)); + } + else + { + v = 0; + } + if( ae_fp_eq(beta,0) ) + { + c->ptr.pp_double[ic+i][jc+j] = alpha*v; + } + else + { + c->ptr.pp_double[ic+i][jc+j] = beta*c->ptr.pp_double[ic+i][jc+j]+alpha*v; + } + } + } + return; + } + else + { + + /* + * C=alpha*A^H*A+beta*C + */ + for(i=0; i<=n-1; i++) + { + if( isupper ) + { + j1 = i; + j2 = n-1; + } + else + { + j1 = 0; + j2 = i; + } + if( ae_fp_eq(beta,0) ) + { + for(j=j1; j<=j2; j++) + { + c->ptr.pp_double[ic+i][jc+j] = 0; + } + } + else + { + ae_v_muld(&c->ptr.pp_double[ic+i][jc+j1], 1, ae_v_len(jc+j1,jc+j2), beta); + } + } + for(i=0; i<=k-1; i++) + { + for(j=0; j<=n-1; j++) + { + if( isupper ) + { + j1 = j; + j2 = n-1; + } + else + { + j1 = 0; + j2 = j; + } + v = alpha*a->ptr.pp_double[ia+i][ja+j]; + ae_v_addd(&c->ptr.pp_double[ic+j][jc+j1], 1, &a->ptr.pp_double[ia+i][ja+j1], 1, ae_v_len(jc+j1,jc+j2), v); + } + } + return; + } +} + + +/************************************************************************* +GEMM kernel + + -- ALGLIB routine -- + 16.12.2009 + Bochkanov Sergey +*************************************************************************/ +static void ablas_cmatrixgemmk(ae_int_t m, + ae_int_t n, + ae_int_t k, + ae_complex alpha, + /* Complex */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + ae_int_t optypea, + /* Complex */ ae_matrix* b, + ae_int_t ib, + ae_int_t jb, + ae_int_t optypeb, + ae_complex beta, + /* Complex */ ae_matrix* c, + ae_int_t ic, + ae_int_t jc, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + ae_complex v; + + + + /* + * Special case + */ + if( m*n==0 ) + { + return; + } + + /* + * Try optimized code + */ + if( cmatrixgemmf(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state) ) + { + return; + } + + /* + * Another special case + */ + if( k==0 ) + { + if( ae_c_neq_d(beta,0) ) + { + for(i=0; i<=m-1; i++) + { + for(j=0; j<=n-1; j++) + { + c->ptr.pp_complex[ic+i][jc+j] = ae_c_mul(beta,c->ptr.pp_complex[ic+i][jc+j]); + } + } + } + else + { + for(i=0; i<=m-1; i++) + { + for(j=0; j<=n-1; j++) + { + c->ptr.pp_complex[ic+i][jc+j] = ae_complex_from_d(0); + } + } + } + return; + } + + /* + * General case + */ + if( optypea==0&&optypeb!=0 ) + { + + /* + * A*B' + */ + for(i=0; i<=m-1; i++) + { + for(j=0; j<=n-1; j++) + { + if( k==0||ae_c_eq_d(alpha,0) ) + { + v = ae_complex_from_d(0); + } + else + { + if( optypeb==1 ) + { + v = ae_v_cdotproduct(&a->ptr.pp_complex[ia+i][ja], 1, "N", &b->ptr.pp_complex[ib+j][jb], 1, "N", ae_v_len(ja,ja+k-1)); + } + else + { + v = ae_v_cdotproduct(&a->ptr.pp_complex[ia+i][ja], 1, "N", &b->ptr.pp_complex[ib+j][jb], 1, "Conj", ae_v_len(ja,ja+k-1)); + } + } + if( ae_c_eq_d(beta,0) ) + { + c->ptr.pp_complex[ic+i][jc+j] = ae_c_mul(alpha,v); + } + else + { + c->ptr.pp_complex[ic+i][jc+j] = ae_c_add(ae_c_mul(beta,c->ptr.pp_complex[ic+i][jc+j]),ae_c_mul(alpha,v)); + } + } + } + return; + } + if( optypea==0&&optypeb==0 ) + { + + /* + * A*B + */ + for(i=0; i<=m-1; i++) + { + if( ae_c_neq_d(beta,0) ) + { + ae_v_cmulc(&c->ptr.pp_complex[ic+i][jc], 1, ae_v_len(jc,jc+n-1), beta); + } + else + { + for(j=0; j<=n-1; j++) + { + c->ptr.pp_complex[ic+i][jc+j] = ae_complex_from_d(0); + } + } + if( ae_c_neq_d(alpha,0) ) + { + for(j=0; j<=k-1; j++) + { + v = ae_c_mul(alpha,a->ptr.pp_complex[ia+i][ja+j]); + ae_v_caddc(&c->ptr.pp_complex[ic+i][jc], 1, &b->ptr.pp_complex[ib+j][jb], 1, "N", ae_v_len(jc,jc+n-1), v); + } + } + } + return; + } + if( optypea!=0&&optypeb!=0 ) + { + + /* + * A'*B' + */ + for(i=0; i<=m-1; i++) + { + for(j=0; j<=n-1; j++) + { + if( ae_c_eq_d(alpha,0) ) + { + v = ae_complex_from_d(0); + } + else + { + if( optypea==1 ) + { + if( optypeb==1 ) + { + v = ae_v_cdotproduct(&a->ptr.pp_complex[ia][ja+i], a->stride, "N", &b->ptr.pp_complex[ib+j][jb], 1, "N", ae_v_len(ia,ia+k-1)); + } + else + { + v = ae_v_cdotproduct(&a->ptr.pp_complex[ia][ja+i], a->stride, "N", &b->ptr.pp_complex[ib+j][jb], 1, "Conj", ae_v_len(ia,ia+k-1)); + } + } + else + { + if( optypeb==1 ) + { + v = ae_v_cdotproduct(&a->ptr.pp_complex[ia][ja+i], a->stride, "Conj", &b->ptr.pp_complex[ib+j][jb], 1, "N", ae_v_len(ia,ia+k-1)); + } + else + { + v = ae_v_cdotproduct(&a->ptr.pp_complex[ia][ja+i], a->stride, "Conj", &b->ptr.pp_complex[ib+j][jb], 1, "Conj", ae_v_len(ia,ia+k-1)); + } + } + } + if( ae_c_eq_d(beta,0) ) + { + c->ptr.pp_complex[ic+i][jc+j] = ae_c_mul(alpha,v); + } + else + { + c->ptr.pp_complex[ic+i][jc+j] = ae_c_add(ae_c_mul(beta,c->ptr.pp_complex[ic+i][jc+j]),ae_c_mul(alpha,v)); + } + } + } + return; + } + if( optypea!=0&&optypeb==0 ) + { + + /* + * A'*B + */ + if( ae_c_eq_d(beta,0) ) + { + for(i=0; i<=m-1; i++) + { + for(j=0; j<=n-1; j++) + { + c->ptr.pp_complex[ic+i][jc+j] = ae_complex_from_d(0); + } + } + } + else + { + for(i=0; i<=m-1; i++) + { + ae_v_cmulc(&c->ptr.pp_complex[ic+i][jc], 1, ae_v_len(jc,jc+n-1), beta); + } + } + if( ae_c_neq_d(alpha,0) ) + { + for(j=0; j<=k-1; j++) + { + for(i=0; i<=m-1; i++) + { + if( optypea==1 ) + { + v = ae_c_mul(alpha,a->ptr.pp_complex[ia+j][ja+i]); + } + else + { + v = ae_c_mul(alpha,ae_c_conj(a->ptr.pp_complex[ia+j][ja+i], _state)); + } + ae_v_caddc(&c->ptr.pp_complex[ic+i][jc], 1, &b->ptr.pp_complex[ib+j][jb], 1, "N", ae_v_len(jc,jc+n-1), v); + } + } + } + return; + } +} + + +/************************************************************************* +GEMM kernel + + -- ALGLIB routine -- + 16.12.2009 + Bochkanov Sergey +*************************************************************************/ +static void ablas_rmatrixgemmk(ae_int_t m, + ae_int_t n, + ae_int_t k, + double alpha, + /* Real */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + ae_int_t optypea, + /* Real */ ae_matrix* b, + ae_int_t ib, + ae_int_t jb, + ae_int_t optypeb, + double beta, + /* Real */ ae_matrix* c, + ae_int_t ic, + ae_int_t jc, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + double v; + + + + /* + * if matrix size is zero + */ + if( m*n==0 ) + { + return; + } + + /* + * Try optimized code + */ + if( rmatrixgemmf(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state) ) + { + return; + } + + /* + * if K=0, then C=Beta*C + */ + if( k==0 ) + { + if( ae_fp_neq(beta,1) ) + { + if( ae_fp_neq(beta,0) ) + { + for(i=0; i<=m-1; i++) + { + for(j=0; j<=n-1; j++) + { + c->ptr.pp_double[ic+i][jc+j] = beta*c->ptr.pp_double[ic+i][jc+j]; + } + } + } + else + { + for(i=0; i<=m-1; i++) + { + for(j=0; j<=n-1; j++) + { + c->ptr.pp_double[ic+i][jc+j] = 0; + } + } + } + } + return; + } + + /* + * General case + */ + if( optypea==0&&optypeb!=0 ) + { + + /* + * A*B' + */ + for(i=0; i<=m-1; i++) + { + for(j=0; j<=n-1; j++) + { + if( k==0||ae_fp_eq(alpha,0) ) + { + v = 0; + } + else + { + v = ae_v_dotproduct(&a->ptr.pp_double[ia+i][ja], 1, &b->ptr.pp_double[ib+j][jb], 1, ae_v_len(ja,ja+k-1)); + } + if( ae_fp_eq(beta,0) ) + { + c->ptr.pp_double[ic+i][jc+j] = alpha*v; + } + else + { + c->ptr.pp_double[ic+i][jc+j] = beta*c->ptr.pp_double[ic+i][jc+j]+alpha*v; + } + } + } + return; + } + if( optypea==0&&optypeb==0 ) + { + + /* + * A*B + */ + for(i=0; i<=m-1; i++) + { + if( ae_fp_neq(beta,0) ) + { + ae_v_muld(&c->ptr.pp_double[ic+i][jc], 1, ae_v_len(jc,jc+n-1), beta); + } + else + { + for(j=0; j<=n-1; j++) + { + c->ptr.pp_double[ic+i][jc+j] = 0; + } + } + if( ae_fp_neq(alpha,0) ) + { + for(j=0; j<=k-1; j++) + { + v = alpha*a->ptr.pp_double[ia+i][ja+j]; + ae_v_addd(&c->ptr.pp_double[ic+i][jc], 1, &b->ptr.pp_double[ib+j][jb], 1, ae_v_len(jc,jc+n-1), v); + } + } + } + return; + } + if( optypea!=0&&optypeb!=0 ) + { + + /* + * A'*B' + */ + for(i=0; i<=m-1; i++) + { + for(j=0; j<=n-1; j++) + { + if( ae_fp_eq(alpha,0) ) + { + v = 0; + } + else + { + v = ae_v_dotproduct(&a->ptr.pp_double[ia][ja+i], a->stride, &b->ptr.pp_double[ib+j][jb], 1, ae_v_len(ia,ia+k-1)); + } + if( ae_fp_eq(beta,0) ) + { + c->ptr.pp_double[ic+i][jc+j] = alpha*v; + } + else + { + c->ptr.pp_double[ic+i][jc+j] = beta*c->ptr.pp_double[ic+i][jc+j]+alpha*v; + } + } + } + return; + } + if( optypea!=0&&optypeb==0 ) + { + + /* + * A'*B + */ + if( ae_fp_eq(beta,0) ) + { + for(i=0; i<=m-1; i++) + { + for(j=0; j<=n-1; j++) + { + c->ptr.pp_double[ic+i][jc+j] = 0; + } + } + } + else + { + for(i=0; i<=m-1; i++) + { + ae_v_muld(&c->ptr.pp_double[ic+i][jc], 1, ae_v_len(jc,jc+n-1), beta); + } + } + if( ae_fp_neq(alpha,0) ) + { + for(j=0; j<=k-1; j++) + { + for(i=0; i<=m-1; i++) + { + v = alpha*a->ptr.pp_double[ia+j][ja+i]; + ae_v_addd(&c->ptr.pp_double[ic+i][jc], 1, &b->ptr.pp_double[ib+j][jb], 1, ae_v_len(jc,jc+n-1), v); + } + } + } + return; + } +} + + + + +/************************************************************************* +QR decomposition of a rectangular matrix of size MxN + +Input parameters: + A - matrix A whose indexes range within [0..M-1, 0..N-1]. + M - number of rows in matrix A. + N - number of columns in matrix A. + +Output parameters: + A - matrices Q and R in compact form (see below). + Tau - array of scalar factors which are used to form + matrix Q. Array whose index ranges within [0.. Min(M-1,N-1)]. + +Matrix A is represented as A = QR, where Q is an orthogonal matrix of size +MxM, R - upper triangular (or upper trapezoid) matrix of size M x N. + +The elements of matrix R are located on and above the main diagonal of +matrix A. The elements which are located in Tau array and below the main +diagonal of matrix A are used to form matrix Q as follows: + +Matrix Q is represented as a product of elementary reflections + +Q = H(0)*H(2)*...*H(k-1), + +where k = min(m,n), and each H(i) is in the form + +H(i) = 1 - tau * v * (v^T) + +where tau is a scalar stored in Tau[I]; v - real vector, +so that v(0:i-1) = 0, v(i) = 1, v(i+1:m-1) stored in A(i+1:m-1,i). + + -- ALGLIB routine -- + 17.02.2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixqr(/* Real */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Real */ ae_vector* tau, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector work; + ae_vector t; + ae_vector taubuf; + ae_int_t minmn; + ae_matrix tmpa; + ae_matrix tmpt; + ae_matrix tmpr; + ae_int_t blockstart; + ae_int_t blocksize; + ae_int_t rowscount; + ae_int_t i; + + ae_frame_make(_state, &_frame_block); + ae_vector_clear(tau); + ae_vector_init(&work, 0, DT_REAL, _state, ae_true); + ae_vector_init(&t, 0, DT_REAL, _state, ae_true); + ae_vector_init(&taubuf, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&tmpa, 0, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&tmpt, 0, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&tmpr, 0, 0, DT_REAL, _state, ae_true); + + if( m<=0||n<=0 ) + { + ae_frame_leave(_state); + return; + } + minmn = ae_minint(m, n, _state); + ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state); + ae_vector_set_length(&t, ae_maxint(m, n, _state)+1, _state); + ae_vector_set_length(tau, minmn, _state); + ae_vector_set_length(&taubuf, minmn, _state); + ae_matrix_set_length(&tmpa, m, ablasblocksize(a, _state), _state); + ae_matrix_set_length(&tmpt, ablasblocksize(a, _state), 2*ablasblocksize(a, _state), _state); + ae_matrix_set_length(&tmpr, 2*ablasblocksize(a, _state), n, _state); + + /* + * Blocked code + */ + blockstart = 0; + while(blockstart!=minmn) + { + + /* + * Determine block size + */ + blocksize = minmn-blockstart; + if( blocksize>ablasblocksize(a, _state) ) + { + blocksize = ablasblocksize(a, _state); + } + rowscount = m-blockstart; + + /* + * QR decomposition of submatrix. + * Matrix is copied to temporary storage to solve + * some TLB issues arising from non-contiguous memory + * access pattern. + */ + rmatrixcopy(rowscount, blocksize, a, blockstart, blockstart, &tmpa, 0, 0, _state); + ortfac_rmatrixqrbasecase(&tmpa, rowscount, blocksize, &work, &t, &taubuf, _state); + rmatrixcopy(rowscount, blocksize, &tmpa, 0, 0, a, blockstart, blockstart, _state); + ae_v_move(&tau->ptr.p_double[blockstart], 1, &taubuf.ptr.p_double[0], 1, ae_v_len(blockstart,blockstart+blocksize-1)); + + /* + * Update the rest, choose between: + * a) Level 2 algorithm (when the rest of the matrix is small enough) + * b) blocked algorithm, see algorithm 5 from 'A storage efficient WY + * representation for products of Householder transformations', + * by R. Schreiber and C. Van Loan. + */ + if( blockstart+blocksize<=n-1 ) + { + if( n-blockstart-blocksize>=2*ablasblocksize(a, _state)||rowscount>=4*ablasblocksize(a, _state) ) + { + + /* + * Prepare block reflector + */ + ortfac_rmatrixblockreflector(&tmpa, &taubuf, ae_true, rowscount, blocksize, &tmpt, &work, _state); + + /* + * Multiply the rest of A by Q'. + * + * Q = E + Y*T*Y' = E + TmpA*TmpT*TmpA' + * Q' = E + Y*T'*Y' = E + TmpA*TmpT'*TmpA' + */ + rmatrixgemm(blocksize, n-blockstart-blocksize, rowscount, 1.0, &tmpa, 0, 0, 1, a, blockstart, blockstart+blocksize, 0, 0.0, &tmpr, 0, 0, _state); + rmatrixgemm(blocksize, n-blockstart-blocksize, blocksize, 1.0, &tmpt, 0, 0, 1, &tmpr, 0, 0, 0, 0.0, &tmpr, blocksize, 0, _state); + rmatrixgemm(rowscount, n-blockstart-blocksize, blocksize, 1.0, &tmpa, 0, 0, 0, &tmpr, blocksize, 0, 0, 1.0, a, blockstart, blockstart+blocksize, _state); + } + else + { + + /* + * Level 2 algorithm + */ + for(i=0; i<=blocksize-1; i++) + { + ae_v_move(&t.ptr.p_double[1], 1, &tmpa.ptr.pp_double[i][i], tmpa.stride, ae_v_len(1,rowscount-i)); + t.ptr.p_double[1] = 1; + applyreflectionfromtheleft(a, taubuf.ptr.p_double[i], &t, blockstart+i, m-1, blockstart+blocksize, n-1, &work, _state); + } + } + } + + /* + * Advance + */ + blockstart = blockstart+blocksize; + } + ae_frame_leave(_state); +} + + +/************************************************************************* +LQ decomposition of a rectangular matrix of size MxN + +Input parameters: + A - matrix A whose indexes range within [0..M-1, 0..N-1]. + M - number of rows in matrix A. + N - number of columns in matrix A. + +Output parameters: + A - matrices L and Q in compact form (see below) + Tau - array of scalar factors which are used to form + matrix Q. Array whose index ranges within [0..Min(M,N)-1]. + +Matrix A is represented as A = LQ, where Q is an orthogonal matrix of size +MxM, L - lower triangular (or lower trapezoid) matrix of size M x N. + +The elements of matrix L are located on and below the main diagonal of +matrix A. The elements which are located in Tau array and above the main +diagonal of matrix A are used to form matrix Q as follows: + +Matrix Q is represented as a product of elementary reflections + +Q = H(k-1)*H(k-2)*...*H(1)*H(0), + +where k = min(m,n), and each H(i) is of the form + +H(i) = 1 - tau * v * (v^T) + +where tau is a scalar stored in Tau[I]; v - real vector, so that v(0:i-1)=0, +v(i) = 1, v(i+1:n-1) stored in A(i,i+1:n-1). + + -- ALGLIB routine -- + 17.02.2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixlq(/* Real */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Real */ ae_vector* tau, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector work; + ae_vector t; + ae_vector taubuf; + ae_int_t minmn; + ae_matrix tmpa; + ae_matrix tmpt; + ae_matrix tmpr; + ae_int_t blockstart; + ae_int_t blocksize; + ae_int_t columnscount; + ae_int_t i; + + ae_frame_make(_state, &_frame_block); + ae_vector_clear(tau); + ae_vector_init(&work, 0, DT_REAL, _state, ae_true); + ae_vector_init(&t, 0, DT_REAL, _state, ae_true); + ae_vector_init(&taubuf, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&tmpa, 0, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&tmpt, 0, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&tmpr, 0, 0, DT_REAL, _state, ae_true); + + if( m<=0||n<=0 ) + { + ae_frame_leave(_state); + return; + } + minmn = ae_minint(m, n, _state); + ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state); + ae_vector_set_length(&t, ae_maxint(m, n, _state)+1, _state); + ae_vector_set_length(tau, minmn, _state); + ae_vector_set_length(&taubuf, minmn, _state); + ae_matrix_set_length(&tmpa, ablasblocksize(a, _state), n, _state); + ae_matrix_set_length(&tmpt, ablasblocksize(a, _state), 2*ablasblocksize(a, _state), _state); + ae_matrix_set_length(&tmpr, m, 2*ablasblocksize(a, _state), _state); + + /* + * Blocked code + */ + blockstart = 0; + while(blockstart!=minmn) + { + + /* + * Determine block size + */ + blocksize = minmn-blockstart; + if( blocksize>ablasblocksize(a, _state) ) + { + blocksize = ablasblocksize(a, _state); + } + columnscount = n-blockstart; + + /* + * LQ decomposition of submatrix. + * Matrix is copied to temporary storage to solve + * some TLB issues arising from non-contiguous memory + * access pattern. + */ + rmatrixcopy(blocksize, columnscount, a, blockstart, blockstart, &tmpa, 0, 0, _state); + ortfac_rmatrixlqbasecase(&tmpa, blocksize, columnscount, &work, &t, &taubuf, _state); + rmatrixcopy(blocksize, columnscount, &tmpa, 0, 0, a, blockstart, blockstart, _state); + ae_v_move(&tau->ptr.p_double[blockstart], 1, &taubuf.ptr.p_double[0], 1, ae_v_len(blockstart,blockstart+blocksize-1)); + + /* + * Update the rest, choose between: + * a) Level 2 algorithm (when the rest of the matrix is small enough) + * b) blocked algorithm, see algorithm 5 from 'A storage efficient WY + * representation for products of Householder transformations', + * by R. Schreiber and C. Van Loan. + */ + if( blockstart+blocksize<=m-1 ) + { + if( m-blockstart-blocksize>=2*ablasblocksize(a, _state) ) + { + + /* + * Prepare block reflector + */ + ortfac_rmatrixblockreflector(&tmpa, &taubuf, ae_false, columnscount, blocksize, &tmpt, &work, _state); + + /* + * Multiply the rest of A by Q. + * + * Q = E + Y*T*Y' = E + TmpA'*TmpT*TmpA + */ + rmatrixgemm(m-blockstart-blocksize, blocksize, columnscount, 1.0, a, blockstart+blocksize, blockstart, 0, &tmpa, 0, 0, 1, 0.0, &tmpr, 0, 0, _state); + rmatrixgemm(m-blockstart-blocksize, blocksize, blocksize, 1.0, &tmpr, 0, 0, 0, &tmpt, 0, 0, 0, 0.0, &tmpr, 0, blocksize, _state); + rmatrixgemm(m-blockstart-blocksize, columnscount, blocksize, 1.0, &tmpr, 0, blocksize, 0, &tmpa, 0, 0, 0, 1.0, a, blockstart+blocksize, blockstart, _state); + } + else + { + + /* + * Level 2 algorithm + */ + for(i=0; i<=blocksize-1; i++) + { + ae_v_move(&t.ptr.p_double[1], 1, &tmpa.ptr.pp_double[i][i], 1, ae_v_len(1,columnscount-i)); + t.ptr.p_double[1] = 1; + applyreflectionfromtheright(a, taubuf.ptr.p_double[i], &t, blockstart+blocksize, m-1, blockstart+i, n-1, &work, _state); + } + } + } + + /* + * Advance + */ + blockstart = blockstart+blocksize; + } + ae_frame_leave(_state); +} + + +/************************************************************************* +QR decomposition of a rectangular complex matrix of size MxN + +Input parameters: + A - matrix A whose indexes range within [0..M-1, 0..N-1] + M - number of rows in matrix A. + N - number of columns in matrix A. + +Output parameters: + A - matrices Q and R in compact form + Tau - array of scalar factors which are used to form matrix Q. Array + whose indexes range within [0.. Min(M,N)-1] + +Matrix A is represented as A = QR, where Q is an orthogonal matrix of size +MxM, R - upper triangular (or upper trapezoid) matrix of size MxN. + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 +*************************************************************************/ +void cmatrixqr(/* Complex */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Complex */ ae_vector* tau, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector work; + ae_vector t; + ae_vector taubuf; + ae_int_t minmn; + ae_matrix tmpa; + ae_matrix tmpt; + ae_matrix tmpr; + ae_int_t blockstart; + ae_int_t blocksize; + ae_int_t rowscount; + ae_int_t i; + + ae_frame_make(_state, &_frame_block); + ae_vector_clear(tau); + ae_vector_init(&work, 0, DT_COMPLEX, _state, ae_true); + ae_vector_init(&t, 0, DT_COMPLEX, _state, ae_true); + ae_vector_init(&taubuf, 0, DT_COMPLEX, _state, ae_true); + ae_matrix_init(&tmpa, 0, 0, DT_COMPLEX, _state, ae_true); + ae_matrix_init(&tmpt, 0, 0, DT_COMPLEX, _state, ae_true); + ae_matrix_init(&tmpr, 0, 0, DT_COMPLEX, _state, ae_true); + + if( m<=0||n<=0 ) + { + ae_frame_leave(_state); + return; + } + minmn = ae_minint(m, n, _state); + ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state); + ae_vector_set_length(&t, ae_maxint(m, n, _state)+1, _state); + ae_vector_set_length(tau, minmn, _state); + ae_vector_set_length(&taubuf, minmn, _state); + ae_matrix_set_length(&tmpa, m, ablascomplexblocksize(a, _state), _state); + ae_matrix_set_length(&tmpt, ablascomplexblocksize(a, _state), ablascomplexblocksize(a, _state), _state); + ae_matrix_set_length(&tmpr, 2*ablascomplexblocksize(a, _state), n, _state); + + /* + * Blocked code + */ + blockstart = 0; + while(blockstart!=minmn) + { + + /* + * Determine block size + */ + blocksize = minmn-blockstart; + if( blocksize>ablascomplexblocksize(a, _state) ) + { + blocksize = ablascomplexblocksize(a, _state); + } + rowscount = m-blockstart; + + /* + * QR decomposition of submatrix. + * Matrix is copied to temporary storage to solve + * some TLB issues arising from non-contiguous memory + * access pattern. + */ + cmatrixcopy(rowscount, blocksize, a, blockstart, blockstart, &tmpa, 0, 0, _state); + ortfac_cmatrixqrbasecase(&tmpa, rowscount, blocksize, &work, &t, &taubuf, _state); + cmatrixcopy(rowscount, blocksize, &tmpa, 0, 0, a, blockstart, blockstart, _state); + ae_v_cmove(&tau->ptr.p_complex[blockstart], 1, &taubuf.ptr.p_complex[0], 1, "N", ae_v_len(blockstart,blockstart+blocksize-1)); + + /* + * Update the rest, choose between: + * a) Level 2 algorithm (when the rest of the matrix is small enough) + * b) blocked algorithm, see algorithm 5 from 'A storage efficient WY + * representation for products of Householder transformations', + * by R. Schreiber and C. Van Loan. + */ + if( blockstart+blocksize<=n-1 ) + { + if( n-blockstart-blocksize>=2*ablascomplexblocksize(a, _state) ) + { + + /* + * Prepare block reflector + */ + ortfac_cmatrixblockreflector(&tmpa, &taubuf, ae_true, rowscount, blocksize, &tmpt, &work, _state); + + /* + * Multiply the rest of A by Q'. + * + * Q = E + Y*T*Y' = E + TmpA*TmpT*TmpA' + * Q' = E + Y*T'*Y' = E + TmpA*TmpT'*TmpA' + */ + cmatrixgemm(blocksize, n-blockstart-blocksize, rowscount, ae_complex_from_d(1.0), &tmpa, 0, 0, 2, a, blockstart, blockstart+blocksize, 0, ae_complex_from_d(0.0), &tmpr, 0, 0, _state); + cmatrixgemm(blocksize, n-blockstart-blocksize, blocksize, ae_complex_from_d(1.0), &tmpt, 0, 0, 2, &tmpr, 0, 0, 0, ae_complex_from_d(0.0), &tmpr, blocksize, 0, _state); + cmatrixgemm(rowscount, n-blockstart-blocksize, blocksize, ae_complex_from_d(1.0), &tmpa, 0, 0, 0, &tmpr, blocksize, 0, 0, ae_complex_from_d(1.0), a, blockstart, blockstart+blocksize, _state); + } + else + { + + /* + * Level 2 algorithm + */ + for(i=0; i<=blocksize-1; i++) + { + ae_v_cmove(&t.ptr.p_complex[1], 1, &tmpa.ptr.pp_complex[i][i], tmpa.stride, "N", ae_v_len(1,rowscount-i)); + t.ptr.p_complex[1] = ae_complex_from_d(1); + complexapplyreflectionfromtheleft(a, ae_c_conj(taubuf.ptr.p_complex[i], _state), &t, blockstart+i, m-1, blockstart+blocksize, n-1, &work, _state); + } + } + } + + /* + * Advance + */ + blockstart = blockstart+blocksize; + } + ae_frame_leave(_state); +} + + +/************************************************************************* +LQ decomposition of a rectangular complex matrix of size MxN + +Input parameters: + A - matrix A whose indexes range within [0..M-1, 0..N-1] + M - number of rows in matrix A. + N - number of columns in matrix A. + +Output parameters: + A - matrices Q and L in compact form + Tau - array of scalar factors which are used to form matrix Q. Array + whose indexes range within [0.. Min(M,N)-1] + +Matrix A is represented as A = LQ, where Q is an orthogonal matrix of size +MxM, L - lower triangular (or lower trapezoid) matrix of size MxN. + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 +*************************************************************************/ +void cmatrixlq(/* Complex */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Complex */ ae_vector* tau, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector work; + ae_vector t; + ae_vector taubuf; + ae_int_t minmn; + ae_matrix tmpa; + ae_matrix tmpt; + ae_matrix tmpr; + ae_int_t blockstart; + ae_int_t blocksize; + ae_int_t columnscount; + ae_int_t i; + + ae_frame_make(_state, &_frame_block); + ae_vector_clear(tau); + ae_vector_init(&work, 0, DT_COMPLEX, _state, ae_true); + ae_vector_init(&t, 0, DT_COMPLEX, _state, ae_true); + ae_vector_init(&taubuf, 0, DT_COMPLEX, _state, ae_true); + ae_matrix_init(&tmpa, 0, 0, DT_COMPLEX, _state, ae_true); + ae_matrix_init(&tmpt, 0, 0, DT_COMPLEX, _state, ae_true); + ae_matrix_init(&tmpr, 0, 0, DT_COMPLEX, _state, ae_true); + + if( m<=0||n<=0 ) + { + ae_frame_leave(_state); + return; + } + minmn = ae_minint(m, n, _state); + ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state); + ae_vector_set_length(&t, ae_maxint(m, n, _state)+1, _state); + ae_vector_set_length(tau, minmn, _state); + ae_vector_set_length(&taubuf, minmn, _state); + ae_matrix_set_length(&tmpa, ablascomplexblocksize(a, _state), n, _state); + ae_matrix_set_length(&tmpt, ablascomplexblocksize(a, _state), ablascomplexblocksize(a, _state), _state); + ae_matrix_set_length(&tmpr, m, 2*ablascomplexblocksize(a, _state), _state); + + /* + * Blocked code + */ + blockstart = 0; + while(blockstart!=minmn) + { + + /* + * Determine block size + */ + blocksize = minmn-blockstart; + if( blocksize>ablascomplexblocksize(a, _state) ) + { + blocksize = ablascomplexblocksize(a, _state); + } + columnscount = n-blockstart; + + /* + * LQ decomposition of submatrix. + * Matrix is copied to temporary storage to solve + * some TLB issues arising from non-contiguous memory + * access pattern. + */ + cmatrixcopy(blocksize, columnscount, a, blockstart, blockstart, &tmpa, 0, 0, _state); + ortfac_cmatrixlqbasecase(&tmpa, blocksize, columnscount, &work, &t, &taubuf, _state); + cmatrixcopy(blocksize, columnscount, &tmpa, 0, 0, a, blockstart, blockstart, _state); + ae_v_cmove(&tau->ptr.p_complex[blockstart], 1, &taubuf.ptr.p_complex[0], 1, "N", ae_v_len(blockstart,blockstart+blocksize-1)); + + /* + * Update the rest, choose between: + * a) Level 2 algorithm (when the rest of the matrix is small enough) + * b) blocked algorithm, see algorithm 5 from 'A storage efficient WY + * representation for products of Householder transformations', + * by R. Schreiber and C. Van Loan. + */ + if( blockstart+blocksize<=m-1 ) + { + if( m-blockstart-blocksize>=2*ablascomplexblocksize(a, _state) ) + { + + /* + * Prepare block reflector + */ + ortfac_cmatrixblockreflector(&tmpa, &taubuf, ae_false, columnscount, blocksize, &tmpt, &work, _state); + + /* + * Multiply the rest of A by Q. + * + * Q = E + Y*T*Y' = E + TmpA'*TmpT*TmpA + */ + cmatrixgemm(m-blockstart-blocksize, blocksize, columnscount, ae_complex_from_d(1.0), a, blockstart+blocksize, blockstart, 0, &tmpa, 0, 0, 2, ae_complex_from_d(0.0), &tmpr, 0, 0, _state); + cmatrixgemm(m-blockstart-blocksize, blocksize, blocksize, ae_complex_from_d(1.0), &tmpr, 0, 0, 0, &tmpt, 0, 0, 0, ae_complex_from_d(0.0), &tmpr, 0, blocksize, _state); + cmatrixgemm(m-blockstart-blocksize, columnscount, blocksize, ae_complex_from_d(1.0), &tmpr, 0, blocksize, 0, &tmpa, 0, 0, 0, ae_complex_from_d(1.0), a, blockstart+blocksize, blockstart, _state); + } + else + { + + /* + * Level 2 algorithm + */ + for(i=0; i<=blocksize-1; i++) + { + ae_v_cmove(&t.ptr.p_complex[1], 1, &tmpa.ptr.pp_complex[i][i], 1, "Conj", ae_v_len(1,columnscount-i)); + t.ptr.p_complex[1] = ae_complex_from_d(1); + complexapplyreflectionfromtheright(a, taubuf.ptr.p_complex[i], &t, blockstart+blocksize, m-1, blockstart+i, n-1, &work, _state); + } + } + } + + /* + * Advance + */ + blockstart = blockstart+blocksize; + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Partial unpacking of matrix Q from the QR decomposition of a matrix A + +Input parameters: + A - matrices Q and R in compact form. + Output of RMatrixQR subroutine. + M - number of rows in given matrix A. M>=0. + N - number of columns in given matrix A. N>=0. + Tau - scalar factors which are used to form Q. + Output of the RMatrixQR subroutine. + QColumns - required number of columns of matrix Q. M>=QColumns>=0. + +Output parameters: + Q - first QColumns columns of matrix Q. + Array whose indexes range within [0..M-1, 0..QColumns-1]. + If QColumns=0, the array remains unchanged. + + -- ALGLIB routine -- + 17.02.2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixqrunpackq(/* Real */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Real */ ae_vector* tau, + ae_int_t qcolumns, + /* Real */ ae_matrix* q, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector work; + ae_vector t; + ae_vector taubuf; + ae_int_t minmn; + ae_int_t refcnt; + ae_matrix tmpa; + ae_matrix tmpt; + ae_matrix tmpr; + ae_int_t blockstart; + ae_int_t blocksize; + ae_int_t rowscount; + ae_int_t i; + ae_int_t j; + + ae_frame_make(_state, &_frame_block); + ae_matrix_clear(q); + ae_vector_init(&work, 0, DT_REAL, _state, ae_true); + ae_vector_init(&t, 0, DT_REAL, _state, ae_true); + ae_vector_init(&taubuf, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&tmpa, 0, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&tmpt, 0, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&tmpr, 0, 0, DT_REAL, _state, ae_true); + + ae_assert(qcolumns<=m, "UnpackQFromQR: QColumns>M!", _state); + if( (m<=0||n<=0)||qcolumns<=0 ) + { + ae_frame_leave(_state); + return; + } + + /* + * init + */ + minmn = ae_minint(m, n, _state); + refcnt = ae_minint(minmn, qcolumns, _state); + ae_matrix_set_length(q, m, qcolumns, _state); + for(i=0; i<=m-1; i++) + { + for(j=0; j<=qcolumns-1; j++) + { + if( i==j ) + { + q->ptr.pp_double[i][j] = 1; + } + else + { + q->ptr.pp_double[i][j] = 0; + } + } + } + ae_vector_set_length(&work, ae_maxint(m, qcolumns, _state)+1, _state); + ae_vector_set_length(&t, ae_maxint(m, qcolumns, _state)+1, _state); + ae_vector_set_length(&taubuf, minmn, _state); + ae_matrix_set_length(&tmpa, m, ablasblocksize(a, _state), _state); + ae_matrix_set_length(&tmpt, ablasblocksize(a, _state), 2*ablasblocksize(a, _state), _state); + ae_matrix_set_length(&tmpr, 2*ablasblocksize(a, _state), qcolumns, _state); + + /* + * Blocked code + */ + blockstart = ablasblocksize(a, _state)*(refcnt/ablasblocksize(a, _state)); + blocksize = refcnt-blockstart; + while(blockstart>=0) + { + rowscount = m-blockstart; + + /* + * Copy current block + */ + rmatrixcopy(rowscount, blocksize, a, blockstart, blockstart, &tmpa, 0, 0, _state); + ae_v_move(&taubuf.ptr.p_double[0], 1, &tau->ptr.p_double[blockstart], 1, ae_v_len(0,blocksize-1)); + + /* + * Update, choose between: + * a) Level 2 algorithm (when the rest of the matrix is small enough) + * b) blocked algorithm, see algorithm 5 from 'A storage efficient WY + * representation for products of Householder transformations', + * by R. Schreiber and C. Van Loan. + */ + if( qcolumns>=2*ablasblocksize(a, _state) ) + { + + /* + * Prepare block reflector + */ + ortfac_rmatrixblockreflector(&tmpa, &taubuf, ae_true, rowscount, blocksize, &tmpt, &work, _state); + + /* + * Multiply matrix by Q. + * + * Q = E + Y*T*Y' = E + TmpA*TmpT*TmpA' + */ + rmatrixgemm(blocksize, qcolumns, rowscount, 1.0, &tmpa, 0, 0, 1, q, blockstart, 0, 0, 0.0, &tmpr, 0, 0, _state); + rmatrixgemm(blocksize, qcolumns, blocksize, 1.0, &tmpt, 0, 0, 0, &tmpr, 0, 0, 0, 0.0, &tmpr, blocksize, 0, _state); + rmatrixgemm(rowscount, qcolumns, blocksize, 1.0, &tmpa, 0, 0, 0, &tmpr, blocksize, 0, 0, 1.0, q, blockstart, 0, _state); + } + else + { + + /* + * Level 2 algorithm + */ + for(i=blocksize-1; i>=0; i--) + { + ae_v_move(&t.ptr.p_double[1], 1, &tmpa.ptr.pp_double[i][i], tmpa.stride, ae_v_len(1,rowscount-i)); + t.ptr.p_double[1] = 1; + applyreflectionfromtheleft(q, taubuf.ptr.p_double[i], &t, blockstart+i, m-1, 0, qcolumns-1, &work, _state); + } + } + + /* + * Advance + */ + blockstart = blockstart-ablasblocksize(a, _state); + blocksize = ablasblocksize(a, _state); + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Unpacking of matrix R from the QR decomposition of a matrix A + +Input parameters: + A - matrices Q and R in compact form. + Output of RMatrixQR subroutine. + M - number of rows in given matrix A. M>=0. + N - number of columns in given matrix A. N>=0. + +Output parameters: + R - matrix R, array[0..M-1, 0..N-1]. + + -- ALGLIB routine -- + 17.02.2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixqrunpackr(/* Real */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Real */ ae_matrix* r, + ae_state *_state) +{ + ae_int_t i; + ae_int_t k; + + ae_matrix_clear(r); + + if( m<=0||n<=0 ) + { + return; + } + k = ae_minint(m, n, _state); + ae_matrix_set_length(r, m, n, _state); + for(i=0; i<=n-1; i++) + { + r->ptr.pp_double[0][i] = 0; + } + for(i=1; i<=m-1; i++) + { + ae_v_move(&r->ptr.pp_double[i][0], 1, &r->ptr.pp_double[0][0], 1, ae_v_len(0,n-1)); + } + for(i=0; i<=k-1; i++) + { + ae_v_move(&r->ptr.pp_double[i][i], 1, &a->ptr.pp_double[i][i], 1, ae_v_len(i,n-1)); + } +} + + +/************************************************************************* +Partial unpacking of matrix Q from the LQ decomposition of a matrix A + +Input parameters: + A - matrices L and Q in compact form. + Output of RMatrixLQ subroutine. + M - number of rows in given matrix A. M>=0. + N - number of columns in given matrix A. N>=0. + Tau - scalar factors which are used to form Q. + Output of the RMatrixLQ subroutine. + QRows - required number of rows in matrix Q. N>=QRows>=0. + +Output parameters: + Q - first QRows rows of matrix Q. Array whose indexes range + within [0..QRows-1, 0..N-1]. If QRows=0, the array remains + unchanged. + + -- ALGLIB routine -- + 17.02.2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixlqunpackq(/* Real */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Real */ ae_vector* tau, + ae_int_t qrows, + /* Real */ ae_matrix* q, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector work; + ae_vector t; + ae_vector taubuf; + ae_int_t minmn; + ae_int_t refcnt; + ae_matrix tmpa; + ae_matrix tmpt; + ae_matrix tmpr; + ae_int_t blockstart; + ae_int_t blocksize; + ae_int_t columnscount; + ae_int_t i; + ae_int_t j; + + ae_frame_make(_state, &_frame_block); + ae_matrix_clear(q); + ae_vector_init(&work, 0, DT_REAL, _state, ae_true); + ae_vector_init(&t, 0, DT_REAL, _state, ae_true); + ae_vector_init(&taubuf, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&tmpa, 0, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&tmpt, 0, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&tmpr, 0, 0, DT_REAL, _state, ae_true); + + ae_assert(qrows<=n, "RMatrixLQUnpackQ: QRows>N!", _state); + if( (m<=0||n<=0)||qrows<=0 ) + { + ae_frame_leave(_state); + return; + } + + /* + * init + */ + minmn = ae_minint(m, n, _state); + refcnt = ae_minint(minmn, qrows, _state); + ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state); + ae_vector_set_length(&t, ae_maxint(m, n, _state)+1, _state); + ae_vector_set_length(&taubuf, minmn, _state); + ae_matrix_set_length(&tmpa, ablasblocksize(a, _state), n, _state); + ae_matrix_set_length(&tmpt, ablasblocksize(a, _state), 2*ablasblocksize(a, _state), _state); + ae_matrix_set_length(&tmpr, qrows, 2*ablasblocksize(a, _state), _state); + ae_matrix_set_length(q, qrows, n, _state); + for(i=0; i<=qrows-1; i++) + { + for(j=0; j<=n-1; j++) + { + if( i==j ) + { + q->ptr.pp_double[i][j] = 1; + } + else + { + q->ptr.pp_double[i][j] = 0; + } + } + } + + /* + * Blocked code + */ + blockstart = ablasblocksize(a, _state)*(refcnt/ablasblocksize(a, _state)); + blocksize = refcnt-blockstart; + while(blockstart>=0) + { + columnscount = n-blockstart; + + /* + * Copy submatrix + */ + rmatrixcopy(blocksize, columnscount, a, blockstart, blockstart, &tmpa, 0, 0, _state); + ae_v_move(&taubuf.ptr.p_double[0], 1, &tau->ptr.p_double[blockstart], 1, ae_v_len(0,blocksize-1)); + + /* + * Update matrix, choose between: + * a) Level 2 algorithm (when the rest of the matrix is small enough) + * b) blocked algorithm, see algorithm 5 from 'A storage efficient WY + * representation for products of Householder transformations', + * by R. Schreiber and C. Van Loan. + */ + if( qrows>=2*ablasblocksize(a, _state) ) + { + + /* + * Prepare block reflector + */ + ortfac_rmatrixblockreflector(&tmpa, &taubuf, ae_false, columnscount, blocksize, &tmpt, &work, _state); + + /* + * Multiply the rest of A by Q'. + * + * Q' = E + Y*T'*Y' = E + TmpA'*TmpT'*TmpA + */ + rmatrixgemm(qrows, blocksize, columnscount, 1.0, q, 0, blockstart, 0, &tmpa, 0, 0, 1, 0.0, &tmpr, 0, 0, _state); + rmatrixgemm(qrows, blocksize, blocksize, 1.0, &tmpr, 0, 0, 0, &tmpt, 0, 0, 1, 0.0, &tmpr, 0, blocksize, _state); + rmatrixgemm(qrows, columnscount, blocksize, 1.0, &tmpr, 0, blocksize, 0, &tmpa, 0, 0, 0, 1.0, q, 0, blockstart, _state); + } + else + { + + /* + * Level 2 algorithm + */ + for(i=blocksize-1; i>=0; i--) + { + ae_v_move(&t.ptr.p_double[1], 1, &tmpa.ptr.pp_double[i][i], 1, ae_v_len(1,columnscount-i)); + t.ptr.p_double[1] = 1; + applyreflectionfromtheright(q, taubuf.ptr.p_double[i], &t, 0, qrows-1, blockstart+i, n-1, &work, _state); + } + } + + /* + * Advance + */ + blockstart = blockstart-ablasblocksize(a, _state); + blocksize = ablasblocksize(a, _state); + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Unpacking of matrix L from the LQ decomposition of a matrix A + +Input parameters: + A - matrices Q and L in compact form. + Output of RMatrixLQ subroutine. + M - number of rows in given matrix A. M>=0. + N - number of columns in given matrix A. N>=0. + +Output parameters: + L - matrix L, array[0..M-1, 0..N-1]. + + -- ALGLIB routine -- + 17.02.2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixlqunpackl(/* Real */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Real */ ae_matrix* l, + ae_state *_state) +{ + ae_int_t i; + ae_int_t k; + + ae_matrix_clear(l); + + if( m<=0||n<=0 ) + { + return; + } + ae_matrix_set_length(l, m, n, _state); + for(i=0; i<=n-1; i++) + { + l->ptr.pp_double[0][i] = 0; + } + for(i=1; i<=m-1; i++) + { + ae_v_move(&l->ptr.pp_double[i][0], 1, &l->ptr.pp_double[0][0], 1, ae_v_len(0,n-1)); + } + for(i=0; i<=m-1; i++) + { + k = ae_minint(i, n-1, _state); + ae_v_move(&l->ptr.pp_double[i][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k)); + } +} + + +/************************************************************************* +Partial unpacking of matrix Q from QR decomposition of a complex matrix A. + +Input parameters: + A - matrices Q and R in compact form. + Output of CMatrixQR subroutine . + M - number of rows in matrix A. M>=0. + N - number of columns in matrix A. N>=0. + Tau - scalar factors which are used to form Q. + Output of CMatrixQR subroutine . + QColumns - required number of columns in matrix Q. M>=QColumns>=0. + +Output parameters: + Q - first QColumns columns of matrix Q. + Array whose index ranges within [0..M-1, 0..QColumns-1]. + If QColumns=0, array isn't changed. + + -- ALGLIB routine -- + 17.02.2010 + Bochkanov Sergey +*************************************************************************/ +void cmatrixqrunpackq(/* Complex */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Complex */ ae_vector* tau, + ae_int_t qcolumns, + /* Complex */ ae_matrix* q, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector work; + ae_vector t; + ae_vector taubuf; + ae_int_t minmn; + ae_int_t refcnt; + ae_matrix tmpa; + ae_matrix tmpt; + ae_matrix tmpr; + ae_int_t blockstart; + ae_int_t blocksize; + ae_int_t rowscount; + ae_int_t i; + ae_int_t j; + + ae_frame_make(_state, &_frame_block); + ae_matrix_clear(q); + ae_vector_init(&work, 0, DT_COMPLEX, _state, ae_true); + ae_vector_init(&t, 0, DT_COMPLEX, _state, ae_true); + ae_vector_init(&taubuf, 0, DT_COMPLEX, _state, ae_true); + ae_matrix_init(&tmpa, 0, 0, DT_COMPLEX, _state, ae_true); + ae_matrix_init(&tmpt, 0, 0, DT_COMPLEX, _state, ae_true); + ae_matrix_init(&tmpr, 0, 0, DT_COMPLEX, _state, ae_true); + + ae_assert(qcolumns<=m, "UnpackQFromQR: QColumns>M!", _state); + if( m<=0||n<=0 ) + { + ae_frame_leave(_state); + return; + } + + /* + * init + */ + minmn = ae_minint(m, n, _state); + refcnt = ae_minint(minmn, qcolumns, _state); + ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state); + ae_vector_set_length(&t, ae_maxint(m, n, _state)+1, _state); + ae_vector_set_length(&taubuf, minmn, _state); + ae_matrix_set_length(&tmpa, m, ablascomplexblocksize(a, _state), _state); + ae_matrix_set_length(&tmpt, ablascomplexblocksize(a, _state), ablascomplexblocksize(a, _state), _state); + ae_matrix_set_length(&tmpr, 2*ablascomplexblocksize(a, _state), qcolumns, _state); + ae_matrix_set_length(q, m, qcolumns, _state); + for(i=0; i<=m-1; i++) + { + for(j=0; j<=qcolumns-1; j++) + { + if( i==j ) + { + q->ptr.pp_complex[i][j] = ae_complex_from_d(1); + } + else + { + q->ptr.pp_complex[i][j] = ae_complex_from_d(0); + } + } + } + + /* + * Blocked code + */ + blockstart = ablascomplexblocksize(a, _state)*(refcnt/ablascomplexblocksize(a, _state)); + blocksize = refcnt-blockstart; + while(blockstart>=0) + { + rowscount = m-blockstart; + + /* + * QR decomposition of submatrix. + * Matrix is copied to temporary storage to solve + * some TLB issues arising from non-contiguous memory + * access pattern. + */ + cmatrixcopy(rowscount, blocksize, a, blockstart, blockstart, &tmpa, 0, 0, _state); + ae_v_cmove(&taubuf.ptr.p_complex[0], 1, &tau->ptr.p_complex[blockstart], 1, "N", ae_v_len(0,blocksize-1)); + + /* + * Update matrix, choose between: + * a) Level 2 algorithm (when the rest of the matrix is small enough) + * b) blocked algorithm, see algorithm 5 from 'A storage efficient WY + * representation for products of Householder transformations', + * by R. Schreiber and C. Van Loan. + */ + if( qcolumns>=2*ablascomplexblocksize(a, _state) ) + { + + /* + * Prepare block reflector + */ + ortfac_cmatrixblockreflector(&tmpa, &taubuf, ae_true, rowscount, blocksize, &tmpt, &work, _state); + + /* + * Multiply the rest of A by Q. + * + * Q = E + Y*T*Y' = E + TmpA*TmpT*TmpA' + */ + cmatrixgemm(blocksize, qcolumns, rowscount, ae_complex_from_d(1.0), &tmpa, 0, 0, 2, q, blockstart, 0, 0, ae_complex_from_d(0.0), &tmpr, 0, 0, _state); + cmatrixgemm(blocksize, qcolumns, blocksize, ae_complex_from_d(1.0), &tmpt, 0, 0, 0, &tmpr, 0, 0, 0, ae_complex_from_d(0.0), &tmpr, blocksize, 0, _state); + cmatrixgemm(rowscount, qcolumns, blocksize, ae_complex_from_d(1.0), &tmpa, 0, 0, 0, &tmpr, blocksize, 0, 0, ae_complex_from_d(1.0), q, blockstart, 0, _state); + } + else + { + + /* + * Level 2 algorithm + */ + for(i=blocksize-1; i>=0; i--) + { + ae_v_cmove(&t.ptr.p_complex[1], 1, &tmpa.ptr.pp_complex[i][i], tmpa.stride, "N", ae_v_len(1,rowscount-i)); + t.ptr.p_complex[1] = ae_complex_from_d(1); + complexapplyreflectionfromtheleft(q, taubuf.ptr.p_complex[i], &t, blockstart+i, m-1, 0, qcolumns-1, &work, _state); + } + } + + /* + * Advance + */ + blockstart = blockstart-ablascomplexblocksize(a, _state); + blocksize = ablascomplexblocksize(a, _state); + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Unpacking of matrix R from the QR decomposition of a matrix A + +Input parameters: + A - matrices Q and R in compact form. + Output of CMatrixQR subroutine. + M - number of rows in given matrix A. M>=0. + N - number of columns in given matrix A. N>=0. + +Output parameters: + R - matrix R, array[0..M-1, 0..N-1]. + + -- ALGLIB routine -- + 17.02.2010 + Bochkanov Sergey +*************************************************************************/ +void cmatrixqrunpackr(/* Complex */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Complex */ ae_matrix* r, + ae_state *_state) +{ + ae_int_t i; + ae_int_t k; + + ae_matrix_clear(r); + + if( m<=0||n<=0 ) + { + return; + } + k = ae_minint(m, n, _state); + ae_matrix_set_length(r, m, n, _state); + for(i=0; i<=n-1; i++) + { + r->ptr.pp_complex[0][i] = ae_complex_from_d(0); + } + for(i=1; i<=m-1; i++) + { + ae_v_cmove(&r->ptr.pp_complex[i][0], 1, &r->ptr.pp_complex[0][0], 1, "N", ae_v_len(0,n-1)); + } + for(i=0; i<=k-1; i++) + { + ae_v_cmove(&r->ptr.pp_complex[i][i], 1, &a->ptr.pp_complex[i][i], 1, "N", ae_v_len(i,n-1)); + } +} + + +/************************************************************************* +Partial unpacking of matrix Q from LQ decomposition of a complex matrix A. + +Input parameters: + A - matrices Q and R in compact form. + Output of CMatrixLQ subroutine . + M - number of rows in matrix A. M>=0. + N - number of columns in matrix A. N>=0. + Tau - scalar factors which are used to form Q. + Output of CMatrixLQ subroutine . + QRows - required number of rows in matrix Q. N>=QColumns>=0. + +Output parameters: + Q - first QRows rows of matrix Q. + Array whose index ranges within [0..QRows-1, 0..N-1]. + If QRows=0, array isn't changed. + + -- ALGLIB routine -- + 17.02.2010 + Bochkanov Sergey +*************************************************************************/ +void cmatrixlqunpackq(/* Complex */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Complex */ ae_vector* tau, + ae_int_t qrows, + /* Complex */ ae_matrix* q, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector work; + ae_vector t; + ae_vector taubuf; + ae_int_t minmn; + ae_int_t refcnt; + ae_matrix tmpa; + ae_matrix tmpt; + ae_matrix tmpr; + ae_int_t blockstart; + ae_int_t blocksize; + ae_int_t columnscount; + ae_int_t i; + ae_int_t j; + + ae_frame_make(_state, &_frame_block); + ae_matrix_clear(q); + ae_vector_init(&work, 0, DT_COMPLEX, _state, ae_true); + ae_vector_init(&t, 0, DT_COMPLEX, _state, ae_true); + ae_vector_init(&taubuf, 0, DT_COMPLEX, _state, ae_true); + ae_matrix_init(&tmpa, 0, 0, DT_COMPLEX, _state, ae_true); + ae_matrix_init(&tmpt, 0, 0, DT_COMPLEX, _state, ae_true); + ae_matrix_init(&tmpr, 0, 0, DT_COMPLEX, _state, ae_true); + + if( m<=0||n<=0 ) + { + ae_frame_leave(_state); + return; + } + + /* + * Init + */ + minmn = ae_minint(m, n, _state); + refcnt = ae_minint(minmn, qrows, _state); + ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state); + ae_vector_set_length(&t, ae_maxint(m, n, _state)+1, _state); + ae_vector_set_length(&taubuf, minmn, _state); + ae_matrix_set_length(&tmpa, ablascomplexblocksize(a, _state), n, _state); + ae_matrix_set_length(&tmpt, ablascomplexblocksize(a, _state), ablascomplexblocksize(a, _state), _state); + ae_matrix_set_length(&tmpr, qrows, 2*ablascomplexblocksize(a, _state), _state); + ae_matrix_set_length(q, qrows, n, _state); + for(i=0; i<=qrows-1; i++) + { + for(j=0; j<=n-1; j++) + { + if( i==j ) + { + q->ptr.pp_complex[i][j] = ae_complex_from_d(1); + } + else + { + q->ptr.pp_complex[i][j] = ae_complex_from_d(0); + } + } + } + + /* + * Blocked code + */ + blockstart = ablascomplexblocksize(a, _state)*(refcnt/ablascomplexblocksize(a, _state)); + blocksize = refcnt-blockstart; + while(blockstart>=0) + { + columnscount = n-blockstart; + + /* + * LQ decomposition of submatrix. + * Matrix is copied to temporary storage to solve + * some TLB issues arising from non-contiguous memory + * access pattern. + */ + cmatrixcopy(blocksize, columnscount, a, blockstart, blockstart, &tmpa, 0, 0, _state); + ae_v_cmove(&taubuf.ptr.p_complex[0], 1, &tau->ptr.p_complex[blockstart], 1, "N", ae_v_len(0,blocksize-1)); + + /* + * Update matrix, choose between: + * a) Level 2 algorithm (when the rest of the matrix is small enough) + * b) blocked algorithm, see algorithm 5 from 'A storage efficient WY + * representation for products of Householder transformations', + * by R. Schreiber and C. Van Loan. + */ + if( qrows>=2*ablascomplexblocksize(a, _state) ) + { + + /* + * Prepare block reflector + */ + ortfac_cmatrixblockreflector(&tmpa, &taubuf, ae_false, columnscount, blocksize, &tmpt, &work, _state); + + /* + * Multiply the rest of A by Q'. + * + * Q' = E + Y*T'*Y' = E + TmpA'*TmpT'*TmpA + */ + cmatrixgemm(qrows, blocksize, columnscount, ae_complex_from_d(1.0), q, 0, blockstart, 0, &tmpa, 0, 0, 2, ae_complex_from_d(0.0), &tmpr, 0, 0, _state); + cmatrixgemm(qrows, blocksize, blocksize, ae_complex_from_d(1.0), &tmpr, 0, 0, 0, &tmpt, 0, 0, 2, ae_complex_from_d(0.0), &tmpr, 0, blocksize, _state); + cmatrixgemm(qrows, columnscount, blocksize, ae_complex_from_d(1.0), &tmpr, 0, blocksize, 0, &tmpa, 0, 0, 0, ae_complex_from_d(1.0), q, 0, blockstart, _state); + } + else + { + + /* + * Level 2 algorithm + */ + for(i=blocksize-1; i>=0; i--) + { + ae_v_cmove(&t.ptr.p_complex[1], 1, &tmpa.ptr.pp_complex[i][i], 1, "Conj", ae_v_len(1,columnscount-i)); + t.ptr.p_complex[1] = ae_complex_from_d(1); + complexapplyreflectionfromtheright(q, ae_c_conj(taubuf.ptr.p_complex[i], _state), &t, 0, qrows-1, blockstart+i, n-1, &work, _state); + } + } + + /* + * Advance + */ + blockstart = blockstart-ablascomplexblocksize(a, _state); + blocksize = ablascomplexblocksize(a, _state); + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Unpacking of matrix L from the LQ decomposition of a matrix A + +Input parameters: + A - matrices Q and L in compact form. + Output of CMatrixLQ subroutine. + M - number of rows in given matrix A. M>=0. + N - number of columns in given matrix A. N>=0. + +Output parameters: + L - matrix L, array[0..M-1, 0..N-1]. + + -- ALGLIB routine -- + 17.02.2010 + Bochkanov Sergey +*************************************************************************/ +void cmatrixlqunpackl(/* Complex */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Complex */ ae_matrix* l, + ae_state *_state) +{ + ae_int_t i; + ae_int_t k; + + ae_matrix_clear(l); + + if( m<=0||n<=0 ) + { + return; + } + ae_matrix_set_length(l, m, n, _state); + for(i=0; i<=n-1; i++) + { + l->ptr.pp_complex[0][i] = ae_complex_from_d(0); + } + for(i=1; i<=m-1; i++) + { + ae_v_cmove(&l->ptr.pp_complex[i][0], 1, &l->ptr.pp_complex[0][0], 1, "N", ae_v_len(0,n-1)); + } + for(i=0; i<=m-1; i++) + { + k = ae_minint(i, n-1, _state); + ae_v_cmove(&l->ptr.pp_complex[i][0], 1, &a->ptr.pp_complex[i][0], 1, "N", ae_v_len(0,k)); + } +} + + +/************************************************************************* +Reduction of a rectangular matrix to bidiagonal form + +The algorithm reduces the rectangular matrix A to bidiagonal form by +orthogonal transformations P and Q: A = Q*B*P. + +Input parameters: + A - source matrix. array[0..M-1, 0..N-1] + M - number of rows in matrix A. + N - number of columns in matrix A. + +Output parameters: + A - matrices Q, B, P in compact form (see below). + TauQ - scalar factors which are used to form matrix Q. + TauP - scalar factors which are used to form matrix P. + +The main diagonal and one of the secondary diagonals of matrix A are +replaced with bidiagonal matrix B. Other elements contain elementary +reflections which form MxM matrix Q and NxN matrix P, respectively. + +If M>=N, B is the upper bidiagonal MxN matrix and is stored in the +corresponding elements of matrix A. Matrix Q is represented as a +product of elementary reflections Q = H(0)*H(1)*...*H(n-1), where +H(i) = 1-tau*v*v'. Here tau is a scalar which is stored in TauQ[i], and +vector v has the following structure: v(0:i-1)=0, v(i)=1, v(i+1:m-1) is +stored in elements A(i+1:m-1,i). Matrix P is as follows: P = +G(0)*G(1)*...*G(n-2), where G(i) = 1 - tau*u*u'. Tau is stored in TauP[i], +u(0:i)=0, u(i+1)=1, u(i+2:n-1) is stored in elements A(i,i+2:n-1). + +If M<N, B is the lower bidiagonal MxN matrix and is stored in the +corresponding elements of matrix A. Q = H(0)*H(1)*...*H(m-2), where +H(i) = 1 - tau*v*v', tau is stored in TauQ, v(0:i)=0, v(i+1)=1, v(i+2:m-1) +is stored in elements A(i+2:m-1,i). P = G(0)*G(1)*...*G(m-1), +G(i) = 1-tau*u*u', tau is stored in TauP, u(0:i-1)=0, u(i)=1, u(i+1:n-1) +is stored in A(i,i+1:n-1). + +EXAMPLE: + +m=6, n=5 (m > n): m=5, n=6 (m < n): + +( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) +( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) +( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) +( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) +( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) +( v1 v2 v3 v4 v5 ) + +Here vi and ui are vectors which form H(i) and G(i), and d and e - +are the diagonal and off-diagonal elements of matrix B. + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994. + Sergey Bochkanov, ALGLIB project, translation from FORTRAN to + pseudocode, 2007-2010. +*************************************************************************/ +void rmatrixbd(/* Real */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Real */ ae_vector* tauq, + /* Real */ ae_vector* taup, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector work; + ae_vector t; + ae_int_t minmn; + ae_int_t maxmn; + ae_int_t i; + double ltau; + + ae_frame_make(_state, &_frame_block); + ae_vector_clear(tauq); + ae_vector_clear(taup); + ae_vector_init(&work, 0, DT_REAL, _state, ae_true); + ae_vector_init(&t, 0, DT_REAL, _state, ae_true); + + + /* + * Prepare + */ + if( n<=0||m<=0 ) + { + ae_frame_leave(_state); + return; + } + minmn = ae_minint(m, n, _state); + maxmn = ae_maxint(m, n, _state); + ae_vector_set_length(&work, maxmn+1, _state); + ae_vector_set_length(&t, maxmn+1, _state); + if( m>=n ) + { + ae_vector_set_length(tauq, n, _state); + ae_vector_set_length(taup, n, _state); + } + else + { + ae_vector_set_length(tauq, m, _state); + ae_vector_set_length(taup, m, _state); + } + if( m>=n ) + { + + /* + * Reduce to upper bidiagonal form + */ + for(i=0; i<=n-1; i++) + { + + /* + * Generate elementary reflector H(i) to annihilate A(i+1:m-1,i) + */ + ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[i][i], a->stride, ae_v_len(1,m-i)); + generatereflection(&t, m-i, <au, _state); + tauq->ptr.p_double[i] = ltau; + ae_v_move(&a->ptr.pp_double[i][i], a->stride, &t.ptr.p_double[1], 1, ae_v_len(i,m-1)); + t.ptr.p_double[1] = 1; + + /* + * Apply H(i) to A(i:m-1,i+1:n-1) from the left + */ + applyreflectionfromtheleft(a, ltau, &t, i, m-1, i+1, n-1, &work, _state); + if( i<n-1 ) + { + + /* + * Generate elementary reflector G(i) to annihilate + * A(i,i+2:n-1) + */ + ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[i][i+1], 1, ae_v_len(1,n-i-1)); + generatereflection(&t, n-1-i, <au, _state); + taup->ptr.p_double[i] = ltau; + ae_v_move(&a->ptr.pp_double[i][i+1], 1, &t.ptr.p_double[1], 1, ae_v_len(i+1,n-1)); + t.ptr.p_double[1] = 1; + + /* + * Apply G(i) to A(i+1:m-1,i+1:n-1) from the right + */ + applyreflectionfromtheright(a, ltau, &t, i+1, m-1, i+1, n-1, &work, _state); + } + else + { + taup->ptr.p_double[i] = 0; + } + } + } + else + { + + /* + * Reduce to lower bidiagonal form + */ + for(i=0; i<=m-1; i++) + { + + /* + * Generate elementary reflector G(i) to annihilate A(i,i+1:n-1) + */ + ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[i][i], 1, ae_v_len(1,n-i)); + generatereflection(&t, n-i, <au, _state); + taup->ptr.p_double[i] = ltau; + ae_v_move(&a->ptr.pp_double[i][i], 1, &t.ptr.p_double[1], 1, ae_v_len(i,n-1)); + t.ptr.p_double[1] = 1; + + /* + * Apply G(i) to A(i+1:m-1,i:n-1) from the right + */ + applyreflectionfromtheright(a, ltau, &t, i+1, m-1, i, n-1, &work, _state); + if( i<m-1 ) + { + + /* + * Generate elementary reflector H(i) to annihilate + * A(i+2:m-1,i) + */ + ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(1,m-1-i)); + generatereflection(&t, m-1-i, <au, _state); + tauq->ptr.p_double[i] = ltau; + ae_v_move(&a->ptr.pp_double[i+1][i], a->stride, &t.ptr.p_double[1], 1, ae_v_len(i+1,m-1)); + t.ptr.p_double[1] = 1; + + /* + * Apply H(i) to A(i+1:m-1,i+1:n-1) from the left + */ + applyreflectionfromtheleft(a, ltau, &t, i+1, m-1, i+1, n-1, &work, _state); + } + else + { + tauq->ptr.p_double[i] = 0; + } + } + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Unpacking matrix Q which reduces a matrix to bidiagonal form. + +Input parameters: + QP - matrices Q and P in compact form. + Output of ToBidiagonal subroutine. + M - number of rows in matrix A. + N - number of columns in matrix A. + TAUQ - scalar factors which are used to form Q. + Output of ToBidiagonal subroutine. + QColumns - required number of columns in matrix Q. + M>=QColumns>=0. + +Output parameters: + Q - first QColumns columns of matrix Q. + Array[0..M-1, 0..QColumns-1] + If QColumns=0, the array is not modified. + + -- ALGLIB -- + 2005-2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixbdunpackq(/* Real */ ae_matrix* qp, + ae_int_t m, + ae_int_t n, + /* Real */ ae_vector* tauq, + ae_int_t qcolumns, + /* Real */ ae_matrix* q, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + + ae_matrix_clear(q); + + ae_assert(qcolumns<=m, "RMatrixBDUnpackQ: QColumns>M!", _state); + ae_assert(qcolumns>=0, "RMatrixBDUnpackQ: QColumns<0!", _state); + if( (m==0||n==0)||qcolumns==0 ) + { + return; + } + + /* + * prepare Q + */ + ae_matrix_set_length(q, m, qcolumns, _state); + for(i=0; i<=m-1; i++) + { + for(j=0; j<=qcolumns-1; j++) + { + if( i==j ) + { + q->ptr.pp_double[i][j] = 1; + } + else + { + q->ptr.pp_double[i][j] = 0; + } + } + } + + /* + * Calculate + */ + rmatrixbdmultiplybyq(qp, m, n, tauq, q, m, qcolumns, ae_false, ae_false, _state); +} + + +/************************************************************************* +Multiplication by matrix Q which reduces matrix A to bidiagonal form. + +The algorithm allows pre- or post-multiply by Q or Q'. + +Input parameters: + QP - matrices Q and P in compact form. + Output of ToBidiagonal subroutine. + M - number of rows in matrix A. + N - number of columns in matrix A. + TAUQ - scalar factors which are used to form Q. + Output of ToBidiagonal subroutine. + Z - multiplied matrix. + array[0..ZRows-1,0..ZColumns-1] + ZRows - number of rows in matrix Z. If FromTheRight=False, + ZRows=M, otherwise ZRows can be arbitrary. + ZColumns - number of columns in matrix Z. If FromTheRight=True, + ZColumns=M, otherwise ZColumns can be arbitrary. + FromTheRight - pre- or post-multiply. + DoTranspose - multiply by Q or Q'. + +Output parameters: + Z - product of Z and Q. + Array[0..ZRows-1,0..ZColumns-1] + If ZRows=0 or ZColumns=0, the array is not modified. + + -- ALGLIB -- + 2005-2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixbdmultiplybyq(/* Real */ ae_matrix* qp, + ae_int_t m, + ae_int_t n, + /* Real */ ae_vector* tauq, + /* Real */ ae_matrix* z, + ae_int_t zrows, + ae_int_t zcolumns, + ae_bool fromtheright, + ae_bool dotranspose, + ae_state *_state) +{ + ae_frame _frame_block; + ae_int_t i; + ae_int_t i1; + ae_int_t i2; + ae_int_t istep; + ae_vector v; + ae_vector work; + ae_int_t mx; + + ae_frame_make(_state, &_frame_block); + ae_vector_init(&v, 0, DT_REAL, _state, ae_true); + ae_vector_init(&work, 0, DT_REAL, _state, ae_true); + + if( ((m<=0||n<=0)||zrows<=0)||zcolumns<=0 ) + { + ae_frame_leave(_state); + return; + } + ae_assert((fromtheright&&zcolumns==m)||(!fromtheright&&zrows==m), "RMatrixBDMultiplyByQ: incorrect Z size!", _state); + + /* + * init + */ + mx = ae_maxint(m, n, _state); + mx = ae_maxint(mx, zrows, _state); + mx = ae_maxint(mx, zcolumns, _state); + ae_vector_set_length(&v, mx+1, _state); + ae_vector_set_length(&work, mx+1, _state); + if( m>=n ) + { + + /* + * setup + */ + if( fromtheright ) + { + i1 = 0; + i2 = n-1; + istep = 1; + } + else + { + i1 = n-1; + i2 = 0; + istep = -1; + } + if( dotranspose ) + { + i = i1; + i1 = i2; + i2 = i; + istep = -istep; + } + + /* + * Process + */ + i = i1; + do + { + ae_v_move(&v.ptr.p_double[1], 1, &qp->ptr.pp_double[i][i], qp->stride, ae_v_len(1,m-i)); + v.ptr.p_double[1] = 1; + if( fromtheright ) + { + applyreflectionfromtheright(z, tauq->ptr.p_double[i], &v, 0, zrows-1, i, m-1, &work, _state); + } + else + { + applyreflectionfromtheleft(z, tauq->ptr.p_double[i], &v, i, m-1, 0, zcolumns-1, &work, _state); + } + i = i+istep; + } + while(i!=i2+istep); + } + else + { + + /* + * setup + */ + if( fromtheright ) + { + i1 = 0; + i2 = m-2; + istep = 1; + } + else + { + i1 = m-2; + i2 = 0; + istep = -1; + } + if( dotranspose ) + { + i = i1; + i1 = i2; + i2 = i; + istep = -istep; + } + + /* + * Process + */ + if( m-1>0 ) + { + i = i1; + do + { + ae_v_move(&v.ptr.p_double[1], 1, &qp->ptr.pp_double[i+1][i], qp->stride, ae_v_len(1,m-i-1)); + v.ptr.p_double[1] = 1; + if( fromtheright ) + { + applyreflectionfromtheright(z, tauq->ptr.p_double[i], &v, 0, zrows-1, i+1, m-1, &work, _state); + } + else + { + applyreflectionfromtheleft(z, tauq->ptr.p_double[i], &v, i+1, m-1, 0, zcolumns-1, &work, _state); + } + i = i+istep; + } + while(i!=i2+istep); + } + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Unpacking matrix P which reduces matrix A to bidiagonal form. +The subroutine returns transposed matrix P. + +Input parameters: + QP - matrices Q and P in compact form. + Output of ToBidiagonal subroutine. + M - number of rows in matrix A. + N - number of columns in matrix A. + TAUP - scalar factors which are used to form P. + Output of ToBidiagonal subroutine. + PTRows - required number of rows of matrix P^T. N >= PTRows >= 0. + +Output parameters: + PT - first PTRows columns of matrix P^T + Array[0..PTRows-1, 0..N-1] + If PTRows=0, the array is not modified. + + -- ALGLIB -- + 2005-2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixbdunpackpt(/* Real */ ae_matrix* qp, + ae_int_t m, + ae_int_t n, + /* Real */ ae_vector* taup, + ae_int_t ptrows, + /* Real */ ae_matrix* pt, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + + ae_matrix_clear(pt); + + ae_assert(ptrows<=n, "RMatrixBDUnpackPT: PTRows>N!", _state); + ae_assert(ptrows>=0, "RMatrixBDUnpackPT: PTRows<0!", _state); + if( (m==0||n==0)||ptrows==0 ) + { + return; + } + + /* + * prepare PT + */ + ae_matrix_set_length(pt, ptrows, n, _state); + for(i=0; i<=ptrows-1; i++) + { + for(j=0; j<=n-1; j++) + { + if( i==j ) + { + pt->ptr.pp_double[i][j] = 1; + } + else + { + pt->ptr.pp_double[i][j] = 0; + } + } + } + + /* + * Calculate + */ + rmatrixbdmultiplybyp(qp, m, n, taup, pt, ptrows, n, ae_true, ae_true, _state); +} + + +/************************************************************************* +Multiplication by matrix P which reduces matrix A to bidiagonal form. + +The algorithm allows pre- or post-multiply by P or P'. + +Input parameters: + QP - matrices Q and P in compact form. + Output of RMatrixBD subroutine. + M - number of rows in matrix A. + N - number of columns in matrix A. + TAUP - scalar factors which are used to form P. + Output of RMatrixBD subroutine. + Z - multiplied matrix. + Array whose indexes range within [0..ZRows-1,0..ZColumns-1]. + ZRows - number of rows in matrix Z. If FromTheRight=False, + ZRows=N, otherwise ZRows can be arbitrary. + ZColumns - number of columns in matrix Z. If FromTheRight=True, + ZColumns=N, otherwise ZColumns can be arbitrary. + FromTheRight - pre- or post-multiply. + DoTranspose - multiply by P or P'. + +Output parameters: + Z - product of Z and P. + Array whose indexes range within [0..ZRows-1,0..ZColumns-1]. + If ZRows=0 or ZColumns=0, the array is not modified. + + -- ALGLIB -- + 2005-2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixbdmultiplybyp(/* Real */ ae_matrix* qp, + ae_int_t m, + ae_int_t n, + /* Real */ ae_vector* taup, + /* Real */ ae_matrix* z, + ae_int_t zrows, + ae_int_t zcolumns, + ae_bool fromtheright, + ae_bool dotranspose, + ae_state *_state) +{ + ae_frame _frame_block; + ae_int_t i; + ae_vector v; + ae_vector work; + ae_int_t mx; + ae_int_t i1; + ae_int_t i2; + ae_int_t istep; + + ae_frame_make(_state, &_frame_block); + ae_vector_init(&v, 0, DT_REAL, _state, ae_true); + ae_vector_init(&work, 0, DT_REAL, _state, ae_true); + + if( ((m<=0||n<=0)||zrows<=0)||zcolumns<=0 ) + { + ae_frame_leave(_state); + return; + } + ae_assert((fromtheright&&zcolumns==n)||(!fromtheright&&zrows==n), "RMatrixBDMultiplyByP: incorrect Z size!", _state); + + /* + * init + */ + mx = ae_maxint(m, n, _state); + mx = ae_maxint(mx, zrows, _state); + mx = ae_maxint(mx, zcolumns, _state); + ae_vector_set_length(&v, mx+1, _state); + ae_vector_set_length(&work, mx+1, _state); + if( m>=n ) + { + + /* + * setup + */ + if( fromtheright ) + { + i1 = n-2; + i2 = 0; + istep = -1; + } + else + { + i1 = 0; + i2 = n-2; + istep = 1; + } + if( !dotranspose ) + { + i = i1; + i1 = i2; + i2 = i; + istep = -istep; + } + + /* + * Process + */ + if( n-1>0 ) + { + i = i1; + do + { + ae_v_move(&v.ptr.p_double[1], 1, &qp->ptr.pp_double[i][i+1], 1, ae_v_len(1,n-1-i)); + v.ptr.p_double[1] = 1; + if( fromtheright ) + { + applyreflectionfromtheright(z, taup->ptr.p_double[i], &v, 0, zrows-1, i+1, n-1, &work, _state); + } + else + { + applyreflectionfromtheleft(z, taup->ptr.p_double[i], &v, i+1, n-1, 0, zcolumns-1, &work, _state); + } + i = i+istep; + } + while(i!=i2+istep); + } + } + else + { + + /* + * setup + */ + if( fromtheright ) + { + i1 = m-1; + i2 = 0; + istep = -1; + } + else + { + i1 = 0; + i2 = m-1; + istep = 1; + } + if( !dotranspose ) + { + i = i1; + i1 = i2; + i2 = i; + istep = -istep; + } + + /* + * Process + */ + i = i1; + do + { + ae_v_move(&v.ptr.p_double[1], 1, &qp->ptr.pp_double[i][i], 1, ae_v_len(1,n-i)); + v.ptr.p_double[1] = 1; + if( fromtheright ) + { + applyreflectionfromtheright(z, taup->ptr.p_double[i], &v, 0, zrows-1, i, n-1, &work, _state); + } + else + { + applyreflectionfromtheleft(z, taup->ptr.p_double[i], &v, i, n-1, 0, zcolumns-1, &work, _state); + } + i = i+istep; + } + while(i!=i2+istep); + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Unpacking of the main and secondary diagonals of bidiagonal decomposition +of matrix A. + +Input parameters: + B - output of RMatrixBD subroutine. + M - number of rows in matrix B. + N - number of columns in matrix B. + +Output parameters: + IsUpper - True, if the matrix is upper bidiagonal. + otherwise IsUpper is False. + D - the main diagonal. + Array whose index ranges within [0..Min(M,N)-1]. + E - the secondary diagonal (upper or lower, depending on + the value of IsUpper). + Array index ranges within [0..Min(M,N)-1], the last + element is not used. + + -- ALGLIB -- + 2005-2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixbdunpackdiagonals(/* Real */ ae_matrix* b, + ae_int_t m, + ae_int_t n, + ae_bool* isupper, + /* Real */ ae_vector* d, + /* Real */ ae_vector* e, + ae_state *_state) +{ + ae_int_t i; + + *isupper = ae_false; + ae_vector_clear(d); + ae_vector_clear(e); + + *isupper = m>=n; + if( m<=0||n<=0 ) + { + return; + } + if( *isupper ) + { + ae_vector_set_length(d, n, _state); + ae_vector_set_length(e, n, _state); + for(i=0; i<=n-2; i++) + { + d->ptr.p_double[i] = b->ptr.pp_double[i][i]; + e->ptr.p_double[i] = b->ptr.pp_double[i][i+1]; + } + d->ptr.p_double[n-1] = b->ptr.pp_double[n-1][n-1]; + } + else + { + ae_vector_set_length(d, m, _state); + ae_vector_set_length(e, m, _state); + for(i=0; i<=m-2; i++) + { + d->ptr.p_double[i] = b->ptr.pp_double[i][i]; + e->ptr.p_double[i] = b->ptr.pp_double[i+1][i]; + } + d->ptr.p_double[m-1] = b->ptr.pp_double[m-1][m-1]; + } +} + + +/************************************************************************* +Reduction of a square matrix to upper Hessenberg form: Q'*A*Q = H, +where Q is an orthogonal matrix, H - Hessenberg matrix. + +Input parameters: + A - matrix A with elements [0..N-1, 0..N-1] + N - size of matrix A. + +Output parameters: + A - matrices Q and P in compact form (see below). + Tau - array of scalar factors which are used to form matrix Q. + Array whose index ranges within [0..N-2] + +Matrix H is located on the main diagonal, on the lower secondary diagonal +and above the main diagonal of matrix A. The elements which are used to +form matrix Q are situated in array Tau and below the lower secondary +diagonal of matrix A as follows: + +Matrix Q is represented as a product of elementary reflections + +Q = H(0)*H(2)*...*H(n-2), + +where each H(i) is given by + +H(i) = 1 - tau * v * (v^T) + +where tau is a scalar stored in Tau[I]; v - is a real vector, +so that v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) stored in A(i+2:n-1,i). + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 +*************************************************************************/ +void rmatrixhessenberg(/* Real */ ae_matrix* a, + ae_int_t n, + /* Real */ ae_vector* tau, + ae_state *_state) +{ + ae_frame _frame_block; + ae_int_t i; + double v; + ae_vector t; + ae_vector work; + + ae_frame_make(_state, &_frame_block); + ae_vector_clear(tau); + ae_vector_init(&t, 0, DT_REAL, _state, ae_true); + ae_vector_init(&work, 0, DT_REAL, _state, ae_true); + + ae_assert(n>=0, "RMatrixHessenberg: incorrect N!", _state); + + /* + * Quick return if possible + */ + if( n<=1 ) + { + ae_frame_leave(_state); + return; + } + ae_vector_set_length(tau, n-2+1, _state); + ae_vector_set_length(&t, n+1, _state); + ae_vector_set_length(&work, n-1+1, _state); + for(i=0; i<=n-2; i++) + { + + /* + * Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) + */ + ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(1,n-i-1)); + generatereflection(&t, n-i-1, &v, _state); + ae_v_move(&a->ptr.pp_double[i+1][i], a->stride, &t.ptr.p_double[1], 1, ae_v_len(i+1,n-1)); + tau->ptr.p_double[i] = v; + t.ptr.p_double[1] = 1; + + /* + * Apply H(i) to A(1:ihi,i+1:ihi) from the right + */ + applyreflectionfromtheright(a, v, &t, 0, n-1, i+1, n-1, &work, _state); + + /* + * Apply H(i) to A(i+1:ihi,i+1:n) from the left + */ + applyreflectionfromtheleft(a, v, &t, i+1, n-1, i+1, n-1, &work, _state); + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Unpacking matrix Q which reduces matrix A to upper Hessenberg form + +Input parameters: + A - output of RMatrixHessenberg subroutine. + N - size of matrix A. + Tau - scalar factors which are used to form Q. + Output of RMatrixHessenberg subroutine. + +Output parameters: + Q - matrix Q. + Array whose indexes range within [0..N-1, 0..N-1]. + + -- ALGLIB -- + 2005-2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixhessenbergunpackq(/* Real */ ae_matrix* a, + ae_int_t n, + /* Real */ ae_vector* tau, + /* Real */ ae_matrix* q, + ae_state *_state) +{ + ae_frame _frame_block; + ae_int_t i; + ae_int_t j; + ae_vector v; + ae_vector work; + + ae_frame_make(_state, &_frame_block); + ae_matrix_clear(q); + ae_vector_init(&v, 0, DT_REAL, _state, ae_true); + ae_vector_init(&work, 0, DT_REAL, _state, ae_true); + + if( n==0 ) + { + ae_frame_leave(_state); + return; + } + + /* + * init + */ + ae_matrix_set_length(q, n-1+1, n-1+1, _state); + ae_vector_set_length(&v, n-1+1, _state); + ae_vector_set_length(&work, n-1+1, _state); + for(i=0; i<=n-1; i++) + { + for(j=0; j<=n-1; j++) + { + if( i==j ) + { + q->ptr.pp_double[i][j] = 1; + } + else + { + q->ptr.pp_double[i][j] = 0; + } + } + } + + /* + * unpack Q + */ + for(i=0; i<=n-2; i++) + { + + /* + * Apply H(i) + */ + ae_v_move(&v.ptr.p_double[1], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(1,n-i-1)); + v.ptr.p_double[1] = 1; + applyreflectionfromtheright(q, tau->ptr.p_double[i], &v, 0, n-1, i+1, n-1, &work, _state); + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Unpacking matrix H (the result of matrix A reduction to upper Hessenberg form) + +Input parameters: + A - output of RMatrixHessenberg subroutine. + N - size of matrix A. + +Output parameters: + H - matrix H. Array whose indexes range within [0..N-1, 0..N-1]. + + -- ALGLIB -- + 2005-2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixhessenbergunpackh(/* Real */ ae_matrix* a, + ae_int_t n, + /* Real */ ae_matrix* h, + ae_state *_state) +{ + ae_frame _frame_block; + ae_int_t i; + ae_int_t j; + ae_vector v; + ae_vector work; + + ae_frame_make(_state, &_frame_block); + ae_matrix_clear(h); + ae_vector_init(&v, 0, DT_REAL, _state, ae_true); + ae_vector_init(&work, 0, DT_REAL, _state, ae_true); + + if( n==0 ) + { + ae_frame_leave(_state); + return; + } + ae_matrix_set_length(h, n-1+1, n-1+1, _state); + for(i=0; i<=n-1; i++) + { + for(j=0; j<=i-2; j++) + { + h->ptr.pp_double[i][j] = 0; + } + j = ae_maxint(0, i-1, _state); + ae_v_move(&h->ptr.pp_double[i][j], 1, &a->ptr.pp_double[i][j], 1, ae_v_len(j,n-1)); + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Reduction of a symmetric matrix which is given by its higher or lower +triangular part to a tridiagonal matrix using orthogonal similarity +transformation: Q'*A*Q=T. + +Input parameters: + A - matrix to be transformed + array with elements [0..N-1, 0..N-1]. + N - size of matrix A. + IsUpper - storage format. If IsUpper = True, then matrix A is given + by its upper triangle, and the lower triangle is not used + and not modified by the algorithm, and vice versa + if IsUpper = False. + +Output parameters: + A - matrices T and Q in compact form (see lower) + Tau - array of factors which are forming matrices H(i) + array with elements [0..N-2]. + D - main diagonal of symmetric matrix T. + array with elements [0..N-1]. + E - secondary diagonal of symmetric matrix T. + array with elements [0..N-2]. + + + If IsUpper=True, the matrix Q is represented as a product of elementary + reflectors + + Q = H(n-2) . . . H(2) H(0). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(i+1:n-1) = 0, v(i) = 1, v(0:i-1) is stored on exit in + A(0:i-1,i+1), and tau in TAU(i). + + If IsUpper=False, the matrix Q is represented as a product of elementary + reflectors + + Q = H(0) H(2) . . . H(n-2). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) is stored on exit in A(i+2:n-1,i), + and tau in TAU(i). + + The contents of A on exit are illustrated by the following examples + with n = 5: + + if UPLO = 'U': if UPLO = 'L': + + ( d e v1 v2 v3 ) ( d ) + ( d e v2 v3 ) ( e d ) + ( d e v3 ) ( v0 e d ) + ( d e ) ( v0 v1 e d ) + ( d ) ( v0 v1 v2 e d ) + + where d and e denote diagonal and off-diagonal elements of T, and vi + denotes an element of the vector defining H(i). + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 +*************************************************************************/ +void smatrixtd(/* Real */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + /* Real */ ae_vector* tau, + /* Real */ ae_vector* d, + /* Real */ ae_vector* e, + ae_state *_state) +{ + ae_frame _frame_block; + ae_int_t i; + double alpha; + double taui; + double v; + ae_vector t; + ae_vector t2; + ae_vector t3; + + ae_frame_make(_state, &_frame_block); + ae_vector_clear(tau); + ae_vector_clear(d); + ae_vector_clear(e); + ae_vector_init(&t, 0, DT_REAL, _state, ae_true); + ae_vector_init(&t2, 0, DT_REAL, _state, ae_true); + ae_vector_init(&t3, 0, DT_REAL, _state, ae_true); + + if( n<=0 ) + { + ae_frame_leave(_state); + return; + } + ae_vector_set_length(&t, n+1, _state); + ae_vector_set_length(&t2, n+1, _state); + ae_vector_set_length(&t3, n+1, _state); + if( n>1 ) + { + ae_vector_set_length(tau, n-2+1, _state); + } + ae_vector_set_length(d, n-1+1, _state); + if( n>1 ) + { + ae_vector_set_length(e, n-2+1, _state); + } + if( isupper ) + { + + /* + * Reduce the upper triangle of A + */ + for(i=n-2; i>=0; i--) + { + + /* + * Generate elementary reflector H() = E - tau * v * v' + */ + if( i>=1 ) + { + ae_v_move(&t.ptr.p_double[2], 1, &a->ptr.pp_double[0][i+1], a->stride, ae_v_len(2,i+1)); + } + t.ptr.p_double[1] = a->ptr.pp_double[i][i+1]; + generatereflection(&t, i+1, &taui, _state); + if( i>=1 ) + { + ae_v_move(&a->ptr.pp_double[0][i+1], a->stride, &t.ptr.p_double[2], 1, ae_v_len(0,i-1)); + } + a->ptr.pp_double[i][i+1] = t.ptr.p_double[1]; + e->ptr.p_double[i] = a->ptr.pp_double[i][i+1]; + if( ae_fp_neq(taui,0) ) + { + + /* + * Apply H from both sides to A + */ + a->ptr.pp_double[i][i+1] = 1; + + /* + * Compute x := tau * A * v storing x in TAU + */ + ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[0][i+1], a->stride, ae_v_len(1,i+1)); + symmetricmatrixvectormultiply(a, isupper, 0, i, &t, taui, &t3, _state); + ae_v_move(&tau->ptr.p_double[0], 1, &t3.ptr.p_double[1], 1, ae_v_len(0,i)); + + /* + * Compute w := x - 1/2 * tau * (x'*v) * v + */ + v = ae_v_dotproduct(&tau->ptr.p_double[0], 1, &a->ptr.pp_double[0][i+1], a->stride, ae_v_len(0,i)); + alpha = -0.5*taui*v; + ae_v_addd(&tau->ptr.p_double[0], 1, &a->ptr.pp_double[0][i+1], a->stride, ae_v_len(0,i), alpha); + + /* + * Apply the transformation as a rank-2 update: + * A := A - v * w' - w * v' + */ + ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[0][i+1], a->stride, ae_v_len(1,i+1)); + ae_v_move(&t3.ptr.p_double[1], 1, &tau->ptr.p_double[0], 1, ae_v_len(1,i+1)); + symmetricrank2update(a, isupper, 0, i, &t, &t3, &t2, -1, _state); + a->ptr.pp_double[i][i+1] = e->ptr.p_double[i]; + } + d->ptr.p_double[i+1] = a->ptr.pp_double[i+1][i+1]; + tau->ptr.p_double[i] = taui; + } + d->ptr.p_double[0] = a->ptr.pp_double[0][0]; + } + else + { + + /* + * Reduce the lower triangle of A + */ + for(i=0; i<=n-2; i++) + { + + /* + * Generate elementary reflector H = E - tau * v * v' + */ + ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(1,n-i-1)); + generatereflection(&t, n-i-1, &taui, _state); + ae_v_move(&a->ptr.pp_double[i+1][i], a->stride, &t.ptr.p_double[1], 1, ae_v_len(i+1,n-1)); + e->ptr.p_double[i] = a->ptr.pp_double[i+1][i]; + if( ae_fp_neq(taui,0) ) + { + + /* + * Apply H from both sides to A + */ + a->ptr.pp_double[i+1][i] = 1; + + /* + * Compute x := tau * A * v storing y in TAU + */ + ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(1,n-i-1)); + symmetricmatrixvectormultiply(a, isupper, i+1, n-1, &t, taui, &t2, _state); + ae_v_move(&tau->ptr.p_double[i], 1, &t2.ptr.p_double[1], 1, ae_v_len(i,n-2)); + + /* + * Compute w := x - 1/2 * tau * (x'*v) * v + */ + v = ae_v_dotproduct(&tau->ptr.p_double[i], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(i,n-2)); + alpha = -0.5*taui*v; + ae_v_addd(&tau->ptr.p_double[i], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(i,n-2), alpha); + + /* + * Apply the transformation as a rank-2 update: + * A := A - v * w' - w * v' + * + */ + ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(1,n-i-1)); + ae_v_move(&t2.ptr.p_double[1], 1, &tau->ptr.p_double[i], 1, ae_v_len(1,n-i-1)); + symmetricrank2update(a, isupper, i+1, n-1, &t, &t2, &t3, -1, _state); + a->ptr.pp_double[i+1][i] = e->ptr.p_double[i]; + } + d->ptr.p_double[i] = a->ptr.pp_double[i][i]; + tau->ptr.p_double[i] = taui; + } + d->ptr.p_double[n-1] = a->ptr.pp_double[n-1][n-1]; + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Unpacking matrix Q which reduces symmetric matrix to a tridiagonal +form. + +Input parameters: + A - the result of a SMatrixTD subroutine + N - size of matrix A. + IsUpper - storage format (a parameter of SMatrixTD subroutine) + Tau - the result of a SMatrixTD subroutine + +Output parameters: + Q - transformation matrix. + array with elements [0..N-1, 0..N-1]. + + -- ALGLIB -- + Copyright 2005-2010 by Bochkanov Sergey +*************************************************************************/ +void smatrixtdunpackq(/* Real */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + /* Real */ ae_vector* tau, + /* Real */ ae_matrix* q, + ae_state *_state) +{ + ae_frame _frame_block; + ae_int_t i; + ae_int_t j; + ae_vector v; + ae_vector work; + + ae_frame_make(_state, &_frame_block); + ae_matrix_clear(q); + ae_vector_init(&v, 0, DT_REAL, _state, ae_true); + ae_vector_init(&work, 0, DT_REAL, _state, ae_true); + + if( n==0 ) + { + ae_frame_leave(_state); + return; + } + + /* + * init + */ + ae_matrix_set_length(q, n-1+1, n-1+1, _state); + ae_vector_set_length(&v, n+1, _state); + ae_vector_set_length(&work, n-1+1, _state); + for(i=0; i<=n-1; i++) + { + for(j=0; j<=n-1; j++) + { + if( i==j ) + { + q->ptr.pp_double[i][j] = 1; + } + else + { + q->ptr.pp_double[i][j] = 0; + } + } + } + + /* + * unpack Q + */ + if( isupper ) + { + for(i=0; i<=n-2; i++) + { + + /* + * Apply H(i) + */ + ae_v_move(&v.ptr.p_double[1], 1, &a->ptr.pp_double[0][i+1], a->stride, ae_v_len(1,i+1)); + v.ptr.p_double[i+1] = 1; + applyreflectionfromtheleft(q, tau->ptr.p_double[i], &v, 0, i, 0, n-1, &work, _state); + } + } + else + { + for(i=n-2; i>=0; i--) + { + + /* + * Apply H(i) + */ + ae_v_move(&v.ptr.p_double[1], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(1,n-i-1)); + v.ptr.p_double[1] = 1; + applyreflectionfromtheleft(q, tau->ptr.p_double[i], &v, i+1, n-1, 0, n-1, &work, _state); + } + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Reduction of a Hermitian matrix which is given by its higher or lower +triangular part to a real tridiagonal matrix using unitary similarity +transformation: Q'*A*Q = T. + +Input parameters: + A - matrix to be transformed + array with elements [0..N-1, 0..N-1]. + N - size of matrix A. + IsUpper - storage format. If IsUpper = True, then matrix A is given + by its upper triangle, and the lower triangle is not used + and not modified by the algorithm, and vice versa + if IsUpper = False. + +Output parameters: + A - matrices T and Q in compact form (see lower) + Tau - array of factors which are forming matrices H(i) + array with elements [0..N-2]. + D - main diagonal of real symmetric matrix T. + array with elements [0..N-1]. + E - secondary diagonal of real symmetric matrix T. + array with elements [0..N-2]. + + + If IsUpper=True, the matrix Q is represented as a product of elementary + reflectors + + Q = H(n-2) . . . H(2) H(0). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(i+1:n-1) = 0, v(i) = 1, v(0:i-1) is stored on exit in + A(0:i-1,i+1), and tau in TAU(i). + + If IsUpper=False, the matrix Q is represented as a product of elementary + reflectors + + Q = H(0) H(2) . . . H(n-2). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) is stored on exit in A(i+2:n-1,i), + and tau in TAU(i). + + The contents of A on exit are illustrated by the following examples + with n = 5: + + if UPLO = 'U': if UPLO = 'L': + + ( d e v1 v2 v3 ) ( d ) + ( d e v2 v3 ) ( e d ) + ( d e v3 ) ( v0 e d ) + ( d e ) ( v0 v1 e d ) + ( d ) ( v0 v1 v2 e d ) + +where d and e denote diagonal and off-diagonal elements of T, and vi +denotes an element of the vector defining H(i). + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 +*************************************************************************/ +void hmatrixtd(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + /* Complex */ ae_vector* tau, + /* Real */ ae_vector* d, + /* Real */ ae_vector* e, + ae_state *_state) +{ + ae_frame _frame_block; + ae_int_t i; + ae_complex alpha; + ae_complex taui; + ae_complex v; + ae_vector t; + ae_vector t2; + ae_vector t3; + + ae_frame_make(_state, &_frame_block); + ae_vector_clear(tau); + ae_vector_clear(d); + ae_vector_clear(e); + ae_vector_init(&t, 0, DT_COMPLEX, _state, ae_true); + ae_vector_init(&t2, 0, DT_COMPLEX, _state, ae_true); + ae_vector_init(&t3, 0, DT_COMPLEX, _state, ae_true); + + if( n<=0 ) + { + ae_frame_leave(_state); + return; + } + for(i=0; i<=n-1; i++) + { + ae_assert(ae_fp_eq(a->ptr.pp_complex[i][i].y,0), "Assertion failed", _state); + } + if( n>1 ) + { + ae_vector_set_length(tau, n-2+1, _state); + ae_vector_set_length(e, n-2+1, _state); + } + ae_vector_set_length(d, n-1+1, _state); + ae_vector_set_length(&t, n-1+1, _state); + ae_vector_set_length(&t2, n-1+1, _state); + ae_vector_set_length(&t3, n-1+1, _state); + if( isupper ) + { + + /* + * Reduce the upper triangle of A + */ + a->ptr.pp_complex[n-1][n-1] = ae_complex_from_d(a->ptr.pp_complex[n-1][n-1].x); + for(i=n-2; i>=0; i--) + { + + /* + * Generate elementary reflector H = I+1 - tau * v * v' + */ + alpha = a->ptr.pp_complex[i][i+1]; + t.ptr.p_complex[1] = alpha; + if( i>=1 ) + { + ae_v_cmove(&t.ptr.p_complex[2], 1, &a->ptr.pp_complex[0][i+1], a->stride, "N", ae_v_len(2,i+1)); + } + complexgeneratereflection(&t, i+1, &taui, _state); + if( i>=1 ) + { + ae_v_cmove(&a->ptr.pp_complex[0][i+1], a->stride, &t.ptr.p_complex[2], 1, "N", ae_v_len(0,i-1)); + } + alpha = t.ptr.p_complex[1]; + e->ptr.p_double[i] = alpha.x; + if( ae_c_neq_d(taui,0) ) + { + + /* + * Apply H(I+1) from both sides to A + */ + a->ptr.pp_complex[i][i+1] = ae_complex_from_d(1); + + /* + * Compute x := tau * A * v storing x in TAU + */ + ae_v_cmove(&t.ptr.p_complex[1], 1, &a->ptr.pp_complex[0][i+1], a->stride, "N", ae_v_len(1,i+1)); + hermitianmatrixvectormultiply(a, isupper, 0, i, &t, taui, &t2, _state); + ae_v_cmove(&tau->ptr.p_complex[0], 1, &t2.ptr.p_complex[1], 1, "N", ae_v_len(0,i)); + + /* + * Compute w := x - 1/2 * tau * (x'*v) * v + */ + v = ae_v_cdotproduct(&tau->ptr.p_complex[0], 1, "Conj", &a->ptr.pp_complex[0][i+1], a->stride, "N", ae_v_len(0,i)); + alpha = ae_c_neg(ae_c_mul(ae_c_mul_d(taui,0.5),v)); + ae_v_caddc(&tau->ptr.p_complex[0], 1, &a->ptr.pp_complex[0][i+1], a->stride, "N", ae_v_len(0,i), alpha); + + /* + * Apply the transformation as a rank-2 update: + * A := A - v * w' - w * v' + */ + ae_v_cmove(&t.ptr.p_complex[1], 1, &a->ptr.pp_complex[0][i+1], a->stride, "N", ae_v_len(1,i+1)); + ae_v_cmove(&t3.ptr.p_complex[1], 1, &tau->ptr.p_complex[0], 1, "N", ae_v_len(1,i+1)); + hermitianrank2update(a, isupper, 0, i, &t, &t3, &t2, ae_complex_from_d(-1), _state); + } + else + { + a->ptr.pp_complex[i][i] = ae_complex_from_d(a->ptr.pp_complex[i][i].x); + } + a->ptr.pp_complex[i][i+1] = ae_complex_from_d(e->ptr.p_double[i]); + d->ptr.p_double[i+1] = a->ptr.pp_complex[i+1][i+1].x; + tau->ptr.p_complex[i] = taui; + } + d->ptr.p_double[0] = a->ptr.pp_complex[0][0].x; + } + else + { + + /* + * Reduce the lower triangle of A + */ + a->ptr.pp_complex[0][0] = ae_complex_from_d(a->ptr.pp_complex[0][0].x); + for(i=0; i<=n-2; i++) + { + + /* + * Generate elementary reflector H = I - tau * v * v' + */ + ae_v_cmove(&t.ptr.p_complex[1], 1, &a->ptr.pp_complex[i+1][i], a->stride, "N", ae_v_len(1,n-i-1)); + complexgeneratereflection(&t, n-i-1, &taui, _state); + ae_v_cmove(&a->ptr.pp_complex[i+1][i], a->stride, &t.ptr.p_complex[1], 1, "N", ae_v_len(i+1,n-1)); + e->ptr.p_double[i] = a->ptr.pp_complex[i+1][i].x; + if( ae_c_neq_d(taui,0) ) + { + + /* + * Apply H(i) from both sides to A(i+1:n,i+1:n) + */ + a->ptr.pp_complex[i+1][i] = ae_complex_from_d(1); + + /* + * Compute x := tau * A * v storing y in TAU + */ + ae_v_cmove(&t.ptr.p_complex[1], 1, &a->ptr.pp_complex[i+1][i], a->stride, "N", ae_v_len(1,n-i-1)); + hermitianmatrixvectormultiply(a, isupper, i+1, n-1, &t, taui, &t2, _state); + ae_v_cmove(&tau->ptr.p_complex[i], 1, &t2.ptr.p_complex[1], 1, "N", ae_v_len(i,n-2)); + + /* + * Compute w := x - 1/2 * tau * (x'*v) * v + */ + v = ae_v_cdotproduct(&tau->ptr.p_complex[i], 1, "Conj", &a->ptr.pp_complex[i+1][i], a->stride, "N", ae_v_len(i,n-2)); + alpha = ae_c_neg(ae_c_mul(ae_c_mul_d(taui,0.5),v)); + ae_v_caddc(&tau->ptr.p_complex[i], 1, &a->ptr.pp_complex[i+1][i], a->stride, "N", ae_v_len(i,n-2), alpha); + + /* + * Apply the transformation as a rank-2 update: + * A := A - v * w' - w * v' + */ + ae_v_cmove(&t.ptr.p_complex[1], 1, &a->ptr.pp_complex[i+1][i], a->stride, "N", ae_v_len(1,n-i-1)); + ae_v_cmove(&t2.ptr.p_complex[1], 1, &tau->ptr.p_complex[i], 1, "N", ae_v_len(1,n-i-1)); + hermitianrank2update(a, isupper, i+1, n-1, &t, &t2, &t3, ae_complex_from_d(-1), _state); + } + else + { + a->ptr.pp_complex[i+1][i+1] = ae_complex_from_d(a->ptr.pp_complex[i+1][i+1].x); + } + a->ptr.pp_complex[i+1][i] = ae_complex_from_d(e->ptr.p_double[i]); + d->ptr.p_double[i] = a->ptr.pp_complex[i][i].x; + tau->ptr.p_complex[i] = taui; + } + d->ptr.p_double[n-1] = a->ptr.pp_complex[n-1][n-1].x; + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Unpacking matrix Q which reduces a Hermitian matrix to a real tridiagonal +form. + +Input parameters: + A - the result of a HMatrixTD subroutine + N - size of matrix A. + IsUpper - storage format (a parameter of HMatrixTD subroutine) + Tau - the result of a HMatrixTD subroutine + +Output parameters: + Q - transformation matrix. + array with elements [0..N-1, 0..N-1]. + + -- ALGLIB -- + Copyright 2005-2010 by Bochkanov Sergey +*************************************************************************/ +void hmatrixtdunpackq(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + /* Complex */ ae_vector* tau, + /* Complex */ ae_matrix* q, + ae_state *_state) +{ + ae_frame _frame_block; + ae_int_t i; + ae_int_t j; + ae_vector v; + ae_vector work; + + ae_frame_make(_state, &_frame_block); + ae_matrix_clear(q); + ae_vector_init(&v, 0, DT_COMPLEX, _state, ae_true); + ae_vector_init(&work, 0, DT_COMPLEX, _state, ae_true); + + if( n==0 ) + { + ae_frame_leave(_state); + return; + } + + /* + * init + */ + ae_matrix_set_length(q, n-1+1, n-1+1, _state); + ae_vector_set_length(&v, n+1, _state); + ae_vector_set_length(&work, n-1+1, _state); + for(i=0; i<=n-1; i++) + { + for(j=0; j<=n-1; j++) + { + if( i==j ) + { + q->ptr.pp_complex[i][j] = ae_complex_from_d(1); + } + else + { + q->ptr.pp_complex[i][j] = ae_complex_from_d(0); + } + } + } + + /* + * unpack Q + */ + if( isupper ) + { + for(i=0; i<=n-2; i++) + { + + /* + * Apply H(i) + */ + ae_v_cmove(&v.ptr.p_complex[1], 1, &a->ptr.pp_complex[0][i+1], a->stride, "N", ae_v_len(1,i+1)); + v.ptr.p_complex[i+1] = ae_complex_from_d(1); + complexapplyreflectionfromtheleft(q, tau->ptr.p_complex[i], &v, 0, i, 0, n-1, &work, _state); + } + } + else + { + for(i=n-2; i>=0; i--) + { + + /* + * Apply H(i) + */ + ae_v_cmove(&v.ptr.p_complex[1], 1, &a->ptr.pp_complex[i+1][i], a->stride, "N", ae_v_len(1,n-i-1)); + v.ptr.p_complex[1] = ae_complex_from_d(1); + complexapplyreflectionfromtheleft(q, tau->ptr.p_complex[i], &v, i+1, n-1, 0, n-1, &work, _state); + } + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Base case for real QR + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994. + Sergey Bochkanov, ALGLIB project, translation from FORTRAN to + pseudocode, 2007-2010. +*************************************************************************/ +static void ortfac_rmatrixqrbasecase(/* Real */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Real */ ae_vector* work, + /* Real */ ae_vector* t, + /* Real */ ae_vector* tau, + ae_state *_state) +{ + ae_int_t i; + ae_int_t k; + ae_int_t minmn; + double tmp; + + + minmn = ae_minint(m, n, _state); + + /* + * Test the input arguments + */ + k = minmn; + for(i=0; i<=k-1; i++) + { + + /* + * Generate elementary reflector H(i) to annihilate A(i+1:m,i) + */ + ae_v_move(&t->ptr.p_double[1], 1, &a->ptr.pp_double[i][i], a->stride, ae_v_len(1,m-i)); + generatereflection(t, m-i, &tmp, _state); + tau->ptr.p_double[i] = tmp; + ae_v_move(&a->ptr.pp_double[i][i], a->stride, &t->ptr.p_double[1], 1, ae_v_len(i,m-1)); + t->ptr.p_double[1] = 1; + if( i<n ) + { + + /* + * Apply H(i) to A(i:m-1,i+1:n-1) from the left + */ + applyreflectionfromtheleft(a, tau->ptr.p_double[i], t, i, m-1, i+1, n-1, work, _state); + } + } +} + + +/************************************************************************* +Base case for real LQ + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994. + Sergey Bochkanov, ALGLIB project, translation from FORTRAN to + pseudocode, 2007-2010. +*************************************************************************/ +static void ortfac_rmatrixlqbasecase(/* Real */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Real */ ae_vector* work, + /* Real */ ae_vector* t, + /* Real */ ae_vector* tau, + ae_state *_state) +{ + ae_int_t i; + ae_int_t k; + ae_int_t minmn; + double tmp; + + + minmn = ae_minint(m, n, _state); + k = ae_minint(m, n, _state); + for(i=0; i<=k-1; i++) + { + + /* + * Generate elementary reflector H(i) to annihilate A(i,i+1:n-1) + */ + ae_v_move(&t->ptr.p_double[1], 1, &a->ptr.pp_double[i][i], 1, ae_v_len(1,n-i)); + generatereflection(t, n-i, &tmp, _state); + tau->ptr.p_double[i] = tmp; + ae_v_move(&a->ptr.pp_double[i][i], 1, &t->ptr.p_double[1], 1, ae_v_len(i,n-1)); + t->ptr.p_double[1] = 1; + if( i<n ) + { + + /* + * Apply H(i) to A(i+1:m,i:n) from the right + */ + applyreflectionfromtheright(a, tau->ptr.p_double[i], t, i+1, m-1, i, n-1, work, _state); + } + } +} + + +/************************************************************************* +Base case for complex QR + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994. + Sergey Bochkanov, ALGLIB project, translation from FORTRAN to + pseudocode, 2007-2010. +*************************************************************************/ +static void ortfac_cmatrixqrbasecase(/* Complex */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Complex */ ae_vector* work, + /* Complex */ ae_vector* t, + /* Complex */ ae_vector* tau, + ae_state *_state) +{ + ae_int_t i; + ae_int_t k; + ae_int_t mmi; + ae_int_t minmn; + ae_complex tmp; + + + minmn = ae_minint(m, n, _state); + if( minmn<=0 ) + { + return; + } + + /* + * Test the input arguments + */ + k = ae_minint(m, n, _state); + for(i=0; i<=k-1; i++) + { + + /* + * Generate elementary reflector H(i) to annihilate A(i+1:m,i) + */ + mmi = m-i; + ae_v_cmove(&t->ptr.p_complex[1], 1, &a->ptr.pp_complex[i][i], a->stride, "N", ae_v_len(1,mmi)); + complexgeneratereflection(t, mmi, &tmp, _state); + tau->ptr.p_complex[i] = tmp; + ae_v_cmove(&a->ptr.pp_complex[i][i], a->stride, &t->ptr.p_complex[1], 1, "N", ae_v_len(i,m-1)); + t->ptr.p_complex[1] = ae_complex_from_d(1); + if( i<n-1 ) + { + + /* + * Apply H'(i) to A(i:m,i+1:n) from the left + */ + complexapplyreflectionfromtheleft(a, ae_c_conj(tau->ptr.p_complex[i], _state), t, i, m-1, i+1, n-1, work, _state); + } + } +} + + +/************************************************************************* +Base case for complex LQ + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994. + Sergey Bochkanov, ALGLIB project, translation from FORTRAN to + pseudocode, 2007-2010. +*************************************************************************/ +static void ortfac_cmatrixlqbasecase(/* Complex */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Complex */ ae_vector* work, + /* Complex */ ae_vector* t, + /* Complex */ ae_vector* tau, + ae_state *_state) +{ + ae_int_t i; + ae_int_t minmn; + ae_complex tmp; + + + minmn = ae_minint(m, n, _state); + if( minmn<=0 ) + { + return; + } + + /* + * Test the input arguments + */ + for(i=0; i<=minmn-1; i++) + { + + /* + * Generate elementary reflector H(i) + * + * NOTE: ComplexGenerateReflection() generates left reflector, + * i.e. H which reduces x by applyiong from the left, but we + * need RIGHT reflector. So we replace H=E-tau*v*v' by H^H, + * which changes v to conj(v). + */ + ae_v_cmove(&t->ptr.p_complex[1], 1, &a->ptr.pp_complex[i][i], 1, "Conj", ae_v_len(1,n-i)); + complexgeneratereflection(t, n-i, &tmp, _state); + tau->ptr.p_complex[i] = tmp; + ae_v_cmove(&a->ptr.pp_complex[i][i], 1, &t->ptr.p_complex[1], 1, "Conj", ae_v_len(i,n-1)); + t->ptr.p_complex[1] = ae_complex_from_d(1); + if( i<m-1 ) + { + + /* + * Apply H'(i) + */ + complexapplyreflectionfromtheright(a, tau->ptr.p_complex[i], t, i+1, m-1, i, n-1, work, _state); + } + } +} + + +/************************************************************************* +Generate block reflector: +* fill unused parts of reflectors matrix by zeros +* fill diagonal of reflectors matrix by ones +* generate triangular factor T + +PARAMETERS: + A - either LengthA*BlockSize (if ColumnwiseA) or + BlockSize*LengthA (if not ColumnwiseA) matrix of + elementary reflectors. + Modified on exit. + Tau - scalar factors + ColumnwiseA - reflectors are stored in rows or in columns + LengthA - length of largest reflector + BlockSize - number of reflectors + T - array[BlockSize,2*BlockSize]. Left BlockSize*BlockSize + submatrix stores triangular factor on exit. + WORK - array[BlockSize] + + -- ALGLIB routine -- + 17.02.2010 + Bochkanov Sergey +*************************************************************************/ +static void ortfac_rmatrixblockreflector(/* Real */ ae_matrix* a, + /* Real */ ae_vector* tau, + ae_bool columnwisea, + ae_int_t lengtha, + ae_int_t blocksize, + /* Real */ ae_matrix* t, + /* Real */ ae_vector* work, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + ae_int_t k; + double v; + + + + /* + * fill beginning of new column with zeros, + * load 1.0 in the first non-zero element + */ + for(k=0; k<=blocksize-1; k++) + { + if( columnwisea ) + { + for(i=0; i<=k-1; i++) + { + a->ptr.pp_double[i][k] = 0; + } + } + else + { + for(i=0; i<=k-1; i++) + { + a->ptr.pp_double[k][i] = 0; + } + } + a->ptr.pp_double[k][k] = 1; + } + + /* + * Calculate Gram matrix of A + */ + for(i=0; i<=blocksize-1; i++) + { + for(j=0; j<=blocksize-1; j++) + { + t->ptr.pp_double[i][blocksize+j] = 0; + } + } + for(k=0; k<=lengtha-1; k++) + { + for(j=1; j<=blocksize-1; j++) + { + if( columnwisea ) + { + v = a->ptr.pp_double[k][j]; + if( ae_fp_neq(v,0) ) + { + ae_v_addd(&t->ptr.pp_double[j][blocksize], 1, &a->ptr.pp_double[k][0], 1, ae_v_len(blocksize,blocksize+j-1), v); + } + } + else + { + v = a->ptr.pp_double[j][k]; + if( ae_fp_neq(v,0) ) + { + ae_v_addd(&t->ptr.pp_double[j][blocksize], 1, &a->ptr.pp_double[0][k], a->stride, ae_v_len(blocksize,blocksize+j-1), v); + } + } + } + } + + /* + * Prepare Y (stored in TmpA) and T (stored in TmpT) + */ + for(k=0; k<=blocksize-1; k++) + { + + /* + * fill non-zero part of T, use pre-calculated Gram matrix + */ + ae_v_move(&work->ptr.p_double[0], 1, &t->ptr.pp_double[k][blocksize], 1, ae_v_len(0,k-1)); + for(i=0; i<=k-1; i++) + { + v = ae_v_dotproduct(&t->ptr.pp_double[i][i], 1, &work->ptr.p_double[i], 1, ae_v_len(i,k-1)); + t->ptr.pp_double[i][k] = -tau->ptr.p_double[k]*v; + } + t->ptr.pp_double[k][k] = -tau->ptr.p_double[k]; + + /* + * Rest of T is filled by zeros + */ + for(i=k+1; i<=blocksize-1; i++) + { + t->ptr.pp_double[i][k] = 0; + } + } +} + + +/************************************************************************* +Generate block reflector (complex): +* fill unused parts of reflectors matrix by zeros +* fill diagonal of reflectors matrix by ones +* generate triangular factor T + + + -- ALGLIB routine -- + 17.02.2010 + Bochkanov Sergey +*************************************************************************/ +static void ortfac_cmatrixblockreflector(/* Complex */ ae_matrix* a, + /* Complex */ ae_vector* tau, + ae_bool columnwisea, + ae_int_t lengtha, + ae_int_t blocksize, + /* Complex */ ae_matrix* t, + /* Complex */ ae_vector* work, + ae_state *_state) +{ + ae_int_t i; + ae_int_t k; + ae_complex v; + + + + /* + * Prepare Y (stored in TmpA) and T (stored in TmpT) + */ + for(k=0; k<=blocksize-1; k++) + { + + /* + * fill beginning of new column with zeros, + * load 1.0 in the first non-zero element + */ + if( columnwisea ) + { + for(i=0; i<=k-1; i++) + { + a->ptr.pp_complex[i][k] = ae_complex_from_d(0); + } + } + else + { + for(i=0; i<=k-1; i++) + { + a->ptr.pp_complex[k][i] = ae_complex_from_d(0); + } + } + a->ptr.pp_complex[k][k] = ae_complex_from_d(1); + + /* + * fill non-zero part of T, + */ + for(i=0; i<=k-1; i++) + { + if( columnwisea ) + { + v = ae_v_cdotproduct(&a->ptr.pp_complex[k][i], a->stride, "Conj", &a->ptr.pp_complex[k][k], a->stride, "N", ae_v_len(k,lengtha-1)); + } + else + { + v = ae_v_cdotproduct(&a->ptr.pp_complex[i][k], 1, "N", &a->ptr.pp_complex[k][k], 1, "Conj", ae_v_len(k,lengtha-1)); + } + work->ptr.p_complex[i] = v; + } + for(i=0; i<=k-1; i++) + { + v = ae_v_cdotproduct(&t->ptr.pp_complex[i][i], 1, "N", &work->ptr.p_complex[i], 1, "N", ae_v_len(i,k-1)); + t->ptr.pp_complex[i][k] = ae_c_neg(ae_c_mul(tau->ptr.p_complex[k],v)); + } + t->ptr.pp_complex[k][k] = ae_c_neg(tau->ptr.p_complex[k]); + + /* + * Rest of T is filled by zeros + */ + for(i=k+1; i<=blocksize-1; i++) + { + t->ptr.pp_complex[i][k] = ae_complex_from_d(0); + } + } +} + + + + +/************************************************************************* +Finding the eigenvalues and eigenvectors of a symmetric matrix + +The algorithm finds eigen pairs of a symmetric matrix by reducing it to +tridiagonal form and using the QL/QR algorithm. + +Input parameters: + A - symmetric matrix which is given by its upper or lower + triangular part. + Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + ZNeeded - flag controlling whether the eigenvectors are needed or not. + If ZNeeded is equal to: + * 0, the eigenvectors are not returned; + * 1, the eigenvectors are returned. + IsUpper - storage format. + +Output parameters: + D - eigenvalues in ascending order. + Array whose index ranges within [0..N-1]. + Z - if ZNeeded is equal to: + * 0, Z hasn’t changed; + * 1, Z contains the eigenvectors. + Array whose indexes range within [0..N-1, 0..N-1]. + The eigenvectors are stored in the matrix columns. + +Result: + True, if the algorithm has converged. + False, if the algorithm hasn't converged (rare case). + + -- ALGLIB -- + Copyright 2005-2008 by Bochkanov Sergey +*************************************************************************/ +ae_bool smatrixevd(/* Real */ ae_matrix* a, + ae_int_t n, + ae_int_t zneeded, + ae_bool isupper, + /* Real */ ae_vector* d, + /* Real */ ae_matrix* z, + ae_state *_state) +{ + ae_frame _frame_block; + ae_matrix _a; + ae_vector tau; + ae_vector e; + ae_bool result; + + ae_frame_make(_state, &_frame_block); + ae_matrix_init_copy(&_a, a, _state, ae_true); + a = &_a; + ae_vector_clear(d); + ae_matrix_clear(z); + ae_vector_init(&tau, 0, DT_REAL, _state, ae_true); + ae_vector_init(&e, 0, DT_REAL, _state, ae_true); + + ae_assert(zneeded==0||zneeded==1, "SMatrixEVD: incorrect ZNeeded", _state); + smatrixtd(a, n, isupper, &tau, d, &e, _state); + if( zneeded==1 ) + { + smatrixtdunpackq(a, n, isupper, &tau, z, _state); + } + result = smatrixtdevd(d, &e, n, zneeded, z, _state); + ae_frame_leave(_state); + return result; +} + + +/************************************************************************* +Subroutine for finding the eigenvalues (and eigenvectors) of a symmetric +matrix in a given half open interval (A, B] by using a bisection and +inverse iteration + +Input parameters: + A - symmetric matrix which is given by its upper or lower + triangular part. Array [0..N-1, 0..N-1]. + N - size of matrix A. + ZNeeded - flag controlling whether the eigenvectors are needed or not. + If ZNeeded is equal to: + * 0, the eigenvectors are not returned; + * 1, the eigenvectors are returned. + IsUpperA - storage format of matrix A. + B1, B2 - half open interval (B1, B2] to search eigenvalues in. + +Output parameters: + M - number of eigenvalues found in a given half-interval (M>=0). + W - array of the eigenvalues found. + Array whose index ranges within [0..M-1]. + Z - if ZNeeded is equal to: + * 0, Z hasn’t changed; + * 1, Z contains eigenvectors. + Array whose indexes range within [0..N-1, 0..M-1]. + The eigenvectors are stored in the matrix columns. + +Result: + True, if successful. M contains the number of eigenvalues in the given + half-interval (could be equal to 0), W contains the eigenvalues, + Z contains the eigenvectors (if needed). + + False, if the bisection method subroutine wasn't able to find the + eigenvalues in the given interval or if the inverse iteration subroutine + wasn't able to find all the corresponding eigenvectors. + In that case, the eigenvalues and eigenvectors are not returned, + M is equal to 0. + + -- ALGLIB -- + Copyright 07.01.2006 by Bochkanov Sergey +*************************************************************************/ +ae_bool smatrixevdr(/* Real */ ae_matrix* a, + ae_int_t n, + ae_int_t zneeded, + ae_bool isupper, + double b1, + double b2, + ae_int_t* m, + /* Real */ ae_vector* w, + /* Real */ ae_matrix* z, + ae_state *_state) +{ + ae_frame _frame_block; + ae_matrix _a; + ae_vector tau; + ae_vector e; + ae_bool result; + + ae_frame_make(_state, &_frame_block); + ae_matrix_init_copy(&_a, a, _state, ae_true); + a = &_a; + *m = 0; + ae_vector_clear(w); + ae_matrix_clear(z); + ae_vector_init(&tau, 0, DT_REAL, _state, ae_true); + ae_vector_init(&e, 0, DT_REAL, _state, ae_true); + + ae_assert(zneeded==0||zneeded==1, "SMatrixTDEVDR: incorrect ZNeeded", _state); + smatrixtd(a, n, isupper, &tau, w, &e, _state); + if( zneeded==1 ) + { + smatrixtdunpackq(a, n, isupper, &tau, z, _state); + } + result = smatrixtdevdr(w, &e, n, zneeded, b1, b2, m, z, _state); + ae_frame_leave(_state); + return result; +} + + +/************************************************************************* +Subroutine for finding the eigenvalues and eigenvectors of a symmetric +matrix with given indexes by using bisection and inverse iteration methods. + +Input parameters: + A - symmetric matrix which is given by its upper or lower + triangular part. Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + ZNeeded - flag controlling whether the eigenvectors are needed or not. + If ZNeeded is equal to: + * 0, the eigenvectors are not returned; + * 1, the eigenvectors are returned. + IsUpperA - storage format of matrix A. + I1, I2 - index interval for searching (from I1 to I2). + 0 <= I1 <= I2 <= N-1. + +Output parameters: + W - array of the eigenvalues found. + Array whose index ranges within [0..I2-I1]. + Z - if ZNeeded is equal to: + * 0, Z hasn’t changed; + * 1, Z contains eigenvectors. + Array whose indexes range within [0..N-1, 0..I2-I1]. + In that case, the eigenvectors are stored in the matrix columns. + +Result: + True, if successful. W contains the eigenvalues, Z contains the + eigenvectors (if needed). + + False, if the bisection method subroutine wasn't able to find the + eigenvalues in the given interval or if the inverse iteration subroutine + wasn't able to find all the corresponding eigenvectors. + In that case, the eigenvalues and eigenvectors are not returned. + + -- ALGLIB -- + Copyright 07.01.2006 by Bochkanov Sergey +*************************************************************************/ +ae_bool smatrixevdi(/* Real */ ae_matrix* a, + ae_int_t n, + ae_int_t zneeded, + ae_bool isupper, + ae_int_t i1, + ae_int_t i2, + /* Real */ ae_vector* w, + /* Real */ ae_matrix* z, + ae_state *_state) +{ + ae_frame _frame_block; + ae_matrix _a; + ae_vector tau; + ae_vector e; + ae_bool result; + + ae_frame_make(_state, &_frame_block); + ae_matrix_init_copy(&_a, a, _state, ae_true); + a = &_a; + ae_vector_clear(w); + ae_matrix_clear(z); + ae_vector_init(&tau, 0, DT_REAL, _state, ae_true); + ae_vector_init(&e, 0, DT_REAL, _state, ae_true); + + ae_assert(zneeded==0||zneeded==1, "SMatrixEVDI: incorrect ZNeeded", _state); + smatrixtd(a, n, isupper, &tau, w, &e, _state); + if( zneeded==1 ) + { + smatrixtdunpackq(a, n, isupper, &tau, z, _state); + } + result = smatrixtdevdi(w, &e, n, zneeded, i1, i2, z, _state); + ae_frame_leave(_state); + return result; +} + + +/************************************************************************* +Finding the eigenvalues and eigenvectors of a Hermitian matrix + +The algorithm finds eigen pairs of a Hermitian matrix by reducing it to +real tridiagonal form and using the QL/QR algorithm. + +Input parameters: + A - Hermitian matrix which is given by its upper or lower + triangular part. + Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + IsUpper - storage format. + ZNeeded - flag controlling whether the eigenvectors are needed or + not. If ZNeeded is equal to: + * 0, the eigenvectors are not returned; + * 1, the eigenvectors are returned. + +Output parameters: + D - eigenvalues in ascending order. + Array whose index ranges within [0..N-1]. + Z - if ZNeeded is equal to: + * 0, Z hasn’t changed; + * 1, Z contains the eigenvectors. + Array whose indexes range within [0..N-1, 0..N-1]. + The eigenvectors are stored in the matrix columns. + +Result: + True, if the algorithm has converged. + False, if the algorithm hasn't converged (rare case). + +Note: + eigenvectors of Hermitian matrix are defined up to multiplication by + a complex number L, such that |L|=1. + + -- ALGLIB -- + Copyright 2005, 23 March 2007 by Bochkanov Sergey +*************************************************************************/ +ae_bool hmatrixevd(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_int_t zneeded, + ae_bool isupper, + /* Real */ ae_vector* d, + /* Complex */ ae_matrix* z, + ae_state *_state) +{ + ae_frame _frame_block; + ae_matrix _a; + ae_vector tau; + ae_vector e; + ae_vector work; + ae_matrix t; + ae_matrix q; + ae_int_t i; + ae_int_t k; + double v; + ae_bool result; + + ae_frame_make(_state, &_frame_block); + ae_matrix_init_copy(&_a, a, _state, ae_true); + a = &_a; + ae_vector_clear(d); + ae_matrix_clear(z); + ae_vector_init(&tau, 0, DT_COMPLEX, _state, ae_true); + ae_vector_init(&e, 0, DT_REAL, _state, ae_true); + ae_vector_init(&work, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&t, 0, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&q, 0, 0, DT_COMPLEX, _state, ae_true); + + ae_assert(zneeded==0||zneeded==1, "HermitianEVD: incorrect ZNeeded", _state); + + /* + * Reduce to tridiagonal form + */ + hmatrixtd(a, n, isupper, &tau, d, &e, _state); + if( zneeded==1 ) + { + hmatrixtdunpackq(a, n, isupper, &tau, &q, _state); + zneeded = 2; + } + + /* + * TDEVD + */ + result = smatrixtdevd(d, &e, n, zneeded, &t, _state); + + /* + * Eigenvectors are needed + * Calculate Z = Q*T = Re(Q)*T + i*Im(Q)*T + */ + if( result&&zneeded!=0 ) + { + ae_vector_set_length(&work, n-1+1, _state); + ae_matrix_set_length(z, n-1+1, n-1+1, _state); + for(i=0; i<=n-1; i++) + { + + /* + * Calculate real part + */ + for(k=0; k<=n-1; k++) + { + work.ptr.p_double[k] = 0; + } + for(k=0; k<=n-1; k++) + { + v = q.ptr.pp_complex[i][k].x; + ae_v_addd(&work.ptr.p_double[0], 1, &t.ptr.pp_double[k][0], 1, ae_v_len(0,n-1), v); + } + for(k=0; k<=n-1; k++) + { + z->ptr.pp_complex[i][k].x = work.ptr.p_double[k]; + } + + /* + * Calculate imaginary part + */ + for(k=0; k<=n-1; k++) + { + work.ptr.p_double[k] = 0; + } + for(k=0; k<=n-1; k++) + { + v = q.ptr.pp_complex[i][k].y; + ae_v_addd(&work.ptr.p_double[0], 1, &t.ptr.pp_double[k][0], 1, ae_v_len(0,n-1), v); + } + for(k=0; k<=n-1; k++) + { + z->ptr.pp_complex[i][k].y = work.ptr.p_double[k]; + } + } + } + ae_frame_leave(_state); + return result; +} + + +/************************************************************************* +Subroutine for finding the eigenvalues (and eigenvectors) of a Hermitian +matrix in a given half-interval (A, B] by using a bisection and inverse +iteration + +Input parameters: + A - Hermitian matrix which is given by its upper or lower + triangular part. Array whose indexes range within + [0..N-1, 0..N-1]. + N - size of matrix A. + ZNeeded - flag controlling whether the eigenvectors are needed or + not. If ZNeeded is equal to: + * 0, the eigenvectors are not returned; + * 1, the eigenvectors are returned. + IsUpperA - storage format of matrix A. + B1, B2 - half-interval (B1, B2] to search eigenvalues in. + +Output parameters: + M - number of eigenvalues found in a given half-interval, M>=0 + W - array of the eigenvalues found. + Array whose index ranges within [0..M-1]. + Z - if ZNeeded is equal to: + * 0, Z hasn’t changed; + * 1, Z contains eigenvectors. + Array whose indexes range within [0..N-1, 0..M-1]. + The eigenvectors are stored in the matrix columns. + +Result: + True, if successful. M contains the number of eigenvalues in the given + half-interval (could be equal to 0), W contains the eigenvalues, + Z contains the eigenvectors (if needed). + + False, if the bisection method subroutine wasn't able to find the + eigenvalues in the given interval or if the inverse iteration + subroutine wasn't able to find all the corresponding eigenvectors. + In that case, the eigenvalues and eigenvectors are not returned, M is + equal to 0. + +Note: + eigen vectors of Hermitian matrix are defined up to multiplication by + a complex number L, such as |L|=1. + + -- ALGLIB -- + Copyright 07.01.2006, 24.03.2007 by Bochkanov Sergey. +*************************************************************************/ +ae_bool hmatrixevdr(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_int_t zneeded, + ae_bool isupper, + double b1, + double b2, + ae_int_t* m, + /* Real */ ae_vector* w, + /* Complex */ ae_matrix* z, + ae_state *_state) +{ + ae_frame _frame_block; + ae_matrix _a; + ae_matrix q; + ae_matrix t; + ae_vector tau; + ae_vector e; + ae_vector work; + ae_int_t i; + ae_int_t k; + double v; + ae_bool result; + + ae_frame_make(_state, &_frame_block); + ae_matrix_init_copy(&_a, a, _state, ae_true); + a = &_a; + *m = 0; + ae_vector_clear(w); + ae_matrix_clear(z); + ae_matrix_init(&q, 0, 0, DT_COMPLEX, _state, ae_true); + ae_matrix_init(&t, 0, 0, DT_REAL, _state, ae_true); + ae_vector_init(&tau, 0, DT_COMPLEX, _state, ae_true); + ae_vector_init(&e, 0, DT_REAL, _state, ae_true); + ae_vector_init(&work, 0, DT_REAL, _state, ae_true); + + ae_assert(zneeded==0||zneeded==1, "HermitianEigenValuesAndVectorsInInterval: incorrect ZNeeded", _state); + + /* + * Reduce to tridiagonal form + */ + hmatrixtd(a, n, isupper, &tau, w, &e, _state); + if( zneeded==1 ) + { + hmatrixtdunpackq(a, n, isupper, &tau, &q, _state); + zneeded = 2; + } + + /* + * Bisection and inverse iteration + */ + result = smatrixtdevdr(w, &e, n, zneeded, b1, b2, m, &t, _state); + + /* + * Eigenvectors are needed + * Calculate Z = Q*T = Re(Q)*T + i*Im(Q)*T + */ + if( (result&&zneeded!=0)&&*m!=0 ) + { + ae_vector_set_length(&work, *m-1+1, _state); + ae_matrix_set_length(z, n-1+1, *m-1+1, _state); + for(i=0; i<=n-1; i++) + { + + /* + * Calculate real part + */ + for(k=0; k<=*m-1; k++) + { + work.ptr.p_double[k] = 0; + } + for(k=0; k<=n-1; k++) + { + v = q.ptr.pp_complex[i][k].x; + ae_v_addd(&work.ptr.p_double[0], 1, &t.ptr.pp_double[k][0], 1, ae_v_len(0,*m-1), v); + } + for(k=0; k<=*m-1; k++) + { + z->ptr.pp_complex[i][k].x = work.ptr.p_double[k]; + } + + /* + * Calculate imaginary part + */ + for(k=0; k<=*m-1; k++) + { + work.ptr.p_double[k] = 0; + } + for(k=0; k<=n-1; k++) + { + v = q.ptr.pp_complex[i][k].y; + ae_v_addd(&work.ptr.p_double[0], 1, &t.ptr.pp_double[k][0], 1, ae_v_len(0,*m-1), v); + } + for(k=0; k<=*m-1; k++) + { + z->ptr.pp_complex[i][k].y = work.ptr.p_double[k]; + } + } + } + ae_frame_leave(_state); + return result; +} + + +/************************************************************************* +Subroutine for finding the eigenvalues and eigenvectors of a Hermitian +matrix with given indexes by using bisection and inverse iteration methods + +Input parameters: + A - Hermitian matrix which is given by its upper or lower + triangular part. + Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + ZNeeded - flag controlling whether the eigenvectors are needed or + not. If ZNeeded is equal to: + * 0, the eigenvectors are not returned; + * 1, the eigenvectors are returned. + IsUpperA - storage format of matrix A. + I1, I2 - index interval for searching (from I1 to I2). + 0 <= I1 <= I2 <= N-1. + +Output parameters: + W - array of the eigenvalues found. + Array whose index ranges within [0..I2-I1]. + Z - if ZNeeded is equal to: + * 0, Z hasn’t changed; + * 1, Z contains eigenvectors. + Array whose indexes range within [0..N-1, 0..I2-I1]. + In that case, the eigenvectors are stored in the matrix + columns. + +Result: + True, if successful. W contains the eigenvalues, Z contains the + eigenvectors (if needed). + + False, if the bisection method subroutine wasn't able to find the + eigenvalues in the given interval or if the inverse iteration + subroutine wasn't able to find all the corresponding eigenvectors. + In that case, the eigenvalues and eigenvectors are not returned. + +Note: + eigen vectors of Hermitian matrix are defined up to multiplication by + a complex number L, such as |L|=1. + + -- ALGLIB -- + Copyright 07.01.2006, 24.03.2007 by Bochkanov Sergey. +*************************************************************************/ +ae_bool hmatrixevdi(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_int_t zneeded, + ae_bool isupper, + ae_int_t i1, + ae_int_t i2, + /* Real */ ae_vector* w, + /* Complex */ ae_matrix* z, + ae_state *_state) +{ + ae_frame _frame_block; + ae_matrix _a; + ae_matrix q; + ae_matrix t; + ae_vector tau; + ae_vector e; + ae_vector work; + ae_int_t i; + ae_int_t k; + double v; + ae_int_t m; + ae_bool result; + + ae_frame_make(_state, &_frame_block); + ae_matrix_init_copy(&_a, a, _state, ae_true); + a = &_a; + ae_vector_clear(w); + ae_matrix_clear(z); + ae_matrix_init(&q, 0, 0, DT_COMPLEX, _state, ae_true); + ae_matrix_init(&t, 0, 0, DT_REAL, _state, ae_true); + ae_vector_init(&tau, 0, DT_COMPLEX, _state, ae_true); + ae_vector_init(&e, 0, DT_REAL, _state, ae_true); + ae_vector_init(&work, 0, DT_REAL, _state, ae_true); + + ae_assert(zneeded==0||zneeded==1, "HermitianEigenValuesAndVectorsByIndexes: incorrect ZNeeded", _state); + + /* + * Reduce to tridiagonal form + */ + hmatrixtd(a, n, isupper, &tau, w, &e, _state); + if( zneeded==1 ) + { + hmatrixtdunpackq(a, n, isupper, &tau, &q, _state); + zneeded = 2; + } + + /* + * Bisection and inverse iteration + */ + result = smatrixtdevdi(w, &e, n, zneeded, i1, i2, &t, _state); + + /* + * Eigenvectors are needed + * Calculate Z = Q*T = Re(Q)*T + i*Im(Q)*T + */ + m = i2-i1+1; + if( result&&zneeded!=0 ) + { + ae_vector_set_length(&work, m-1+1, _state); + ae_matrix_set_length(z, n-1+1, m-1+1, _state); + for(i=0; i<=n-1; i++) + { + + /* + * Calculate real part + */ + for(k=0; k<=m-1; k++) + { + work.ptr.p_double[k] = 0; + } + for(k=0; k<=n-1; k++) + { + v = q.ptr.pp_complex[i][k].x; + ae_v_addd(&work.ptr.p_double[0], 1, &t.ptr.pp_double[k][0], 1, ae_v_len(0,m-1), v); + } + for(k=0; k<=m-1; k++) + { + z->ptr.pp_complex[i][k].x = work.ptr.p_double[k]; + } + + /* + * Calculate imaginary part + */ + for(k=0; k<=m-1; k++) + { + work.ptr.p_double[k] = 0; + } + for(k=0; k<=n-1; k++) + { + v = q.ptr.pp_complex[i][k].y; + ae_v_addd(&work.ptr.p_double[0], 1, &t.ptr.pp_double[k][0], 1, ae_v_len(0,m-1), v); + } + for(k=0; k<=m-1; k++) + { + z->ptr.pp_complex[i][k].y = work.ptr.p_double[k]; + } + } + } + ae_frame_leave(_state); + return result; +} + + +/************************************************************************* +Finding the eigenvalues and eigenvectors of a tridiagonal symmetric matrix + +The algorithm finds the eigen pairs of a tridiagonal symmetric matrix by +using an QL/QR algorithm with implicit shifts. + +Input parameters: + D - the main diagonal of a tridiagonal matrix. + Array whose index ranges within [0..N-1]. + E - the secondary diagonal of a tridiagonal matrix. + Array whose index ranges within [0..N-2]. + N - size of matrix A. + ZNeeded - flag controlling whether the eigenvectors are needed or not. + If ZNeeded is equal to: + * 0, the eigenvectors are not needed; + * 1, the eigenvectors of a tridiagonal matrix + are multiplied by the square matrix Z. It is used if the + tridiagonal matrix is obtained by the similarity + transformation of a symmetric matrix; + * 2, the eigenvectors of a tridiagonal matrix replace the + square matrix Z; + * 3, matrix Z contains the first row of the eigenvectors + matrix. + Z - if ZNeeded=1, Z contains the square matrix by which the + eigenvectors are multiplied. + Array whose indexes range within [0..N-1, 0..N-1]. + +Output parameters: + D - eigenvalues in ascending order. + Array whose index ranges within [0..N-1]. + Z - if ZNeeded is equal to: + * 0, Z hasn’t changed; + * 1, Z contains the product of a given matrix (from the left) + and the eigenvectors matrix (from the right); + * 2, Z contains the eigenvectors. + * 3, Z contains the first row of the eigenvectors matrix. + If ZNeeded<3, Z is the array whose indexes range within [0..N-1, 0..N-1]. + In that case, the eigenvectors are stored in the matrix columns. + If ZNeeded=3, Z is the array whose indexes range within [0..0, 0..N-1]. + +Result: + True, if the algorithm has converged. + False, if the algorithm hasn't converged. + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 +*************************************************************************/ +ae_bool smatrixtdevd(/* Real */ ae_vector* d, + /* Real */ ae_vector* e, + ae_int_t n, + ae_int_t zneeded, + /* Real */ ae_matrix* z, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector _e; + ae_vector d1; + ae_vector e1; + ae_matrix z1; + ae_int_t i; + ae_bool result; + + ae_frame_make(_state, &_frame_block); + ae_vector_init_copy(&_e, e, _state, ae_true); + e = &_e; + ae_vector_init(&d1, 0, DT_REAL, _state, ae_true); + ae_vector_init(&e1, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&z1, 0, 0, DT_REAL, _state, ae_true); + + + /* + * Prepare 1-based task + */ + ae_vector_set_length(&d1, n+1, _state); + ae_vector_set_length(&e1, n+1, _state); + ae_v_move(&d1.ptr.p_double[1], 1, &d->ptr.p_double[0], 1, ae_v_len(1,n)); + if( n>1 ) + { + ae_v_move(&e1.ptr.p_double[1], 1, &e->ptr.p_double[0], 1, ae_v_len(1,n-1)); + } + if( zneeded==1 ) + { + ae_matrix_set_length(&z1, n+1, n+1, _state); + for(i=1; i<=n; i++) + { + ae_v_move(&z1.ptr.pp_double[i][1], 1, &z->ptr.pp_double[i-1][0], 1, ae_v_len(1,n)); + } + } + + /* + * Solve 1-based task + */ + result = evd_tridiagonalevd(&d1, &e1, n, zneeded, &z1, _state); + if( !result ) + { + ae_frame_leave(_state); + return result; + } + + /* + * Convert back to 0-based result + */ + ae_v_move(&d->ptr.p_double[0], 1, &d1.ptr.p_double[1], 1, ae_v_len(0,n-1)); + if( zneeded!=0 ) + { + if( zneeded==1 ) + { + for(i=1; i<=n; i++) + { + ae_v_move(&z->ptr.pp_double[i-1][0], 1, &z1.ptr.pp_double[i][1], 1, ae_v_len(0,n-1)); + } + ae_frame_leave(_state); + return result; + } + if( zneeded==2 ) + { + ae_matrix_set_length(z, n-1+1, n-1+1, _state); + for(i=1; i<=n; i++) + { + ae_v_move(&z->ptr.pp_double[i-1][0], 1, &z1.ptr.pp_double[i][1], 1, ae_v_len(0,n-1)); + } + ae_frame_leave(_state); + return result; + } + if( zneeded==3 ) + { + ae_matrix_set_length(z, 0+1, n-1+1, _state); + ae_v_move(&z->ptr.pp_double[0][0], 1, &z1.ptr.pp_double[1][1], 1, ae_v_len(0,n-1)); + ae_frame_leave(_state); + return result; + } + ae_assert(ae_false, "SMatrixTDEVD: Incorrect ZNeeded!", _state); + } + ae_frame_leave(_state); + return result; +} + + +/************************************************************************* +Subroutine for finding the tridiagonal matrix eigenvalues/vectors in a +given half-interval (A, B] by using bisection and inverse iteration. + +Input parameters: + D - the main diagonal of a tridiagonal matrix. + Array whose index ranges within [0..N-1]. + E - the secondary diagonal of a tridiagonal matrix. + Array whose index ranges within [0..N-2]. + N - size of matrix, N>=0. + ZNeeded - flag controlling whether the eigenvectors are needed or not. + If ZNeeded is equal to: + * 0, the eigenvectors are not needed; + * 1, the eigenvectors of a tridiagonal matrix are multiplied + by the square matrix Z. It is used if the tridiagonal + matrix is obtained by the similarity transformation + of a symmetric matrix. + * 2, the eigenvectors of a tridiagonal matrix replace matrix Z. + A, B - half-interval (A, B] to search eigenvalues in. + Z - if ZNeeded is equal to: + * 0, Z isn't used and remains unchanged; + * 1, Z contains the square matrix (array whose indexes range + within [0..N-1, 0..N-1]) which reduces the given symmetric + matrix to tridiagonal form; + * 2, Z isn't used (but changed on the exit). + +Output parameters: + D - array of the eigenvalues found. + Array whose index ranges within [0..M-1]. + M - number of eigenvalues found in the given half-interval (M>=0). + Z - if ZNeeded is equal to: + * 0, doesn't contain any information; + * 1, contains the product of a given NxN matrix Z (from the + left) and NxM matrix of the eigenvectors found (from the + right). Array whose indexes range within [0..N-1, 0..M-1]. + * 2, contains the matrix of the eigenvectors found. + Array whose indexes range within [0..N-1, 0..M-1]. + +Result: + + True, if successful. In that case, M contains the number of eigenvalues + in the given half-interval (could be equal to 0), D contains the eigenvalues, + Z contains the eigenvectors (if needed). + It should be noted that the subroutine changes the size of arrays D and Z. + + False, if the bisection method subroutine wasn't able to find the + eigenvalues in the given interval or if the inverse iteration subroutine + wasn't able to find all the corresponding eigenvectors. In that case, + the eigenvalues and eigenvectors are not returned, M is equal to 0. + + -- ALGLIB -- + Copyright 31.03.2008 by Bochkanov Sergey +*************************************************************************/ +ae_bool smatrixtdevdr(/* Real */ ae_vector* d, + /* Real */ ae_vector* e, + ae_int_t n, + ae_int_t zneeded, + double a, + double b, + ae_int_t* m, + /* Real */ ae_matrix* z, + ae_state *_state) +{ + ae_frame _frame_block; + ae_int_t errorcode; + ae_int_t nsplit; + ae_int_t i; + ae_int_t j; + ae_int_t k; + ae_int_t cr; + ae_vector iblock; + ae_vector isplit; + ae_vector ifail; + ae_vector d1; + ae_vector e1; + ae_vector w; + ae_matrix z2; + ae_matrix z3; + double v; + ae_bool result; + + ae_frame_make(_state, &_frame_block); + *m = 0; + ae_vector_init(&iblock, 0, DT_INT, _state, ae_true); + ae_vector_init(&isplit, 0, DT_INT, _state, ae_true); + ae_vector_init(&ifail, 0, DT_INT, _state, ae_true); + ae_vector_init(&d1, 0, DT_REAL, _state, ae_true); + ae_vector_init(&e1, 0, DT_REAL, _state, ae_true); + ae_vector_init(&w, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&z2, 0, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&z3, 0, 0, DT_REAL, _state, ae_true); + + ae_assert(zneeded>=0&&zneeded<=2, "SMatrixTDEVDR: incorrect ZNeeded!", _state); + + /* + * Special cases + */ + if( ae_fp_less_eq(b,a) ) + { + *m = 0; + result = ae_true; + ae_frame_leave(_state); + return result; + } + if( n<=0 ) + { + *m = 0; + result = ae_true; + ae_frame_leave(_state); + return result; + } + + /* + * Copy D,E to D1, E1 + */ + ae_vector_set_length(&d1, n+1, _state); + ae_v_move(&d1.ptr.p_double[1], 1, &d->ptr.p_double[0], 1, ae_v_len(1,n)); + if( n>1 ) + { + ae_vector_set_length(&e1, n-1+1, _state); + ae_v_move(&e1.ptr.p_double[1], 1, &e->ptr.p_double[0], 1, ae_v_len(1,n-1)); + } + + /* + * No eigen vectors + */ + if( zneeded==0 ) + { + result = evd_internalbisectioneigenvalues(&d1, &e1, n, 2, 1, a, b, 0, 0, -1, &w, m, &nsplit, &iblock, &isplit, &errorcode, _state); + if( !result||*m==0 ) + { + *m = 0; + ae_frame_leave(_state); + return result; + } + ae_vector_set_length(d, *m-1+1, _state); + ae_v_move(&d->ptr.p_double[0], 1, &w.ptr.p_double[1], 1, ae_v_len(0,*m-1)); + ae_frame_leave(_state); + return result; + } + + /* + * Eigen vectors are multiplied by Z + */ + if( zneeded==1 ) + { + + /* + * Find eigen pairs + */ + result = evd_internalbisectioneigenvalues(&d1, &e1, n, 2, 2, a, b, 0, 0, -1, &w, m, &nsplit, &iblock, &isplit, &errorcode, _state); + if( !result||*m==0 ) + { + *m = 0; + ae_frame_leave(_state); + return result; + } + evd_internaldstein(n, &d1, &e1, *m, &w, &iblock, &isplit, &z2, &ifail, &cr, _state); + if( cr!=0 ) + { + *m = 0; + result = ae_false; + ae_frame_leave(_state); + return result; + } + + /* + * Sort eigen values and vectors + */ + for(i=1; i<=*m; i++) + { + k = i; + for(j=i; j<=*m; j++) + { + if( ae_fp_less(w.ptr.p_double[j],w.ptr.p_double[k]) ) + { + k = j; + } + } + v = w.ptr.p_double[i]; + w.ptr.p_double[i] = w.ptr.p_double[k]; + w.ptr.p_double[k] = v; + for(j=1; j<=n; j++) + { + v = z2.ptr.pp_double[j][i]; + z2.ptr.pp_double[j][i] = z2.ptr.pp_double[j][k]; + z2.ptr.pp_double[j][k] = v; + } + } + + /* + * Transform Z2 and overwrite Z + */ + ae_matrix_set_length(&z3, *m+1, n+1, _state); + for(i=1; i<=*m; i++) + { + ae_v_move(&z3.ptr.pp_double[i][1], 1, &z2.ptr.pp_double[1][i], z2.stride, ae_v_len(1,n)); + } + for(i=1; i<=n; i++) + { + for(j=1; j<=*m; j++) + { + v = ae_v_dotproduct(&z->ptr.pp_double[i-1][0], 1, &z3.ptr.pp_double[j][1], 1, ae_v_len(0,n-1)); + z2.ptr.pp_double[i][j] = v; + } + } + ae_matrix_set_length(z, n-1+1, *m-1+1, _state); + for(i=1; i<=*m; i++) + { + ae_v_move(&z->ptr.pp_double[0][i-1], z->stride, &z2.ptr.pp_double[1][i], z2.stride, ae_v_len(0,n-1)); + } + + /* + * Store W + */ + ae_vector_set_length(d, *m-1+1, _state); + for(i=1; i<=*m; i++) + { + d->ptr.p_double[i-1] = w.ptr.p_double[i]; + } + ae_frame_leave(_state); + return result; + } + + /* + * Eigen vectors are stored in Z + */ + if( zneeded==2 ) + { + + /* + * Find eigen pairs + */ + result = evd_internalbisectioneigenvalues(&d1, &e1, n, 2, 2, a, b, 0, 0, -1, &w, m, &nsplit, &iblock, &isplit, &errorcode, _state); + if( !result||*m==0 ) + { + *m = 0; + ae_frame_leave(_state); + return result; + } + evd_internaldstein(n, &d1, &e1, *m, &w, &iblock, &isplit, &z2, &ifail, &cr, _state); + if( cr!=0 ) + { + *m = 0; + result = ae_false; + ae_frame_leave(_state); + return result; + } + + /* + * Sort eigen values and vectors + */ + for(i=1; i<=*m; i++) + { + k = i; + for(j=i; j<=*m; j++) + { + if( ae_fp_less(w.ptr.p_double[j],w.ptr.p_double[k]) ) + { + k = j; + } + } + v = w.ptr.p_double[i]; + w.ptr.p_double[i] = w.ptr.p_double[k]; + w.ptr.p_double[k] = v; + for(j=1; j<=n; j++) + { + v = z2.ptr.pp_double[j][i]; + z2.ptr.pp_double[j][i] = z2.ptr.pp_double[j][k]; + z2.ptr.pp_double[j][k] = v; + } + } + + /* + * Store W + */ + ae_vector_set_length(d, *m-1+1, _state); + for(i=1; i<=*m; i++) + { + d->ptr.p_double[i-1] = w.ptr.p_double[i]; + } + ae_matrix_set_length(z, n-1+1, *m-1+1, _state); + for(i=1; i<=*m; i++) + { + ae_v_move(&z->ptr.pp_double[0][i-1], z->stride, &z2.ptr.pp_double[1][i], z2.stride, ae_v_len(0,n-1)); + } + ae_frame_leave(_state); + return result; + } + result = ae_false; + ae_frame_leave(_state); + return result; +} + + +/************************************************************************* +Subroutine for finding tridiagonal matrix eigenvalues/vectors with given +indexes (in ascending order) by using the bisection and inverse iteraion. + +Input parameters: + D - the main diagonal of a tridiagonal matrix. + Array whose index ranges within [0..N-1]. + E - the secondary diagonal of a tridiagonal matrix. + Array whose index ranges within [0..N-2]. + N - size of matrix. N>=0. + ZNeeded - flag controlling whether the eigenvectors are needed or not. + If ZNeeded is equal to: + * 0, the eigenvectors are not needed; + * 1, the eigenvectors of a tridiagonal matrix are multiplied + by the square matrix Z. It is used if the + tridiagonal matrix is obtained by the similarity transformation + of a symmetric matrix. + * 2, the eigenvectors of a tridiagonal matrix replace + matrix Z. + I1, I2 - index interval for searching (from I1 to I2). + 0 <= I1 <= I2 <= N-1. + Z - if ZNeeded is equal to: + * 0, Z isn't used and remains unchanged; + * 1, Z contains the square matrix (array whose indexes range within [0..N-1, 0..N-1]) + which reduces the given symmetric matrix to tridiagonal form; + * 2, Z isn't used (but changed on the exit). + +Output parameters: + D - array of the eigenvalues found. + Array whose index ranges within [0..I2-I1]. + Z - if ZNeeded is equal to: + * 0, doesn't contain any information; + * 1, contains the product of a given NxN matrix Z (from the left) and + Nx(I2-I1) matrix of the eigenvectors found (from the right). + Array whose indexes range within [0..N-1, 0..I2-I1]. + * 2, contains the matrix of the eigenvalues found. + Array whose indexes range within [0..N-1, 0..I2-I1]. + + +Result: + + True, if successful. In that case, D contains the eigenvalues, + Z contains the eigenvectors (if needed). + It should be noted that the subroutine changes the size of arrays D and Z. + + False, if the bisection method subroutine wasn't able to find the eigenvalues + in the given interval or if the inverse iteration subroutine wasn't able + to find all the corresponding eigenvectors. In that case, the eigenvalues + and eigenvectors are not returned. + + -- ALGLIB -- + Copyright 25.12.2005 by Bochkanov Sergey +*************************************************************************/ +ae_bool smatrixtdevdi(/* Real */ ae_vector* d, + /* Real */ ae_vector* e, + ae_int_t n, + ae_int_t zneeded, + ae_int_t i1, + ae_int_t i2, + /* Real */ ae_matrix* z, + ae_state *_state) +{ + ae_frame _frame_block; + ae_int_t errorcode; + ae_int_t nsplit; + ae_int_t i; + ae_int_t j; + ae_int_t k; + ae_int_t m; + ae_int_t cr; + ae_vector iblock; + ae_vector isplit; + ae_vector ifail; + ae_vector w; + ae_vector d1; + ae_vector e1; + ae_matrix z2; + ae_matrix z3; + double v; + ae_bool result; + + ae_frame_make(_state, &_frame_block); + ae_vector_init(&iblock, 0, DT_INT, _state, ae_true); + ae_vector_init(&isplit, 0, DT_INT, _state, ae_true); + ae_vector_init(&ifail, 0, DT_INT, _state, ae_true); + ae_vector_init(&w, 0, DT_REAL, _state, ae_true); + ae_vector_init(&d1, 0, DT_REAL, _state, ae_true); + ae_vector_init(&e1, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&z2, 0, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&z3, 0, 0, DT_REAL, _state, ae_true); + + ae_assert((0<=i1&&i1<=i2)&&i2<n, "SMatrixTDEVDI: incorrect I1/I2!", _state); + + /* + * Copy D,E to D1, E1 + */ + ae_vector_set_length(&d1, n+1, _state); + ae_v_move(&d1.ptr.p_double[1], 1, &d->ptr.p_double[0], 1, ae_v_len(1,n)); + if( n>1 ) + { + ae_vector_set_length(&e1, n-1+1, _state); + ae_v_move(&e1.ptr.p_double[1], 1, &e->ptr.p_double[0], 1, ae_v_len(1,n-1)); + } + + /* + * No eigen vectors + */ + if( zneeded==0 ) + { + result = evd_internalbisectioneigenvalues(&d1, &e1, n, 3, 1, 0, 0, i1+1, i2+1, -1, &w, &m, &nsplit, &iblock, &isplit, &errorcode, _state); + if( !result ) + { + ae_frame_leave(_state); + return result; + } + if( m!=i2-i1+1 ) + { + result = ae_false; + ae_frame_leave(_state); + return result; + } + ae_vector_set_length(d, m-1+1, _state); + for(i=1; i<=m; i++) + { + d->ptr.p_double[i-1] = w.ptr.p_double[i]; + } + ae_frame_leave(_state); + return result; + } + + /* + * Eigen vectors are multiplied by Z + */ + if( zneeded==1 ) + { + + /* + * Find eigen pairs + */ + result = evd_internalbisectioneigenvalues(&d1, &e1, n, 3, 2, 0, 0, i1+1, i2+1, -1, &w, &m, &nsplit, &iblock, &isplit, &errorcode, _state); + if( !result ) + { + ae_frame_leave(_state); + return result; + } + if( m!=i2-i1+1 ) + { + result = ae_false; + ae_frame_leave(_state); + return result; + } + evd_internaldstein(n, &d1, &e1, m, &w, &iblock, &isplit, &z2, &ifail, &cr, _state); + if( cr!=0 ) + { + result = ae_false; + ae_frame_leave(_state); + return result; + } + + /* + * Sort eigen values and vectors + */ + for(i=1; i<=m; i++) + { + k = i; + for(j=i; j<=m; j++) + { + if( ae_fp_less(w.ptr.p_double[j],w.ptr.p_double[k]) ) + { + k = j; + } + } + v = w.ptr.p_double[i]; + w.ptr.p_double[i] = w.ptr.p_double[k]; + w.ptr.p_double[k] = v; + for(j=1; j<=n; j++) + { + v = z2.ptr.pp_double[j][i]; + z2.ptr.pp_double[j][i] = z2.ptr.pp_double[j][k]; + z2.ptr.pp_double[j][k] = v; + } + } + + /* + * Transform Z2 and overwrite Z + */ + ae_matrix_set_length(&z3, m+1, n+1, _state); + for(i=1; i<=m; i++) + { + ae_v_move(&z3.ptr.pp_double[i][1], 1, &z2.ptr.pp_double[1][i], z2.stride, ae_v_len(1,n)); + } + for(i=1; i<=n; i++) + { + for(j=1; j<=m; j++) + { + v = ae_v_dotproduct(&z->ptr.pp_double[i-1][0], 1, &z3.ptr.pp_double[j][1], 1, ae_v_len(0,n-1)); + z2.ptr.pp_double[i][j] = v; + } + } + ae_matrix_set_length(z, n-1+1, m-1+1, _state); + for(i=1; i<=m; i++) + { + ae_v_move(&z->ptr.pp_double[0][i-1], z->stride, &z2.ptr.pp_double[1][i], z2.stride, ae_v_len(0,n-1)); + } + + /* + * Store W + */ + ae_vector_set_length(d, m-1+1, _state); + for(i=1; i<=m; i++) + { + d->ptr.p_double[i-1] = w.ptr.p_double[i]; + } + ae_frame_leave(_state); + return result; + } + + /* + * Eigen vectors are stored in Z + */ + if( zneeded==2 ) + { + + /* + * Find eigen pairs + */ + result = evd_internalbisectioneigenvalues(&d1, &e1, n, 3, 2, 0, 0, i1+1, i2+1, -1, &w, &m, &nsplit, &iblock, &isplit, &errorcode, _state); + if( !result ) + { + ae_frame_leave(_state); + return result; + } + if( m!=i2-i1+1 ) + { + result = ae_false; + ae_frame_leave(_state); + return result; + } + evd_internaldstein(n, &d1, &e1, m, &w, &iblock, &isplit, &z2, &ifail, &cr, _state); + if( cr!=0 ) + { + result = ae_false; + ae_frame_leave(_state); + return result; + } + + /* + * Sort eigen values and vectors + */ + for(i=1; i<=m; i++) + { + k = i; + for(j=i; j<=m; j++) + { + if( ae_fp_less(w.ptr.p_double[j],w.ptr.p_double[k]) ) + { + k = j; + } + } + v = w.ptr.p_double[i]; + w.ptr.p_double[i] = w.ptr.p_double[k]; + w.ptr.p_double[k] = v; + for(j=1; j<=n; j++) + { + v = z2.ptr.pp_double[j][i]; + z2.ptr.pp_double[j][i] = z2.ptr.pp_double[j][k]; + z2.ptr.pp_double[j][k] = v; + } + } + + /* + * Store Z + */ + ae_matrix_set_length(z, n-1+1, m-1+1, _state); + for(i=1; i<=m; i++) + { + ae_v_move(&z->ptr.pp_double[0][i-1], z->stride, &z2.ptr.pp_double[1][i], z2.stride, ae_v_len(0,n-1)); + } + + /* + * Store W + */ + ae_vector_set_length(d, m-1+1, _state); + for(i=1; i<=m; i++) + { + d->ptr.p_double[i-1] = w.ptr.p_double[i]; + } + ae_frame_leave(_state); + return result; + } + result = ae_false; + ae_frame_leave(_state); + return result; +} + + +/************************************************************************* +Finding eigenvalues and eigenvectors of a general matrix + +The algorithm finds eigenvalues and eigenvectors of a general matrix by +using the QR algorithm with multiple shifts. The algorithm can find +eigenvalues and both left and right eigenvectors. + +The right eigenvector is a vector x such that A*x = w*x, and the left +eigenvector is a vector y such that y'*A = w*y' (here y' implies a complex +conjugate transposition of vector y). + +Input parameters: + A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + VNeeded - flag controlling whether eigenvectors are needed or not. + If VNeeded is equal to: + * 0, eigenvectors are not returned; + * 1, right eigenvectors are returned; + * 2, left eigenvectors are returned; + * 3, both left and right eigenvectors are returned. + +Output parameters: + WR - real parts of eigenvalues. + Array whose index ranges within [0..N-1]. + WR - imaginary parts of eigenvalues. + Array whose index ranges within [0..N-1]. + VL, VR - arrays of left and right eigenvectors (if they are needed). + If WI[i]=0, the respective eigenvalue is a real number, + and it corresponds to the column number I of matrices VL/VR. + If WI[i]>0, we have a pair of complex conjugate numbers with + positive and negative imaginary parts: + the first eigenvalue WR[i] + sqrt(-1)*WI[i]; + the second eigenvalue WR[i+1] + sqrt(-1)*WI[i+1]; + WI[i]>0 + WI[i+1] = -WI[i] < 0 + In that case, the eigenvector corresponding to the first + eigenvalue is located in i and i+1 columns of matrices + VL/VR (the column number i contains the real part, and the + column number i+1 contains the imaginary part), and the vector + corresponding to the second eigenvalue is a complex conjugate to + the first vector. + Arrays whose indexes range within [0..N-1, 0..N-1]. + +Result: + True, if the algorithm has converged. + False, if the algorithm has not converged. + +Note 1: + Some users may ask the following question: what if WI[N-1]>0? + WI[N] must contain an eigenvalue which is complex conjugate to the + N-th eigenvalue, but the array has only size N? + The answer is as follows: such a situation cannot occur because the + algorithm finds a pairs of eigenvalues, therefore, if WI[i]>0, I is + strictly less than N-1. + +Note 2: + The algorithm performance depends on the value of the internal parameter + NS of the InternalSchurDecomposition subroutine which defines the number + of shifts in the QR algorithm (similarly to the block width in block-matrix + algorithms of linear algebra). If you require maximum performance + on your machine, it is recommended to adjust this parameter manually. + + +See also the InternalTREVC subroutine. + +The algorithm is based on the LAPACK 3.0 library. +*************************************************************************/ +ae_bool rmatrixevd(/* Real */ ae_matrix* a, + ae_int_t n, + ae_int_t vneeded, + /* Real */ ae_vector* wr, + /* Real */ ae_vector* wi, + /* Real */ ae_matrix* vl, + /* Real */ ae_matrix* vr, + ae_state *_state) +{ + ae_frame _frame_block; + ae_matrix _a; + ae_matrix a1; + ae_matrix vl1; + ae_matrix vr1; + ae_vector wr1; + ae_vector wi1; + ae_int_t i; + ae_bool result; + + ae_frame_make(_state, &_frame_block); + ae_matrix_init_copy(&_a, a, _state, ae_true); + a = &_a; + ae_vector_clear(wr); + ae_vector_clear(wi); + ae_matrix_clear(vl); + ae_matrix_clear(vr); + ae_matrix_init(&a1, 0, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&vl1, 0, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&vr1, 0, 0, DT_REAL, _state, ae_true); + ae_vector_init(&wr1, 0, DT_REAL, _state, ae_true); + ae_vector_init(&wi1, 0, DT_REAL, _state, ae_true); + + ae_assert(vneeded>=0&&vneeded<=3, "RMatrixEVD: incorrect VNeeded!", _state); + ae_matrix_set_length(&a1, n+1, n+1, _state); + for(i=1; i<=n; i++) + { + ae_v_move(&a1.ptr.pp_double[i][1], 1, &a->ptr.pp_double[i-1][0], 1, ae_v_len(1,n)); + } + result = evd_nonsymmetricevd(&a1, n, vneeded, &wr1, &wi1, &vl1, &vr1, _state); + if( result ) + { + ae_vector_set_length(wr, n-1+1, _state); + ae_vector_set_length(wi, n-1+1, _state); + ae_v_move(&wr->ptr.p_double[0], 1, &wr1.ptr.p_double[1], 1, ae_v_len(0,n-1)); + ae_v_move(&wi->ptr.p_double[0], 1, &wi1.ptr.p_double[1], 1, ae_v_len(0,n-1)); + if( vneeded==2||vneeded==3 ) + { + ae_matrix_set_length(vl, n-1+1, n-1+1, _state); + for(i=0; i<=n-1; i++) + { + ae_v_move(&vl->ptr.pp_double[i][0], 1, &vl1.ptr.pp_double[i+1][1], 1, ae_v_len(0,n-1)); + } + } + if( vneeded==1||vneeded==3 ) + { + ae_matrix_set_length(vr, n-1+1, n-1+1, _state); + for(i=0; i<=n-1; i++) + { + ae_v_move(&vr->ptr.pp_double[i][0], 1, &vr1.ptr.pp_double[i+1][1], 1, ae_v_len(0,n-1)); + } + } + } + ae_frame_leave(_state); + return result; +} + + +static ae_bool evd_tridiagonalevd(/* Real */ ae_vector* d, + /* Real */ ae_vector* e, + ae_int_t n, + ae_int_t zneeded, + /* Real */ ae_matrix* z, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector _e; + ae_int_t maxit; + ae_int_t i; + ae_int_t ii; + ae_int_t iscale; + ae_int_t j; + ae_int_t jtot; + ae_int_t k; + ae_int_t t; + ae_int_t l; + ae_int_t l1; + ae_int_t lend; + ae_int_t lendm1; + ae_int_t lendp1; + ae_int_t lendsv; + ae_int_t lm1; + ae_int_t lsv; + ae_int_t m; + ae_int_t mm; + ae_int_t mm1; + ae_int_t nm1; + ae_int_t nmaxit; + ae_int_t tmpint; + double anorm; + double b; + double c; + double eps; + double eps2; + double f; + double g; + double p; + double r; + double rt1; + double rt2; + double s; + double safmax; + double safmin; + double ssfmax; + double ssfmin; + double tst; + double tmp; + ae_vector work1; + ae_vector work2; + ae_vector workc; + ae_vector works; + ae_vector wtemp; + ae_bool gotoflag; + ae_int_t zrows; + ae_bool wastranspose; + ae_bool result; + + ae_frame_make(_state, &_frame_block); + ae_vector_init_copy(&_e, e, _state, ae_true); + e = &_e; + ae_vector_init(&work1, 0, DT_REAL, _state, ae_true); + ae_vector_init(&work2, 0, DT_REAL, _state, ae_true); + ae_vector_init(&workc, 0, DT_REAL, _state, ae_true); + ae_vector_init(&works, 0, DT_REAL, _state, ae_true); + ae_vector_init(&wtemp, 0, DT_REAL, _state, ae_true); + + ae_assert(zneeded>=0&&zneeded<=3, "TridiagonalEVD: Incorrent ZNeeded", _state); + + /* + * Quick return if possible + */ + if( zneeded<0||zneeded>3 ) + { + result = ae_false; + ae_frame_leave(_state); + return result; + } + result = ae_true; + if( n==0 ) + { + ae_frame_leave(_state); + return result; + } + if( n==1 ) + { + if( zneeded==2||zneeded==3 ) + { + ae_matrix_set_length(z, 1+1, 1+1, _state); + z->ptr.pp_double[1][1] = 1; + } + ae_frame_leave(_state); + return result; + } + maxit = 30; + + /* + * Initialize arrays + */ + ae_vector_set_length(&wtemp, n+1, _state); + ae_vector_set_length(&work1, n-1+1, _state); + ae_vector_set_length(&work2, n-1+1, _state); + ae_vector_set_length(&workc, n+1, _state); + ae_vector_set_length(&works, n+1, _state); + + /* + * Determine the unit roundoff and over/underflow thresholds. + */ + eps = ae_machineepsilon; + eps2 = ae_sqr(eps, _state); + safmin = ae_minrealnumber; + safmax = ae_maxrealnumber; + ssfmax = ae_sqrt(safmax, _state)/3; + ssfmin = ae_sqrt(safmin, _state)/eps2; + + /* + * Prepare Z + * + * Here we are using transposition to get rid of column operations + * + */ + wastranspose = ae_false; + zrows = 0; + if( zneeded==1 ) + { + zrows = n; + } + if( zneeded==2 ) + { + zrows = n; + } + if( zneeded==3 ) + { + zrows = 1; + } + if( zneeded==1 ) + { + wastranspose = ae_true; + inplacetranspose(z, 1, n, 1, n, &wtemp, _state); + } + if( zneeded==2 ) + { + wastranspose = ae_true; + ae_matrix_set_length(z, n+1, n+1, _state); + for(i=1; i<=n; i++) + { + for(j=1; j<=n; j++) + { + if( i==j ) + { + z->ptr.pp_double[i][j] = 1; + } + else + { + z->ptr.pp_double[i][j] = 0; + } + } + } + } + if( zneeded==3 ) + { + wastranspose = ae_false; + ae_matrix_set_length(z, 1+1, n+1, _state); + for(j=1; j<=n; j++) + { + if( j==1 ) + { + z->ptr.pp_double[1][j] = 1; + } + else + { + z->ptr.pp_double[1][j] = 0; + } + } + } + nmaxit = n*maxit; + jtot = 0; + + /* + * Determine where the matrix splits and choose QL or QR iteration + * for each block, according to whether top or bottom diagonal + * element is smaller. + */ + l1 = 1; + nm1 = n-1; + for(;;) + { + if( l1>n ) + { + break; + } + if( l1>1 ) + { + e->ptr.p_double[l1-1] = 0; + } + gotoflag = ae_false; + m = l1; + if( l1<=nm1 ) + { + for(m=l1; m<=nm1; m++) + { + tst = ae_fabs(e->ptr.p_double[m], _state); + if( ae_fp_eq(tst,0) ) + { + gotoflag = ae_true; + break; + } + if( ae_fp_less_eq(tst,ae_sqrt(ae_fabs(d->ptr.p_double[m], _state), _state)*ae_sqrt(ae_fabs(d->ptr.p_double[m+1], _state), _state)*eps) ) + { + e->ptr.p_double[m] = 0; + gotoflag = ae_true; + break; + } + } + } + if( !gotoflag ) + { + m = n; + } + + /* + * label 30: + */ + l = l1; + lsv = l; + lend = m; + lendsv = lend; + l1 = m+1; + if( lend==l ) + { + continue; + } + + /* + * Scale submatrix in rows and columns L to LEND + */ + if( l==lend ) + { + anorm = ae_fabs(d->ptr.p_double[l], _state); + } + else + { + anorm = ae_maxreal(ae_fabs(d->ptr.p_double[l], _state)+ae_fabs(e->ptr.p_double[l], _state), ae_fabs(e->ptr.p_double[lend-1], _state)+ae_fabs(d->ptr.p_double[lend], _state), _state); + for(i=l+1; i<=lend-1; i++) + { + anorm = ae_maxreal(anorm, ae_fabs(d->ptr.p_double[i], _state)+ae_fabs(e->ptr.p_double[i], _state)+ae_fabs(e->ptr.p_double[i-1], _state), _state); + } + } + iscale = 0; + if( ae_fp_eq(anorm,0) ) + { + continue; + } + if( ae_fp_greater(anorm,ssfmax) ) + { + iscale = 1; + tmp = ssfmax/anorm; + tmpint = lend-1; + ae_v_muld(&d->ptr.p_double[l], 1, ae_v_len(l,lend), tmp); + ae_v_muld(&e->ptr.p_double[l], 1, ae_v_len(l,tmpint), tmp); + } + if( ae_fp_less(anorm,ssfmin) ) + { + iscale = 2; + tmp = ssfmin/anorm; + tmpint = lend-1; + ae_v_muld(&d->ptr.p_double[l], 1, ae_v_len(l,lend), tmp); + ae_v_muld(&e->ptr.p_double[l], 1, ae_v_len(l,tmpint), tmp); + } + + /* + * Choose between QL and QR iteration + */ + if( ae_fp_less(ae_fabs(d->ptr.p_double[lend], _state),ae_fabs(d->ptr.p_double[l], _state)) ) + { + lend = lsv; + l = lendsv; + } + if( lend>l ) + { + + /* + * QL Iteration + * + * Look for small subdiagonal element. + */ + for(;;) + { + gotoflag = ae_false; + if( l!=lend ) + { + lendm1 = lend-1; + for(m=l; m<=lendm1; m++) + { + tst = ae_sqr(ae_fabs(e->ptr.p_double[m], _state), _state); + if( ae_fp_less_eq(tst,eps2*ae_fabs(d->ptr.p_double[m], _state)*ae_fabs(d->ptr.p_double[m+1], _state)+safmin) ) + { + gotoflag = ae_true; + break; + } + } + } + if( !gotoflag ) + { + m = lend; + } + if( m<lend ) + { + e->ptr.p_double[m] = 0; + } + p = d->ptr.p_double[l]; + if( m!=l ) + { + + /* + * If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 + * to compute its eigensystem. + */ + if( m==l+1 ) + { + if( zneeded>0 ) + { + evd_tdevdev2(d->ptr.p_double[l], e->ptr.p_double[l], d->ptr.p_double[l+1], &rt1, &rt2, &c, &s, _state); + work1.ptr.p_double[l] = c; + work2.ptr.p_double[l] = s; + workc.ptr.p_double[1] = work1.ptr.p_double[l]; + works.ptr.p_double[1] = work2.ptr.p_double[l]; + if( !wastranspose ) + { + applyrotationsfromtheright(ae_false, 1, zrows, l, l+1, &workc, &works, z, &wtemp, _state); + } + else + { + applyrotationsfromtheleft(ae_false, l, l+1, 1, zrows, &workc, &works, z, &wtemp, _state); + } + } + else + { + evd_tdevde2(d->ptr.p_double[l], e->ptr.p_double[l], d->ptr.p_double[l+1], &rt1, &rt2, _state); + } + d->ptr.p_double[l] = rt1; + d->ptr.p_double[l+1] = rt2; + e->ptr.p_double[l] = 0; + l = l+2; + if( l<=lend ) + { + continue; + } + + /* + * GOTO 140 + */ + break; + } + if( jtot==nmaxit ) + { + + /* + * GOTO 140 + */ + break; + } + jtot = jtot+1; + + /* + * Form shift. + */ + g = (d->ptr.p_double[l+1]-p)/(2*e->ptr.p_double[l]); + r = evd_tdevdpythag(g, 1, _state); + g = d->ptr.p_double[m]-p+e->ptr.p_double[l]/(g+evd_tdevdextsign(r, g, _state)); + s = 1; + c = 1; + p = 0; + + /* + * Inner loop + */ + mm1 = m-1; + for(i=mm1; i>=l; i--) + { + f = s*e->ptr.p_double[i]; + b = c*e->ptr.p_double[i]; + generaterotation(g, f, &c, &s, &r, _state); + if( i!=m-1 ) + { + e->ptr.p_double[i+1] = r; + } + g = d->ptr.p_double[i+1]-p; + r = (d->ptr.p_double[i]-g)*s+2*c*b; + p = s*r; + d->ptr.p_double[i+1] = g+p; + g = c*r-b; + + /* + * If eigenvectors are desired, then save rotations. + */ + if( zneeded>0 ) + { + work1.ptr.p_double[i] = c; + work2.ptr.p_double[i] = -s; + } + } + + /* + * If eigenvectors are desired, then apply saved rotations. + */ + if( zneeded>0 ) + { + for(i=l; i<=m-1; i++) + { + workc.ptr.p_double[i-l+1] = work1.ptr.p_double[i]; + works.ptr.p_double[i-l+1] = work2.ptr.p_double[i]; + } + if( !wastranspose ) + { + applyrotationsfromtheright(ae_false, 1, zrows, l, m, &workc, &works, z, &wtemp, _state); + } + else + { + applyrotationsfromtheleft(ae_false, l, m, 1, zrows, &workc, &works, z, &wtemp, _state); + } + } + d->ptr.p_double[l] = d->ptr.p_double[l]-p; + e->ptr.p_double[l] = g; + continue; + } + + /* + * Eigenvalue found. + */ + d->ptr.p_double[l] = p; + l = l+1; + if( l<=lend ) + { + continue; + } + break; + } + } + else + { + + /* + * QR Iteration + * + * Look for small superdiagonal element. + */ + for(;;) + { + gotoflag = ae_false; + if( l!=lend ) + { + lendp1 = lend+1; + for(m=l; m>=lendp1; m--) + { + tst = ae_sqr(ae_fabs(e->ptr.p_double[m-1], _state), _state); + if( ae_fp_less_eq(tst,eps2*ae_fabs(d->ptr.p_double[m], _state)*ae_fabs(d->ptr.p_double[m-1], _state)+safmin) ) + { + gotoflag = ae_true; + break; + } + } + } + if( !gotoflag ) + { + m = lend; + } + if( m>lend ) + { + e->ptr.p_double[m-1] = 0; + } + p = d->ptr.p_double[l]; + if( m!=l ) + { + + /* + * If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 + * to compute its eigensystem. + */ + if( m==l-1 ) + { + if( zneeded>0 ) + { + evd_tdevdev2(d->ptr.p_double[l-1], e->ptr.p_double[l-1], d->ptr.p_double[l], &rt1, &rt2, &c, &s, _state); + work1.ptr.p_double[m] = c; + work2.ptr.p_double[m] = s; + workc.ptr.p_double[1] = c; + works.ptr.p_double[1] = s; + if( !wastranspose ) + { + applyrotationsfromtheright(ae_true, 1, zrows, l-1, l, &workc, &works, z, &wtemp, _state); + } + else + { + applyrotationsfromtheleft(ae_true, l-1, l, 1, zrows, &workc, &works, z, &wtemp, _state); + } + } + else + { + evd_tdevde2(d->ptr.p_double[l-1], e->ptr.p_double[l-1], d->ptr.p_double[l], &rt1, &rt2, _state); + } + d->ptr.p_double[l-1] = rt1; + d->ptr.p_double[l] = rt2; + e->ptr.p_double[l-1] = 0; + l = l-2; + if( l>=lend ) + { + continue; + } + break; + } + if( jtot==nmaxit ) + { + break; + } + jtot = jtot+1; + + /* + * Form shift. + */ + g = (d->ptr.p_double[l-1]-p)/(2*e->ptr.p_double[l-1]); + r = evd_tdevdpythag(g, 1, _state); + g = d->ptr.p_double[m]-p+e->ptr.p_double[l-1]/(g+evd_tdevdextsign(r, g, _state)); + s = 1; + c = 1; + p = 0; + + /* + * Inner loop + */ + lm1 = l-1; + for(i=m; i<=lm1; i++) + { + f = s*e->ptr.p_double[i]; + b = c*e->ptr.p_double[i]; + generaterotation(g, f, &c, &s, &r, _state); + if( i!=m ) + { + e->ptr.p_double[i-1] = r; + } + g = d->ptr.p_double[i]-p; + r = (d->ptr.p_double[i+1]-g)*s+2*c*b; + p = s*r; + d->ptr.p_double[i] = g+p; + g = c*r-b; + + /* + * If eigenvectors are desired, then save rotations. + */ + if( zneeded>0 ) + { + work1.ptr.p_double[i] = c; + work2.ptr.p_double[i] = s; + } + } + + /* + * If eigenvectors are desired, then apply saved rotations. + */ + if( zneeded>0 ) + { + mm = l-m+1; + for(i=m; i<=l-1; i++) + { + workc.ptr.p_double[i-m+1] = work1.ptr.p_double[i]; + works.ptr.p_double[i-m+1] = work2.ptr.p_double[i]; + } + if( !wastranspose ) + { + applyrotationsfromtheright(ae_true, 1, zrows, m, l, &workc, &works, z, &wtemp, _state); + } + else + { + applyrotationsfromtheleft(ae_true, m, l, 1, zrows, &workc, &works, z, &wtemp, _state); + } + } + d->ptr.p_double[l] = d->ptr.p_double[l]-p; + e->ptr.p_double[lm1] = g; + continue; + } + + /* + * Eigenvalue found. + */ + d->ptr.p_double[l] = p; + l = l-1; + if( l>=lend ) + { + continue; + } + break; + } + } + + /* + * Undo scaling if necessary + */ + if( iscale==1 ) + { + tmp = anorm/ssfmax; + tmpint = lendsv-1; + ae_v_muld(&d->ptr.p_double[lsv], 1, ae_v_len(lsv,lendsv), tmp); + ae_v_muld(&e->ptr.p_double[lsv], 1, ae_v_len(lsv,tmpint), tmp); + } + if( iscale==2 ) + { + tmp = anorm/ssfmin; + tmpint = lendsv-1; + ae_v_muld(&d->ptr.p_double[lsv], 1, ae_v_len(lsv,lendsv), tmp); + ae_v_muld(&e->ptr.p_double[lsv], 1, ae_v_len(lsv,tmpint), tmp); + } + + /* + * Check for no convergence to an eigenvalue after a total + * of N*MAXIT iterations. + */ + if( jtot>=nmaxit ) + { + result = ae_false; + if( wastranspose ) + { + inplacetranspose(z, 1, n, 1, n, &wtemp, _state); + } + ae_frame_leave(_state); + return result; + } + } + + /* + * Order eigenvalues and eigenvectors. + */ + if( zneeded==0 ) + { + + /* + * Sort + */ + if( n==1 ) + { + ae_frame_leave(_state); + return result; + } + if( n==2 ) + { + if( ae_fp_greater(d->ptr.p_double[1],d->ptr.p_double[2]) ) + { + tmp = d->ptr.p_double[1]; + d->ptr.p_double[1] = d->ptr.p_double[2]; + d->ptr.p_double[2] = tmp; + } + ae_frame_leave(_state); + return result; + } + i = 2; + do + { + t = i; + while(t!=1) + { + k = t/2; + if( ae_fp_greater_eq(d->ptr.p_double[k],d->ptr.p_double[t]) ) + { + t = 1; + } + else + { + tmp = d->ptr.p_double[k]; + d->ptr.p_double[k] = d->ptr.p_double[t]; + d->ptr.p_double[t] = tmp; + t = k; + } + } + i = i+1; + } + while(i<=n); + i = n-1; + do + { + tmp = d->ptr.p_double[i+1]; + d->ptr.p_double[i+1] = d->ptr.p_double[1]; + d->ptr.p_double[1] = tmp; + t = 1; + while(t!=0) + { + k = 2*t; + if( k>i ) + { + t = 0; + } + else + { + if( k<i ) + { + if( ae_fp_greater(d->ptr.p_double[k+1],d->ptr.p_double[k]) ) + { + k = k+1; + } + } + if( ae_fp_greater_eq(d->ptr.p_double[t],d->ptr.p_double[k]) ) + { + t = 0; + } + else + { + tmp = d->ptr.p_double[k]; + d->ptr.p_double[k] = d->ptr.p_double[t]; + d->ptr.p_double[t] = tmp; + t = k; + } + } + } + i = i-1; + } + while(i>=1); + } + else + { + + /* + * Use Selection Sort to minimize swaps of eigenvectors + */ + for(ii=2; ii<=n; ii++) + { + i = ii-1; + k = i; + p = d->ptr.p_double[i]; + for(j=ii; j<=n; j++) + { + if( ae_fp_less(d->ptr.p_double[j],p) ) + { + k = j; + p = d->ptr.p_double[j]; + } + } + if( k!=i ) + { + d->ptr.p_double[k] = d->ptr.p_double[i]; + d->ptr.p_double[i] = p; + if( wastranspose ) + { + ae_v_move(&wtemp.ptr.p_double[1], 1, &z->ptr.pp_double[i][1], 1, ae_v_len(1,n)); + ae_v_move(&z->ptr.pp_double[i][1], 1, &z->ptr.pp_double[k][1], 1, ae_v_len(1,n)); + ae_v_move(&z->ptr.pp_double[k][1], 1, &wtemp.ptr.p_double[1], 1, ae_v_len(1,n)); + } + else + { + ae_v_move(&wtemp.ptr.p_double[1], 1, &z->ptr.pp_double[1][i], z->stride, ae_v_len(1,zrows)); + ae_v_move(&z->ptr.pp_double[1][i], z->stride, &z->ptr.pp_double[1][k], z->stride, ae_v_len(1,zrows)); + ae_v_move(&z->ptr.pp_double[1][k], z->stride, &wtemp.ptr.p_double[1], 1, ae_v_len(1,zrows)); + } + } + } + if( wastranspose ) + { + inplacetranspose(z, 1, n, 1, n, &wtemp, _state); + } + } + ae_frame_leave(_state); + return result; +} + + +/************************************************************************* +DLAE2 computes the eigenvalues of a 2-by-2 symmetric matrix + [ A B ] + [ B C ]. +On return, RT1 is the eigenvalue of larger absolute value, and RT2 +is the eigenvalue of smaller absolute value. + + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 +*************************************************************************/ +static void evd_tdevde2(double a, + double b, + double c, + double* rt1, + double* rt2, + ae_state *_state) +{ + double ab; + double acmn; + double acmx; + double adf; + double df; + double rt; + double sm; + double tb; + + *rt1 = 0; + *rt2 = 0; + + sm = a+c; + df = a-c; + adf = ae_fabs(df, _state); + tb = b+b; + ab = ae_fabs(tb, _state); + if( ae_fp_greater(ae_fabs(a, _state),ae_fabs(c, _state)) ) + { + acmx = a; + acmn = c; + } + else + { + acmx = c; + acmn = a; + } + if( ae_fp_greater(adf,ab) ) + { + rt = adf*ae_sqrt(1+ae_sqr(ab/adf, _state), _state); + } + else + { + if( ae_fp_less(adf,ab) ) + { + rt = ab*ae_sqrt(1+ae_sqr(adf/ab, _state), _state); + } + else + { + + /* + * Includes case AB=ADF=0 + */ + rt = ab*ae_sqrt(2, _state); + } + } + if( ae_fp_less(sm,0) ) + { + *rt1 = 0.5*(sm-rt); + + /* + * Order of execution important. + * To get fully accurate smaller eigenvalue, + * next line needs to be executed in higher precision. + */ + *rt2 = acmx/(*rt1)*acmn-b/(*rt1)*b; + } + else + { + if( ae_fp_greater(sm,0) ) + { + *rt1 = 0.5*(sm+rt); + + /* + * Order of execution important. + * To get fully accurate smaller eigenvalue, + * next line needs to be executed in higher precision. + */ + *rt2 = acmx/(*rt1)*acmn-b/(*rt1)*b; + } + else + { + + /* + * Includes case RT1 = RT2 = 0 + */ + *rt1 = 0.5*rt; + *rt2 = -0.5*rt; + } + } +} + + +/************************************************************************* +DLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix + + [ A B ] + [ B C ]. + +On return, RT1 is the eigenvalue of larger absolute value, RT2 is the +eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right +eigenvector for RT1, giving the decomposition + + [ CS1 SN1 ] [ A B ] [ CS1 -SN1 ] = [ RT1 0 ] + [-SN1 CS1 ] [ B C ] [ SN1 CS1 ] [ 0 RT2 ]. + + + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 +*************************************************************************/ +static void evd_tdevdev2(double a, + double b, + double c, + double* rt1, + double* rt2, + double* cs1, + double* sn1, + ae_state *_state) +{ + ae_int_t sgn1; + ae_int_t sgn2; + double ab; + double acmn; + double acmx; + double acs; + double adf; + double cs; + double ct; + double df; + double rt; + double sm; + double tb; + double tn; + + *rt1 = 0; + *rt2 = 0; + *cs1 = 0; + *sn1 = 0; + + + /* + * Compute the eigenvalues + */ + sm = a+c; + df = a-c; + adf = ae_fabs(df, _state); + tb = b+b; + ab = ae_fabs(tb, _state); + if( ae_fp_greater(ae_fabs(a, _state),ae_fabs(c, _state)) ) + { + acmx = a; + acmn = c; + } + else + { + acmx = c; + acmn = a; + } + if( ae_fp_greater(adf,ab) ) + { + rt = adf*ae_sqrt(1+ae_sqr(ab/adf, _state), _state); + } + else + { + if( ae_fp_less(adf,ab) ) + { + rt = ab*ae_sqrt(1+ae_sqr(adf/ab, _state), _state); + } + else + { + + /* + * Includes case AB=ADF=0 + */ + rt = ab*ae_sqrt(2, _state); + } + } + if( ae_fp_less(sm,0) ) + { + *rt1 = 0.5*(sm-rt); + sgn1 = -1; + + /* + * Order of execution important. + * To get fully accurate smaller eigenvalue, + * next line needs to be executed in higher precision. + */ + *rt2 = acmx/(*rt1)*acmn-b/(*rt1)*b; + } + else + { + if( ae_fp_greater(sm,0) ) + { + *rt1 = 0.5*(sm+rt); + sgn1 = 1; + + /* + * Order of execution important. + * To get fully accurate smaller eigenvalue, + * next line needs to be executed in higher precision. + */ + *rt2 = acmx/(*rt1)*acmn-b/(*rt1)*b; + } + else + { + + /* + * Includes case RT1 = RT2 = 0 + */ + *rt1 = 0.5*rt; + *rt2 = -0.5*rt; + sgn1 = 1; + } + } + + /* + * Compute the eigenvector + */ + if( ae_fp_greater_eq(df,0) ) + { + cs = df+rt; + sgn2 = 1; + } + else + { + cs = df-rt; + sgn2 = -1; + } + acs = ae_fabs(cs, _state); + if( ae_fp_greater(acs,ab) ) + { + ct = -tb/cs; + *sn1 = 1/ae_sqrt(1+ct*ct, _state); + *cs1 = ct*(*sn1); + } + else + { + if( ae_fp_eq(ab,0) ) + { + *cs1 = 1; + *sn1 = 0; + } + else + { + tn = -cs/tb; + *cs1 = 1/ae_sqrt(1+tn*tn, _state); + *sn1 = tn*(*cs1); + } + } + if( sgn1==sgn2 ) + { + tn = *cs1; + *cs1 = -*sn1; + *sn1 = tn; + } +} + + +/************************************************************************* +Internal routine +*************************************************************************/ +static double evd_tdevdpythag(double a, double b, ae_state *_state) +{ + double result; + + + if( ae_fp_less(ae_fabs(a, _state),ae_fabs(b, _state)) ) + { + result = ae_fabs(b, _state)*ae_sqrt(1+ae_sqr(a/b, _state), _state); + } + else + { + result = ae_fabs(a, _state)*ae_sqrt(1+ae_sqr(b/a, _state), _state); + } + return result; +} + + +/************************************************************************* +Internal routine +*************************************************************************/ +static double evd_tdevdextsign(double a, double b, ae_state *_state) +{ + double result; + + + if( ae_fp_greater_eq(b,0) ) + { + result = ae_fabs(a, _state); + } + else + { + result = -ae_fabs(a, _state); + } + return result; +} + + +static ae_bool evd_internalbisectioneigenvalues(/* Real */ ae_vector* d, + /* Real */ ae_vector* e, + ae_int_t n, + ae_int_t irange, + ae_int_t iorder, + double vl, + double vu, + ae_int_t il, + ae_int_t iu, + double abstol, + /* Real */ ae_vector* w, + ae_int_t* m, + ae_int_t* nsplit, + /* Integer */ ae_vector* iblock, + /* Integer */ ae_vector* isplit, + ae_int_t* errorcode, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector _d; + ae_vector _e; + double fudge; + double relfac; + ae_bool ncnvrg; + ae_bool toofew; + ae_int_t ib; + ae_int_t ibegin; + ae_int_t idiscl; + ae_int_t idiscu; + ae_int_t ie; + ae_int_t iend; + ae_int_t iinfo; + ae_int_t im; + ae_int_t iin; + ae_int_t ioff; + ae_int_t iout; + ae_int_t itmax; + ae_int_t iw; + ae_int_t iwoff; + ae_int_t j; + ae_int_t itmp1; + ae_int_t jb; + ae_int_t jdisc; + ae_int_t je; + ae_int_t nwl; + ae_int_t nwu; + double atoli; + double bnorm; + double gl; + double gu; + double pivmin; + double rtoli; + double safemn; + double tmp1; + double tmp2; + double tnorm; + double ulp; + double wkill; + double wl; + double wlu; + double wu; + double wul; + double scalefactor; + double t; + ae_vector idumma; + ae_vector work; + ae_vector iwork; + ae_vector ia1s2; + ae_vector ra1s2; + ae_matrix ra1s2x2; + ae_matrix ia1s2x2; + ae_vector ra1siin; + ae_vector ra2siin; + ae_vector ra3siin; + ae_vector ra4siin; + ae_matrix ra1siinx2; + ae_matrix ia1siinx2; + ae_vector iworkspace; + ae_vector rworkspace; + ae_int_t tmpi; + ae_bool result; + + ae_frame_make(_state, &_frame_block); + ae_vector_init_copy(&_d, d, _state, ae_true); + d = &_d; + ae_vector_init_copy(&_e, e, _state, ae_true); + e = &_e; + ae_vector_clear(w); + *m = 0; + *nsplit = 0; + ae_vector_clear(iblock); + ae_vector_clear(isplit); + *errorcode = 0; + ae_vector_init(&idumma, 0, DT_INT, _state, ae_true); + ae_vector_init(&work, 0, DT_REAL, _state, ae_true); + ae_vector_init(&iwork, 0, DT_INT, _state, ae_true); + ae_vector_init(&ia1s2, 0, DT_INT, _state, ae_true); + ae_vector_init(&ra1s2, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&ra1s2x2, 0, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&ia1s2x2, 0, 0, DT_INT, _state, ae_true); + ae_vector_init(&ra1siin, 0, DT_REAL, _state, ae_true); + ae_vector_init(&ra2siin, 0, DT_REAL, _state, ae_true); + ae_vector_init(&ra3siin, 0, DT_REAL, _state, ae_true); + ae_vector_init(&ra4siin, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&ra1siinx2, 0, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&ia1siinx2, 0, 0, DT_INT, _state, ae_true); + ae_vector_init(&iworkspace, 0, DT_INT, _state, ae_true); + ae_vector_init(&rworkspace, 0, DT_REAL, _state, ae_true); + + + /* + * Quick return if possible + */ + *m = 0; + if( n==0 ) + { + result = ae_true; + ae_frame_leave(_state); + return result; + } + + /* + * Get machine constants + * NB is the minimum vector length for vector bisection, or 0 + * if only scalar is to be done. + */ + fudge = 2; + relfac = 2; + safemn = ae_minrealnumber; + ulp = 2*ae_machineepsilon; + rtoli = ulp*relfac; + ae_vector_set_length(&idumma, 1+1, _state); + ae_vector_set_length(&work, 4*n+1, _state); + ae_vector_set_length(&iwork, 3*n+1, _state); + ae_vector_set_length(w, n+1, _state); + ae_vector_set_length(iblock, n+1, _state); + ae_vector_set_length(isplit, n+1, _state); + ae_vector_set_length(&ia1s2, 2+1, _state); + ae_vector_set_length(&ra1s2, 2+1, _state); + ae_matrix_set_length(&ra1s2x2, 2+1, 2+1, _state); + ae_matrix_set_length(&ia1s2x2, 2+1, 2+1, _state); + ae_vector_set_length(&ra1siin, n+1, _state); + ae_vector_set_length(&ra2siin, n+1, _state); + ae_vector_set_length(&ra3siin, n+1, _state); + ae_vector_set_length(&ra4siin, n+1, _state); + ae_matrix_set_length(&ra1siinx2, n+1, 2+1, _state); + ae_matrix_set_length(&ia1siinx2, n+1, 2+1, _state); + ae_vector_set_length(&iworkspace, n+1, _state); + ae_vector_set_length(&rworkspace, n+1, _state); + + /* + * these initializers are not really necessary, + * but without them compiler complains about uninitialized locals + */ + wlu = 0; + wul = 0; + + /* + * Check for Errors + */ + result = ae_false; + *errorcode = 0; + if( irange<=0||irange>=4 ) + { + *errorcode = -4; + } + if( iorder<=0||iorder>=3 ) + { + *errorcode = -5; + } + if( n<0 ) + { + *errorcode = -3; + } + if( irange==2&&ae_fp_greater_eq(vl,vu) ) + { + *errorcode = -6; + } + if( irange==3&&(il<1||il>ae_maxint(1, n, _state)) ) + { + *errorcode = -8; + } + if( irange==3&&(iu<ae_minint(n, il, _state)||iu>n) ) + { + *errorcode = -9; + } + if( *errorcode!=0 ) + { + ae_frame_leave(_state); + return result; + } + + /* + * Initialize error flags + */ + ncnvrg = ae_false; + toofew = ae_false; + + /* + * Simplifications: + */ + if( (irange==3&&il==1)&&iu==n ) + { + irange = 1; + } + + /* + * Special Case when N=1 + */ + if( n==1 ) + { + *nsplit = 1; + isplit->ptr.p_int[1] = 1; + if( irange==2&&(ae_fp_greater_eq(vl,d->ptr.p_double[1])||ae_fp_less(vu,d->ptr.p_double[1])) ) + { + *m = 0; + } + else + { + w->ptr.p_double[1] = d->ptr.p_double[1]; + iblock->ptr.p_int[1] = 1; + *m = 1; + } + result = ae_true; + ae_frame_leave(_state); + return result; + } + + /* + * Scaling + */ + t = ae_fabs(d->ptr.p_double[n], _state); + for(j=1; j<=n-1; j++) + { + t = ae_maxreal(t, ae_fabs(d->ptr.p_double[j], _state), _state); + t = ae_maxreal(t, ae_fabs(e->ptr.p_double[j], _state), _state); + } + scalefactor = 1; + if( ae_fp_neq(t,0) ) + { + if( ae_fp_greater(t,ae_sqrt(ae_sqrt(ae_minrealnumber, _state), _state)*ae_sqrt(ae_maxrealnumber, _state)) ) + { + scalefactor = t; + } + if( ae_fp_less(t,ae_sqrt(ae_sqrt(ae_maxrealnumber, _state), _state)*ae_sqrt(ae_minrealnumber, _state)) ) + { + scalefactor = t; + } + for(j=1; j<=n-1; j++) + { + d->ptr.p_double[j] = d->ptr.p_double[j]/scalefactor; + e->ptr.p_double[j] = e->ptr.p_double[j]/scalefactor; + } + d->ptr.p_double[n] = d->ptr.p_double[n]/scalefactor; + } + + /* + * Compute Splitting Points + */ + *nsplit = 1; + work.ptr.p_double[n] = 0; + pivmin = 1; + for(j=2; j<=n; j++) + { + tmp1 = ae_sqr(e->ptr.p_double[j-1], _state); + if( ae_fp_greater(ae_fabs(d->ptr.p_double[j]*d->ptr.p_double[j-1], _state)*ae_sqr(ulp, _state)+safemn,tmp1) ) + { + isplit->ptr.p_int[*nsplit] = j-1; + *nsplit = *nsplit+1; + work.ptr.p_double[j-1] = 0; + } + else + { + work.ptr.p_double[j-1] = tmp1; + pivmin = ae_maxreal(pivmin, tmp1, _state); + } + } + isplit->ptr.p_int[*nsplit] = n; + pivmin = pivmin*safemn; + + /* + * Compute Interval and ATOLI + */ + if( irange==3 ) + { + + /* + * RANGE='I': Compute the interval containing eigenvalues + * IL through IU. + * + * Compute Gershgorin interval for entire (split) matrix + * and use it as the initial interval + */ + gu = d->ptr.p_double[1]; + gl = d->ptr.p_double[1]; + tmp1 = 0; + for(j=1; j<=n-1; j++) + { + tmp2 = ae_sqrt(work.ptr.p_double[j], _state); + gu = ae_maxreal(gu, d->ptr.p_double[j]+tmp1+tmp2, _state); + gl = ae_minreal(gl, d->ptr.p_double[j]-tmp1-tmp2, _state); + tmp1 = tmp2; + } + gu = ae_maxreal(gu, d->ptr.p_double[n]+tmp1, _state); + gl = ae_minreal(gl, d->ptr.p_double[n]-tmp1, _state); + tnorm = ae_maxreal(ae_fabs(gl, _state), ae_fabs(gu, _state), _state); + gl = gl-fudge*tnorm*ulp*n-fudge*2*pivmin; + gu = gu+fudge*tnorm*ulp*n+fudge*pivmin; + + /* + * Compute Iteration parameters + */ + itmax = ae_iceil((ae_log(tnorm+pivmin, _state)-ae_log(pivmin, _state))/ae_log(2, _state), _state)+2; + if( ae_fp_less_eq(abstol,0) ) + { + atoli = ulp*tnorm; + } + else + { + atoli = abstol; + } + work.ptr.p_double[n+1] = gl; + work.ptr.p_double[n+2] = gl; + work.ptr.p_double[n+3] = gu; + work.ptr.p_double[n+4] = gu; + work.ptr.p_double[n+5] = gl; + work.ptr.p_double[n+6] = gu; + iwork.ptr.p_int[1] = -1; + iwork.ptr.p_int[2] = -1; + iwork.ptr.p_int[3] = n+1; + iwork.ptr.p_int[4] = n+1; + iwork.ptr.p_int[5] = il-1; + iwork.ptr.p_int[6] = iu; + + /* + * Calling DLAEBZ + * + * DLAEBZ( 3, ITMAX, N, 2, 2, NB, ATOLI, RTOLI, PIVMIN, D, E, + * WORK, IWORK( 5 ), WORK( N+1 ), WORK( N+5 ), IOUT, + * IWORK, W, IBLOCK, IINFO ) + */ + ia1s2.ptr.p_int[1] = iwork.ptr.p_int[5]; + ia1s2.ptr.p_int[2] = iwork.ptr.p_int[6]; + ra1s2.ptr.p_double[1] = work.ptr.p_double[n+5]; + ra1s2.ptr.p_double[2] = work.ptr.p_double[n+6]; + ra1s2x2.ptr.pp_double[1][1] = work.ptr.p_double[n+1]; + ra1s2x2.ptr.pp_double[2][1] = work.ptr.p_double[n+2]; + ra1s2x2.ptr.pp_double[1][2] = work.ptr.p_double[n+3]; + ra1s2x2.ptr.pp_double[2][2] = work.ptr.p_double[n+4]; + ia1s2x2.ptr.pp_int[1][1] = iwork.ptr.p_int[1]; + ia1s2x2.ptr.pp_int[2][1] = iwork.ptr.p_int[2]; + ia1s2x2.ptr.pp_int[1][2] = iwork.ptr.p_int[3]; + ia1s2x2.ptr.pp_int[2][2] = iwork.ptr.p_int[4]; + evd_internaldlaebz(3, itmax, n, 2, 2, atoli, rtoli, pivmin, d, e, &work, &ia1s2, &ra1s2x2, &ra1s2, &iout, &ia1s2x2, w, iblock, &iinfo, _state); + iwork.ptr.p_int[5] = ia1s2.ptr.p_int[1]; + iwork.ptr.p_int[6] = ia1s2.ptr.p_int[2]; + work.ptr.p_double[n+5] = ra1s2.ptr.p_double[1]; + work.ptr.p_double[n+6] = ra1s2.ptr.p_double[2]; + work.ptr.p_double[n+1] = ra1s2x2.ptr.pp_double[1][1]; + work.ptr.p_double[n+2] = ra1s2x2.ptr.pp_double[2][1]; + work.ptr.p_double[n+3] = ra1s2x2.ptr.pp_double[1][2]; + work.ptr.p_double[n+4] = ra1s2x2.ptr.pp_double[2][2]; + iwork.ptr.p_int[1] = ia1s2x2.ptr.pp_int[1][1]; + iwork.ptr.p_int[2] = ia1s2x2.ptr.pp_int[2][1]; + iwork.ptr.p_int[3] = ia1s2x2.ptr.pp_int[1][2]; + iwork.ptr.p_int[4] = ia1s2x2.ptr.pp_int[2][2]; + if( iwork.ptr.p_int[6]==iu ) + { + wl = work.ptr.p_double[n+1]; + wlu = work.ptr.p_double[n+3]; + nwl = iwork.ptr.p_int[1]; + wu = work.ptr.p_double[n+4]; + wul = work.ptr.p_double[n+2]; + nwu = iwork.ptr.p_int[4]; + } + else + { + wl = work.ptr.p_double[n+2]; + wlu = work.ptr.p_double[n+4]; + nwl = iwork.ptr.p_int[2]; + wu = work.ptr.p_double[n+3]; + wul = work.ptr.p_double[n+1]; + nwu = iwork.ptr.p_int[3]; + } + if( ((nwl<0||nwl>=n)||nwu<1)||nwu>n ) + { + *errorcode = 4; + result = ae_false; + ae_frame_leave(_state); + return result; + } + } + else + { + + /* + * RANGE='A' or 'V' -- Set ATOLI + */ + tnorm = ae_maxreal(ae_fabs(d->ptr.p_double[1], _state)+ae_fabs(e->ptr.p_double[1], _state), ae_fabs(d->ptr.p_double[n], _state)+ae_fabs(e->ptr.p_double[n-1], _state), _state); + for(j=2; j<=n-1; j++) + { + tnorm = ae_maxreal(tnorm, ae_fabs(d->ptr.p_double[j], _state)+ae_fabs(e->ptr.p_double[j-1], _state)+ae_fabs(e->ptr.p_double[j], _state), _state); + } + if( ae_fp_less_eq(abstol,0) ) + { + atoli = ulp*tnorm; + } + else + { + atoli = abstol; + } + if( irange==2 ) + { + wl = vl; + wu = vu; + } + else + { + wl = 0; + wu = 0; + } + } + + /* + * Find Eigenvalues -- Loop Over Blocks and recompute NWL and NWU. + * NWL accumulates the number of eigenvalues .le. WL, + * NWU accumulates the number of eigenvalues .le. WU + */ + *m = 0; + iend = 0; + *errorcode = 0; + nwl = 0; + nwu = 0; + for(jb=1; jb<=*nsplit; jb++) + { + ioff = iend; + ibegin = ioff+1; + iend = isplit->ptr.p_int[jb]; + iin = iend-ioff; + if( iin==1 ) + { + + /* + * Special Case -- IIN=1 + */ + if( irange==1||ae_fp_greater_eq(wl,d->ptr.p_double[ibegin]-pivmin) ) + { + nwl = nwl+1; + } + if( irange==1||ae_fp_greater_eq(wu,d->ptr.p_double[ibegin]-pivmin) ) + { + nwu = nwu+1; + } + if( irange==1||(ae_fp_less(wl,d->ptr.p_double[ibegin]-pivmin)&&ae_fp_greater_eq(wu,d->ptr.p_double[ibegin]-pivmin)) ) + { + *m = *m+1; + w->ptr.p_double[*m] = d->ptr.p_double[ibegin]; + iblock->ptr.p_int[*m] = jb; + } + } + else + { + + /* + * General Case -- IIN > 1 + * + * Compute Gershgorin Interval + * and use it as the initial interval + */ + gu = d->ptr.p_double[ibegin]; + gl = d->ptr.p_double[ibegin]; + tmp1 = 0; + for(j=ibegin; j<=iend-1; j++) + { + tmp2 = ae_fabs(e->ptr.p_double[j], _state); + gu = ae_maxreal(gu, d->ptr.p_double[j]+tmp1+tmp2, _state); + gl = ae_minreal(gl, d->ptr.p_double[j]-tmp1-tmp2, _state); + tmp1 = tmp2; + } + gu = ae_maxreal(gu, d->ptr.p_double[iend]+tmp1, _state); + gl = ae_minreal(gl, d->ptr.p_double[iend]-tmp1, _state); + bnorm = ae_maxreal(ae_fabs(gl, _state), ae_fabs(gu, _state), _state); + gl = gl-fudge*bnorm*ulp*iin-fudge*pivmin; + gu = gu+fudge*bnorm*ulp*iin+fudge*pivmin; + + /* + * Compute ATOLI for the current submatrix + */ + if( ae_fp_less_eq(abstol,0) ) + { + atoli = ulp*ae_maxreal(ae_fabs(gl, _state), ae_fabs(gu, _state), _state); + } + else + { + atoli = abstol; + } + if( irange>1 ) + { + if( ae_fp_less(gu,wl) ) + { + nwl = nwl+iin; + nwu = nwu+iin; + continue; + } + gl = ae_maxreal(gl, wl, _state); + gu = ae_minreal(gu, wu, _state); + if( ae_fp_greater_eq(gl,gu) ) + { + continue; + } + } + + /* + * Set Up Initial Interval + */ + work.ptr.p_double[n+1] = gl; + work.ptr.p_double[n+iin+1] = gu; + + /* + * Calling DLAEBZ + * + * CALL DLAEBZ( 1, 0, IN, IN, 1, NB, ATOLI, RTOLI, PIVMIN, + * D( IBEGIN ), E( IBEGIN ), WORK( IBEGIN ), + * IDUMMA, WORK( N+1 ), WORK( N+2*IN+1 ), IM, + * IWORK, W( M+1 ), IBLOCK( M+1 ), IINFO ) + */ + for(tmpi=1; tmpi<=iin; tmpi++) + { + ra1siin.ptr.p_double[tmpi] = d->ptr.p_double[ibegin-1+tmpi]; + if( ibegin-1+tmpi<n ) + { + ra2siin.ptr.p_double[tmpi] = e->ptr.p_double[ibegin-1+tmpi]; + } + ra3siin.ptr.p_double[tmpi] = work.ptr.p_double[ibegin-1+tmpi]; + ra1siinx2.ptr.pp_double[tmpi][1] = work.ptr.p_double[n+tmpi]; + ra1siinx2.ptr.pp_double[tmpi][2] = work.ptr.p_double[n+tmpi+iin]; + ra4siin.ptr.p_double[tmpi] = work.ptr.p_double[n+2*iin+tmpi]; + rworkspace.ptr.p_double[tmpi] = w->ptr.p_double[*m+tmpi]; + iworkspace.ptr.p_int[tmpi] = iblock->ptr.p_int[*m+tmpi]; + ia1siinx2.ptr.pp_int[tmpi][1] = iwork.ptr.p_int[tmpi]; + ia1siinx2.ptr.pp_int[tmpi][2] = iwork.ptr.p_int[tmpi+iin]; + } + evd_internaldlaebz(1, 0, iin, iin, 1, atoli, rtoli, pivmin, &ra1siin, &ra2siin, &ra3siin, &idumma, &ra1siinx2, &ra4siin, &im, &ia1siinx2, &rworkspace, &iworkspace, &iinfo, _state); + for(tmpi=1; tmpi<=iin; tmpi++) + { + work.ptr.p_double[n+tmpi] = ra1siinx2.ptr.pp_double[tmpi][1]; + work.ptr.p_double[n+tmpi+iin] = ra1siinx2.ptr.pp_double[tmpi][2]; + work.ptr.p_double[n+2*iin+tmpi] = ra4siin.ptr.p_double[tmpi]; + w->ptr.p_double[*m+tmpi] = rworkspace.ptr.p_double[tmpi]; + iblock->ptr.p_int[*m+tmpi] = iworkspace.ptr.p_int[tmpi]; + iwork.ptr.p_int[tmpi] = ia1siinx2.ptr.pp_int[tmpi][1]; + iwork.ptr.p_int[tmpi+iin] = ia1siinx2.ptr.pp_int[tmpi][2]; + } + nwl = nwl+iwork.ptr.p_int[1]; + nwu = nwu+iwork.ptr.p_int[iin+1]; + iwoff = *m-iwork.ptr.p_int[1]; + + /* + * Compute Eigenvalues + */ + itmax = ae_iceil((ae_log(gu-gl+pivmin, _state)-ae_log(pivmin, _state))/ae_log(2, _state), _state)+2; + + /* + * Calling DLAEBZ + * + *CALL DLAEBZ( 2, ITMAX, IN, IN, 1, NB, ATOLI, RTOLI, PIVMIN, + * D( IBEGIN ), E( IBEGIN ), WORK( IBEGIN ), + * IDUMMA, WORK( N+1 ), WORK( N+2*IN+1 ), IOUT, + * IWORK, W( M+1 ), IBLOCK( M+1 ), IINFO ) + */ + for(tmpi=1; tmpi<=iin; tmpi++) + { + ra1siin.ptr.p_double[tmpi] = d->ptr.p_double[ibegin-1+tmpi]; + if( ibegin-1+tmpi<n ) + { + ra2siin.ptr.p_double[tmpi] = e->ptr.p_double[ibegin-1+tmpi]; + } + ra3siin.ptr.p_double[tmpi] = work.ptr.p_double[ibegin-1+tmpi]; + ra1siinx2.ptr.pp_double[tmpi][1] = work.ptr.p_double[n+tmpi]; + ra1siinx2.ptr.pp_double[tmpi][2] = work.ptr.p_double[n+tmpi+iin]; + ra4siin.ptr.p_double[tmpi] = work.ptr.p_double[n+2*iin+tmpi]; + rworkspace.ptr.p_double[tmpi] = w->ptr.p_double[*m+tmpi]; + iworkspace.ptr.p_int[tmpi] = iblock->ptr.p_int[*m+tmpi]; + ia1siinx2.ptr.pp_int[tmpi][1] = iwork.ptr.p_int[tmpi]; + ia1siinx2.ptr.pp_int[tmpi][2] = iwork.ptr.p_int[tmpi+iin]; + } + evd_internaldlaebz(2, itmax, iin, iin, 1, atoli, rtoli, pivmin, &ra1siin, &ra2siin, &ra3siin, &idumma, &ra1siinx2, &ra4siin, &iout, &ia1siinx2, &rworkspace, &iworkspace, &iinfo, _state); + for(tmpi=1; tmpi<=iin; tmpi++) + { + work.ptr.p_double[n+tmpi] = ra1siinx2.ptr.pp_double[tmpi][1]; + work.ptr.p_double[n+tmpi+iin] = ra1siinx2.ptr.pp_double[tmpi][2]; + work.ptr.p_double[n+2*iin+tmpi] = ra4siin.ptr.p_double[tmpi]; + w->ptr.p_double[*m+tmpi] = rworkspace.ptr.p_double[tmpi]; + iblock->ptr.p_int[*m+tmpi] = iworkspace.ptr.p_int[tmpi]; + iwork.ptr.p_int[tmpi] = ia1siinx2.ptr.pp_int[tmpi][1]; + iwork.ptr.p_int[tmpi+iin] = ia1siinx2.ptr.pp_int[tmpi][2]; + } + + /* + * Copy Eigenvalues Into W and IBLOCK + * Use -JB for block number for unconverged eigenvalues. + */ + for(j=1; j<=iout; j++) + { + tmp1 = 0.5*(work.ptr.p_double[j+n]+work.ptr.p_double[j+iin+n]); + + /* + * Flag non-convergence. + */ + if( j>iout-iinfo ) + { + ncnvrg = ae_true; + ib = -jb; + } + else + { + ib = jb; + } + for(je=iwork.ptr.p_int[j]+1+iwoff; je<=iwork.ptr.p_int[j+iin]+iwoff; je++) + { + w->ptr.p_double[je] = tmp1; + iblock->ptr.p_int[je] = ib; + } + } + *m = *m+im; + } + } + + /* + * If RANGE='I', then (WL,WU) contains eigenvalues NWL+1,...,NWU + * If NWL+1 < IL or NWU > IU, discard extra eigenvalues. + */ + if( irange==3 ) + { + im = 0; + idiscl = il-1-nwl; + idiscu = nwu-iu; + if( idiscl>0||idiscu>0 ) + { + for(je=1; je<=*m; je++) + { + if( ae_fp_less_eq(w->ptr.p_double[je],wlu)&&idiscl>0 ) + { + idiscl = idiscl-1; + } + else + { + if( ae_fp_greater_eq(w->ptr.p_double[je],wul)&&idiscu>0 ) + { + idiscu = idiscu-1; + } + else + { + im = im+1; + w->ptr.p_double[im] = w->ptr.p_double[je]; + iblock->ptr.p_int[im] = iblock->ptr.p_int[je]; + } + } + } + *m = im; + } + if( idiscl>0||idiscu>0 ) + { + + /* + * Code to deal with effects of bad arithmetic: + * Some low eigenvalues to be discarded are not in (WL,WLU], + * or high eigenvalues to be discarded are not in (WUL,WU] + * so just kill off the smallest IDISCL/largest IDISCU + * eigenvalues, by simply finding the smallest/largest + * eigenvalue(s). + * + * (If N(w) is monotone non-decreasing, this should never + * happen.) + */ + if( idiscl>0 ) + { + wkill = wu; + for(jdisc=1; jdisc<=idiscl; jdisc++) + { + iw = 0; + for(je=1; je<=*m; je++) + { + if( iblock->ptr.p_int[je]!=0&&(ae_fp_less(w->ptr.p_double[je],wkill)||iw==0) ) + { + iw = je; + wkill = w->ptr.p_double[je]; + } + } + iblock->ptr.p_int[iw] = 0; + } + } + if( idiscu>0 ) + { + wkill = wl; + for(jdisc=1; jdisc<=idiscu; jdisc++) + { + iw = 0; + for(je=1; je<=*m; je++) + { + if( iblock->ptr.p_int[je]!=0&&(ae_fp_greater(w->ptr.p_double[je],wkill)||iw==0) ) + { + iw = je; + wkill = w->ptr.p_double[je]; + } + } + iblock->ptr.p_int[iw] = 0; + } + } + im = 0; + for(je=1; je<=*m; je++) + { + if( iblock->ptr.p_int[je]!=0 ) + { + im = im+1; + w->ptr.p_double[im] = w->ptr.p_double[je]; + iblock->ptr.p_int[im] = iblock->ptr.p_int[je]; + } + } + *m = im; + } + if( idiscl<0||idiscu<0 ) + { + toofew = ae_true; + } + } + + /* + * If ORDER='B', do nothing -- the eigenvalues are already sorted + * by block. + * If ORDER='E', sort the eigenvalues from smallest to largest + */ + if( iorder==1&&*nsplit>1 ) + { + for(je=1; je<=*m-1; je++) + { + ie = 0; + tmp1 = w->ptr.p_double[je]; + for(j=je+1; j<=*m; j++) + { + if( ae_fp_less(w->ptr.p_double[j],tmp1) ) + { + ie = j; + tmp1 = w->ptr.p_double[j]; + } + } + if( ie!=0 ) + { + itmp1 = iblock->ptr.p_int[ie]; + w->ptr.p_double[ie] = w->ptr.p_double[je]; + iblock->ptr.p_int[ie] = iblock->ptr.p_int[je]; + w->ptr.p_double[je] = tmp1; + iblock->ptr.p_int[je] = itmp1; + } + } + } + for(j=1; j<=*m; j++) + { + w->ptr.p_double[j] = w->ptr.p_double[j]*scalefactor; + } + *errorcode = 0; + if( ncnvrg ) + { + *errorcode = *errorcode+1; + } + if( toofew ) + { + *errorcode = *errorcode+2; + } + result = *errorcode==0; + ae_frame_leave(_state); + return result; +} + + +static void evd_internaldstein(ae_int_t n, + /* Real */ ae_vector* d, + /* Real */ ae_vector* e, + ae_int_t m, + /* Real */ ae_vector* w, + /* Integer */ ae_vector* iblock, + /* Integer */ ae_vector* isplit, + /* Real */ ae_matrix* z, + /* Integer */ ae_vector* ifail, + ae_int_t* info, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector _e; + ae_vector _w; + ae_int_t maxits; + ae_int_t extra; + ae_int_t b1; + ae_int_t blksiz; + ae_int_t bn; + ae_int_t gpind; + ae_int_t i; + ae_int_t iinfo; + ae_int_t its; + ae_int_t j; + ae_int_t j1; + ae_int_t jblk; + ae_int_t jmax; + ae_int_t nblk; + ae_int_t nrmchk; + double dtpcrt; + double eps; + double eps1; + double nrm; + double onenrm; + double ortol; + double pertol; + double scl; + double sep; + double tol; + double xj; + double xjm; + double ztr; + ae_vector work1; + ae_vector work2; + ae_vector work3; + ae_vector work4; + ae_vector work5; + ae_vector iwork; + ae_bool tmpcriterion; + ae_int_t ti; + ae_int_t i1; + ae_int_t i2; + double v; + + ae_frame_make(_state, &_frame_block); + ae_vector_init_copy(&_e, e, _state, ae_true); + e = &_e; + ae_vector_init_copy(&_w, w, _state, ae_true); + w = &_w; + ae_matrix_clear(z); + ae_vector_clear(ifail); + *info = 0; + ae_vector_init(&work1, 0, DT_REAL, _state, ae_true); + ae_vector_init(&work2, 0, DT_REAL, _state, ae_true); + ae_vector_init(&work3, 0, DT_REAL, _state, ae_true); + ae_vector_init(&work4, 0, DT_REAL, _state, ae_true); + ae_vector_init(&work5, 0, DT_REAL, _state, ae_true); + ae_vector_init(&iwork, 0, DT_INT, _state, ae_true); + + maxits = 5; + extra = 2; + ae_vector_set_length(&work1, ae_maxint(n, 1, _state)+1, _state); + ae_vector_set_length(&work2, ae_maxint(n-1, 1, _state)+1, _state); + ae_vector_set_length(&work3, ae_maxint(n, 1, _state)+1, _state); + ae_vector_set_length(&work4, ae_maxint(n, 1, _state)+1, _state); + ae_vector_set_length(&work5, ae_maxint(n, 1, _state)+1, _state); + ae_vector_set_length(&iwork, ae_maxint(n, 1, _state)+1, _state); + ae_vector_set_length(ifail, ae_maxint(m, 1, _state)+1, _state); + ae_matrix_set_length(z, ae_maxint(n, 1, _state)+1, ae_maxint(m, 1, _state)+1, _state); + + /* + * these initializers are not really necessary, + * but without them compiler complains about uninitialized locals + */ + gpind = 0; + onenrm = 0; + ortol = 0; + dtpcrt = 0; + xjm = 0; + + /* + * Test the input parameters. + */ + *info = 0; + for(i=1; i<=m; i++) + { + ifail->ptr.p_int[i] = 0; + } + if( n<0 ) + { + *info = -1; + ae_frame_leave(_state); + return; + } + if( m<0||m>n ) + { + *info = -4; + ae_frame_leave(_state); + return; + } + for(j=2; j<=m; j++) + { + if( iblock->ptr.p_int[j]<iblock->ptr.p_int[j-1] ) + { + *info = -6; + break; + } + if( iblock->ptr.p_int[j]==iblock->ptr.p_int[j-1]&&ae_fp_less(w->ptr.p_double[j],w->ptr.p_double[j-1]) ) + { + *info = -5; + break; + } + } + if( *info!=0 ) + { + ae_frame_leave(_state); + return; + } + + /* + * Quick return if possible + */ + if( n==0||m==0 ) + { + ae_frame_leave(_state); + return; + } + if( n==1 ) + { + z->ptr.pp_double[1][1] = 1; + ae_frame_leave(_state); + return; + } + + /* + * Some preparations + */ + ti = n-1; + ae_v_move(&work1.ptr.p_double[1], 1, &e->ptr.p_double[1], 1, ae_v_len(1,ti)); + ae_vector_set_length(e, n+1, _state); + ae_v_move(&e->ptr.p_double[1], 1, &work1.ptr.p_double[1], 1, ae_v_len(1,ti)); + ae_v_move(&work1.ptr.p_double[1], 1, &w->ptr.p_double[1], 1, ae_v_len(1,m)); + ae_vector_set_length(w, n+1, _state); + ae_v_move(&w->ptr.p_double[1], 1, &work1.ptr.p_double[1], 1, ae_v_len(1,m)); + + /* + * Get machine constants. + */ + eps = ae_machineepsilon; + + /* + * Compute eigenvectors of matrix blocks. + */ + j1 = 1; + for(nblk=1; nblk<=iblock->ptr.p_int[m]; nblk++) + { + + /* + * Find starting and ending indices of block nblk. + */ + if( nblk==1 ) + { + b1 = 1; + } + else + { + b1 = isplit->ptr.p_int[nblk-1]+1; + } + bn = isplit->ptr.p_int[nblk]; + blksiz = bn-b1+1; + if( blksiz!=1 ) + { + + /* + * Compute reorthogonalization criterion and stopping criterion. + */ + gpind = b1; + onenrm = ae_fabs(d->ptr.p_double[b1], _state)+ae_fabs(e->ptr.p_double[b1], _state); + onenrm = ae_maxreal(onenrm, ae_fabs(d->ptr.p_double[bn], _state)+ae_fabs(e->ptr.p_double[bn-1], _state), _state); + for(i=b1+1; i<=bn-1; i++) + { + onenrm = ae_maxreal(onenrm, ae_fabs(d->ptr.p_double[i], _state)+ae_fabs(e->ptr.p_double[i-1], _state)+ae_fabs(e->ptr.p_double[i], _state), _state); + } + ortol = 0.001*onenrm; + dtpcrt = ae_sqrt(0.1/blksiz, _state); + } + + /* + * Loop through eigenvalues of block nblk. + */ + jblk = 0; + for(j=j1; j<=m; j++) + { + if( iblock->ptr.p_int[j]!=nblk ) + { + j1 = j; + break; + } + jblk = jblk+1; + xj = w->ptr.p_double[j]; + if( blksiz==1 ) + { + + /* + * Skip all the work if the block size is one. + */ + work1.ptr.p_double[1] = 1; + } + else + { + + /* + * If eigenvalues j and j-1 are too close, add a relatively + * small perturbation. + */ + if( jblk>1 ) + { + eps1 = ae_fabs(eps*xj, _state); + pertol = 10*eps1; + sep = xj-xjm; + if( ae_fp_less(sep,pertol) ) + { + xj = xjm+pertol; + } + } + its = 0; + nrmchk = 0; + + /* + * Get random starting vector. + */ + for(ti=1; ti<=blksiz; ti++) + { + work1.ptr.p_double[ti] = 2*ae_randomreal(_state)-1; + } + + /* + * Copy the matrix T so it won't be destroyed in factorization. + */ + for(ti=1; ti<=blksiz-1; ti++) + { + work2.ptr.p_double[ti] = e->ptr.p_double[b1+ti-1]; + work3.ptr.p_double[ti] = e->ptr.p_double[b1+ti-1]; + work4.ptr.p_double[ti] = d->ptr.p_double[b1+ti-1]; + } + work4.ptr.p_double[blksiz] = d->ptr.p_double[b1+blksiz-1]; + + /* + * Compute LU factors with partial pivoting ( PT = LU ) + */ + tol = 0; + evd_tdininternaldlagtf(blksiz, &work4, xj, &work2, &work3, tol, &work5, &iwork, &iinfo, _state); + + /* + * Update iteration count. + */ + do + { + its = its+1; + if( its>maxits ) + { + + /* + * If stopping criterion was not satisfied, update info and + * store eigenvector number in array ifail. + */ + *info = *info+1; + ifail->ptr.p_int[*info] = j; + break; + } + + /* + * Normalize and scale the righthand side vector Pb. + */ + v = 0; + for(ti=1; ti<=blksiz; ti++) + { + v = v+ae_fabs(work1.ptr.p_double[ti], _state); + } + scl = blksiz*onenrm*ae_maxreal(eps, ae_fabs(work4.ptr.p_double[blksiz], _state), _state)/v; + ae_v_muld(&work1.ptr.p_double[1], 1, ae_v_len(1,blksiz), scl); + + /* + * Solve the system LU = Pb. + */ + evd_tdininternaldlagts(blksiz, &work4, &work2, &work3, &work5, &iwork, &work1, &tol, &iinfo, _state); + + /* + * Reorthogonalize by modified Gram-Schmidt if eigenvalues are + * close enough. + */ + if( jblk!=1 ) + { + if( ae_fp_greater(ae_fabs(xj-xjm, _state),ortol) ) + { + gpind = j; + } + if( gpind!=j ) + { + for(i=gpind; i<=j-1; i++) + { + i1 = b1; + i2 = b1+blksiz-1; + ztr = ae_v_dotproduct(&work1.ptr.p_double[1], 1, &z->ptr.pp_double[i1][i], z->stride, ae_v_len(1,blksiz)); + ae_v_subd(&work1.ptr.p_double[1], 1, &z->ptr.pp_double[i1][i], z->stride, ae_v_len(1,blksiz), ztr); + } + } + } + + /* + * Check the infinity norm of the iterate. + */ + jmax = vectoridxabsmax(&work1, 1, blksiz, _state); + nrm = ae_fabs(work1.ptr.p_double[jmax], _state); + + /* + * Continue for additional iterations after norm reaches + * stopping criterion. + */ + tmpcriterion = ae_false; + if( ae_fp_less(nrm,dtpcrt) ) + { + tmpcriterion = ae_true; + } + else + { + nrmchk = nrmchk+1; + if( nrmchk<extra+1 ) + { + tmpcriterion = ae_true; + } + } + } + while(tmpcriterion); + + /* + * Accept iterate as jth eigenvector. + */ + scl = 1/vectornorm2(&work1, 1, blksiz, _state); + jmax = vectoridxabsmax(&work1, 1, blksiz, _state); + if( ae_fp_less(work1.ptr.p_double[jmax],0) ) + { + scl = -scl; + } + ae_v_muld(&work1.ptr.p_double[1], 1, ae_v_len(1,blksiz), scl); + } + for(i=1; i<=n; i++) + { + z->ptr.pp_double[i][j] = 0; + } + for(i=1; i<=blksiz; i++) + { + z->ptr.pp_double[b1+i-1][j] = work1.ptr.p_double[i]; + } + + /* + * Save the shift to check eigenvalue spacing at next + * iteration. + */ + xjm = xj; + } + } + ae_frame_leave(_state); +} + + +static void evd_tdininternaldlagtf(ae_int_t n, + /* Real */ ae_vector* a, + double lambdav, + /* Real */ ae_vector* b, + /* Real */ ae_vector* c, + double tol, + /* Real */ ae_vector* d, + /* Integer */ ae_vector* iin, + ae_int_t* info, + ae_state *_state) +{ + ae_int_t k; + double eps; + double mult; + double piv1; + double piv2; + double scale1; + double scale2; + double temp; + double tl; + + *info = 0; + + *info = 0; + if( n<0 ) + { + *info = -1; + return; + } + if( n==0 ) + { + return; + } + a->ptr.p_double[1] = a->ptr.p_double[1]-lambdav; + iin->ptr.p_int[n] = 0; + if( n==1 ) + { + if( ae_fp_eq(a->ptr.p_double[1],0) ) + { + iin->ptr.p_int[1] = 1; + } + return; + } + eps = ae_machineepsilon; + tl = ae_maxreal(tol, eps, _state); + scale1 = ae_fabs(a->ptr.p_double[1], _state)+ae_fabs(b->ptr.p_double[1], _state); + for(k=1; k<=n-1; k++) + { + a->ptr.p_double[k+1] = a->ptr.p_double[k+1]-lambdav; + scale2 = ae_fabs(c->ptr.p_double[k], _state)+ae_fabs(a->ptr.p_double[k+1], _state); + if( k<n-1 ) + { + scale2 = scale2+ae_fabs(b->ptr.p_double[k+1], _state); + } + if( ae_fp_eq(a->ptr.p_double[k],0) ) + { + piv1 = 0; + } + else + { + piv1 = ae_fabs(a->ptr.p_double[k], _state)/scale1; + } + if( ae_fp_eq(c->ptr.p_double[k],0) ) + { + iin->ptr.p_int[k] = 0; + piv2 = 0; + scale1 = scale2; + if( k<n-1 ) + { + d->ptr.p_double[k] = 0; + } + } + else + { + piv2 = ae_fabs(c->ptr.p_double[k], _state)/scale2; + if( ae_fp_less_eq(piv2,piv1) ) + { + iin->ptr.p_int[k] = 0; + scale1 = scale2; + c->ptr.p_double[k] = c->ptr.p_double[k]/a->ptr.p_double[k]; + a->ptr.p_double[k+1] = a->ptr.p_double[k+1]-c->ptr.p_double[k]*b->ptr.p_double[k]; + if( k<n-1 ) + { + d->ptr.p_double[k] = 0; + } + } + else + { + iin->ptr.p_int[k] = 1; + mult = a->ptr.p_double[k]/c->ptr.p_double[k]; + a->ptr.p_double[k] = c->ptr.p_double[k]; + temp = a->ptr.p_double[k+1]; + a->ptr.p_double[k+1] = b->ptr.p_double[k]-mult*temp; + if( k<n-1 ) + { + d->ptr.p_double[k] = b->ptr.p_double[k+1]; + b->ptr.p_double[k+1] = -mult*d->ptr.p_double[k]; + } + b->ptr.p_double[k] = temp; + c->ptr.p_double[k] = mult; + } + } + if( ae_fp_less_eq(ae_maxreal(piv1, piv2, _state),tl)&&iin->ptr.p_int[n]==0 ) + { + iin->ptr.p_int[n] = k; + } + } + if( ae_fp_less_eq(ae_fabs(a->ptr.p_double[n], _state),scale1*tl)&&iin->ptr.p_int[n]==0 ) + { + iin->ptr.p_int[n] = n; + } +} + + +static void evd_tdininternaldlagts(ae_int_t n, + /* Real */ ae_vector* a, + /* Real */ ae_vector* b, + /* Real */ ae_vector* c, + /* Real */ ae_vector* d, + /* Integer */ ae_vector* iin, + /* Real */ ae_vector* y, + double* tol, + ae_int_t* info, + ae_state *_state) +{ + ae_int_t k; + double absak; + double ak; + double bignum; + double eps; + double pert; + double sfmin; + double temp; + + *info = 0; + + *info = 0; + if( n<0 ) + { + *info = -1; + return; + } + if( n==0 ) + { + return; + } + eps = ae_machineepsilon; + sfmin = ae_minrealnumber; + bignum = 1/sfmin; + if( ae_fp_less_eq(*tol,0) ) + { + *tol = ae_fabs(a->ptr.p_double[1], _state); + if( n>1 ) + { + *tol = ae_maxreal(*tol, ae_maxreal(ae_fabs(a->ptr.p_double[2], _state), ae_fabs(b->ptr.p_double[1], _state), _state), _state); + } + for(k=3; k<=n; k++) + { + *tol = ae_maxreal(*tol, ae_maxreal(ae_fabs(a->ptr.p_double[k], _state), ae_maxreal(ae_fabs(b->ptr.p_double[k-1], _state), ae_fabs(d->ptr.p_double[k-2], _state), _state), _state), _state); + } + *tol = *tol*eps; + if( ae_fp_eq(*tol,0) ) + { + *tol = eps; + } + } + for(k=2; k<=n; k++) + { + if( iin->ptr.p_int[k-1]==0 ) + { + y->ptr.p_double[k] = y->ptr.p_double[k]-c->ptr.p_double[k-1]*y->ptr.p_double[k-1]; + } + else + { + temp = y->ptr.p_double[k-1]; + y->ptr.p_double[k-1] = y->ptr.p_double[k]; + y->ptr.p_double[k] = temp-c->ptr.p_double[k-1]*y->ptr.p_double[k]; + } + } + for(k=n; k>=1; k--) + { + if( k<=n-2 ) + { + temp = y->ptr.p_double[k]-b->ptr.p_double[k]*y->ptr.p_double[k+1]-d->ptr.p_double[k]*y->ptr.p_double[k+2]; + } + else + { + if( k==n-1 ) + { + temp = y->ptr.p_double[k]-b->ptr.p_double[k]*y->ptr.p_double[k+1]; + } + else + { + temp = y->ptr.p_double[k]; + } + } + ak = a->ptr.p_double[k]; + pert = ae_fabs(*tol, _state); + if( ae_fp_less(ak,0) ) + { + pert = -pert; + } + for(;;) + { + absak = ae_fabs(ak, _state); + if( ae_fp_less(absak,1) ) + { + if( ae_fp_less(absak,sfmin) ) + { + if( ae_fp_eq(absak,0)||ae_fp_greater(ae_fabs(temp, _state)*sfmin,absak) ) + { + ak = ak+pert; + pert = 2*pert; + continue; + } + else + { + temp = temp*bignum; + ak = ak*bignum; + } + } + else + { + if( ae_fp_greater(ae_fabs(temp, _state),absak*bignum) ) + { + ak = ak+pert; + pert = 2*pert; + continue; + } + } + } + break; + } + y->ptr.p_double[k] = temp/ak; + } +} + + +static void evd_internaldlaebz(ae_int_t ijob, + ae_int_t nitmax, + ae_int_t n, + ae_int_t mmax, + ae_int_t minp, + double abstol, + double reltol, + double pivmin, + /* Real */ ae_vector* d, + /* Real */ ae_vector* e, + /* Real */ ae_vector* e2, + /* Integer */ ae_vector* nval, + /* Real */ ae_matrix* ab, + /* Real */ ae_vector* c, + ae_int_t* mout, + /* Integer */ ae_matrix* nab, + /* Real */ ae_vector* work, + /* Integer */ ae_vector* iwork, + ae_int_t* info, + ae_state *_state) +{ + ae_int_t itmp1; + ae_int_t itmp2; + ae_int_t j; + ae_int_t ji; + ae_int_t jit; + ae_int_t jp; + ae_int_t kf; + ae_int_t kfnew; + ae_int_t kl; + ae_int_t klnew; + double tmp1; + double tmp2; + + *mout = 0; + *info = 0; + + *info = 0; + if( ijob<1||ijob>3 ) + { + *info = -1; + return; + } + + /* + * Initialize NAB + */ + if( ijob==1 ) + { + + /* + * Compute the number of eigenvalues in the initial intervals. + */ + *mout = 0; + + /* + *DIR$ NOVECTOR + */ + for(ji=1; ji<=minp; ji++) + { + for(jp=1; jp<=2; jp++) + { + tmp1 = d->ptr.p_double[1]-ab->ptr.pp_double[ji][jp]; + if( ae_fp_less(ae_fabs(tmp1, _state),pivmin) ) + { + tmp1 = -pivmin; + } + nab->ptr.pp_int[ji][jp] = 0; + if( ae_fp_less_eq(tmp1,0) ) + { + nab->ptr.pp_int[ji][jp] = 1; + } + for(j=2; j<=n; j++) + { + tmp1 = d->ptr.p_double[j]-e2->ptr.p_double[j-1]/tmp1-ab->ptr.pp_double[ji][jp]; + if( ae_fp_less(ae_fabs(tmp1, _state),pivmin) ) + { + tmp1 = -pivmin; + } + if( ae_fp_less_eq(tmp1,0) ) + { + nab->ptr.pp_int[ji][jp] = nab->ptr.pp_int[ji][jp]+1; + } + } + } + *mout = *mout+nab->ptr.pp_int[ji][2]-nab->ptr.pp_int[ji][1]; + } + return; + } + + /* + * Initialize for loop + * + * KF and KL have the following meaning: + * Intervals 1,...,KF-1 have converged. + * Intervals KF,...,KL still need to be refined. + */ + kf = 1; + kl = minp; + + /* + * If IJOB=2, initialize C. + * If IJOB=3, use the user-supplied starting point. + */ + if( ijob==2 ) + { + for(ji=1; ji<=minp; ji++) + { + c->ptr.p_double[ji] = 0.5*(ab->ptr.pp_double[ji][1]+ab->ptr.pp_double[ji][2]); + } + } + + /* + * Iteration loop + */ + for(jit=1; jit<=nitmax; jit++) + { + + /* + * Loop over intervals + * + * + * Serial Version of the loop + */ + klnew = kl; + for(ji=kf; ji<=kl; ji++) + { + + /* + * Compute N(w), the number of eigenvalues less than w + */ + tmp1 = c->ptr.p_double[ji]; + tmp2 = d->ptr.p_double[1]-tmp1; + itmp1 = 0; + if( ae_fp_less_eq(tmp2,pivmin) ) + { + itmp1 = 1; + tmp2 = ae_minreal(tmp2, -pivmin, _state); + } + + /* + * A series of compiler directives to defeat vectorization + * for the next loop + * + **$PL$ CMCHAR=' ' + *CDIR$ NEXTSCALAR + *C$DIR SCALAR + *CDIR$ NEXT SCALAR + *CVD$L NOVECTOR + *CDEC$ NOVECTOR + *CVD$ NOVECTOR + **VDIR NOVECTOR + **VOCL LOOP,SCALAR + *CIBM PREFER SCALAR + **$PL$ CMCHAR='*' + */ + for(j=2; j<=n; j++) + { + tmp2 = d->ptr.p_double[j]-e2->ptr.p_double[j-1]/tmp2-tmp1; + if( ae_fp_less_eq(tmp2,pivmin) ) + { + itmp1 = itmp1+1; + tmp2 = ae_minreal(tmp2, -pivmin, _state); + } + } + if( ijob<=2 ) + { + + /* + * IJOB=2: Choose all intervals containing eigenvalues. + * + * Insure that N(w) is monotone + */ + itmp1 = ae_minint(nab->ptr.pp_int[ji][2], ae_maxint(nab->ptr.pp_int[ji][1], itmp1, _state), _state); + + /* + * Update the Queue -- add intervals if both halves + * contain eigenvalues. + */ + if( itmp1==nab->ptr.pp_int[ji][2] ) + { + + /* + * No eigenvalue in the upper interval: + * just use the lower interval. + */ + ab->ptr.pp_double[ji][2] = tmp1; + } + else + { + if( itmp1==nab->ptr.pp_int[ji][1] ) + { + + /* + * No eigenvalue in the lower interval: + * just use the upper interval. + */ + ab->ptr.pp_double[ji][1] = tmp1; + } + else + { + if( klnew<mmax ) + { + + /* + * Eigenvalue in both intervals -- add upper to queue. + */ + klnew = klnew+1; + ab->ptr.pp_double[klnew][2] = ab->ptr.pp_double[ji][2]; + nab->ptr.pp_int[klnew][2] = nab->ptr.pp_int[ji][2]; + ab->ptr.pp_double[klnew][1] = tmp1; + nab->ptr.pp_int[klnew][1] = itmp1; + ab->ptr.pp_double[ji][2] = tmp1; + nab->ptr.pp_int[ji][2] = itmp1; + } + else + { + *info = mmax+1; + return; + } + } + } + } + else + { + + /* + * IJOB=3: Binary search. Keep only the interval + * containing w s.t. N(w) = NVAL + */ + if( itmp1<=nval->ptr.p_int[ji] ) + { + ab->ptr.pp_double[ji][1] = tmp1; + nab->ptr.pp_int[ji][1] = itmp1; + } + if( itmp1>=nval->ptr.p_int[ji] ) + { + ab->ptr.pp_double[ji][2] = tmp1; + nab->ptr.pp_int[ji][2] = itmp1; + } + } + } + kl = klnew; + + /* + * Check for convergence + */ + kfnew = kf; + for(ji=kf; ji<=kl; ji++) + { + tmp1 = ae_fabs(ab->ptr.pp_double[ji][2]-ab->ptr.pp_double[ji][1], _state); + tmp2 = ae_maxreal(ae_fabs(ab->ptr.pp_double[ji][2], _state), ae_fabs(ab->ptr.pp_double[ji][1], _state), _state); + if( ae_fp_less(tmp1,ae_maxreal(abstol, ae_maxreal(pivmin, reltol*tmp2, _state), _state))||nab->ptr.pp_int[ji][1]>=nab->ptr.pp_int[ji][2] ) + { + + /* + * Converged -- Swap with position KFNEW, + * then increment KFNEW + */ + if( ji>kfnew ) + { + tmp1 = ab->ptr.pp_double[ji][1]; + tmp2 = ab->ptr.pp_double[ji][2]; + itmp1 = nab->ptr.pp_int[ji][1]; + itmp2 = nab->ptr.pp_int[ji][2]; + ab->ptr.pp_double[ji][1] = ab->ptr.pp_double[kfnew][1]; + ab->ptr.pp_double[ji][2] = ab->ptr.pp_double[kfnew][2]; + nab->ptr.pp_int[ji][1] = nab->ptr.pp_int[kfnew][1]; + nab->ptr.pp_int[ji][2] = nab->ptr.pp_int[kfnew][2]; + ab->ptr.pp_double[kfnew][1] = tmp1; + ab->ptr.pp_double[kfnew][2] = tmp2; + nab->ptr.pp_int[kfnew][1] = itmp1; + nab->ptr.pp_int[kfnew][2] = itmp2; + if( ijob==3 ) + { + itmp1 = nval->ptr.p_int[ji]; + nval->ptr.p_int[ji] = nval->ptr.p_int[kfnew]; + nval->ptr.p_int[kfnew] = itmp1; + } + } + kfnew = kfnew+1; + } + } + kf = kfnew; + + /* + * Choose Midpoints + */ + for(ji=kf; ji<=kl; ji++) + { + c->ptr.p_double[ji] = 0.5*(ab->ptr.pp_double[ji][1]+ab->ptr.pp_double[ji][2]); + } + + /* + * If no more intervals to refine, quit. + */ + if( kf>kl ) + { + break; + } + } + + /* + * Converged + */ + *info = ae_maxint(kl+1-kf, 0, _state); + *mout = kl; +} + + +/************************************************************************* +Internal subroutine + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 +*************************************************************************/ +static void evd_internaltrevc(/* Real */ ae_matrix* t, + ae_int_t n, + ae_int_t side, + ae_int_t howmny, + /* Boolean */ ae_vector* vselect, + /* Real */ ae_matrix* vl, + /* Real */ ae_matrix* vr, + ae_int_t* m, + ae_int_t* info, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector _vselect; + ae_bool allv; + ae_bool bothv; + ae_bool leftv; + ae_bool over; + ae_bool pair; + ae_bool rightv; + ae_bool somev; + ae_int_t i; + ae_int_t ierr; + ae_int_t ii; + ae_int_t ip; + ae_int_t iis; + ae_int_t j; + ae_int_t j1; + ae_int_t j2; + ae_int_t jnxt; + ae_int_t k; + ae_int_t ki; + ae_int_t n2; + double beta; + double bignum; + double emax; + double ovfl; + double rec; + double remax; + double scl; + double smin; + double smlnum; + double ulp; + double unfl; + double vcrit; + double vmax; + double wi; + double wr; + double xnorm; + ae_matrix x; + ae_vector work; + ae_vector temp; + ae_matrix temp11; + ae_matrix temp22; + ae_matrix temp11b; + ae_matrix temp21b; + ae_matrix temp12b; + ae_matrix temp22b; + ae_bool skipflag; + ae_int_t k1; + ae_int_t k2; + ae_int_t k3; + ae_int_t k4; + double vt; + ae_vector rswap4; + ae_vector zswap4; + ae_matrix ipivot44; + ae_vector civ4; + ae_vector crv4; + + ae_frame_make(_state, &_frame_block); + ae_vector_init_copy(&_vselect, vselect, _state, ae_true); + vselect = &_vselect; + *m = 0; + *info = 0; + ae_matrix_init(&x, 0, 0, DT_REAL, _state, ae_true); + ae_vector_init(&work, 0, DT_REAL, _state, ae_true); + ae_vector_init(&temp, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&temp11, 0, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&temp22, 0, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&temp11b, 0, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&temp21b, 0, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&temp12b, 0, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&temp22b, 0, 0, DT_REAL, _state, ae_true); + ae_vector_init(&rswap4, 0, DT_BOOL, _state, ae_true); + ae_vector_init(&zswap4, 0, DT_BOOL, _state, ae_true); + ae_matrix_init(&ipivot44, 0, 0, DT_INT, _state, ae_true); + ae_vector_init(&civ4, 0, DT_REAL, _state, ae_true); + ae_vector_init(&crv4, 0, DT_REAL, _state, ae_true); + + ae_matrix_set_length(&x, 2+1, 2+1, _state); + ae_matrix_set_length(&temp11, 1+1, 1+1, _state); + ae_matrix_set_length(&temp11b, 1+1, 1+1, _state); + ae_matrix_set_length(&temp21b, 2+1, 1+1, _state); + ae_matrix_set_length(&temp12b, 1+1, 2+1, _state); + ae_matrix_set_length(&temp22b, 2+1, 2+1, _state); + ae_matrix_set_length(&temp22, 2+1, 2+1, _state); + ae_vector_set_length(&work, 3*n+1, _state); + ae_vector_set_length(&temp, n+1, _state); + ae_vector_set_length(&rswap4, 4+1, _state); + ae_vector_set_length(&zswap4, 4+1, _state); + ae_matrix_set_length(&ipivot44, 4+1, 4+1, _state); + ae_vector_set_length(&civ4, 4+1, _state); + ae_vector_set_length(&crv4, 4+1, _state); + if( howmny!=1 ) + { + if( side==1||side==3 ) + { + ae_matrix_set_length(vr, n+1, n+1, _state); + } + if( side==2||side==3 ) + { + ae_matrix_set_length(vl, n+1, n+1, _state); + } + } + + /* + * Decode and test the input parameters + */ + bothv = side==3; + rightv = side==1||bothv; + leftv = side==2||bothv; + allv = howmny==2; + over = howmny==1; + somev = howmny==3; + *info = 0; + if( n<0 ) + { + *info = -2; + ae_frame_leave(_state); + return; + } + if( !rightv&&!leftv ) + { + *info = -3; + ae_frame_leave(_state); + return; + } + if( (!allv&&!over)&&!somev ) + { + *info = -4; + ae_frame_leave(_state); + return; + } + + /* + * Set M to the number of columns required to store the selected + * eigenvectors, standardize the array SELECT if necessary, and + * test MM. + */ + if( somev ) + { + *m = 0; + pair = ae_false; + for(j=1; j<=n; j++) + { + if( pair ) + { + pair = ae_false; + vselect->ptr.p_bool[j] = ae_false; + } + else + { + if( j<n ) + { + if( ae_fp_eq(t->ptr.pp_double[j+1][j],0) ) + { + if( vselect->ptr.p_bool[j] ) + { + *m = *m+1; + } + } + else + { + pair = ae_true; + if( vselect->ptr.p_bool[j]||vselect->ptr.p_bool[j+1] ) + { + vselect->ptr.p_bool[j] = ae_true; + *m = *m+2; + } + } + } + else + { + if( vselect->ptr.p_bool[n] ) + { + *m = *m+1; + } + } + } + } + } + else + { + *m = n; + } + + /* + * Quick return if possible. + */ + if( n==0 ) + { + ae_frame_leave(_state); + return; + } + + /* + * Set the constants to control overflow. + */ + unfl = ae_minrealnumber; + ovfl = 1/unfl; + ulp = ae_machineepsilon; + smlnum = unfl*(n/ulp); + bignum = (1-ulp)/smlnum; + + /* + * Compute 1-norm of each column of strictly upper triangular + * part of T to control overflow in triangular solver. + */ + work.ptr.p_double[1] = 0; + for(j=2; j<=n; j++) + { + work.ptr.p_double[j] = 0; + for(i=1; i<=j-1; i++) + { + work.ptr.p_double[j] = work.ptr.p_double[j]+ae_fabs(t->ptr.pp_double[i][j], _state); + } + } + + /* + * Index IP is used to specify the real or complex eigenvalue: + * IP = 0, real eigenvalue, + * 1, first of conjugate complex pair: (wr,wi) + * -1, second of conjugate complex pair: (wr,wi) + */ + n2 = 2*n; + if( rightv ) + { + + /* + * Compute right eigenvectors. + */ + ip = 0; + iis = *m; + for(ki=n; ki>=1; ki--) + { + skipflag = ae_false; + if( ip==1 ) + { + skipflag = ae_true; + } + else + { + if( ki!=1 ) + { + if( ae_fp_neq(t->ptr.pp_double[ki][ki-1],0) ) + { + ip = -1; + } + } + if( somev ) + { + if( ip==0 ) + { + if( !vselect->ptr.p_bool[ki] ) + { + skipflag = ae_true; + } + } + else + { + if( !vselect->ptr.p_bool[ki-1] ) + { + skipflag = ae_true; + } + } + } + } + if( !skipflag ) + { + + /* + * Compute the KI-th eigenvalue (WR,WI). + */ + wr = t->ptr.pp_double[ki][ki]; + wi = 0; + if( ip!=0 ) + { + wi = ae_sqrt(ae_fabs(t->ptr.pp_double[ki][ki-1], _state), _state)*ae_sqrt(ae_fabs(t->ptr.pp_double[ki-1][ki], _state), _state); + } + smin = ae_maxreal(ulp*(ae_fabs(wr, _state)+ae_fabs(wi, _state)), smlnum, _state); + if( ip==0 ) + { + + /* + * Real right eigenvector + */ + work.ptr.p_double[ki+n] = 1; + + /* + * Form right-hand side + */ + for(k=1; k<=ki-1; k++) + { + work.ptr.p_double[k+n] = -t->ptr.pp_double[k][ki]; + } + + /* + * Solve the upper quasi-triangular system: + * (T(1:KI-1,1:KI-1) - WR)*X = SCALE*WORK. + */ + jnxt = ki-1; + for(j=ki-1; j>=1; j--) + { + if( j>jnxt ) + { + continue; + } + j1 = j; + j2 = j; + jnxt = j-1; + if( j>1 ) + { + if( ae_fp_neq(t->ptr.pp_double[j][j-1],0) ) + { + j1 = j-1; + jnxt = j-2; + } + } + if( j1==j2 ) + { + + /* + * 1-by-1 diagonal block + */ + temp11.ptr.pp_double[1][1] = t->ptr.pp_double[j][j]; + temp11b.ptr.pp_double[1][1] = work.ptr.p_double[j+n]; + evd_internalhsevdlaln2(ae_false, 1, 1, smin, 1, &temp11, 1.0, 1.0, &temp11b, wr, 0.0, &rswap4, &zswap4, &ipivot44, &civ4, &crv4, &x, &scl, &xnorm, &ierr, _state); + + /* + * Scale X(1,1) to avoid overflow when updating + * the right-hand side. + */ + if( ae_fp_greater(xnorm,1) ) + { + if( ae_fp_greater(work.ptr.p_double[j],bignum/xnorm) ) + { + x.ptr.pp_double[1][1] = x.ptr.pp_double[1][1]/xnorm; + scl = scl/xnorm; + } + } + + /* + * Scale if necessary + */ + if( ae_fp_neq(scl,1) ) + { + k1 = n+1; + k2 = n+ki; + ae_v_muld(&work.ptr.p_double[k1], 1, ae_v_len(k1,k2), scl); + } + work.ptr.p_double[j+n] = x.ptr.pp_double[1][1]; + + /* + * Update right-hand side + */ + k1 = 1+n; + k2 = j-1+n; + k3 = j-1; + vt = -x.ptr.pp_double[1][1]; + ae_v_addd(&work.ptr.p_double[k1], 1, &t->ptr.pp_double[1][j], t->stride, ae_v_len(k1,k2), vt); + } + else + { + + /* + * 2-by-2 diagonal block + */ + temp22.ptr.pp_double[1][1] = t->ptr.pp_double[j-1][j-1]; + temp22.ptr.pp_double[1][2] = t->ptr.pp_double[j-1][j]; + temp22.ptr.pp_double[2][1] = t->ptr.pp_double[j][j-1]; + temp22.ptr.pp_double[2][2] = t->ptr.pp_double[j][j]; + temp21b.ptr.pp_double[1][1] = work.ptr.p_double[j-1+n]; + temp21b.ptr.pp_double[2][1] = work.ptr.p_double[j+n]; + evd_internalhsevdlaln2(ae_false, 2, 1, smin, 1.0, &temp22, 1.0, 1.0, &temp21b, wr, 0, &rswap4, &zswap4, &ipivot44, &civ4, &crv4, &x, &scl, &xnorm, &ierr, _state); + + /* + * Scale X(1,1) and X(2,1) to avoid overflow when + * updating the right-hand side. + */ + if( ae_fp_greater(xnorm,1) ) + { + beta = ae_maxreal(work.ptr.p_double[j-1], work.ptr.p_double[j], _state); + if( ae_fp_greater(beta,bignum/xnorm) ) + { + x.ptr.pp_double[1][1] = x.ptr.pp_double[1][1]/xnorm; + x.ptr.pp_double[2][1] = x.ptr.pp_double[2][1]/xnorm; + scl = scl/xnorm; + } + } + + /* + * Scale if necessary + */ + if( ae_fp_neq(scl,1) ) + { + k1 = 1+n; + k2 = ki+n; + ae_v_muld(&work.ptr.p_double[k1], 1, ae_v_len(k1,k2), scl); + } + work.ptr.p_double[j-1+n] = x.ptr.pp_double[1][1]; + work.ptr.p_double[j+n] = x.ptr.pp_double[2][1]; + + /* + * Update right-hand side + */ + k1 = 1+n; + k2 = j-2+n; + k3 = j-2; + k4 = j-1; + vt = -x.ptr.pp_double[1][1]; + ae_v_addd(&work.ptr.p_double[k1], 1, &t->ptr.pp_double[1][k4], t->stride, ae_v_len(k1,k2), vt); + vt = -x.ptr.pp_double[2][1]; + ae_v_addd(&work.ptr.p_double[k1], 1, &t->ptr.pp_double[1][j], t->stride, ae_v_len(k1,k2), vt); + } + } + + /* + * Copy the vector x or Q*x to VR and normalize. + */ + if( !over ) + { + k1 = 1+n; + k2 = ki+n; + ae_v_move(&vr->ptr.pp_double[1][iis], vr->stride, &work.ptr.p_double[k1], 1, ae_v_len(1,ki)); + ii = columnidxabsmax(vr, 1, ki, iis, _state); + remax = 1/ae_fabs(vr->ptr.pp_double[ii][iis], _state); + ae_v_muld(&vr->ptr.pp_double[1][iis], vr->stride, ae_v_len(1,ki), remax); + for(k=ki+1; k<=n; k++) + { + vr->ptr.pp_double[k][iis] = 0; + } + } + else + { + if( ki>1 ) + { + ae_v_move(&temp.ptr.p_double[1], 1, &vr->ptr.pp_double[1][ki], vr->stride, ae_v_len(1,n)); + matrixvectormultiply(vr, 1, n, 1, ki-1, ae_false, &work, 1+n, ki-1+n, 1.0, &temp, 1, n, work.ptr.p_double[ki+n], _state); + ae_v_move(&vr->ptr.pp_double[1][ki], vr->stride, &temp.ptr.p_double[1], 1, ae_v_len(1,n)); + } + ii = columnidxabsmax(vr, 1, n, ki, _state); + remax = 1/ae_fabs(vr->ptr.pp_double[ii][ki], _state); + ae_v_muld(&vr->ptr.pp_double[1][ki], vr->stride, ae_v_len(1,n), remax); + } + } + else + { + + /* + * Complex right eigenvector. + * + * Initial solve + * [ (T(KI-1,KI-1) T(KI-1,KI) ) - (WR + I* WI)]*X = 0. + * [ (T(KI,KI-1) T(KI,KI) ) ] + */ + if( ae_fp_greater_eq(ae_fabs(t->ptr.pp_double[ki-1][ki], _state),ae_fabs(t->ptr.pp_double[ki][ki-1], _state)) ) + { + work.ptr.p_double[ki-1+n] = 1; + work.ptr.p_double[ki+n2] = wi/t->ptr.pp_double[ki-1][ki]; + } + else + { + work.ptr.p_double[ki-1+n] = -wi/t->ptr.pp_double[ki][ki-1]; + work.ptr.p_double[ki+n2] = 1; + } + work.ptr.p_double[ki+n] = 0; + work.ptr.p_double[ki-1+n2] = 0; + + /* + * Form right-hand side + */ + for(k=1; k<=ki-2; k++) + { + work.ptr.p_double[k+n] = -work.ptr.p_double[ki-1+n]*t->ptr.pp_double[k][ki-1]; + work.ptr.p_double[k+n2] = -work.ptr.p_double[ki+n2]*t->ptr.pp_double[k][ki]; + } + + /* + * Solve upper quasi-triangular system: + * (T(1:KI-2,1:KI-2) - (WR+i*WI))*X = SCALE*(WORK+i*WORK2) + */ + jnxt = ki-2; + for(j=ki-2; j>=1; j--) + { + if( j>jnxt ) + { + continue; + } + j1 = j; + j2 = j; + jnxt = j-1; + if( j>1 ) + { + if( ae_fp_neq(t->ptr.pp_double[j][j-1],0) ) + { + j1 = j-1; + jnxt = j-2; + } + } + if( j1==j2 ) + { + + /* + * 1-by-1 diagonal block + */ + temp11.ptr.pp_double[1][1] = t->ptr.pp_double[j][j]; + temp12b.ptr.pp_double[1][1] = work.ptr.p_double[j+n]; + temp12b.ptr.pp_double[1][2] = work.ptr.p_double[j+n+n]; + evd_internalhsevdlaln2(ae_false, 1, 2, smin, 1.0, &temp11, 1.0, 1.0, &temp12b, wr, wi, &rswap4, &zswap4, &ipivot44, &civ4, &crv4, &x, &scl, &xnorm, &ierr, _state); + + /* + * Scale X(1,1) and X(1,2) to avoid overflow when + * updating the right-hand side. + */ + if( ae_fp_greater(xnorm,1) ) + { + if( ae_fp_greater(work.ptr.p_double[j],bignum/xnorm) ) + { + x.ptr.pp_double[1][1] = x.ptr.pp_double[1][1]/xnorm; + x.ptr.pp_double[1][2] = x.ptr.pp_double[1][2]/xnorm; + scl = scl/xnorm; + } + } + + /* + * Scale if necessary + */ + if( ae_fp_neq(scl,1) ) + { + k1 = 1+n; + k2 = ki+n; + ae_v_muld(&work.ptr.p_double[k1], 1, ae_v_len(k1,k2), scl); + k1 = 1+n2; + k2 = ki+n2; + ae_v_muld(&work.ptr.p_double[k1], 1, ae_v_len(k1,k2), scl); + } + work.ptr.p_double[j+n] = x.ptr.pp_double[1][1]; + work.ptr.p_double[j+n2] = x.ptr.pp_double[1][2]; + + /* + * Update the right-hand side + */ + k1 = 1+n; + k2 = j-1+n; + k3 = 1; + k4 = j-1; + vt = -x.ptr.pp_double[1][1]; + ae_v_addd(&work.ptr.p_double[k1], 1, &t->ptr.pp_double[k3][j], t->stride, ae_v_len(k1,k2), vt); + k1 = 1+n2; + k2 = j-1+n2; + k3 = 1; + k4 = j-1; + vt = -x.ptr.pp_double[1][2]; + ae_v_addd(&work.ptr.p_double[k1], 1, &t->ptr.pp_double[k3][j], t->stride, ae_v_len(k1,k2), vt); + } + else + { + + /* + * 2-by-2 diagonal block + */ + temp22.ptr.pp_double[1][1] = t->ptr.pp_double[j-1][j-1]; + temp22.ptr.pp_double[1][2] = t->ptr.pp_double[j-1][j]; + temp22.ptr.pp_double[2][1] = t->ptr.pp_double[j][j-1]; + temp22.ptr.pp_double[2][2] = t->ptr.pp_double[j][j]; + temp22b.ptr.pp_double[1][1] = work.ptr.p_double[j-1+n]; + temp22b.ptr.pp_double[1][2] = work.ptr.p_double[j-1+n+n]; + temp22b.ptr.pp_double[2][1] = work.ptr.p_double[j+n]; + temp22b.ptr.pp_double[2][2] = work.ptr.p_double[j+n+n]; + evd_internalhsevdlaln2(ae_false, 2, 2, smin, 1.0, &temp22, 1.0, 1.0, &temp22b, wr, wi, &rswap4, &zswap4, &ipivot44, &civ4, &crv4, &x, &scl, &xnorm, &ierr, _state); + + /* + * Scale X to avoid overflow when updating + * the right-hand side. + */ + if( ae_fp_greater(xnorm,1) ) + { + beta = ae_maxreal(work.ptr.p_double[j-1], work.ptr.p_double[j], _state); + if( ae_fp_greater(beta,bignum/xnorm) ) + { + rec = 1/xnorm; + x.ptr.pp_double[1][1] = x.ptr.pp_double[1][1]*rec; + x.ptr.pp_double[1][2] = x.ptr.pp_double[1][2]*rec; + x.ptr.pp_double[2][1] = x.ptr.pp_double[2][1]*rec; + x.ptr.pp_double[2][2] = x.ptr.pp_double[2][2]*rec; + scl = scl*rec; + } + } + + /* + * Scale if necessary + */ + if( ae_fp_neq(scl,1) ) + { + ae_v_muld(&work.ptr.p_double[1+n], 1, ae_v_len(1+n,ki+n), scl); + ae_v_muld(&work.ptr.p_double[1+n2], 1, ae_v_len(1+n2,ki+n2), scl); + } + work.ptr.p_double[j-1+n] = x.ptr.pp_double[1][1]; + work.ptr.p_double[j+n] = x.ptr.pp_double[2][1]; + work.ptr.p_double[j-1+n2] = x.ptr.pp_double[1][2]; + work.ptr.p_double[j+n2] = x.ptr.pp_double[2][2]; + + /* + * Update the right-hand side + */ + vt = -x.ptr.pp_double[1][1]; + ae_v_addd(&work.ptr.p_double[n+1], 1, &t->ptr.pp_double[1][j-1], t->stride, ae_v_len(n+1,n+j-2), vt); + vt = -x.ptr.pp_double[2][1]; + ae_v_addd(&work.ptr.p_double[n+1], 1, &t->ptr.pp_double[1][j], t->stride, ae_v_len(n+1,n+j-2), vt); + vt = -x.ptr.pp_double[1][2]; + ae_v_addd(&work.ptr.p_double[n2+1], 1, &t->ptr.pp_double[1][j-1], t->stride, ae_v_len(n2+1,n2+j-2), vt); + vt = -x.ptr.pp_double[2][2]; + ae_v_addd(&work.ptr.p_double[n2+1], 1, &t->ptr.pp_double[1][j], t->stride, ae_v_len(n2+1,n2+j-2), vt); + } + } + + /* + * Copy the vector x or Q*x to VR and normalize. + */ + if( !over ) + { + ae_v_move(&vr->ptr.pp_double[1][iis-1], vr->stride, &work.ptr.p_double[n+1], 1, ae_v_len(1,ki)); + ae_v_move(&vr->ptr.pp_double[1][iis], vr->stride, &work.ptr.p_double[n2+1], 1, ae_v_len(1,ki)); + emax = 0; + for(k=1; k<=ki; k++) + { + emax = ae_maxreal(emax, ae_fabs(vr->ptr.pp_double[k][iis-1], _state)+ae_fabs(vr->ptr.pp_double[k][iis], _state), _state); + } + remax = 1/emax; + ae_v_muld(&vr->ptr.pp_double[1][iis-1], vr->stride, ae_v_len(1,ki), remax); + ae_v_muld(&vr->ptr.pp_double[1][iis], vr->stride, ae_v_len(1,ki), remax); + for(k=ki+1; k<=n; k++) + { + vr->ptr.pp_double[k][iis-1] = 0; + vr->ptr.pp_double[k][iis] = 0; + } + } + else + { + if( ki>2 ) + { + ae_v_move(&temp.ptr.p_double[1], 1, &vr->ptr.pp_double[1][ki-1], vr->stride, ae_v_len(1,n)); + matrixvectormultiply(vr, 1, n, 1, ki-2, ae_false, &work, 1+n, ki-2+n, 1.0, &temp, 1, n, work.ptr.p_double[ki-1+n], _state); + ae_v_move(&vr->ptr.pp_double[1][ki-1], vr->stride, &temp.ptr.p_double[1], 1, ae_v_len(1,n)); + ae_v_move(&temp.ptr.p_double[1], 1, &vr->ptr.pp_double[1][ki], vr->stride, ae_v_len(1,n)); + matrixvectormultiply(vr, 1, n, 1, ki-2, ae_false, &work, 1+n2, ki-2+n2, 1.0, &temp, 1, n, work.ptr.p_double[ki+n2], _state); + ae_v_move(&vr->ptr.pp_double[1][ki], vr->stride, &temp.ptr.p_double[1], 1, ae_v_len(1,n)); + } + else + { + vt = work.ptr.p_double[ki-1+n]; + ae_v_muld(&vr->ptr.pp_double[1][ki-1], vr->stride, ae_v_len(1,n), vt); + vt = work.ptr.p_double[ki+n2]; + ae_v_muld(&vr->ptr.pp_double[1][ki], vr->stride, ae_v_len(1,n), vt); + } + emax = 0; + for(k=1; k<=n; k++) + { + emax = ae_maxreal(emax, ae_fabs(vr->ptr.pp_double[k][ki-1], _state)+ae_fabs(vr->ptr.pp_double[k][ki], _state), _state); + } + remax = 1/emax; + ae_v_muld(&vr->ptr.pp_double[1][ki-1], vr->stride, ae_v_len(1,n), remax); + ae_v_muld(&vr->ptr.pp_double[1][ki], vr->stride, ae_v_len(1,n), remax); + } + } + iis = iis-1; + if( ip!=0 ) + { + iis = iis-1; + } + } + if( ip==1 ) + { + ip = 0; + } + if( ip==-1 ) + { + ip = 1; + } + } + } + if( leftv ) + { + + /* + * Compute left eigenvectors. + */ + ip = 0; + iis = 1; + for(ki=1; ki<=n; ki++) + { + skipflag = ae_false; + if( ip==-1 ) + { + skipflag = ae_true; + } + else + { + if( ki!=n ) + { + if( ae_fp_neq(t->ptr.pp_double[ki+1][ki],0) ) + { + ip = 1; + } + } + if( somev ) + { + if( !vselect->ptr.p_bool[ki] ) + { + skipflag = ae_true; + } + } + } + if( !skipflag ) + { + + /* + * Compute the KI-th eigenvalue (WR,WI). + */ + wr = t->ptr.pp_double[ki][ki]; + wi = 0; + if( ip!=0 ) + { + wi = ae_sqrt(ae_fabs(t->ptr.pp_double[ki][ki+1], _state), _state)*ae_sqrt(ae_fabs(t->ptr.pp_double[ki+1][ki], _state), _state); + } + smin = ae_maxreal(ulp*(ae_fabs(wr, _state)+ae_fabs(wi, _state)), smlnum, _state); + if( ip==0 ) + { + + /* + * Real left eigenvector. + */ + work.ptr.p_double[ki+n] = 1; + + /* + * Form right-hand side + */ + for(k=ki+1; k<=n; k++) + { + work.ptr.p_double[k+n] = -t->ptr.pp_double[ki][k]; + } + + /* + * Solve the quasi-triangular system: + * (T(KI+1:N,KI+1:N) - WR)'*X = SCALE*WORK + */ + vmax = 1; + vcrit = bignum; + jnxt = ki+1; + for(j=ki+1; j<=n; j++) + { + if( j<jnxt ) + { + continue; + } + j1 = j; + j2 = j; + jnxt = j+1; + if( j<n ) + { + if( ae_fp_neq(t->ptr.pp_double[j+1][j],0) ) + { + j2 = j+1; + jnxt = j+2; + } + } + if( j1==j2 ) + { + + /* + * 1-by-1 diagonal block + * + * Scale if necessary to avoid overflow when forming + * the right-hand side. + */ + if( ae_fp_greater(work.ptr.p_double[j],vcrit) ) + { + rec = 1/vmax; + ae_v_muld(&work.ptr.p_double[ki+n], 1, ae_v_len(ki+n,n+n), rec); + vmax = 1; + vcrit = bignum; + } + vt = ae_v_dotproduct(&t->ptr.pp_double[ki+1][j], t->stride, &work.ptr.p_double[ki+1+n], 1, ae_v_len(ki+1,j-1)); + work.ptr.p_double[j+n] = work.ptr.p_double[j+n]-vt; + + /* + * Solve (T(J,J)-WR)'*X = WORK + */ + temp11.ptr.pp_double[1][1] = t->ptr.pp_double[j][j]; + temp11b.ptr.pp_double[1][1] = work.ptr.p_double[j+n]; + evd_internalhsevdlaln2(ae_false, 1, 1, smin, 1.0, &temp11, 1.0, 1.0, &temp11b, wr, 0, &rswap4, &zswap4, &ipivot44, &civ4, &crv4, &x, &scl, &xnorm, &ierr, _state); + + /* + * Scale if necessary + */ + if( ae_fp_neq(scl,1) ) + { + ae_v_muld(&work.ptr.p_double[ki+n], 1, ae_v_len(ki+n,n+n), scl); + } + work.ptr.p_double[j+n] = x.ptr.pp_double[1][1]; + vmax = ae_maxreal(ae_fabs(work.ptr.p_double[j+n], _state), vmax, _state); + vcrit = bignum/vmax; + } + else + { + + /* + * 2-by-2 diagonal block + * + * Scale if necessary to avoid overflow when forming + * the right-hand side. + */ + beta = ae_maxreal(work.ptr.p_double[j], work.ptr.p_double[j+1], _state); + if( ae_fp_greater(beta,vcrit) ) + { + rec = 1/vmax; + ae_v_muld(&work.ptr.p_double[ki+n], 1, ae_v_len(ki+n,n+n), rec); + vmax = 1; + vcrit = bignum; + } + vt = ae_v_dotproduct(&t->ptr.pp_double[ki+1][j], t->stride, &work.ptr.p_double[ki+1+n], 1, ae_v_len(ki+1,j-1)); + work.ptr.p_double[j+n] = work.ptr.p_double[j+n]-vt; + vt = ae_v_dotproduct(&t->ptr.pp_double[ki+1][j+1], t->stride, &work.ptr.p_double[ki+1+n], 1, ae_v_len(ki+1,j-1)); + work.ptr.p_double[j+1+n] = work.ptr.p_double[j+1+n]-vt; + + /* + * Solve + * [T(J,J)-WR T(J,J+1) ]'* X = SCALE*( WORK1 ) + * [T(J+1,J) T(J+1,J+1)-WR] ( WORK2 ) + */ + temp22.ptr.pp_double[1][1] = t->ptr.pp_double[j][j]; + temp22.ptr.pp_double[1][2] = t->ptr.pp_double[j][j+1]; + temp22.ptr.pp_double[2][1] = t->ptr.pp_double[j+1][j]; + temp22.ptr.pp_double[2][2] = t->ptr.pp_double[j+1][j+1]; + temp21b.ptr.pp_double[1][1] = work.ptr.p_double[j+n]; + temp21b.ptr.pp_double[2][1] = work.ptr.p_double[j+1+n]; + evd_internalhsevdlaln2(ae_true, 2, 1, smin, 1.0, &temp22, 1.0, 1.0, &temp21b, wr, 0, &rswap4, &zswap4, &ipivot44, &civ4, &crv4, &x, &scl, &xnorm, &ierr, _state); + + /* + * Scale if necessary + */ + if( ae_fp_neq(scl,1) ) + { + ae_v_muld(&work.ptr.p_double[ki+n], 1, ae_v_len(ki+n,n+n), scl); + } + work.ptr.p_double[j+n] = x.ptr.pp_double[1][1]; + work.ptr.p_double[j+1+n] = x.ptr.pp_double[2][1]; + vmax = ae_maxreal(ae_fabs(work.ptr.p_double[j+n], _state), ae_maxreal(ae_fabs(work.ptr.p_double[j+1+n], _state), vmax, _state), _state); + vcrit = bignum/vmax; + } + } + + /* + * Copy the vector x or Q*x to VL and normalize. + */ + if( !over ) + { + ae_v_move(&vl->ptr.pp_double[ki][iis], vl->stride, &work.ptr.p_double[ki+n], 1, ae_v_len(ki,n)); + ii = columnidxabsmax(vl, ki, n, iis, _state); + remax = 1/ae_fabs(vl->ptr.pp_double[ii][iis], _state); + ae_v_muld(&vl->ptr.pp_double[ki][iis], vl->stride, ae_v_len(ki,n), remax); + for(k=1; k<=ki-1; k++) + { + vl->ptr.pp_double[k][iis] = 0; + } + } + else + { + if( ki<n ) + { + ae_v_move(&temp.ptr.p_double[1], 1, &vl->ptr.pp_double[1][ki], vl->stride, ae_v_len(1,n)); + matrixvectormultiply(vl, 1, n, ki+1, n, ae_false, &work, ki+1+n, n+n, 1.0, &temp, 1, n, work.ptr.p_double[ki+n], _state); + ae_v_move(&vl->ptr.pp_double[1][ki], vl->stride, &temp.ptr.p_double[1], 1, ae_v_len(1,n)); + } + ii = columnidxabsmax(vl, 1, n, ki, _state); + remax = 1/ae_fabs(vl->ptr.pp_double[ii][ki], _state); + ae_v_muld(&vl->ptr.pp_double[1][ki], vl->stride, ae_v_len(1,n), remax); + } + } + else + { + + /* + * Complex left eigenvector. + * + * Initial solve: + * ((T(KI,KI) T(KI,KI+1) )' - (WR - I* WI))*X = 0. + * ((T(KI+1,KI) T(KI+1,KI+1)) ) + */ + if( ae_fp_greater_eq(ae_fabs(t->ptr.pp_double[ki][ki+1], _state),ae_fabs(t->ptr.pp_double[ki+1][ki], _state)) ) + { + work.ptr.p_double[ki+n] = wi/t->ptr.pp_double[ki][ki+1]; + work.ptr.p_double[ki+1+n2] = 1; + } + else + { + work.ptr.p_double[ki+n] = 1; + work.ptr.p_double[ki+1+n2] = -wi/t->ptr.pp_double[ki+1][ki]; + } + work.ptr.p_double[ki+1+n] = 0; + work.ptr.p_double[ki+n2] = 0; + + /* + * Form right-hand side + */ + for(k=ki+2; k<=n; k++) + { + work.ptr.p_double[k+n] = -work.ptr.p_double[ki+n]*t->ptr.pp_double[ki][k]; + work.ptr.p_double[k+n2] = -work.ptr.p_double[ki+1+n2]*t->ptr.pp_double[ki+1][k]; + } + + /* + * Solve complex quasi-triangular system: + * ( T(KI+2,N:KI+2,N) - (WR-i*WI) )*X = WORK1+i*WORK2 + */ + vmax = 1; + vcrit = bignum; + jnxt = ki+2; + for(j=ki+2; j<=n; j++) + { + if( j<jnxt ) + { + continue; + } + j1 = j; + j2 = j; + jnxt = j+1; + if( j<n ) + { + if( ae_fp_neq(t->ptr.pp_double[j+1][j],0) ) + { + j2 = j+1; + jnxt = j+2; + } + } + if( j1==j2 ) + { + + /* + * 1-by-1 diagonal block + * + * Scale if necessary to avoid overflow when + * forming the right-hand side elements. + */ + if( ae_fp_greater(work.ptr.p_double[j],vcrit) ) + { + rec = 1/vmax; + ae_v_muld(&work.ptr.p_double[ki+n], 1, ae_v_len(ki+n,n+n), rec); + ae_v_muld(&work.ptr.p_double[ki+n2], 1, ae_v_len(ki+n2,n+n2), rec); + vmax = 1; + vcrit = bignum; + } + vt = ae_v_dotproduct(&t->ptr.pp_double[ki+2][j], t->stride, &work.ptr.p_double[ki+2+n], 1, ae_v_len(ki+2,j-1)); + work.ptr.p_double[j+n] = work.ptr.p_double[j+n]-vt; + vt = ae_v_dotproduct(&t->ptr.pp_double[ki+2][j], t->stride, &work.ptr.p_double[ki+2+n2], 1, ae_v_len(ki+2,j-1)); + work.ptr.p_double[j+n2] = work.ptr.p_double[j+n2]-vt; + + /* + * Solve (T(J,J)-(WR-i*WI))*(X11+i*X12)= WK+I*WK2 + */ + temp11.ptr.pp_double[1][1] = t->ptr.pp_double[j][j]; + temp12b.ptr.pp_double[1][1] = work.ptr.p_double[j+n]; + temp12b.ptr.pp_double[1][2] = work.ptr.p_double[j+n+n]; + evd_internalhsevdlaln2(ae_false, 1, 2, smin, 1.0, &temp11, 1.0, 1.0, &temp12b, wr, -wi, &rswap4, &zswap4, &ipivot44, &civ4, &crv4, &x, &scl, &xnorm, &ierr, _state); + + /* + * Scale if necessary + */ + if( ae_fp_neq(scl,1) ) + { + ae_v_muld(&work.ptr.p_double[ki+n], 1, ae_v_len(ki+n,n+n), scl); + ae_v_muld(&work.ptr.p_double[ki+n2], 1, ae_v_len(ki+n2,n+n2), scl); + } + work.ptr.p_double[j+n] = x.ptr.pp_double[1][1]; + work.ptr.p_double[j+n2] = x.ptr.pp_double[1][2]; + vmax = ae_maxreal(ae_fabs(work.ptr.p_double[j+n], _state), ae_maxreal(ae_fabs(work.ptr.p_double[j+n2], _state), vmax, _state), _state); + vcrit = bignum/vmax; + } + else + { + + /* + * 2-by-2 diagonal block + * + * Scale if necessary to avoid overflow when forming + * the right-hand side elements. + */ + beta = ae_maxreal(work.ptr.p_double[j], work.ptr.p_double[j+1], _state); + if( ae_fp_greater(beta,vcrit) ) + { + rec = 1/vmax; + ae_v_muld(&work.ptr.p_double[ki+n], 1, ae_v_len(ki+n,n+n), rec); + ae_v_muld(&work.ptr.p_double[ki+n2], 1, ae_v_len(ki+n2,n+n2), rec); + vmax = 1; + vcrit = bignum; + } + vt = ae_v_dotproduct(&t->ptr.pp_double[ki+2][j], t->stride, &work.ptr.p_double[ki+2+n], 1, ae_v_len(ki+2,j-1)); + work.ptr.p_double[j+n] = work.ptr.p_double[j+n]-vt; + vt = ae_v_dotproduct(&t->ptr.pp_double[ki+2][j], t->stride, &work.ptr.p_double[ki+2+n2], 1, ae_v_len(ki+2,j-1)); + work.ptr.p_double[j+n2] = work.ptr.p_double[j+n2]-vt; + vt = ae_v_dotproduct(&t->ptr.pp_double[ki+2][j+1], t->stride, &work.ptr.p_double[ki+2+n], 1, ae_v_len(ki+2,j-1)); + work.ptr.p_double[j+1+n] = work.ptr.p_double[j+1+n]-vt; + vt = ae_v_dotproduct(&t->ptr.pp_double[ki+2][j+1], t->stride, &work.ptr.p_double[ki+2+n2], 1, ae_v_len(ki+2,j-1)); + work.ptr.p_double[j+1+n2] = work.ptr.p_double[j+1+n2]-vt; + + /* + * Solve 2-by-2 complex linear equation + * ([T(j,j) T(j,j+1) ]'-(wr-i*wi)*I)*X = SCALE*B + * ([T(j+1,j) T(j+1,j+1)] ) + */ + temp22.ptr.pp_double[1][1] = t->ptr.pp_double[j][j]; + temp22.ptr.pp_double[1][2] = t->ptr.pp_double[j][j+1]; + temp22.ptr.pp_double[2][1] = t->ptr.pp_double[j+1][j]; + temp22.ptr.pp_double[2][2] = t->ptr.pp_double[j+1][j+1]; + temp22b.ptr.pp_double[1][1] = work.ptr.p_double[j+n]; + temp22b.ptr.pp_double[1][2] = work.ptr.p_double[j+n+n]; + temp22b.ptr.pp_double[2][1] = work.ptr.p_double[j+1+n]; + temp22b.ptr.pp_double[2][2] = work.ptr.p_double[j+1+n+n]; + evd_internalhsevdlaln2(ae_true, 2, 2, smin, 1.0, &temp22, 1.0, 1.0, &temp22b, wr, -wi, &rswap4, &zswap4, &ipivot44, &civ4, &crv4, &x, &scl, &xnorm, &ierr, _state); + + /* + * Scale if necessary + */ + if( ae_fp_neq(scl,1) ) + { + ae_v_muld(&work.ptr.p_double[ki+n], 1, ae_v_len(ki+n,n+n), scl); + ae_v_muld(&work.ptr.p_double[ki+n2], 1, ae_v_len(ki+n2,n+n2), scl); + } + work.ptr.p_double[j+n] = x.ptr.pp_double[1][1]; + work.ptr.p_double[j+n2] = x.ptr.pp_double[1][2]; + work.ptr.p_double[j+1+n] = x.ptr.pp_double[2][1]; + work.ptr.p_double[j+1+n2] = x.ptr.pp_double[2][2]; + vmax = ae_maxreal(ae_fabs(x.ptr.pp_double[1][1], _state), vmax, _state); + vmax = ae_maxreal(ae_fabs(x.ptr.pp_double[1][2], _state), vmax, _state); + vmax = ae_maxreal(ae_fabs(x.ptr.pp_double[2][1], _state), vmax, _state); + vmax = ae_maxreal(ae_fabs(x.ptr.pp_double[2][2], _state), vmax, _state); + vcrit = bignum/vmax; + } + } + + /* + * Copy the vector x or Q*x to VL and normalize. + */ + if( !over ) + { + ae_v_move(&vl->ptr.pp_double[ki][iis], vl->stride, &work.ptr.p_double[ki+n], 1, ae_v_len(ki,n)); + ae_v_move(&vl->ptr.pp_double[ki][iis+1], vl->stride, &work.ptr.p_double[ki+n2], 1, ae_v_len(ki,n)); + emax = 0; + for(k=ki; k<=n; k++) + { + emax = ae_maxreal(emax, ae_fabs(vl->ptr.pp_double[k][iis], _state)+ae_fabs(vl->ptr.pp_double[k][iis+1], _state), _state); + } + remax = 1/emax; + ae_v_muld(&vl->ptr.pp_double[ki][iis], vl->stride, ae_v_len(ki,n), remax); + ae_v_muld(&vl->ptr.pp_double[ki][iis+1], vl->stride, ae_v_len(ki,n), remax); + for(k=1; k<=ki-1; k++) + { + vl->ptr.pp_double[k][iis] = 0; + vl->ptr.pp_double[k][iis+1] = 0; + } + } + else + { + if( ki<n-1 ) + { + ae_v_move(&temp.ptr.p_double[1], 1, &vl->ptr.pp_double[1][ki], vl->stride, ae_v_len(1,n)); + matrixvectormultiply(vl, 1, n, ki+2, n, ae_false, &work, ki+2+n, n+n, 1.0, &temp, 1, n, work.ptr.p_double[ki+n], _state); + ae_v_move(&vl->ptr.pp_double[1][ki], vl->stride, &temp.ptr.p_double[1], 1, ae_v_len(1,n)); + ae_v_move(&temp.ptr.p_double[1], 1, &vl->ptr.pp_double[1][ki+1], vl->stride, ae_v_len(1,n)); + matrixvectormultiply(vl, 1, n, ki+2, n, ae_false, &work, ki+2+n2, n+n2, 1.0, &temp, 1, n, work.ptr.p_double[ki+1+n2], _state); + ae_v_move(&vl->ptr.pp_double[1][ki+1], vl->stride, &temp.ptr.p_double[1], 1, ae_v_len(1,n)); + } + else + { + vt = work.ptr.p_double[ki+n]; + ae_v_muld(&vl->ptr.pp_double[1][ki], vl->stride, ae_v_len(1,n), vt); + vt = work.ptr.p_double[ki+1+n2]; + ae_v_muld(&vl->ptr.pp_double[1][ki+1], vl->stride, ae_v_len(1,n), vt); + } + emax = 0; + for(k=1; k<=n; k++) + { + emax = ae_maxreal(emax, ae_fabs(vl->ptr.pp_double[k][ki], _state)+ae_fabs(vl->ptr.pp_double[k][ki+1], _state), _state); + } + remax = 1/emax; + ae_v_muld(&vl->ptr.pp_double[1][ki], vl->stride, ae_v_len(1,n), remax); + ae_v_muld(&vl->ptr.pp_double[1][ki+1], vl->stride, ae_v_len(1,n), remax); + } + } + iis = iis+1; + if( ip!=0 ) + { + iis = iis+1; + } + } + if( ip==-1 ) + { + ip = 0; + } + if( ip==1 ) + { + ip = -1; + } + } + } + ae_frame_leave(_state); +} + + +/************************************************************************* +DLALN2 solves a system of the form (ca A - w D ) X = s B +or (ca A' - w D) X = s B with possible scaling ("s") and +perturbation of A. (A' means A-transpose.) + +A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA +real diagonal matrix, w is a real or complex value, and X and B are +NA x 1 matrices -- real if w is real, complex if w is complex. NA +may be 1 or 2. + +If w is complex, X and B are represented as NA x 2 matrices, +the first column of each being the real part and the second +being the imaginary part. + +"s" is a scaling factor (.LE. 1), computed by DLALN2, which is +so chosen that X can be computed without overflow. X is further +scaled if necessary to assure that norm(ca A - w D)*norm(X) is less +than overflow. + +If both singular values of (ca A - w D) are less than SMIN, +SMIN*identity will be used instead of (ca A - w D). If only one +singular value is less than SMIN, one element of (ca A - w D) will be +perturbed enough to make the smallest singular value roughly SMIN. +If both singular values are at least SMIN, (ca A - w D) will not be +perturbed. In any case, the perturbation will be at most some small +multiple of max( SMIN, ulp*norm(ca A - w D) ). The singular values +are computed by infinity-norm approximations, and thus will only be +correct to a factor of 2 or so. + +Note: all input quantities are assumed to be smaller than overflow +by a reasonable factor. (See BIGNUM.) + + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 +*************************************************************************/ +static void evd_internalhsevdlaln2(ae_bool ltrans, + ae_int_t na, + ae_int_t nw, + double smin, + double ca, + /* Real */ ae_matrix* a, + double d1, + double d2, + /* Real */ ae_matrix* b, + double wr, + double wi, + /* Boolean */ ae_vector* rswap4, + /* Boolean */ ae_vector* zswap4, + /* Integer */ ae_matrix* ipivot44, + /* Real */ ae_vector* civ4, + /* Real */ ae_vector* crv4, + /* Real */ ae_matrix* x, + double* scl, + double* xnorm, + ae_int_t* info, + ae_state *_state) +{ + ae_int_t icmax; + ae_int_t j; + double bbnd; + double bi1; + double bi2; + double bignum; + double bnorm; + double br1; + double br2; + double ci21; + double ci22; + double cmax; + double cnorm; + double cr21; + double cr22; + double csi; + double csr; + double li21; + double lr21; + double smini; + double smlnum; + double temp; + double u22abs; + double ui11; + double ui11r; + double ui12; + double ui12s; + double ui22; + double ur11; + double ur11r; + double ur12; + double ur12s; + double ur22; + double xi1; + double xi2; + double xr1; + double xr2; + double tmp1; + double tmp2; + + *scl = 0; + *xnorm = 0; + *info = 0; + + zswap4->ptr.p_bool[1] = ae_false; + zswap4->ptr.p_bool[2] = ae_false; + zswap4->ptr.p_bool[3] = ae_true; + zswap4->ptr.p_bool[4] = ae_true; + rswap4->ptr.p_bool[1] = ae_false; + rswap4->ptr.p_bool[2] = ae_true; + rswap4->ptr.p_bool[3] = ae_false; + rswap4->ptr.p_bool[4] = ae_true; + ipivot44->ptr.pp_int[1][1] = 1; + ipivot44->ptr.pp_int[2][1] = 2; + ipivot44->ptr.pp_int[3][1] = 3; + ipivot44->ptr.pp_int[4][1] = 4; + ipivot44->ptr.pp_int[1][2] = 2; + ipivot44->ptr.pp_int[2][2] = 1; + ipivot44->ptr.pp_int[3][2] = 4; + ipivot44->ptr.pp_int[4][2] = 3; + ipivot44->ptr.pp_int[1][3] = 3; + ipivot44->ptr.pp_int[2][3] = 4; + ipivot44->ptr.pp_int[3][3] = 1; + ipivot44->ptr.pp_int[4][3] = 2; + ipivot44->ptr.pp_int[1][4] = 4; + ipivot44->ptr.pp_int[2][4] = 3; + ipivot44->ptr.pp_int[3][4] = 2; + ipivot44->ptr.pp_int[4][4] = 1; + smlnum = 2*ae_minrealnumber; + bignum = 1/smlnum; + smini = ae_maxreal(smin, smlnum, _state); + + /* + * Don't check for input errors + */ + *info = 0; + + /* + * Standard Initializations + */ + *scl = 1; + if( na==1 ) + { + + /* + * 1 x 1 (i.e., scalar) system C X = B + */ + if( nw==1 ) + { + + /* + * Real 1x1 system. + * + * C = ca A - w D + */ + csr = ca*a->ptr.pp_double[1][1]-wr*d1; + cnorm = ae_fabs(csr, _state); + + /* + * If | C | < SMINI, use C = SMINI + */ + if( ae_fp_less(cnorm,smini) ) + { + csr = smini; + cnorm = smini; + *info = 1; + } + + /* + * Check scaling for X = B / C + */ + bnorm = ae_fabs(b->ptr.pp_double[1][1], _state); + if( ae_fp_less(cnorm,1)&&ae_fp_greater(bnorm,1) ) + { + if( ae_fp_greater(bnorm,bignum*cnorm) ) + { + *scl = 1/bnorm; + } + } + + /* + * Compute X + */ + x->ptr.pp_double[1][1] = b->ptr.pp_double[1][1]*(*scl)/csr; + *xnorm = ae_fabs(x->ptr.pp_double[1][1], _state); + } + else + { + + /* + * Complex 1x1 system (w is complex) + * + * C = ca A - w D + */ + csr = ca*a->ptr.pp_double[1][1]-wr*d1; + csi = -wi*d1; + cnorm = ae_fabs(csr, _state)+ae_fabs(csi, _state); + + /* + * If | C | < SMINI, use C = SMINI + */ + if( ae_fp_less(cnorm,smini) ) + { + csr = smini; + csi = 0; + cnorm = smini; + *info = 1; + } + + /* + * Check scaling for X = B / C + */ + bnorm = ae_fabs(b->ptr.pp_double[1][1], _state)+ae_fabs(b->ptr.pp_double[1][2], _state); + if( ae_fp_less(cnorm,1)&&ae_fp_greater(bnorm,1) ) + { + if( ae_fp_greater(bnorm,bignum*cnorm) ) + { + *scl = 1/bnorm; + } + } + + /* + * Compute X + */ + evd_internalhsevdladiv(*scl*b->ptr.pp_double[1][1], *scl*b->ptr.pp_double[1][2], csr, csi, &tmp1, &tmp2, _state); + x->ptr.pp_double[1][1] = tmp1; + x->ptr.pp_double[1][2] = tmp2; + *xnorm = ae_fabs(x->ptr.pp_double[1][1], _state)+ae_fabs(x->ptr.pp_double[1][2], _state); + } + } + else + { + + /* + * 2x2 System + * + * Compute the real part of C = ca A - w D (or ca A' - w D ) + */ + crv4->ptr.p_double[1+0] = ca*a->ptr.pp_double[1][1]-wr*d1; + crv4->ptr.p_double[2+2] = ca*a->ptr.pp_double[2][2]-wr*d2; + if( ltrans ) + { + crv4->ptr.p_double[1+2] = ca*a->ptr.pp_double[2][1]; + crv4->ptr.p_double[2+0] = ca*a->ptr.pp_double[1][2]; + } + else + { + crv4->ptr.p_double[2+0] = ca*a->ptr.pp_double[2][1]; + crv4->ptr.p_double[1+2] = ca*a->ptr.pp_double[1][2]; + } + if( nw==1 ) + { + + /* + * Real 2x2 system (w is real) + * + * Find the largest element in C + */ + cmax = 0; + icmax = 0; + for(j=1; j<=4; j++) + { + if( ae_fp_greater(ae_fabs(crv4->ptr.p_double[j], _state),cmax) ) + { + cmax = ae_fabs(crv4->ptr.p_double[j], _state); + icmax = j; + } + } + + /* + * If norm(C) < SMINI, use SMINI*identity. + */ + if( ae_fp_less(cmax,smini) ) + { + bnorm = ae_maxreal(ae_fabs(b->ptr.pp_double[1][1], _state), ae_fabs(b->ptr.pp_double[2][1], _state), _state); + if( ae_fp_less(smini,1)&&ae_fp_greater(bnorm,1) ) + { + if( ae_fp_greater(bnorm,bignum*smini) ) + { + *scl = 1/bnorm; + } + } + temp = *scl/smini; + x->ptr.pp_double[1][1] = temp*b->ptr.pp_double[1][1]; + x->ptr.pp_double[2][1] = temp*b->ptr.pp_double[2][1]; + *xnorm = temp*bnorm; + *info = 1; + return; + } + + /* + * Gaussian elimination with complete pivoting. + */ + ur11 = crv4->ptr.p_double[icmax]; + cr21 = crv4->ptr.p_double[ipivot44->ptr.pp_int[2][icmax]]; + ur12 = crv4->ptr.p_double[ipivot44->ptr.pp_int[3][icmax]]; + cr22 = crv4->ptr.p_double[ipivot44->ptr.pp_int[4][icmax]]; + ur11r = 1/ur11; + lr21 = ur11r*cr21; + ur22 = cr22-ur12*lr21; + + /* + * If smaller pivot < SMINI, use SMINI + */ + if( ae_fp_less(ae_fabs(ur22, _state),smini) ) + { + ur22 = smini; + *info = 1; + } + if( rswap4->ptr.p_bool[icmax] ) + { + br1 = b->ptr.pp_double[2][1]; + br2 = b->ptr.pp_double[1][1]; + } + else + { + br1 = b->ptr.pp_double[1][1]; + br2 = b->ptr.pp_double[2][1]; + } + br2 = br2-lr21*br1; + bbnd = ae_maxreal(ae_fabs(br1*(ur22*ur11r), _state), ae_fabs(br2, _state), _state); + if( ae_fp_greater(bbnd,1)&&ae_fp_less(ae_fabs(ur22, _state),1) ) + { + if( ae_fp_greater_eq(bbnd,bignum*ae_fabs(ur22, _state)) ) + { + *scl = 1/bbnd; + } + } + xr2 = br2*(*scl)/ur22; + xr1 = *scl*br1*ur11r-xr2*(ur11r*ur12); + if( zswap4->ptr.p_bool[icmax] ) + { + x->ptr.pp_double[1][1] = xr2; + x->ptr.pp_double[2][1] = xr1; + } + else + { + x->ptr.pp_double[1][1] = xr1; + x->ptr.pp_double[2][1] = xr2; + } + *xnorm = ae_maxreal(ae_fabs(xr1, _state), ae_fabs(xr2, _state), _state); + + /* + * Further scaling if norm(A) norm(X) > overflow + */ + if( ae_fp_greater(*xnorm,1)&&ae_fp_greater(cmax,1) ) + { + if( ae_fp_greater(*xnorm,bignum/cmax) ) + { + temp = cmax/bignum; + x->ptr.pp_double[1][1] = temp*x->ptr.pp_double[1][1]; + x->ptr.pp_double[2][1] = temp*x->ptr.pp_double[2][1]; + *xnorm = temp*(*xnorm); + *scl = temp*(*scl); + } + } + } + else + { + + /* + * Complex 2x2 system (w is complex) + * + * Find the largest element in C + */ + civ4->ptr.p_double[1+0] = -wi*d1; + civ4->ptr.p_double[2+0] = 0; + civ4->ptr.p_double[1+2] = 0; + civ4->ptr.p_double[2+2] = -wi*d2; + cmax = 0; + icmax = 0; + for(j=1; j<=4; j++) + { + if( ae_fp_greater(ae_fabs(crv4->ptr.p_double[j], _state)+ae_fabs(civ4->ptr.p_double[j], _state),cmax) ) + { + cmax = ae_fabs(crv4->ptr.p_double[j], _state)+ae_fabs(civ4->ptr.p_double[j], _state); + icmax = j; + } + } + + /* + * If norm(C) < SMINI, use SMINI*identity. + */ + if( ae_fp_less(cmax,smini) ) + { + bnorm = ae_maxreal(ae_fabs(b->ptr.pp_double[1][1], _state)+ae_fabs(b->ptr.pp_double[1][2], _state), ae_fabs(b->ptr.pp_double[2][1], _state)+ae_fabs(b->ptr.pp_double[2][2], _state), _state); + if( ae_fp_less(smini,1)&&ae_fp_greater(bnorm,1) ) + { + if( ae_fp_greater(bnorm,bignum*smini) ) + { + *scl = 1/bnorm; + } + } + temp = *scl/smini; + x->ptr.pp_double[1][1] = temp*b->ptr.pp_double[1][1]; + x->ptr.pp_double[2][1] = temp*b->ptr.pp_double[2][1]; + x->ptr.pp_double[1][2] = temp*b->ptr.pp_double[1][2]; + x->ptr.pp_double[2][2] = temp*b->ptr.pp_double[2][2]; + *xnorm = temp*bnorm; + *info = 1; + return; + } + + /* + * Gaussian elimination with complete pivoting. + */ + ur11 = crv4->ptr.p_double[icmax]; + ui11 = civ4->ptr.p_double[icmax]; + cr21 = crv4->ptr.p_double[ipivot44->ptr.pp_int[2][icmax]]; + ci21 = civ4->ptr.p_double[ipivot44->ptr.pp_int[2][icmax]]; + ur12 = crv4->ptr.p_double[ipivot44->ptr.pp_int[3][icmax]]; + ui12 = civ4->ptr.p_double[ipivot44->ptr.pp_int[3][icmax]]; + cr22 = crv4->ptr.p_double[ipivot44->ptr.pp_int[4][icmax]]; + ci22 = civ4->ptr.p_double[ipivot44->ptr.pp_int[4][icmax]]; + if( icmax==1||icmax==4 ) + { + + /* + * Code when off-diagonals of pivoted C are real + */ + if( ae_fp_greater(ae_fabs(ur11, _state),ae_fabs(ui11, _state)) ) + { + temp = ui11/ur11; + ur11r = 1/(ur11*(1+ae_sqr(temp, _state))); + ui11r = -temp*ur11r; + } + else + { + temp = ur11/ui11; + ui11r = -1/(ui11*(1+ae_sqr(temp, _state))); + ur11r = -temp*ui11r; + } + lr21 = cr21*ur11r; + li21 = cr21*ui11r; + ur12s = ur12*ur11r; + ui12s = ur12*ui11r; + ur22 = cr22-ur12*lr21; + ui22 = ci22-ur12*li21; + } + else + { + + /* + * Code when diagonals of pivoted C are real + */ + ur11r = 1/ur11; + ui11r = 0; + lr21 = cr21*ur11r; + li21 = ci21*ur11r; + ur12s = ur12*ur11r; + ui12s = ui12*ur11r; + ur22 = cr22-ur12*lr21+ui12*li21; + ui22 = -ur12*li21-ui12*lr21; + } + u22abs = ae_fabs(ur22, _state)+ae_fabs(ui22, _state); + + /* + * If smaller pivot < SMINI, use SMINI + */ + if( ae_fp_less(u22abs,smini) ) + { + ur22 = smini; + ui22 = 0; + *info = 1; + } + if( rswap4->ptr.p_bool[icmax] ) + { + br2 = b->ptr.pp_double[1][1]; + br1 = b->ptr.pp_double[2][1]; + bi2 = b->ptr.pp_double[1][2]; + bi1 = b->ptr.pp_double[2][2]; + } + else + { + br1 = b->ptr.pp_double[1][1]; + br2 = b->ptr.pp_double[2][1]; + bi1 = b->ptr.pp_double[1][2]; + bi2 = b->ptr.pp_double[2][2]; + } + br2 = br2-lr21*br1+li21*bi1; + bi2 = bi2-li21*br1-lr21*bi1; + bbnd = ae_maxreal((ae_fabs(br1, _state)+ae_fabs(bi1, _state))*(u22abs*(ae_fabs(ur11r, _state)+ae_fabs(ui11r, _state))), ae_fabs(br2, _state)+ae_fabs(bi2, _state), _state); + if( ae_fp_greater(bbnd,1)&&ae_fp_less(u22abs,1) ) + { + if( ae_fp_greater_eq(bbnd,bignum*u22abs) ) + { + *scl = 1/bbnd; + br1 = *scl*br1; + bi1 = *scl*bi1; + br2 = *scl*br2; + bi2 = *scl*bi2; + } + } + evd_internalhsevdladiv(br2, bi2, ur22, ui22, &xr2, &xi2, _state); + xr1 = ur11r*br1-ui11r*bi1-ur12s*xr2+ui12s*xi2; + xi1 = ui11r*br1+ur11r*bi1-ui12s*xr2-ur12s*xi2; + if( zswap4->ptr.p_bool[icmax] ) + { + x->ptr.pp_double[1][1] = xr2; + x->ptr.pp_double[2][1] = xr1; + x->ptr.pp_double[1][2] = xi2; + x->ptr.pp_double[2][2] = xi1; + } + else + { + x->ptr.pp_double[1][1] = xr1; + x->ptr.pp_double[2][1] = xr2; + x->ptr.pp_double[1][2] = xi1; + x->ptr.pp_double[2][2] = xi2; + } + *xnorm = ae_maxreal(ae_fabs(xr1, _state)+ae_fabs(xi1, _state), ae_fabs(xr2, _state)+ae_fabs(xi2, _state), _state); + + /* + * Further scaling if norm(A) norm(X) > overflow + */ + if( ae_fp_greater(*xnorm,1)&&ae_fp_greater(cmax,1) ) + { + if( ae_fp_greater(*xnorm,bignum/cmax) ) + { + temp = cmax/bignum; + x->ptr.pp_double[1][1] = temp*x->ptr.pp_double[1][1]; + x->ptr.pp_double[2][1] = temp*x->ptr.pp_double[2][1]; + x->ptr.pp_double[1][2] = temp*x->ptr.pp_double[1][2]; + x->ptr.pp_double[2][2] = temp*x->ptr.pp_double[2][2]; + *xnorm = temp*(*xnorm); + *scl = temp*(*scl); + } + } + } + } +} + + +/************************************************************************* +performs complex division in real arithmetic + + a + i*b + p + i*q = --------- + c + i*d + +The algorithm is due to Robert L. Smith and can be found +in D. Knuth, The art of Computer Programming, Vol.2, p.195 + + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 +*************************************************************************/ +static void evd_internalhsevdladiv(double a, + double b, + double c, + double d, + double* p, + double* q, + ae_state *_state) +{ + double e; + double f; + + *p = 0; + *q = 0; + + if( ae_fp_less(ae_fabs(d, _state),ae_fabs(c, _state)) ) + { + e = d/c; + f = c+d*e; + *p = (a+b*e)/f; + *q = (b-a*e)/f; + } + else + { + e = c/d; + f = d+c*e; + *p = (b+a*e)/f; + *q = (-a+b*e)/f; + } +} + + +static ae_bool evd_nonsymmetricevd(/* Real */ ae_matrix* a, + ae_int_t n, + ae_int_t vneeded, + /* Real */ ae_vector* wr, + /* Real */ ae_vector* wi, + /* Real */ ae_matrix* vl, + /* Real */ ae_matrix* vr, + ae_state *_state) +{ + ae_frame _frame_block; + ae_matrix _a; + ae_matrix s; + ae_vector tau; + ae_vector sel; + ae_int_t i; + ae_int_t info; + ae_int_t m; + ae_bool result; + + ae_frame_make(_state, &_frame_block); + ae_matrix_init_copy(&_a, a, _state, ae_true); + a = &_a; + ae_vector_clear(wr); + ae_vector_clear(wi); + ae_matrix_clear(vl); + ae_matrix_clear(vr); + ae_matrix_init(&s, 0, 0, DT_REAL, _state, ae_true); + ae_vector_init(&tau, 0, DT_REAL, _state, ae_true); + ae_vector_init(&sel, 0, DT_BOOL, _state, ae_true); + + ae_assert(vneeded>=0&&vneeded<=3, "NonSymmetricEVD: incorrect VNeeded!", _state); + if( vneeded==0 ) + { + + /* + * Eigen values only + */ + evd_toupperhessenberg(a, n, &tau, _state); + internalschurdecomposition(a, n, 0, 0, wr, wi, &s, &info, _state); + result = info==0; + ae_frame_leave(_state); + return result; + } + + /* + * Eigen values and vectors + */ + evd_toupperhessenberg(a, n, &tau, _state); + evd_unpackqfromupperhessenberg(a, n, &tau, &s, _state); + internalschurdecomposition(a, n, 1, 1, wr, wi, &s, &info, _state); + result = info==0; + if( !result ) + { + ae_frame_leave(_state); + return result; + } + if( vneeded==1||vneeded==3 ) + { + ae_matrix_set_length(vr, n+1, n+1, _state); + for(i=1; i<=n; i++) + { + ae_v_move(&vr->ptr.pp_double[i][1], 1, &s.ptr.pp_double[i][1], 1, ae_v_len(1,n)); + } + } + if( vneeded==2||vneeded==3 ) + { + ae_matrix_set_length(vl, n+1, n+1, _state); + for(i=1; i<=n; i++) + { + ae_v_move(&vl->ptr.pp_double[i][1], 1, &s.ptr.pp_double[i][1], 1, ae_v_len(1,n)); + } + } + evd_internaltrevc(a, n, vneeded, 1, &sel, vl, vr, &m, &info, _state); + result = info==0; + ae_frame_leave(_state); + return result; +} + + +static void evd_toupperhessenberg(/* Real */ ae_matrix* a, + ae_int_t n, + /* Real */ ae_vector* tau, + ae_state *_state) +{ + ae_frame _frame_block; + ae_int_t i; + ae_int_t ip1; + ae_int_t nmi; + double v; + ae_vector t; + ae_vector work; + + ae_frame_make(_state, &_frame_block); + ae_vector_clear(tau); + ae_vector_init(&t, 0, DT_REAL, _state, ae_true); + ae_vector_init(&work, 0, DT_REAL, _state, ae_true); + + ae_assert(n>=0, "ToUpperHessenberg: incorrect N!", _state); + + /* + * Quick return if possible + */ + if( n<=1 ) + { + ae_frame_leave(_state); + return; + } + ae_vector_set_length(tau, n-1+1, _state); + ae_vector_set_length(&t, n+1, _state); + ae_vector_set_length(&work, n+1, _state); + for(i=1; i<=n-1; i++) + { + + /* + * Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) + */ + ip1 = i+1; + nmi = n-i; + ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[ip1][i], a->stride, ae_v_len(1,nmi)); + generatereflection(&t, nmi, &v, _state); + ae_v_move(&a->ptr.pp_double[ip1][i], a->stride, &t.ptr.p_double[1], 1, ae_v_len(ip1,n)); + tau->ptr.p_double[i] = v; + t.ptr.p_double[1] = 1; + + /* + * Apply H(i) to A(1:ihi,i+1:ihi) from the right + */ + applyreflectionfromtheright(a, v, &t, 1, n, i+1, n, &work, _state); + + /* + * Apply H(i) to A(i+1:ihi,i+1:n) from the left + */ + applyreflectionfromtheleft(a, v, &t, i+1, n, i+1, n, &work, _state); + } + ae_frame_leave(_state); +} + + +static void evd_unpackqfromupperhessenberg(/* Real */ ae_matrix* a, + ae_int_t n, + /* Real */ ae_vector* tau, + /* Real */ ae_matrix* q, + ae_state *_state) +{ + ae_frame _frame_block; + ae_int_t i; + ae_int_t j; + ae_vector v; + ae_vector work; + ae_int_t ip1; + ae_int_t nmi; + + ae_frame_make(_state, &_frame_block); + ae_matrix_clear(q); + ae_vector_init(&v, 0, DT_REAL, _state, ae_true); + ae_vector_init(&work, 0, DT_REAL, _state, ae_true); + + if( n==0 ) + { + ae_frame_leave(_state); + return; + } + + /* + * init + */ + ae_matrix_set_length(q, n+1, n+1, _state); + ae_vector_set_length(&v, n+1, _state); + ae_vector_set_length(&work, n+1, _state); + for(i=1; i<=n; i++) + { + for(j=1; j<=n; j++) + { + if( i==j ) + { + q->ptr.pp_double[i][j] = 1; + } + else + { + q->ptr.pp_double[i][j] = 0; + } + } + } + + /* + * unpack Q + */ + for(i=1; i<=n-1; i++) + { + + /* + * Apply H(i) + */ + ip1 = i+1; + nmi = n-i; + ae_v_move(&v.ptr.p_double[1], 1, &a->ptr.pp_double[ip1][i], a->stride, ae_v_len(1,nmi)); + v.ptr.p_double[1] = 1; + applyreflectionfromtheright(q, tau->ptr.p_double[i], &v, 1, n, i+1, n, &work, _state); + } + ae_frame_leave(_state); +} + + + + +/************************************************************************* +Generation of a random uniformly distributed (Haar) orthogonal matrix + +INPUT PARAMETERS: + N - matrix size, N>=1 + +OUTPUT PARAMETERS: + A - orthogonal NxN matrix, array[0..N-1,0..N-1] + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void rmatrixrndorthogonal(ae_int_t n, + /* Real */ ae_matrix* a, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + + ae_matrix_clear(a); + + ae_assert(n>=1, "RMatrixRndOrthogonal: N<1!", _state); + ae_matrix_set_length(a, n, n, _state); + for(i=0; i<=n-1; i++) + { + for(j=0; j<=n-1; j++) + { + if( i==j ) + { + a->ptr.pp_double[i][j] = 1; + } + else + { + a->ptr.pp_double[i][j] = 0; + } + } + } + rmatrixrndorthogonalfromtheright(a, n, n, _state); +} + + +/************************************************************************* +Generation of random NxN matrix with given condition number and norm2(A)=1 + +INPUT PARAMETERS: + N - matrix size + C - condition number (in 2-norm) + +OUTPUT PARAMETERS: + A - random matrix with norm2(A)=1 and cond(A)=C + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void rmatrixrndcond(ae_int_t n, + double c, + /* Real */ ae_matrix* a, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + double l1; + double l2; + + ae_matrix_clear(a); + + ae_assert(n>=1&&ae_fp_greater_eq(c,1), "RMatrixRndCond: N<1 or C<1!", _state); + ae_matrix_set_length(a, n, n, _state); + if( n==1 ) + { + + /* + * special case + */ + a->ptr.pp_double[0][0] = 2*ae_randominteger(2, _state)-1; + return; + } + l1 = 0; + l2 = ae_log(1/c, _state); + for(i=0; i<=n-1; i++) + { + for(j=0; j<=n-1; j++) + { + a->ptr.pp_double[i][j] = 0; + } + } + a->ptr.pp_double[0][0] = ae_exp(l1, _state); + for(i=1; i<=n-2; i++) + { + a->ptr.pp_double[i][i] = ae_exp(ae_randomreal(_state)*(l2-l1)+l1, _state); + } + a->ptr.pp_double[n-1][n-1] = ae_exp(l2, _state); + rmatrixrndorthogonalfromtheleft(a, n, n, _state); + rmatrixrndorthogonalfromtheright(a, n, n, _state); +} + + +/************************************************************************* +Generation of a random Haar distributed orthogonal complex matrix + +INPUT PARAMETERS: + N - matrix size, N>=1 + +OUTPUT PARAMETERS: + A - orthogonal NxN matrix, array[0..N-1,0..N-1] + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void cmatrixrndorthogonal(ae_int_t n, + /* Complex */ ae_matrix* a, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + + ae_matrix_clear(a); + + ae_assert(n>=1, "CMatrixRndOrthogonal: N<1!", _state); + ae_matrix_set_length(a, n, n, _state); + for(i=0; i<=n-1; i++) + { + for(j=0; j<=n-1; j++) + { + if( i==j ) + { + a->ptr.pp_complex[i][j] = ae_complex_from_d(1); + } + else + { + a->ptr.pp_complex[i][j] = ae_complex_from_d(0); + } + } + } + cmatrixrndorthogonalfromtheright(a, n, n, _state); +} + + +/************************************************************************* +Generation of random NxN complex matrix with given condition number C and +norm2(A)=1 + +INPUT PARAMETERS: + N - matrix size + C - condition number (in 2-norm) + +OUTPUT PARAMETERS: + A - random matrix with norm2(A)=1 and cond(A)=C + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void cmatrixrndcond(ae_int_t n, + double c, + /* Complex */ ae_matrix* a, + ae_state *_state) +{ + ae_frame _frame_block; + ae_int_t i; + ae_int_t j; + double l1; + double l2; + hqrndstate state; + ae_complex v; + + ae_frame_make(_state, &_frame_block); + ae_matrix_clear(a); + _hqrndstate_init(&state, _state, ae_true); + + ae_assert(n>=1&&ae_fp_greater_eq(c,1), "CMatrixRndCond: N<1 or C<1!", _state); + ae_matrix_set_length(a, n, n, _state); + if( n==1 ) + { + + /* + * special case + */ + hqrndrandomize(&state, _state); + hqrndunit2(&state, &v.x, &v.y, _state); + a->ptr.pp_complex[0][0] = v; + ae_frame_leave(_state); + return; + } + l1 = 0; + l2 = ae_log(1/c, _state); + for(i=0; i<=n-1; i++) + { + for(j=0; j<=n-1; j++) + { + a->ptr.pp_complex[i][j] = ae_complex_from_d(0); + } + } + a->ptr.pp_complex[0][0] = ae_complex_from_d(ae_exp(l1, _state)); + for(i=1; i<=n-2; i++) + { + a->ptr.pp_complex[i][i] = ae_complex_from_d(ae_exp(ae_randomreal(_state)*(l2-l1)+l1, _state)); + } + a->ptr.pp_complex[n-1][n-1] = ae_complex_from_d(ae_exp(l2, _state)); + cmatrixrndorthogonalfromtheleft(a, n, n, _state); + cmatrixrndorthogonalfromtheright(a, n, n, _state); + ae_frame_leave(_state); +} + + +/************************************************************************* +Generation of random NxN symmetric matrix with given condition number and +norm2(A)=1 + +INPUT PARAMETERS: + N - matrix size + C - condition number (in 2-norm) + +OUTPUT PARAMETERS: + A - random matrix with norm2(A)=1 and cond(A)=C + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void smatrixrndcond(ae_int_t n, + double c, + /* Real */ ae_matrix* a, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + double l1; + double l2; + + ae_matrix_clear(a); + + ae_assert(n>=1&&ae_fp_greater_eq(c,1), "SMatrixRndCond: N<1 or C<1!", _state); + ae_matrix_set_length(a, n, n, _state); + if( n==1 ) + { + + /* + * special case + */ + a->ptr.pp_double[0][0] = 2*ae_randominteger(2, _state)-1; + return; + } + + /* + * Prepare matrix + */ + l1 = 0; + l2 = ae_log(1/c, _state); + for(i=0; i<=n-1; i++) + { + for(j=0; j<=n-1; j++) + { + a->ptr.pp_double[i][j] = 0; + } + } + a->ptr.pp_double[0][0] = ae_exp(l1, _state); + for(i=1; i<=n-2; i++) + { + a->ptr.pp_double[i][i] = (2*ae_randominteger(2, _state)-1)*ae_exp(ae_randomreal(_state)*(l2-l1)+l1, _state); + } + a->ptr.pp_double[n-1][n-1] = ae_exp(l2, _state); + + /* + * Multiply + */ + smatrixrndmultiply(a, n, _state); +} + + +/************************************************************************* +Generation of random NxN symmetric positive definite matrix with given +condition number and norm2(A)=1 + +INPUT PARAMETERS: + N - matrix size + C - condition number (in 2-norm) + +OUTPUT PARAMETERS: + A - random SPD matrix with norm2(A)=1 and cond(A)=C + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void spdmatrixrndcond(ae_int_t n, + double c, + /* Real */ ae_matrix* a, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + double l1; + double l2; + + ae_matrix_clear(a); + + + /* + * Special cases + */ + if( n<=0||ae_fp_less(c,1) ) + { + return; + } + ae_matrix_set_length(a, n, n, _state); + if( n==1 ) + { + a->ptr.pp_double[0][0] = 1; + return; + } + + /* + * Prepare matrix + */ + l1 = 0; + l2 = ae_log(1/c, _state); + for(i=0; i<=n-1; i++) + { + for(j=0; j<=n-1; j++) + { + a->ptr.pp_double[i][j] = 0; + } + } + a->ptr.pp_double[0][0] = ae_exp(l1, _state); + for(i=1; i<=n-2; i++) + { + a->ptr.pp_double[i][i] = ae_exp(ae_randomreal(_state)*(l2-l1)+l1, _state); + } + a->ptr.pp_double[n-1][n-1] = ae_exp(l2, _state); + + /* + * Multiply + */ + smatrixrndmultiply(a, n, _state); +} + + +/************************************************************************* +Generation of random NxN Hermitian matrix with given condition number and +norm2(A)=1 + +INPUT PARAMETERS: + N - matrix size + C - condition number (in 2-norm) + +OUTPUT PARAMETERS: + A - random matrix with norm2(A)=1 and cond(A)=C + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void hmatrixrndcond(ae_int_t n, + double c, + /* Complex */ ae_matrix* a, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + double l1; + double l2; + + ae_matrix_clear(a); + + ae_assert(n>=1&&ae_fp_greater_eq(c,1), "HMatrixRndCond: N<1 or C<1!", _state); + ae_matrix_set_length(a, n, n, _state); + if( n==1 ) + { + + /* + * special case + */ + a->ptr.pp_complex[0][0] = ae_complex_from_d(2*ae_randominteger(2, _state)-1); + return; + } + + /* + * Prepare matrix + */ + l1 = 0; + l2 = ae_log(1/c, _state); + for(i=0; i<=n-1; i++) + { + for(j=0; j<=n-1; j++) + { + a->ptr.pp_complex[i][j] = ae_complex_from_d(0); + } + } + a->ptr.pp_complex[0][0] = ae_complex_from_d(ae_exp(l1, _state)); + for(i=1; i<=n-2; i++) + { + a->ptr.pp_complex[i][i] = ae_complex_from_d((2*ae_randominteger(2, _state)-1)*ae_exp(ae_randomreal(_state)*(l2-l1)+l1, _state)); + } + a->ptr.pp_complex[n-1][n-1] = ae_complex_from_d(ae_exp(l2, _state)); + + /* + * Multiply + */ + hmatrixrndmultiply(a, n, _state); + + /* + * post-process to ensure that matrix diagonal is real + */ + for(i=0; i<=n-1; i++) + { + a->ptr.pp_complex[i][i].y = 0; + } +} + + +/************************************************************************* +Generation of random NxN Hermitian positive definite matrix with given +condition number and norm2(A)=1 + +INPUT PARAMETERS: + N - matrix size + C - condition number (in 2-norm) + +OUTPUT PARAMETERS: + A - random HPD matrix with norm2(A)=1 and cond(A)=C + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void hpdmatrixrndcond(ae_int_t n, + double c, + /* Complex */ ae_matrix* a, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + double l1; + double l2; + + ae_matrix_clear(a); + + + /* + * Special cases + */ + if( n<=0||ae_fp_less(c,1) ) + { + return; + } + ae_matrix_set_length(a, n, n, _state); + if( n==1 ) + { + a->ptr.pp_complex[0][0] = ae_complex_from_d(1); + return; + } + + /* + * Prepare matrix + */ + l1 = 0; + l2 = ae_log(1/c, _state); + for(i=0; i<=n-1; i++) + { + for(j=0; j<=n-1; j++) + { + a->ptr.pp_complex[i][j] = ae_complex_from_d(0); + } + } + a->ptr.pp_complex[0][0] = ae_complex_from_d(ae_exp(l1, _state)); + for(i=1; i<=n-2; i++) + { + a->ptr.pp_complex[i][i] = ae_complex_from_d(ae_exp(ae_randomreal(_state)*(l2-l1)+l1, _state)); + } + a->ptr.pp_complex[n-1][n-1] = ae_complex_from_d(ae_exp(l2, _state)); + + /* + * Multiply + */ + hmatrixrndmultiply(a, n, _state); + + /* + * post-process to ensure that matrix diagonal is real + */ + for(i=0; i<=n-1; i++) + { + a->ptr.pp_complex[i][i].y = 0; + } +} + + +/************************************************************************* +Multiplication of MxN matrix by NxN random Haar distributed orthogonal matrix + +INPUT PARAMETERS: + A - matrix, array[0..M-1, 0..N-1] + M, N- matrix size + +OUTPUT PARAMETERS: + A - A*Q, where Q is random NxN orthogonal matrix + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void rmatrixrndorthogonalfromtheright(/* Real */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + ae_state *_state) +{ + ae_frame _frame_block; + double tau; + double lambdav; + ae_int_t s; + ae_int_t i; + double u1; + double u2; + ae_vector w; + ae_vector v; + hqrndstate state; + + ae_frame_make(_state, &_frame_block); + ae_vector_init(&w, 0, DT_REAL, _state, ae_true); + ae_vector_init(&v, 0, DT_REAL, _state, ae_true); + _hqrndstate_init(&state, _state, ae_true); + + ae_assert(n>=1&&m>=1, "RMatrixRndOrthogonalFromTheRight: N<1 or M<1!", _state); + if( n==1 ) + { + + /* + * Special case + */ + tau = 2*ae_randominteger(2, _state)-1; + for(i=0; i<=m-1; i++) + { + a->ptr.pp_double[i][0] = a->ptr.pp_double[i][0]*tau; + } + ae_frame_leave(_state); + return; + } + + /* + * General case. + * First pass. + */ + ae_vector_set_length(&w, m, _state); + ae_vector_set_length(&v, n+1, _state); + hqrndrandomize(&state, _state); + for(s=2; s<=n; s++) + { + + /* + * Prepare random normal v + */ + do + { + i = 1; + while(i<=s) + { + hqrndnormal2(&state, &u1, &u2, _state); + v.ptr.p_double[i] = u1; + if( i+1<=s ) + { + v.ptr.p_double[i+1] = u2; + } + i = i+2; + } + lambdav = ae_v_dotproduct(&v.ptr.p_double[1], 1, &v.ptr.p_double[1], 1, ae_v_len(1,s)); + } + while(ae_fp_eq(lambdav,0)); + + /* + * Prepare and apply reflection + */ + generatereflection(&v, s, &tau, _state); + v.ptr.p_double[1] = 1; + applyreflectionfromtheright(a, tau, &v, 0, m-1, n-s, n-1, &w, _state); + } + + /* + * Second pass. + */ + for(i=0; i<=n-1; i++) + { + tau = 2*ae_randominteger(2, _state)-1; + ae_v_muld(&a->ptr.pp_double[0][i], a->stride, ae_v_len(0,m-1), tau); + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Multiplication of MxN matrix by MxM random Haar distributed orthogonal matrix + +INPUT PARAMETERS: + A - matrix, array[0..M-1, 0..N-1] + M, N- matrix size + +OUTPUT PARAMETERS: + A - Q*A, where Q is random MxM orthogonal matrix + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void rmatrixrndorthogonalfromtheleft(/* Real */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + ae_state *_state) +{ + ae_frame _frame_block; + double tau; + double lambdav; + ae_int_t s; + ae_int_t i; + ae_int_t j; + double u1; + double u2; + ae_vector w; + ae_vector v; + hqrndstate state; + + ae_frame_make(_state, &_frame_block); + ae_vector_init(&w, 0, DT_REAL, _state, ae_true); + ae_vector_init(&v, 0, DT_REAL, _state, ae_true); + _hqrndstate_init(&state, _state, ae_true); + + ae_assert(n>=1&&m>=1, "RMatrixRndOrthogonalFromTheRight: N<1 or M<1!", _state); + if( m==1 ) + { + + /* + * special case + */ + tau = 2*ae_randominteger(2, _state)-1; + for(j=0; j<=n-1; j++) + { + a->ptr.pp_double[0][j] = a->ptr.pp_double[0][j]*tau; + } + ae_frame_leave(_state); + return; + } + + /* + * General case. + * First pass. + */ + ae_vector_set_length(&w, n, _state); + ae_vector_set_length(&v, m+1, _state); + hqrndrandomize(&state, _state); + for(s=2; s<=m; s++) + { + + /* + * Prepare random normal v + */ + do + { + i = 1; + while(i<=s) + { + hqrndnormal2(&state, &u1, &u2, _state); + v.ptr.p_double[i] = u1; + if( i+1<=s ) + { + v.ptr.p_double[i+1] = u2; + } + i = i+2; + } + lambdav = ae_v_dotproduct(&v.ptr.p_double[1], 1, &v.ptr.p_double[1], 1, ae_v_len(1,s)); + } + while(ae_fp_eq(lambdav,0)); + + /* + * Prepare and apply reflection + */ + generatereflection(&v, s, &tau, _state); + v.ptr.p_double[1] = 1; + applyreflectionfromtheleft(a, tau, &v, m-s, m-1, 0, n-1, &w, _state); + } + + /* + * Second pass. + */ + for(i=0; i<=m-1; i++) + { + tau = 2*ae_randominteger(2, _state)-1; + ae_v_muld(&a->ptr.pp_double[i][0], 1, ae_v_len(0,n-1), tau); + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Multiplication of MxN complex matrix by NxN random Haar distributed +complex orthogonal matrix + +INPUT PARAMETERS: + A - matrix, array[0..M-1, 0..N-1] + M, N- matrix size + +OUTPUT PARAMETERS: + A - A*Q, where Q is random NxN orthogonal matrix + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void cmatrixrndorthogonalfromtheright(/* Complex */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + ae_state *_state) +{ + ae_frame _frame_block; + ae_complex lambdav; + ae_complex tau; + ae_int_t s; + ae_int_t i; + ae_vector w; + ae_vector v; + hqrndstate state; + + ae_frame_make(_state, &_frame_block); + ae_vector_init(&w, 0, DT_COMPLEX, _state, ae_true); + ae_vector_init(&v, 0, DT_COMPLEX, _state, ae_true); + _hqrndstate_init(&state, _state, ae_true); + + ae_assert(n>=1&&m>=1, "CMatrixRndOrthogonalFromTheRight: N<1 or M<1!", _state); + if( n==1 ) + { + + /* + * Special case + */ + hqrndrandomize(&state, _state); + hqrndunit2(&state, &tau.x, &tau.y, _state); + for(i=0; i<=m-1; i++) + { + a->ptr.pp_complex[i][0] = ae_c_mul(a->ptr.pp_complex[i][0],tau); + } + ae_frame_leave(_state); + return; + } + + /* + * General case. + * First pass. + */ + ae_vector_set_length(&w, m, _state); + ae_vector_set_length(&v, n+1, _state); + hqrndrandomize(&state, _state); + for(s=2; s<=n; s++) + { + + /* + * Prepare random normal v + */ + do + { + for(i=1; i<=s; i++) + { + hqrndnormal2(&state, &tau.x, &tau.y, _state); + v.ptr.p_complex[i] = tau; + } + lambdav = ae_v_cdotproduct(&v.ptr.p_complex[1], 1, "N", &v.ptr.p_complex[1], 1, "Conj", ae_v_len(1,s)); + } + while(ae_c_eq_d(lambdav,0)); + + /* + * Prepare and apply reflection + */ + complexgeneratereflection(&v, s, &tau, _state); + v.ptr.p_complex[1] = ae_complex_from_d(1); + complexapplyreflectionfromtheright(a, tau, &v, 0, m-1, n-s, n-1, &w, _state); + } + + /* + * Second pass. + */ + for(i=0; i<=n-1; i++) + { + hqrndunit2(&state, &tau.x, &tau.y, _state); + ae_v_cmulc(&a->ptr.pp_complex[0][i], a->stride, ae_v_len(0,m-1), tau); + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Multiplication of MxN complex matrix by MxM random Haar distributed +complex orthogonal matrix + +INPUT PARAMETERS: + A - matrix, array[0..M-1, 0..N-1] + M, N- matrix size + +OUTPUT PARAMETERS: + A - Q*A, where Q is random MxM orthogonal matrix + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void cmatrixrndorthogonalfromtheleft(/* Complex */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + ae_state *_state) +{ + ae_frame _frame_block; + ae_complex tau; + ae_complex lambdav; + ae_int_t s; + ae_int_t i; + ae_int_t j; + ae_vector w; + ae_vector v; + hqrndstate state; + + ae_frame_make(_state, &_frame_block); + ae_vector_init(&w, 0, DT_COMPLEX, _state, ae_true); + ae_vector_init(&v, 0, DT_COMPLEX, _state, ae_true); + _hqrndstate_init(&state, _state, ae_true); + + ae_assert(n>=1&&m>=1, "CMatrixRndOrthogonalFromTheRight: N<1 or M<1!", _state); + if( m==1 ) + { + + /* + * special case + */ + hqrndrandomize(&state, _state); + hqrndunit2(&state, &tau.x, &tau.y, _state); + for(j=0; j<=n-1; j++) + { + a->ptr.pp_complex[0][j] = ae_c_mul(a->ptr.pp_complex[0][j],tau); + } + ae_frame_leave(_state); + return; + } + + /* + * General case. + * First pass. + */ + ae_vector_set_length(&w, n, _state); + ae_vector_set_length(&v, m+1, _state); + hqrndrandomize(&state, _state); + for(s=2; s<=m; s++) + { + + /* + * Prepare random normal v + */ + do + { + for(i=1; i<=s; i++) + { + hqrndnormal2(&state, &tau.x, &tau.y, _state); + v.ptr.p_complex[i] = tau; + } + lambdav = ae_v_cdotproduct(&v.ptr.p_complex[1], 1, "N", &v.ptr.p_complex[1], 1, "Conj", ae_v_len(1,s)); + } + while(ae_c_eq_d(lambdav,0)); + + /* + * Prepare and apply reflection + */ + complexgeneratereflection(&v, s, &tau, _state); + v.ptr.p_complex[1] = ae_complex_from_d(1); + complexapplyreflectionfromtheleft(a, tau, &v, m-s, m-1, 0, n-1, &w, _state); + } + + /* + * Second pass. + */ + for(i=0; i<=m-1; i++) + { + hqrndunit2(&state, &tau.x, &tau.y, _state); + ae_v_cmulc(&a->ptr.pp_complex[i][0], 1, ae_v_len(0,n-1), tau); + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Symmetric multiplication of NxN matrix by random Haar distributed +orthogonal matrix + +INPUT PARAMETERS: + A - matrix, array[0..N-1, 0..N-1] + N - matrix size + +OUTPUT PARAMETERS: + A - Q'*A*Q, where Q is random NxN orthogonal matrix + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void smatrixrndmultiply(/* Real */ ae_matrix* a, + ae_int_t n, + ae_state *_state) +{ + ae_frame _frame_block; + double tau; + double lambdav; + ae_int_t s; + ae_int_t i; + double u1; + double u2; + ae_vector w; + ae_vector v; + hqrndstate state; + + ae_frame_make(_state, &_frame_block); + ae_vector_init(&w, 0, DT_REAL, _state, ae_true); + ae_vector_init(&v, 0, DT_REAL, _state, ae_true); + _hqrndstate_init(&state, _state, ae_true); + + + /* + * General case. + */ + ae_vector_set_length(&w, n, _state); + ae_vector_set_length(&v, n+1, _state); + hqrndrandomize(&state, _state); + for(s=2; s<=n; s++) + { + + /* + * Prepare random normal v + */ + do + { + i = 1; + while(i<=s) + { + hqrndnormal2(&state, &u1, &u2, _state); + v.ptr.p_double[i] = u1; + if( i+1<=s ) + { + v.ptr.p_double[i+1] = u2; + } + i = i+2; + } + lambdav = ae_v_dotproduct(&v.ptr.p_double[1], 1, &v.ptr.p_double[1], 1, ae_v_len(1,s)); + } + while(ae_fp_eq(lambdav,0)); + + /* + * Prepare and apply reflection + */ + generatereflection(&v, s, &tau, _state); + v.ptr.p_double[1] = 1; + applyreflectionfromtheright(a, tau, &v, 0, n-1, n-s, n-1, &w, _state); + applyreflectionfromtheleft(a, tau, &v, n-s, n-1, 0, n-1, &w, _state); + } + + /* + * Second pass. + */ + for(i=0; i<=n-1; i++) + { + tau = 2*ae_randominteger(2, _state)-1; + ae_v_muld(&a->ptr.pp_double[0][i], a->stride, ae_v_len(0,n-1), tau); + ae_v_muld(&a->ptr.pp_double[i][0], 1, ae_v_len(0,n-1), tau); + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Hermitian multiplication of NxN matrix by random Haar distributed +complex orthogonal matrix + +INPUT PARAMETERS: + A - matrix, array[0..N-1, 0..N-1] + N - matrix size + +OUTPUT PARAMETERS: + A - Q^H*A*Q, where Q is random NxN orthogonal matrix + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void hmatrixrndmultiply(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_state *_state) +{ + ae_frame _frame_block; + ae_complex tau; + ae_complex lambdav; + ae_int_t s; + ae_int_t i; + ae_vector w; + ae_vector v; + hqrndstate state; + + ae_frame_make(_state, &_frame_block); + ae_vector_init(&w, 0, DT_COMPLEX, _state, ae_true); + ae_vector_init(&v, 0, DT_COMPLEX, _state, ae_true); + _hqrndstate_init(&state, _state, ae_true); + + + /* + * General case. + */ + ae_vector_set_length(&w, n, _state); + ae_vector_set_length(&v, n+1, _state); + hqrndrandomize(&state, _state); + for(s=2; s<=n; s++) + { + + /* + * Prepare random normal v + */ + do + { + for(i=1; i<=s; i++) + { + hqrndnormal2(&state, &tau.x, &tau.y, _state); + v.ptr.p_complex[i] = tau; + } + lambdav = ae_v_cdotproduct(&v.ptr.p_complex[1], 1, "N", &v.ptr.p_complex[1], 1, "Conj", ae_v_len(1,s)); + } + while(ae_c_eq_d(lambdav,0)); + + /* + * Prepare and apply reflection + */ + complexgeneratereflection(&v, s, &tau, _state); + v.ptr.p_complex[1] = ae_complex_from_d(1); + complexapplyreflectionfromtheright(a, tau, &v, 0, n-1, n-s, n-1, &w, _state); + complexapplyreflectionfromtheleft(a, ae_c_conj(tau, _state), &v, n-s, n-1, 0, n-1, &w, _state); + } + + /* + * Second pass. + */ + for(i=0; i<=n-1; i++) + { + hqrndunit2(&state, &tau.x, &tau.y, _state); + ae_v_cmulc(&a->ptr.pp_complex[0][i], a->stride, ae_v_len(0,n-1), tau); + tau = ae_c_conj(tau, _state); + ae_v_cmulc(&a->ptr.pp_complex[i][0], 1, ae_v_len(0,n-1), tau); + } + ae_frame_leave(_state); +} + + + + +/************************************************************************* +LU decomposition of a general real matrix with row pivoting + +A is represented as A = P*L*U, where: +* L is lower unitriangular matrix +* U is upper triangular matrix +* P = P0*P1*...*PK, K=min(M,N)-1, + Pi - permutation matrix for I and Pivots[I] + +This is cache-oblivous implementation of LU decomposition. +It is optimized for square matrices. As for rectangular matrices: +* best case - M>>N +* worst case - N>>M, small M, large N, matrix does not fit in CPU cache + +INPUT PARAMETERS: + A - array[0..M-1, 0..N-1]. + M - number of rows in matrix A. + N - number of columns in matrix A. + + +OUTPUT PARAMETERS: + A - matrices L and U in compact form: + * L is stored under main diagonal + * U is stored on and above main diagonal + Pivots - permutation matrix in compact form. + array[0..Min(M-1,N-1)]. + + -- ALGLIB routine -- + 10.01.2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixlu(/* Real */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Integer */ ae_vector* pivots, + ae_state *_state) +{ + + ae_vector_clear(pivots); + + ae_assert(m>0, "RMatrixLU: incorrect M!", _state); + ae_assert(n>0, "RMatrixLU: incorrect N!", _state); + rmatrixplu(a, m, n, pivots, _state); +} + + +/************************************************************************* +LU decomposition of a general complex matrix with row pivoting + +A is represented as A = P*L*U, where: +* L is lower unitriangular matrix +* U is upper triangular matrix +* P = P0*P1*...*PK, K=min(M,N)-1, + Pi - permutation matrix for I and Pivots[I] + +This is cache-oblivous implementation of LU decomposition. It is optimized +for square matrices. As for rectangular matrices: +* best case - M>>N +* worst case - N>>M, small M, large N, matrix does not fit in CPU cache + +INPUT PARAMETERS: + A - array[0..M-1, 0..N-1]. + M - number of rows in matrix A. + N - number of columns in matrix A. + + +OUTPUT PARAMETERS: + A - matrices L and U in compact form: + * L is stored under main diagonal + * U is stored on and above main diagonal + Pivots - permutation matrix in compact form. + array[0..Min(M-1,N-1)]. + + -- ALGLIB routine -- + 10.01.2010 + Bochkanov Sergey +*************************************************************************/ +void cmatrixlu(/* Complex */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Integer */ ae_vector* pivots, + ae_state *_state) +{ + + ae_vector_clear(pivots); + + ae_assert(m>0, "CMatrixLU: incorrect M!", _state); + ae_assert(n>0, "CMatrixLU: incorrect N!", _state); + cmatrixplu(a, m, n, pivots, _state); +} + + +/************************************************************************* +Cache-oblivious Cholesky decomposition + +The algorithm computes Cholesky decomposition of a Hermitian positive- +definite matrix. The result of an algorithm is a representation of A as +A=U'*U or A=L*L' (here X' detones conj(X^T)). + +INPUT PARAMETERS: + A - upper or lower triangle of a factorized matrix. + array with elements [0..N-1, 0..N-1]. + N - size of matrix A. + IsUpper - if IsUpper=True, then A contains an upper triangle of + a symmetric matrix, otherwise A contains a lower one. + +OUTPUT PARAMETERS: + A - the result of factorization. If IsUpper=True, then + the upper triangle contains matrix U, so that A = U'*U, + and the elements below the main diagonal are not modified. + Similarly, if IsUpper = False. + +RESULT: + If the matrix is positive-definite, the function returns True. + Otherwise, the function returns False. Contents of A is not determined + in such case. + + -- ALGLIB routine -- + 15.12.2009 + Bochkanov Sergey +*************************************************************************/ +ae_bool hpdmatrixcholesky(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector tmp; + ae_bool result; + + ae_frame_make(_state, &_frame_block); + ae_vector_init(&tmp, 0, DT_COMPLEX, _state, ae_true); + + if( n<1 ) + { + result = ae_false; + ae_frame_leave(_state); + return result; + } + result = trfac_hpdmatrixcholeskyrec(a, 0, n, isupper, &tmp, _state); + ae_frame_leave(_state); + return result; +} + + +/************************************************************************* +Cache-oblivious Cholesky decomposition + +The algorithm computes Cholesky decomposition of a symmetric positive- +definite matrix. The result of an algorithm is a representation of A as +A=U^T*U or A=L*L^T + +INPUT PARAMETERS: + A - upper or lower triangle of a factorized matrix. + array with elements [0..N-1, 0..N-1]. + N - size of matrix A. + IsUpper - if IsUpper=True, then A contains an upper triangle of + a symmetric matrix, otherwise A contains a lower one. + +OUTPUT PARAMETERS: + A - the result of factorization. If IsUpper=True, then + the upper triangle contains matrix U, so that A = U^T*U, + and the elements below the main diagonal are not modified. + Similarly, if IsUpper = False. + +RESULT: + If the matrix is positive-definite, the function returns True. + Otherwise, the function returns False. Contents of A is not determined + in such case. + + -- ALGLIB routine -- + 15.12.2009 + Bochkanov Sergey +*************************************************************************/ +ae_bool spdmatrixcholesky(/* Real */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector tmp; + ae_bool result; + + ae_frame_make(_state, &_frame_block); + ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true); + + if( n<1 ) + { + result = ae_false; + ae_frame_leave(_state); + return result; + } + result = spdmatrixcholeskyrec(a, 0, n, isupper, &tmp, _state); + ae_frame_leave(_state); + return result; +} + + +void rmatrixlup(/* Real */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Integer */ ae_vector* pivots, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector tmp; + ae_int_t i; + ae_int_t j; + double mx; + double v; + + ae_frame_make(_state, &_frame_block); + ae_vector_clear(pivots); + ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true); + + + /* + * Internal LU decomposition subroutine. + * Never call it directly. + */ + ae_assert(m>0, "RMatrixLUP: incorrect M!", _state); + ae_assert(n>0, "RMatrixLUP: incorrect N!", _state); + + /* + * Scale matrix to avoid overflows, + * decompose it, then scale back. + */ + mx = 0; + for(i=0; i<=m-1; i++) + { + for(j=0; j<=n-1; j++) + { + mx = ae_maxreal(mx, ae_fabs(a->ptr.pp_double[i][j], _state), _state); + } + } + if( ae_fp_neq(mx,0) ) + { + v = 1/mx; + for(i=0; i<=m-1; i++) + { + ae_v_muld(&a->ptr.pp_double[i][0], 1, ae_v_len(0,n-1), v); + } + } + ae_vector_set_length(pivots, ae_minint(m, n, _state), _state); + ae_vector_set_length(&tmp, 2*ae_maxint(m, n, _state), _state); + trfac_rmatrixluprec(a, 0, m, n, pivots, &tmp, _state); + if( ae_fp_neq(mx,0) ) + { + v = mx; + for(i=0; i<=m-1; i++) + { + ae_v_muld(&a->ptr.pp_double[i][0], 1, ae_v_len(0,ae_minint(i, n-1, _state)), v); + } + } + ae_frame_leave(_state); +} + + +void cmatrixlup(/* Complex */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Integer */ ae_vector* pivots, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector tmp; + ae_int_t i; + ae_int_t j; + double mx; + double v; + + ae_frame_make(_state, &_frame_block); + ae_vector_clear(pivots); + ae_vector_init(&tmp, 0, DT_COMPLEX, _state, ae_true); + + + /* + * Internal LU decomposition subroutine. + * Never call it directly. + */ + ae_assert(m>0, "CMatrixLUP: incorrect M!", _state); + ae_assert(n>0, "CMatrixLUP: incorrect N!", _state); + + /* + * Scale matrix to avoid overflows, + * decompose it, then scale back. + */ + mx = 0; + for(i=0; i<=m-1; i++) + { + for(j=0; j<=n-1; j++) + { + mx = ae_maxreal(mx, ae_c_abs(a->ptr.pp_complex[i][j], _state), _state); + } + } + if( ae_fp_neq(mx,0) ) + { + v = 1/mx; + for(i=0; i<=m-1; i++) + { + ae_v_cmuld(&a->ptr.pp_complex[i][0], 1, ae_v_len(0,n-1), v); + } + } + ae_vector_set_length(pivots, ae_minint(m, n, _state), _state); + ae_vector_set_length(&tmp, 2*ae_maxint(m, n, _state), _state); + trfac_cmatrixluprec(a, 0, m, n, pivots, &tmp, _state); + if( ae_fp_neq(mx,0) ) + { + v = mx; + for(i=0; i<=m-1; i++) + { + ae_v_cmuld(&a->ptr.pp_complex[i][0], 1, ae_v_len(0,ae_minint(i, n-1, _state)), v); + } + } + ae_frame_leave(_state); +} + + +void rmatrixplu(/* Real */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Integer */ ae_vector* pivots, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector tmp; + ae_int_t i; + ae_int_t j; + double mx; + double v; + + ae_frame_make(_state, &_frame_block); + ae_vector_clear(pivots); + ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true); + + + /* + * Internal LU decomposition subroutine. + * Never call it directly. + */ + ae_assert(m>0, "RMatrixPLU: incorrect M!", _state); + ae_assert(n>0, "RMatrixPLU: incorrect N!", _state); + ae_vector_set_length(&tmp, 2*ae_maxint(m, n, _state), _state); + ae_vector_set_length(pivots, ae_minint(m, n, _state), _state); + + /* + * Scale matrix to avoid overflows, + * decompose it, then scale back. + */ + mx = 0; + for(i=0; i<=m-1; i++) + { + for(j=0; j<=n-1; j++) + { + mx = ae_maxreal(mx, ae_fabs(a->ptr.pp_double[i][j], _state), _state); + } + } + if( ae_fp_neq(mx,0) ) + { + v = 1/mx; + for(i=0; i<=m-1; i++) + { + ae_v_muld(&a->ptr.pp_double[i][0], 1, ae_v_len(0,n-1), v); + } + } + trfac_rmatrixplurec(a, 0, m, n, pivots, &tmp, _state); + if( ae_fp_neq(mx,0) ) + { + v = mx; + for(i=0; i<=ae_minint(m, n, _state)-1; i++) + { + ae_v_muld(&a->ptr.pp_double[i][i], 1, ae_v_len(i,n-1), v); + } + } + ae_frame_leave(_state); +} + + +void cmatrixplu(/* Complex */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Integer */ ae_vector* pivots, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector tmp; + ae_int_t i; + ae_int_t j; + double mx; + ae_complex v; + + ae_frame_make(_state, &_frame_block); + ae_vector_clear(pivots); + ae_vector_init(&tmp, 0, DT_COMPLEX, _state, ae_true); + + + /* + * Internal LU decomposition subroutine. + * Never call it directly. + */ + ae_assert(m>0, "CMatrixPLU: incorrect M!", _state); + ae_assert(n>0, "CMatrixPLU: incorrect N!", _state); + ae_vector_set_length(&tmp, 2*ae_maxint(m, n, _state), _state); + ae_vector_set_length(pivots, ae_minint(m, n, _state), _state); + + /* + * Scale matrix to avoid overflows, + * decompose it, then scale back. + */ + mx = 0; + for(i=0; i<=m-1; i++) + { + for(j=0; j<=n-1; j++) + { + mx = ae_maxreal(mx, ae_c_abs(a->ptr.pp_complex[i][j], _state), _state); + } + } + if( ae_fp_neq(mx,0) ) + { + v = ae_complex_from_d(1/mx); + for(i=0; i<=m-1; i++) + { + ae_v_cmulc(&a->ptr.pp_complex[i][0], 1, ae_v_len(0,n-1), v); + } + } + trfac_cmatrixplurec(a, 0, m, n, pivots, &tmp, _state); + if( ae_fp_neq(mx,0) ) + { + v = ae_complex_from_d(mx); + for(i=0; i<=ae_minint(m, n, _state)-1; i++) + { + ae_v_cmulc(&a->ptr.pp_complex[i][i], 1, ae_v_len(i,n-1), v); + } + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Recursive computational subroutine for SPDMatrixCholesky. + +INPUT PARAMETERS: + A - matrix given by upper or lower triangle + Offs - offset of diagonal block to decompose + N - diagonal block size + IsUpper - what half is given + Tmp - temporary array; allocated by function, if its size is too + small; can be reused on subsequent calls. + +OUTPUT PARAMETERS: + A - upper (or lower) triangle contains Cholesky decomposition + +RESULT: + True, on success + False, on failure + + -- ALGLIB routine -- + 15.12.2009 + Bochkanov Sergey +*************************************************************************/ +ae_bool spdmatrixcholeskyrec(/* Real */ ae_matrix* a, + ae_int_t offs, + ae_int_t n, + ae_bool isupper, + /* Real */ ae_vector* tmp, + ae_state *_state) +{ + ae_int_t n1; + ae_int_t n2; + ae_bool result; + + + + /* + * check N + */ + if( n<1 ) + { + result = ae_false; + return result; + } + + /* + * Prepare buffer + */ + if( tmp->cnt<2*n ) + { + ae_vector_set_length(tmp, 2*n, _state); + } + + /* + * special cases + */ + if( n==1 ) + { + if( ae_fp_greater(a->ptr.pp_double[offs][offs],0) ) + { + a->ptr.pp_double[offs][offs] = ae_sqrt(a->ptr.pp_double[offs][offs], _state); + result = ae_true; + } + else + { + result = ae_false; + } + return result; + } + if( n<=ablasblocksize(a, _state) ) + { + result = trfac_spdmatrixcholesky2(a, offs, n, isupper, tmp, _state); + return result; + } + + /* + * general case: split task in cache-oblivious manner + */ + result = ae_true; + ablassplitlength(a, n, &n1, &n2, _state); + result = spdmatrixcholeskyrec(a, offs, n1, isupper, tmp, _state); + if( !result ) + { + return result; + } + if( n2>0 ) + { + if( isupper ) + { + rmatrixlefttrsm(n1, n2, a, offs, offs, isupper, ae_false, 1, a, offs, offs+n1, _state); + rmatrixsyrk(n2, n1, -1.0, a, offs, offs+n1, 1, 1.0, a, offs+n1, offs+n1, isupper, _state); + } + else + { + rmatrixrighttrsm(n2, n1, a, offs, offs, isupper, ae_false, 1, a, offs+n1, offs, _state); + rmatrixsyrk(n2, n1, -1.0, a, offs+n1, offs, 0, 1.0, a, offs+n1, offs+n1, isupper, _state); + } + result = spdmatrixcholeskyrec(a, offs+n1, n2, isupper, tmp, _state); + if( !result ) + { + return result; + } + } + return result; +} + + +/************************************************************************* +Recurrent complex LU subroutine. +Never call it directly. + + -- ALGLIB routine -- + 04.01.2010 + Bochkanov Sergey +*************************************************************************/ +static void trfac_cmatrixluprec(/* Complex */ ae_matrix* a, + ae_int_t offs, + ae_int_t m, + ae_int_t n, + /* Integer */ ae_vector* pivots, + /* Complex */ ae_vector* tmp, + ae_state *_state) +{ + ae_int_t i; + ae_int_t m1; + ae_int_t m2; + + + + /* + * Kernel case + */ + if( ae_minint(m, n, _state)<=ablascomplexblocksize(a, _state) ) + { + trfac_cmatrixlup2(a, offs, m, n, pivots, tmp, _state); + return; + } + + /* + * Preliminary step, make N>=M + * + * ( A1 ) + * A = ( ), where A1 is square + * ( A2 ) + * + * Factorize A1, update A2 + */ + if( m>n ) + { + trfac_cmatrixluprec(a, offs, n, n, pivots, tmp, _state); + for(i=0; i<=n-1; i++) + { + ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+n][offs+i], a->stride, "N", ae_v_len(0,m-n-1)); + ae_v_cmove(&a->ptr.pp_complex[offs+n][offs+i], a->stride, &a->ptr.pp_complex[offs+n][pivots->ptr.p_int[offs+i]], a->stride, "N", ae_v_len(offs+n,offs+m-1)); + ae_v_cmove(&a->ptr.pp_complex[offs+n][pivots->ptr.p_int[offs+i]], a->stride, &tmp->ptr.p_complex[0], 1, "N", ae_v_len(offs+n,offs+m-1)); + } + cmatrixrighttrsm(m-n, n, a, offs, offs, ae_true, ae_true, 0, a, offs+n, offs, _state); + return; + } + + /* + * Non-kernel case + */ + ablascomplexsplitlength(a, m, &m1, &m2, _state); + trfac_cmatrixluprec(a, offs, m1, n, pivots, tmp, _state); + if( m2>0 ) + { + for(i=0; i<=m1-1; i++) + { + if( offs+i!=pivots->ptr.p_int[offs+i] ) + { + ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+m1][offs+i], a->stride, "N", ae_v_len(0,m2-1)); + ae_v_cmove(&a->ptr.pp_complex[offs+m1][offs+i], a->stride, &a->ptr.pp_complex[offs+m1][pivots->ptr.p_int[offs+i]], a->stride, "N", ae_v_len(offs+m1,offs+m-1)); + ae_v_cmove(&a->ptr.pp_complex[offs+m1][pivots->ptr.p_int[offs+i]], a->stride, &tmp->ptr.p_complex[0], 1, "N", ae_v_len(offs+m1,offs+m-1)); + } + } + cmatrixrighttrsm(m2, m1, a, offs, offs, ae_true, ae_true, 0, a, offs+m1, offs, _state); + cmatrixgemm(m-m1, n-m1, m1, ae_complex_from_d(-1.0), a, offs+m1, offs, 0, a, offs, offs+m1, 0, ae_complex_from_d(1.0), a, offs+m1, offs+m1, _state); + trfac_cmatrixluprec(a, offs+m1, m-m1, n-m1, pivots, tmp, _state); + for(i=0; i<=m2-1; i++) + { + if( offs+m1+i!=pivots->ptr.p_int[offs+m1+i] ) + { + ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs][offs+m1+i], a->stride, "N", ae_v_len(0,m1-1)); + ae_v_cmove(&a->ptr.pp_complex[offs][offs+m1+i], a->stride, &a->ptr.pp_complex[offs][pivots->ptr.p_int[offs+m1+i]], a->stride, "N", ae_v_len(offs,offs+m1-1)); + ae_v_cmove(&a->ptr.pp_complex[offs][pivots->ptr.p_int[offs+m1+i]], a->stride, &tmp->ptr.p_complex[0], 1, "N", ae_v_len(offs,offs+m1-1)); + } + } + } +} + + +/************************************************************************* +Recurrent real LU subroutine. +Never call it directly. + + -- ALGLIB routine -- + 04.01.2010 + Bochkanov Sergey +*************************************************************************/ +static void trfac_rmatrixluprec(/* Real */ ae_matrix* a, + ae_int_t offs, + ae_int_t m, + ae_int_t n, + /* Integer */ ae_vector* pivots, + /* Real */ ae_vector* tmp, + ae_state *_state) +{ + ae_int_t i; + ae_int_t m1; + ae_int_t m2; + + + + /* + * Kernel case + */ + if( ae_minint(m, n, _state)<=ablasblocksize(a, _state) ) + { + trfac_rmatrixlup2(a, offs, m, n, pivots, tmp, _state); + return; + } + + /* + * Preliminary step, make N>=M + * + * ( A1 ) + * A = ( ), where A1 is square + * ( A2 ) + * + * Factorize A1, update A2 + */ + if( m>n ) + { + trfac_rmatrixluprec(a, offs, n, n, pivots, tmp, _state); + for(i=0; i<=n-1; i++) + { + if( offs+i!=pivots->ptr.p_int[offs+i] ) + { + ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+n][offs+i], a->stride, ae_v_len(0,m-n-1)); + ae_v_move(&a->ptr.pp_double[offs+n][offs+i], a->stride, &a->ptr.pp_double[offs+n][pivots->ptr.p_int[offs+i]], a->stride, ae_v_len(offs+n,offs+m-1)); + ae_v_move(&a->ptr.pp_double[offs+n][pivots->ptr.p_int[offs+i]], a->stride, &tmp->ptr.p_double[0], 1, ae_v_len(offs+n,offs+m-1)); + } + } + rmatrixrighttrsm(m-n, n, a, offs, offs, ae_true, ae_true, 0, a, offs+n, offs, _state); + return; + } + + /* + * Non-kernel case + */ + ablassplitlength(a, m, &m1, &m2, _state); + trfac_rmatrixluprec(a, offs, m1, n, pivots, tmp, _state); + if( m2>0 ) + { + for(i=0; i<=m1-1; i++) + { + if( offs+i!=pivots->ptr.p_int[offs+i] ) + { + ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+m1][offs+i], a->stride, ae_v_len(0,m2-1)); + ae_v_move(&a->ptr.pp_double[offs+m1][offs+i], a->stride, &a->ptr.pp_double[offs+m1][pivots->ptr.p_int[offs+i]], a->stride, ae_v_len(offs+m1,offs+m-1)); + ae_v_move(&a->ptr.pp_double[offs+m1][pivots->ptr.p_int[offs+i]], a->stride, &tmp->ptr.p_double[0], 1, ae_v_len(offs+m1,offs+m-1)); + } + } + rmatrixrighttrsm(m2, m1, a, offs, offs, ae_true, ae_true, 0, a, offs+m1, offs, _state); + rmatrixgemm(m-m1, n-m1, m1, -1.0, a, offs+m1, offs, 0, a, offs, offs+m1, 0, 1.0, a, offs+m1, offs+m1, _state); + trfac_rmatrixluprec(a, offs+m1, m-m1, n-m1, pivots, tmp, _state); + for(i=0; i<=m2-1; i++) + { + if( offs+m1+i!=pivots->ptr.p_int[offs+m1+i] ) + { + ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs][offs+m1+i], a->stride, ae_v_len(0,m1-1)); + ae_v_move(&a->ptr.pp_double[offs][offs+m1+i], a->stride, &a->ptr.pp_double[offs][pivots->ptr.p_int[offs+m1+i]], a->stride, ae_v_len(offs,offs+m1-1)); + ae_v_move(&a->ptr.pp_double[offs][pivots->ptr.p_int[offs+m1+i]], a->stride, &tmp->ptr.p_double[0], 1, ae_v_len(offs,offs+m1-1)); + } + } + } +} + + +/************************************************************************* +Recurrent complex LU subroutine. +Never call it directly. + + -- ALGLIB routine -- + 04.01.2010 + Bochkanov Sergey +*************************************************************************/ +static void trfac_cmatrixplurec(/* Complex */ ae_matrix* a, + ae_int_t offs, + ae_int_t m, + ae_int_t n, + /* Integer */ ae_vector* pivots, + /* Complex */ ae_vector* tmp, + ae_state *_state) +{ + ae_int_t i; + ae_int_t n1; + ae_int_t n2; + + + + /* + * Kernel case + */ + if( ae_minint(m, n, _state)<=ablascomplexblocksize(a, _state) ) + { + trfac_cmatrixplu2(a, offs, m, n, pivots, tmp, _state); + return; + } + + /* + * Preliminary step, make M>=N. + * + * A = (A1 A2), where A1 is square + * Factorize A1, update A2 + */ + if( n>m ) + { + trfac_cmatrixplurec(a, offs, m, m, pivots, tmp, _state); + for(i=0; i<=m-1; i++) + { + ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+i][offs+m], 1, "N", ae_v_len(0,n-m-1)); + ae_v_cmove(&a->ptr.pp_complex[offs+i][offs+m], 1, &a->ptr.pp_complex[pivots->ptr.p_int[offs+i]][offs+m], 1, "N", ae_v_len(offs+m,offs+n-1)); + ae_v_cmove(&a->ptr.pp_complex[pivots->ptr.p_int[offs+i]][offs+m], 1, &tmp->ptr.p_complex[0], 1, "N", ae_v_len(offs+m,offs+n-1)); + } + cmatrixlefttrsm(m, n-m, a, offs, offs, ae_false, ae_true, 0, a, offs, offs+m, _state); + return; + } + + /* + * Non-kernel case + */ + ablascomplexsplitlength(a, n, &n1, &n2, _state); + trfac_cmatrixplurec(a, offs, m, n1, pivots, tmp, _state); + if( n2>0 ) + { + for(i=0; i<=n1-1; i++) + { + if( offs+i!=pivots->ptr.p_int[offs+i] ) + { + ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+i][offs+n1], 1, "N", ae_v_len(0,n2-1)); + ae_v_cmove(&a->ptr.pp_complex[offs+i][offs+n1], 1, &a->ptr.pp_complex[pivots->ptr.p_int[offs+i]][offs+n1], 1, "N", ae_v_len(offs+n1,offs+n-1)); + ae_v_cmove(&a->ptr.pp_complex[pivots->ptr.p_int[offs+i]][offs+n1], 1, &tmp->ptr.p_complex[0], 1, "N", ae_v_len(offs+n1,offs+n-1)); + } + } + cmatrixlefttrsm(n1, n2, a, offs, offs, ae_false, ae_true, 0, a, offs, offs+n1, _state); + cmatrixgemm(m-n1, n-n1, n1, ae_complex_from_d(-1.0), a, offs+n1, offs, 0, a, offs, offs+n1, 0, ae_complex_from_d(1.0), a, offs+n1, offs+n1, _state); + trfac_cmatrixplurec(a, offs+n1, m-n1, n-n1, pivots, tmp, _state); + for(i=0; i<=n2-1; i++) + { + if( offs+n1+i!=pivots->ptr.p_int[offs+n1+i] ) + { + ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+n1+i][offs], 1, "N", ae_v_len(0,n1-1)); + ae_v_cmove(&a->ptr.pp_complex[offs+n1+i][offs], 1, &a->ptr.pp_complex[pivots->ptr.p_int[offs+n1+i]][offs], 1, "N", ae_v_len(offs,offs+n1-1)); + ae_v_cmove(&a->ptr.pp_complex[pivots->ptr.p_int[offs+n1+i]][offs], 1, &tmp->ptr.p_complex[0], 1, "N", ae_v_len(offs,offs+n1-1)); + } + } + } +} + + +/************************************************************************* +Recurrent real LU subroutine. +Never call it directly. + + -- ALGLIB routine -- + 04.01.2010 + Bochkanov Sergey +*************************************************************************/ +static void trfac_rmatrixplurec(/* Real */ ae_matrix* a, + ae_int_t offs, + ae_int_t m, + ae_int_t n, + /* Integer */ ae_vector* pivots, + /* Real */ ae_vector* tmp, + ae_state *_state) +{ + ae_int_t i; + ae_int_t n1; + ae_int_t n2; + + + + /* + * Kernel case + */ + if( ae_minint(m, n, _state)<=ablasblocksize(a, _state) ) + { + trfac_rmatrixplu2(a, offs, m, n, pivots, tmp, _state); + return; + } + + /* + * Preliminary step, make M>=N. + * + * A = (A1 A2), where A1 is square + * Factorize A1, update A2 + */ + if( n>m ) + { + trfac_rmatrixplurec(a, offs, m, m, pivots, tmp, _state); + for(i=0; i<=m-1; i++) + { + ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+i][offs+m], 1, ae_v_len(0,n-m-1)); + ae_v_move(&a->ptr.pp_double[offs+i][offs+m], 1, &a->ptr.pp_double[pivots->ptr.p_int[offs+i]][offs+m], 1, ae_v_len(offs+m,offs+n-1)); + ae_v_move(&a->ptr.pp_double[pivots->ptr.p_int[offs+i]][offs+m], 1, &tmp->ptr.p_double[0], 1, ae_v_len(offs+m,offs+n-1)); + } + rmatrixlefttrsm(m, n-m, a, offs, offs, ae_false, ae_true, 0, a, offs, offs+m, _state); + return; + } + + /* + * Non-kernel case + */ + ablassplitlength(a, n, &n1, &n2, _state); + trfac_rmatrixplurec(a, offs, m, n1, pivots, tmp, _state); + if( n2>0 ) + { + for(i=0; i<=n1-1; i++) + { + if( offs+i!=pivots->ptr.p_int[offs+i] ) + { + ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+i][offs+n1], 1, ae_v_len(0,n2-1)); + ae_v_move(&a->ptr.pp_double[offs+i][offs+n1], 1, &a->ptr.pp_double[pivots->ptr.p_int[offs+i]][offs+n1], 1, ae_v_len(offs+n1,offs+n-1)); + ae_v_move(&a->ptr.pp_double[pivots->ptr.p_int[offs+i]][offs+n1], 1, &tmp->ptr.p_double[0], 1, ae_v_len(offs+n1,offs+n-1)); + } + } + rmatrixlefttrsm(n1, n2, a, offs, offs, ae_false, ae_true, 0, a, offs, offs+n1, _state); + rmatrixgemm(m-n1, n-n1, n1, -1.0, a, offs+n1, offs, 0, a, offs, offs+n1, 0, 1.0, a, offs+n1, offs+n1, _state); + trfac_rmatrixplurec(a, offs+n1, m-n1, n-n1, pivots, tmp, _state); + for(i=0; i<=n2-1; i++) + { + if( offs+n1+i!=pivots->ptr.p_int[offs+n1+i] ) + { + ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+n1+i][offs], 1, ae_v_len(0,n1-1)); + ae_v_move(&a->ptr.pp_double[offs+n1+i][offs], 1, &a->ptr.pp_double[pivots->ptr.p_int[offs+n1+i]][offs], 1, ae_v_len(offs,offs+n1-1)); + ae_v_move(&a->ptr.pp_double[pivots->ptr.p_int[offs+n1+i]][offs], 1, &tmp->ptr.p_double[0], 1, ae_v_len(offs,offs+n1-1)); + } + } + } +} + + +/************************************************************************* +Complex LUP kernel + + -- ALGLIB routine -- + 10.01.2010 + Bochkanov Sergey +*************************************************************************/ +static void trfac_cmatrixlup2(/* Complex */ ae_matrix* a, + ae_int_t offs, + ae_int_t m, + ae_int_t n, + /* Integer */ ae_vector* pivots, + /* Complex */ ae_vector* tmp, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + ae_int_t jp; + ae_complex s; + + + + /* + * Quick return if possible + */ + if( m==0||n==0 ) + { + return; + } + + /* + * main cycle + */ + for(j=0; j<=ae_minint(m-1, n-1, _state); j++) + { + + /* + * Find pivot, swap columns + */ + jp = j; + for(i=j+1; i<=n-1; i++) + { + if( ae_fp_greater(ae_c_abs(a->ptr.pp_complex[offs+j][offs+i], _state),ae_c_abs(a->ptr.pp_complex[offs+j][offs+jp], _state)) ) + { + jp = i; + } + } + pivots->ptr.p_int[offs+j] = offs+jp; + if( jp!=j ) + { + ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs][offs+j], a->stride, "N", ae_v_len(0,m-1)); + ae_v_cmove(&a->ptr.pp_complex[offs][offs+j], a->stride, &a->ptr.pp_complex[offs][offs+jp], a->stride, "N", ae_v_len(offs,offs+m-1)); + ae_v_cmove(&a->ptr.pp_complex[offs][offs+jp], a->stride, &tmp->ptr.p_complex[0], 1, "N", ae_v_len(offs,offs+m-1)); + } + + /* + * LU decomposition of 1x(N-J) matrix + */ + if( ae_c_neq_d(a->ptr.pp_complex[offs+j][offs+j],0)&&j+1<=n-1 ) + { + s = ae_c_d_div(1,a->ptr.pp_complex[offs+j][offs+j]); + ae_v_cmulc(&a->ptr.pp_complex[offs+j][offs+j+1], 1, ae_v_len(offs+j+1,offs+n-1), s); + } + + /* + * Update trailing (M-J-1)x(N-J-1) matrix + */ + if( j<ae_minint(m-1, n-1, _state) ) + { + ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+j+1][offs+j], a->stride, "N", ae_v_len(0,m-j-2)); + ae_v_cmoveneg(&tmp->ptr.p_complex[m], 1, &a->ptr.pp_complex[offs+j][offs+j+1], 1, "N", ae_v_len(m,m+n-j-2)); + cmatrixrank1(m-j-1, n-j-1, a, offs+j+1, offs+j+1, tmp, 0, tmp, m, _state); + } + } +} + + +/************************************************************************* +Real LUP kernel + + -- ALGLIB routine -- + 10.01.2010 + Bochkanov Sergey +*************************************************************************/ +static void trfac_rmatrixlup2(/* Real */ ae_matrix* a, + ae_int_t offs, + ae_int_t m, + ae_int_t n, + /* Integer */ ae_vector* pivots, + /* Real */ ae_vector* tmp, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + ae_int_t jp; + double s; + + + + /* + * Quick return if possible + */ + if( m==0||n==0 ) + { + return; + } + + /* + * main cycle + */ + for(j=0; j<=ae_minint(m-1, n-1, _state); j++) + { + + /* + * Find pivot, swap columns + */ + jp = j; + for(i=j+1; i<=n-1; i++) + { + if( ae_fp_greater(ae_fabs(a->ptr.pp_double[offs+j][offs+i], _state),ae_fabs(a->ptr.pp_double[offs+j][offs+jp], _state)) ) + { + jp = i; + } + } + pivots->ptr.p_int[offs+j] = offs+jp; + if( jp!=j ) + { + ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs][offs+j], a->stride, ae_v_len(0,m-1)); + ae_v_move(&a->ptr.pp_double[offs][offs+j], a->stride, &a->ptr.pp_double[offs][offs+jp], a->stride, ae_v_len(offs,offs+m-1)); + ae_v_move(&a->ptr.pp_double[offs][offs+jp], a->stride, &tmp->ptr.p_double[0], 1, ae_v_len(offs,offs+m-1)); + } + + /* + * LU decomposition of 1x(N-J) matrix + */ + if( ae_fp_neq(a->ptr.pp_double[offs+j][offs+j],0)&&j+1<=n-1 ) + { + s = 1/a->ptr.pp_double[offs+j][offs+j]; + ae_v_muld(&a->ptr.pp_double[offs+j][offs+j+1], 1, ae_v_len(offs+j+1,offs+n-1), s); + } + + /* + * Update trailing (M-J-1)x(N-J-1) matrix + */ + if( j<ae_minint(m-1, n-1, _state) ) + { + ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+j+1][offs+j], a->stride, ae_v_len(0,m-j-2)); + ae_v_moveneg(&tmp->ptr.p_double[m], 1, &a->ptr.pp_double[offs+j][offs+j+1], 1, ae_v_len(m,m+n-j-2)); + rmatrixrank1(m-j-1, n-j-1, a, offs+j+1, offs+j+1, tmp, 0, tmp, m, _state); + } + } +} + + +/************************************************************************* +Complex PLU kernel + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1992 +*************************************************************************/ +static void trfac_cmatrixplu2(/* Complex */ ae_matrix* a, + ae_int_t offs, + ae_int_t m, + ae_int_t n, + /* Integer */ ae_vector* pivots, + /* Complex */ ae_vector* tmp, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + ae_int_t jp; + ae_complex s; + + + + /* + * Quick return if possible + */ + if( m==0||n==0 ) + { + return; + } + for(j=0; j<=ae_minint(m-1, n-1, _state); j++) + { + + /* + * Find pivot and test for singularity. + */ + jp = j; + for(i=j+1; i<=m-1; i++) + { + if( ae_fp_greater(ae_c_abs(a->ptr.pp_complex[offs+i][offs+j], _state),ae_c_abs(a->ptr.pp_complex[offs+jp][offs+j], _state)) ) + { + jp = i; + } + } + pivots->ptr.p_int[offs+j] = offs+jp; + if( ae_c_neq_d(a->ptr.pp_complex[offs+jp][offs+j],0) ) + { + + /* + *Apply the interchange to rows + */ + if( jp!=j ) + { + for(i=0; i<=n-1; i++) + { + s = a->ptr.pp_complex[offs+j][offs+i]; + a->ptr.pp_complex[offs+j][offs+i] = a->ptr.pp_complex[offs+jp][offs+i]; + a->ptr.pp_complex[offs+jp][offs+i] = s; + } + } + + /* + *Compute elements J+1:M of J-th column. + */ + if( j+1<=m-1 ) + { + s = ae_c_d_div(1,a->ptr.pp_complex[offs+j][offs+j]); + ae_v_cmulc(&a->ptr.pp_complex[offs+j+1][offs+j], a->stride, ae_v_len(offs+j+1,offs+m-1), s); + } + } + if( j<ae_minint(m, n, _state)-1 ) + { + + /* + *Update trailing submatrix. + */ + ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+j+1][offs+j], a->stride, "N", ae_v_len(0,m-j-2)); + ae_v_cmoveneg(&tmp->ptr.p_complex[m], 1, &a->ptr.pp_complex[offs+j][offs+j+1], 1, "N", ae_v_len(m,m+n-j-2)); + cmatrixrank1(m-j-1, n-j-1, a, offs+j+1, offs+j+1, tmp, 0, tmp, m, _state); + } + } +} + + +/************************************************************************* +Real PLU kernel + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1992 +*************************************************************************/ +static void trfac_rmatrixplu2(/* Real */ ae_matrix* a, + ae_int_t offs, + ae_int_t m, + ae_int_t n, + /* Integer */ ae_vector* pivots, + /* Real */ ae_vector* tmp, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + ae_int_t jp; + double s; + + + + /* + * Quick return if possible + */ + if( m==0||n==0 ) + { + return; + } + for(j=0; j<=ae_minint(m-1, n-1, _state); j++) + { + + /* + * Find pivot and test for singularity. + */ + jp = j; + for(i=j+1; i<=m-1; i++) + { + if( ae_fp_greater(ae_fabs(a->ptr.pp_double[offs+i][offs+j], _state),ae_fabs(a->ptr.pp_double[offs+jp][offs+j], _state)) ) + { + jp = i; + } + } + pivots->ptr.p_int[offs+j] = offs+jp; + if( ae_fp_neq(a->ptr.pp_double[offs+jp][offs+j],0) ) + { + + /* + *Apply the interchange to rows + */ + if( jp!=j ) + { + for(i=0; i<=n-1; i++) + { + s = a->ptr.pp_double[offs+j][offs+i]; + a->ptr.pp_double[offs+j][offs+i] = a->ptr.pp_double[offs+jp][offs+i]; + a->ptr.pp_double[offs+jp][offs+i] = s; + } + } + + /* + *Compute elements J+1:M of J-th column. + */ + if( j+1<=m-1 ) + { + s = 1/a->ptr.pp_double[offs+j][offs+j]; + ae_v_muld(&a->ptr.pp_double[offs+j+1][offs+j], a->stride, ae_v_len(offs+j+1,offs+m-1), s); + } + } + if( j<ae_minint(m, n, _state)-1 ) + { + + /* + *Update trailing submatrix. + */ + ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+j+1][offs+j], a->stride, ae_v_len(0,m-j-2)); + ae_v_moveneg(&tmp->ptr.p_double[m], 1, &a->ptr.pp_double[offs+j][offs+j+1], 1, ae_v_len(m,m+n-j-2)); + rmatrixrank1(m-j-1, n-j-1, a, offs+j+1, offs+j+1, tmp, 0, tmp, m, _state); + } + } +} + + +/************************************************************************* +Recursive computational subroutine for HPDMatrixCholesky + + -- ALGLIB routine -- + 15.12.2009 + Bochkanov Sergey +*************************************************************************/ +static ae_bool trfac_hpdmatrixcholeskyrec(/* Complex */ ae_matrix* a, + ae_int_t offs, + ae_int_t n, + ae_bool isupper, + /* Complex */ ae_vector* tmp, + ae_state *_state) +{ + ae_int_t n1; + ae_int_t n2; + ae_bool result; + + + + /* + * check N + */ + if( n<1 ) + { + result = ae_false; + return result; + } + + /* + * Prepare buffer + */ + if( tmp->cnt<2*n ) + { + ae_vector_set_length(tmp, 2*n, _state); + } + + /* + * special cases + */ + if( n==1 ) + { + if( ae_fp_greater(a->ptr.pp_complex[offs][offs].x,0) ) + { + a->ptr.pp_complex[offs][offs] = ae_complex_from_d(ae_sqrt(a->ptr.pp_complex[offs][offs].x, _state)); + result = ae_true; + } + else + { + result = ae_false; + } + return result; + } + if( n<=ablascomplexblocksize(a, _state) ) + { + result = trfac_hpdmatrixcholesky2(a, offs, n, isupper, tmp, _state); + return result; + } + + /* + * general case: split task in cache-oblivious manner + */ + result = ae_true; + ablascomplexsplitlength(a, n, &n1, &n2, _state); + result = trfac_hpdmatrixcholeskyrec(a, offs, n1, isupper, tmp, _state); + if( !result ) + { + return result; + } + if( n2>0 ) + { + if( isupper ) + { + cmatrixlefttrsm(n1, n2, a, offs, offs, isupper, ae_false, 2, a, offs, offs+n1, _state); + cmatrixsyrk(n2, n1, -1.0, a, offs, offs+n1, 2, 1.0, a, offs+n1, offs+n1, isupper, _state); + } + else + { + cmatrixrighttrsm(n2, n1, a, offs, offs, isupper, ae_false, 2, a, offs+n1, offs, _state); + cmatrixsyrk(n2, n1, -1.0, a, offs+n1, offs, 0, 1.0, a, offs+n1, offs+n1, isupper, _state); + } + result = trfac_hpdmatrixcholeskyrec(a, offs+n1, n2, isupper, tmp, _state); + if( !result ) + { + return result; + } + } + return result; +} + + +/************************************************************************* +Level-2 Hermitian Cholesky subroutine. + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 +*************************************************************************/ +static ae_bool trfac_hpdmatrixcholesky2(/* Complex */ ae_matrix* aaa, + ae_int_t offs, + ae_int_t n, + ae_bool isupper, + /* Complex */ ae_vector* tmp, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + double ajj; + ae_complex v; + double r; + ae_bool result; + + + result = ae_true; + if( n<0 ) + { + result = ae_false; + return result; + } + + /* + * Quick return if possible + */ + if( n==0 ) + { + return result; + } + if( isupper ) + { + + /* + * Compute the Cholesky factorization A = U'*U. + */ + for(j=0; j<=n-1; j++) + { + + /* + * Compute U(J,J) and test for non-positive-definiteness. + */ + v = ae_v_cdotproduct(&aaa->ptr.pp_complex[offs][offs+j], aaa->stride, "Conj", &aaa->ptr.pp_complex[offs][offs+j], aaa->stride, "N", ae_v_len(offs,offs+j-1)); + ajj = ae_c_sub(aaa->ptr.pp_complex[offs+j][offs+j],v).x; + if( ae_fp_less_eq(ajj,0) ) + { + aaa->ptr.pp_complex[offs+j][offs+j] = ae_complex_from_d(ajj); + result = ae_false; + return result; + } + ajj = ae_sqrt(ajj, _state); + aaa->ptr.pp_complex[offs+j][offs+j] = ae_complex_from_d(ajj); + + /* + * Compute elements J+1:N-1 of row J. + */ + if( j<n-1 ) + { + if( j>0 ) + { + ae_v_cmoveneg(&tmp->ptr.p_complex[0], 1, &aaa->ptr.pp_complex[offs][offs+j], aaa->stride, "Conj", ae_v_len(0,j-1)); + cmatrixmv(n-j-1, j, aaa, offs, offs+j+1, 1, tmp, 0, tmp, n, _state); + ae_v_cadd(&aaa->ptr.pp_complex[offs+j][offs+j+1], 1, &tmp->ptr.p_complex[n], 1, "N", ae_v_len(offs+j+1,offs+n-1)); + } + r = 1/ajj; + ae_v_cmuld(&aaa->ptr.pp_complex[offs+j][offs+j+1], 1, ae_v_len(offs+j+1,offs+n-1), r); + } + } + } + else + { + + /* + * Compute the Cholesky factorization A = L*L'. + */ + for(j=0; j<=n-1; j++) + { + + /* + * Compute L(J+1,J+1) and test for non-positive-definiteness. + */ + v = ae_v_cdotproduct(&aaa->ptr.pp_complex[offs+j][offs], 1, "Conj", &aaa->ptr.pp_complex[offs+j][offs], 1, "N", ae_v_len(offs,offs+j-1)); + ajj = ae_c_sub(aaa->ptr.pp_complex[offs+j][offs+j],v).x; + if( ae_fp_less_eq(ajj,0) ) + { + aaa->ptr.pp_complex[offs+j][offs+j] = ae_complex_from_d(ajj); + result = ae_false; + return result; + } + ajj = ae_sqrt(ajj, _state); + aaa->ptr.pp_complex[offs+j][offs+j] = ae_complex_from_d(ajj); + + /* + * Compute elements J+1:N of column J. + */ + if( j<n-1 ) + { + if( j>0 ) + { + ae_v_cmove(&tmp->ptr.p_complex[0], 1, &aaa->ptr.pp_complex[offs+j][offs], 1, "Conj", ae_v_len(0,j-1)); + cmatrixmv(n-j-1, j, aaa, offs+j+1, offs, 0, tmp, 0, tmp, n, _state); + for(i=0; i<=n-j-2; i++) + { + aaa->ptr.pp_complex[offs+j+1+i][offs+j] = ae_c_div_d(ae_c_sub(aaa->ptr.pp_complex[offs+j+1+i][offs+j],tmp->ptr.p_complex[n+i]),ajj); + } + } + else + { + for(i=0; i<=n-j-2; i++) + { + aaa->ptr.pp_complex[offs+j+1+i][offs+j] = ae_c_div_d(aaa->ptr.pp_complex[offs+j+1+i][offs+j],ajj); + } + } + } + } + } + return result; +} + + +/************************************************************************* +Level-2 Cholesky subroutine + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 +*************************************************************************/ +static ae_bool trfac_spdmatrixcholesky2(/* Real */ ae_matrix* aaa, + ae_int_t offs, + ae_int_t n, + ae_bool isupper, + /* Real */ ae_vector* tmp, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + double ajj; + double v; + double r; + ae_bool result; + + + result = ae_true; + if( n<0 ) + { + result = ae_false; + return result; + } + + /* + * Quick return if possible + */ + if( n==0 ) + { + return result; + } + if( isupper ) + { + + /* + * Compute the Cholesky factorization A = U'*U. + */ + for(j=0; j<=n-1; j++) + { + + /* + * Compute U(J,J) and test for non-positive-definiteness. + */ + v = ae_v_dotproduct(&aaa->ptr.pp_double[offs][offs+j], aaa->stride, &aaa->ptr.pp_double[offs][offs+j], aaa->stride, ae_v_len(offs,offs+j-1)); + ajj = aaa->ptr.pp_double[offs+j][offs+j]-v; + if( ae_fp_less_eq(ajj,0) ) + { + aaa->ptr.pp_double[offs+j][offs+j] = ajj; + result = ae_false; + return result; + } + ajj = ae_sqrt(ajj, _state); + aaa->ptr.pp_double[offs+j][offs+j] = ajj; + + /* + * Compute elements J+1:N-1 of row J. + */ + if( j<n-1 ) + { + if( j>0 ) + { + ae_v_moveneg(&tmp->ptr.p_double[0], 1, &aaa->ptr.pp_double[offs][offs+j], aaa->stride, ae_v_len(0,j-1)); + rmatrixmv(n-j-1, j, aaa, offs, offs+j+1, 1, tmp, 0, tmp, n, _state); + ae_v_add(&aaa->ptr.pp_double[offs+j][offs+j+1], 1, &tmp->ptr.p_double[n], 1, ae_v_len(offs+j+1,offs+n-1)); + } + r = 1/ajj; + ae_v_muld(&aaa->ptr.pp_double[offs+j][offs+j+1], 1, ae_v_len(offs+j+1,offs+n-1), r); + } + } + } + else + { + + /* + * Compute the Cholesky factorization A = L*L'. + */ + for(j=0; j<=n-1; j++) + { + + /* + * Compute L(J+1,J+1) and test for non-positive-definiteness. + */ + v = ae_v_dotproduct(&aaa->ptr.pp_double[offs+j][offs], 1, &aaa->ptr.pp_double[offs+j][offs], 1, ae_v_len(offs,offs+j-1)); + ajj = aaa->ptr.pp_double[offs+j][offs+j]-v; + if( ae_fp_less_eq(ajj,0) ) + { + aaa->ptr.pp_double[offs+j][offs+j] = ajj; + result = ae_false; + return result; + } + ajj = ae_sqrt(ajj, _state); + aaa->ptr.pp_double[offs+j][offs+j] = ajj; + + /* + * Compute elements J+1:N of column J. + */ + if( j<n-1 ) + { + if( j>0 ) + { + ae_v_move(&tmp->ptr.p_double[0], 1, &aaa->ptr.pp_double[offs+j][offs], 1, ae_v_len(0,j-1)); + rmatrixmv(n-j-1, j, aaa, offs+j+1, offs, 0, tmp, 0, tmp, n, _state); + for(i=0; i<=n-j-2; i++) + { + aaa->ptr.pp_double[offs+j+1+i][offs+j] = (aaa->ptr.pp_double[offs+j+1+i][offs+j]-tmp->ptr.p_double[n+i])/ajj; + } + } + else + { + for(i=0; i<=n-j-2; i++) + { + aaa->ptr.pp_double[offs+j+1+i][offs+j] = aaa->ptr.pp_double[offs+j+1+i][offs+j]/ajj; + } + } + } + } + } + return result; +} + + + + +/************************************************************************* +Estimate of a matrix condition number (1-norm) + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double rmatrixrcond1(/* Real */ ae_matrix* a, + ae_int_t n, + ae_state *_state) +{ + ae_frame _frame_block; + ae_matrix _a; + ae_int_t i; + ae_int_t j; + double v; + double nrm; + ae_vector pivots; + ae_vector t; + double result; + + ae_frame_make(_state, &_frame_block); + ae_matrix_init_copy(&_a, a, _state, ae_true); + a = &_a; + ae_vector_init(&pivots, 0, DT_INT, _state, ae_true); + ae_vector_init(&t, 0, DT_REAL, _state, ae_true); + + ae_assert(n>=1, "RMatrixRCond1: N<1!", _state); + ae_vector_set_length(&t, n, _state); + for(i=0; i<=n-1; i++) + { + t.ptr.p_double[i] = 0; + } + for(i=0; i<=n-1; i++) + { + for(j=0; j<=n-1; j++) + { + t.ptr.p_double[j] = t.ptr.p_double[j]+ae_fabs(a->ptr.pp_double[i][j], _state); + } + } + nrm = 0; + for(i=0; i<=n-1; i++) + { + nrm = ae_maxreal(nrm, t.ptr.p_double[i], _state); + } + rmatrixlu(a, n, n, &pivots, _state); + rcond_rmatrixrcondluinternal(a, n, ae_true, ae_true, nrm, &v, _state); + result = v; + ae_frame_leave(_state); + return result; +} + + +/************************************************************************* +Estimate of a matrix condition number (infinity-norm). + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double rmatrixrcondinf(/* Real */ ae_matrix* a, + ae_int_t n, + ae_state *_state) +{ + ae_frame _frame_block; + ae_matrix _a; + ae_int_t i; + ae_int_t j; + double v; + double nrm; + ae_vector pivots; + double result; + + ae_frame_make(_state, &_frame_block); + ae_matrix_init_copy(&_a, a, _state, ae_true); + a = &_a; + ae_vector_init(&pivots, 0, DT_INT, _state, ae_true); + + ae_assert(n>=1, "RMatrixRCondInf: N<1!", _state); + nrm = 0; + for(i=0; i<=n-1; i++) + { + v = 0; + for(j=0; j<=n-1; j++) + { + v = v+ae_fabs(a->ptr.pp_double[i][j], _state); + } + nrm = ae_maxreal(nrm, v, _state); + } + rmatrixlu(a, n, n, &pivots, _state); + rcond_rmatrixrcondluinternal(a, n, ae_false, ae_true, nrm, &v, _state); + result = v; + ae_frame_leave(_state); + return result; +} + + +/************************************************************************* +Condition number estimate of a symmetric positive definite matrix. + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +It should be noted that 1-norm and inf-norm of condition numbers of symmetric +matrices are equal, so the algorithm doesn't take into account the +differences between these types of norms. + +Input parameters: + A - symmetric positive definite matrix which is given by its + upper or lower triangle depending on the value of + IsUpper. Array with elements [0..N-1, 0..N-1]. + N - size of matrix A. + IsUpper - storage format. + +Result: + 1/LowerBound(cond(A)), if matrix A is positive definite, + -1, if matrix A is not positive definite, and its condition number + could not be found by this algorithm. + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double spdmatrixrcond(/* Real */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_state *_state) +{ + ae_frame _frame_block; + ae_matrix _a; + ae_int_t i; + ae_int_t j; + ae_int_t j1; + ae_int_t j2; + double v; + double nrm; + ae_vector t; + double result; + + ae_frame_make(_state, &_frame_block); + ae_matrix_init_copy(&_a, a, _state, ae_true); + a = &_a; + ae_vector_init(&t, 0, DT_REAL, _state, ae_true); + + ae_vector_set_length(&t, n, _state); + for(i=0; i<=n-1; i++) + { + t.ptr.p_double[i] = 0; + } + for(i=0; i<=n-1; i++) + { + if( isupper ) + { + j1 = i; + j2 = n-1; + } + else + { + j1 = 0; + j2 = i; + } + for(j=j1; j<=j2; j++) + { + if( i==j ) + { + t.ptr.p_double[i] = t.ptr.p_double[i]+ae_fabs(a->ptr.pp_double[i][i], _state); + } + else + { + t.ptr.p_double[i] = t.ptr.p_double[i]+ae_fabs(a->ptr.pp_double[i][j], _state); + t.ptr.p_double[j] = t.ptr.p_double[j]+ae_fabs(a->ptr.pp_double[i][j], _state); + } + } + } + nrm = 0; + for(i=0; i<=n-1; i++) + { + nrm = ae_maxreal(nrm, t.ptr.p_double[i], _state); + } + if( spdmatrixcholesky(a, n, isupper, _state) ) + { + rcond_spdmatrixrcondcholeskyinternal(a, n, isupper, ae_true, nrm, &v, _state); + result = v; + } + else + { + result = -1; + } + ae_frame_leave(_state); + return result; +} + + +/************************************************************************* +Triangular matrix: estimate of a condition number (1-norm) + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + A - matrix. Array[0..N-1, 0..N-1]. + N - size of A. + IsUpper - True, if the matrix is upper triangular. + IsUnit - True, if the matrix has a unit diagonal. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double rmatrixtrrcond1(/* Real */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_bool isunit, + ae_state *_state) +{ + ae_frame _frame_block; + ae_int_t i; + ae_int_t j; + double v; + double nrm; + ae_vector pivots; + ae_vector t; + ae_int_t j1; + ae_int_t j2; + double result; + + ae_frame_make(_state, &_frame_block); + ae_vector_init(&pivots, 0, DT_INT, _state, ae_true); + ae_vector_init(&t, 0, DT_REAL, _state, ae_true); + + ae_assert(n>=1, "RMatrixTRRCond1: N<1!", _state); + ae_vector_set_length(&t, n, _state); + for(i=0; i<=n-1; i++) + { + t.ptr.p_double[i] = 0; + } + for(i=0; i<=n-1; i++) + { + if( isupper ) + { + j1 = i+1; + j2 = n-1; + } + else + { + j1 = 0; + j2 = i-1; + } + for(j=j1; j<=j2; j++) + { + t.ptr.p_double[j] = t.ptr.p_double[j]+ae_fabs(a->ptr.pp_double[i][j], _state); + } + if( isunit ) + { + t.ptr.p_double[i] = t.ptr.p_double[i]+1; + } + else + { + t.ptr.p_double[i] = t.ptr.p_double[i]+ae_fabs(a->ptr.pp_double[i][i], _state); + } + } + nrm = 0; + for(i=0; i<=n-1; i++) + { + nrm = ae_maxreal(nrm, t.ptr.p_double[i], _state); + } + rcond_rmatrixrcondtrinternal(a, n, isupper, isunit, ae_true, nrm, &v, _state); + result = v; + ae_frame_leave(_state); + return result; +} + + +/************************************************************************* +Triangular matrix: estimate of a matrix condition number (infinity-norm). + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + IsUpper - True, if the matrix is upper triangular. + IsUnit - True, if the matrix has a unit diagonal. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double rmatrixtrrcondinf(/* Real */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_bool isunit, + ae_state *_state) +{ + ae_frame _frame_block; + ae_int_t i; + ae_int_t j; + double v; + double nrm; + ae_vector pivots; + ae_int_t j1; + ae_int_t j2; + double result; + + ae_frame_make(_state, &_frame_block); + ae_vector_init(&pivots, 0, DT_INT, _state, ae_true); + + ae_assert(n>=1, "RMatrixTRRCondInf: N<1!", _state); + nrm = 0; + for(i=0; i<=n-1; i++) + { + if( isupper ) + { + j1 = i+1; + j2 = n-1; + } + else + { + j1 = 0; + j2 = i-1; + } + v = 0; + for(j=j1; j<=j2; j++) + { + v = v+ae_fabs(a->ptr.pp_double[i][j], _state); + } + if( isunit ) + { + v = v+1; + } + else + { + v = v+ae_fabs(a->ptr.pp_double[i][i], _state); + } + nrm = ae_maxreal(nrm, v, _state); + } + rcond_rmatrixrcondtrinternal(a, n, isupper, isunit, ae_false, nrm, &v, _state); + result = v; + ae_frame_leave(_state); + return result; +} + + +/************************************************************************* +Condition number estimate of a Hermitian positive definite matrix. + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +It should be noted that 1-norm and inf-norm of condition numbers of symmetric +matrices are equal, so the algorithm doesn't take into account the +differences between these types of norms. + +Input parameters: + A - Hermitian positive definite matrix which is given by its + upper or lower triangle depending on the value of + IsUpper. Array with elements [0..N-1, 0..N-1]. + N - size of matrix A. + IsUpper - storage format. + +Result: + 1/LowerBound(cond(A)), if matrix A is positive definite, + -1, if matrix A is not positive definite, and its condition number + could not be found by this algorithm. + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double hpdmatrixrcond(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_state *_state) +{ + ae_frame _frame_block; + ae_matrix _a; + ae_int_t i; + ae_int_t j; + ae_int_t j1; + ae_int_t j2; + double v; + double nrm; + ae_vector t; + double result; + + ae_frame_make(_state, &_frame_block); + ae_matrix_init_copy(&_a, a, _state, ae_true); + a = &_a; + ae_vector_init(&t, 0, DT_REAL, _state, ae_true); + + ae_vector_set_length(&t, n, _state); + for(i=0; i<=n-1; i++) + { + t.ptr.p_double[i] = 0; + } + for(i=0; i<=n-1; i++) + { + if( isupper ) + { + j1 = i; + j2 = n-1; + } + else + { + j1 = 0; + j2 = i; + } + for(j=j1; j<=j2; j++) + { + if( i==j ) + { + t.ptr.p_double[i] = t.ptr.p_double[i]+ae_c_abs(a->ptr.pp_complex[i][i], _state); + } + else + { + t.ptr.p_double[i] = t.ptr.p_double[i]+ae_c_abs(a->ptr.pp_complex[i][j], _state); + t.ptr.p_double[j] = t.ptr.p_double[j]+ae_c_abs(a->ptr.pp_complex[i][j], _state); + } + } + } + nrm = 0; + for(i=0; i<=n-1; i++) + { + nrm = ae_maxreal(nrm, t.ptr.p_double[i], _state); + } + if( hpdmatrixcholesky(a, n, isupper, _state) ) + { + rcond_hpdmatrixrcondcholeskyinternal(a, n, isupper, ae_true, nrm, &v, _state); + result = v; + } + else + { + result = -1; + } + ae_frame_leave(_state); + return result; +} + + +/************************************************************************* +Estimate of a matrix condition number (1-norm) + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double cmatrixrcond1(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_state *_state) +{ + ae_frame _frame_block; + ae_matrix _a; + ae_int_t i; + ae_int_t j; + double v; + double nrm; + ae_vector pivots; + ae_vector t; + double result; + + ae_frame_make(_state, &_frame_block); + ae_matrix_init_copy(&_a, a, _state, ae_true); + a = &_a; + ae_vector_init(&pivots, 0, DT_INT, _state, ae_true); + ae_vector_init(&t, 0, DT_REAL, _state, ae_true); + + ae_assert(n>=1, "CMatrixRCond1: N<1!", _state); + ae_vector_set_length(&t, n, _state); + for(i=0; i<=n-1; i++) + { + t.ptr.p_double[i] = 0; + } + for(i=0; i<=n-1; i++) + { + for(j=0; j<=n-1; j++) + { + t.ptr.p_double[j] = t.ptr.p_double[j]+ae_c_abs(a->ptr.pp_complex[i][j], _state); + } + } + nrm = 0; + for(i=0; i<=n-1; i++) + { + nrm = ae_maxreal(nrm, t.ptr.p_double[i], _state); + } + cmatrixlu(a, n, n, &pivots, _state); + rcond_cmatrixrcondluinternal(a, n, ae_true, ae_true, nrm, &v, _state); + result = v; + ae_frame_leave(_state); + return result; +} + + +/************************************************************************* +Estimate of a matrix condition number (infinity-norm). + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double cmatrixrcondinf(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_state *_state) +{ + ae_frame _frame_block; + ae_matrix _a; + ae_int_t i; + ae_int_t j; + double v; + double nrm; + ae_vector pivots; + double result; + + ae_frame_make(_state, &_frame_block); + ae_matrix_init_copy(&_a, a, _state, ae_true); + a = &_a; + ae_vector_init(&pivots, 0, DT_INT, _state, ae_true); + + ae_assert(n>=1, "CMatrixRCondInf: N<1!", _state); + nrm = 0; + for(i=0; i<=n-1; i++) + { + v = 0; + for(j=0; j<=n-1; j++) + { + v = v+ae_c_abs(a->ptr.pp_complex[i][j], _state); + } + nrm = ae_maxreal(nrm, v, _state); + } + cmatrixlu(a, n, n, &pivots, _state); + rcond_cmatrixrcondluinternal(a, n, ae_false, ae_true, nrm, &v, _state); + result = v; + ae_frame_leave(_state); + return result; +} + + +/************************************************************************* +Estimate of the condition number of a matrix given by its LU decomposition (1-norm) + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + LUA - LU decomposition of a matrix in compact form. Output of + the RMatrixLU subroutine. + N - size of matrix A. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double rmatrixlurcond1(/* Real */ ae_matrix* lua, + ae_int_t n, + ae_state *_state) +{ + double v; + double result; + + + rcond_rmatrixrcondluinternal(lua, n, ae_true, ae_false, 0, &v, _state); + result = v; + return result; +} + + +/************************************************************************* +Estimate of the condition number of a matrix given by its LU decomposition +(infinity norm). + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + LUA - LU decomposition of a matrix in compact form. Output of + the RMatrixLU subroutine. + N - size of matrix A. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double rmatrixlurcondinf(/* Real */ ae_matrix* lua, + ae_int_t n, + ae_state *_state) +{ + double v; + double result; + + + rcond_rmatrixrcondluinternal(lua, n, ae_false, ae_false, 0, &v, _state); + result = v; + return result; +} + + +/************************************************************************* +Condition number estimate of a symmetric positive definite matrix given by +Cholesky decomposition. + +The algorithm calculates a lower bound of the condition number. In this +case, the algorithm does not return a lower bound of the condition number, +but an inverse number (to avoid an overflow in case of a singular matrix). + +It should be noted that 1-norm and inf-norm condition numbers of symmetric +matrices are equal, so the algorithm doesn't take into account the +differences between these types of norms. + +Input parameters: + CD - Cholesky decomposition of matrix A, + output of SMatrixCholesky subroutine. + N - size of matrix A. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double spdmatrixcholeskyrcond(/* Real */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_state *_state) +{ + double v; + double result; + + + rcond_spdmatrixrcondcholeskyinternal(a, n, isupper, ae_false, 0, &v, _state); + result = v; + return result; +} + + +/************************************************************************* +Condition number estimate of a Hermitian positive definite matrix given by +Cholesky decomposition. + +The algorithm calculates a lower bound of the condition number. In this +case, the algorithm does not return a lower bound of the condition number, +but an inverse number (to avoid an overflow in case of a singular matrix). + +It should be noted that 1-norm and inf-norm condition numbers of symmetric +matrices are equal, so the algorithm doesn't take into account the +differences between these types of norms. + +Input parameters: + CD - Cholesky decomposition of matrix A, + output of SMatrixCholesky subroutine. + N - size of matrix A. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double hpdmatrixcholeskyrcond(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_state *_state) +{ + double v; + double result; + + + rcond_hpdmatrixrcondcholeskyinternal(a, n, isupper, ae_false, 0, &v, _state); + result = v; + return result; +} + + +/************************************************************************* +Estimate of the condition number of a matrix given by its LU decomposition (1-norm) + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + LUA - LU decomposition of a matrix in compact form. Output of + the CMatrixLU subroutine. + N - size of matrix A. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double cmatrixlurcond1(/* Complex */ ae_matrix* lua, + ae_int_t n, + ae_state *_state) +{ + double v; + double result; + + + ae_assert(n>=1, "CMatrixLURCond1: N<1!", _state); + rcond_cmatrixrcondluinternal(lua, n, ae_true, ae_false, 0.0, &v, _state); + result = v; + return result; +} + + +/************************************************************************* +Estimate of the condition number of a matrix given by its LU decomposition +(infinity norm). + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + LUA - LU decomposition of a matrix in compact form. Output of + the CMatrixLU subroutine. + N - size of matrix A. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double cmatrixlurcondinf(/* Complex */ ae_matrix* lua, + ae_int_t n, + ae_state *_state) +{ + double v; + double result; + + + ae_assert(n>=1, "CMatrixLURCondInf: N<1!", _state); + rcond_cmatrixrcondluinternal(lua, n, ae_false, ae_false, 0.0, &v, _state); + result = v; + return result; +} + + +/************************************************************************* +Triangular matrix: estimate of a condition number (1-norm) + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + A - matrix. Array[0..N-1, 0..N-1]. + N - size of A. + IsUpper - True, if the matrix is upper triangular. + IsUnit - True, if the matrix has a unit diagonal. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double cmatrixtrrcond1(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_bool isunit, + ae_state *_state) +{ + ae_frame _frame_block; + ae_int_t i; + ae_int_t j; + double v; + double nrm; + ae_vector pivots; + ae_vector t; + ae_int_t j1; + ae_int_t j2; + double result; + + ae_frame_make(_state, &_frame_block); + ae_vector_init(&pivots, 0, DT_INT, _state, ae_true); + ae_vector_init(&t, 0, DT_REAL, _state, ae_true); + + ae_assert(n>=1, "RMatrixTRRCond1: N<1!", _state); + ae_vector_set_length(&t, n, _state); + for(i=0; i<=n-1; i++) + { + t.ptr.p_double[i] = 0; + } + for(i=0; i<=n-1; i++) + { + if( isupper ) + { + j1 = i+1; + j2 = n-1; + } + else + { + j1 = 0; + j2 = i-1; + } + for(j=j1; j<=j2; j++) + { + t.ptr.p_double[j] = t.ptr.p_double[j]+ae_c_abs(a->ptr.pp_complex[i][j], _state); + } + if( isunit ) + { + t.ptr.p_double[i] = t.ptr.p_double[i]+1; + } + else + { + t.ptr.p_double[i] = t.ptr.p_double[i]+ae_c_abs(a->ptr.pp_complex[i][i], _state); + } + } + nrm = 0; + for(i=0; i<=n-1; i++) + { + nrm = ae_maxreal(nrm, t.ptr.p_double[i], _state); + } + rcond_cmatrixrcondtrinternal(a, n, isupper, isunit, ae_true, nrm, &v, _state); + result = v; + ae_frame_leave(_state); + return result; +} + + +/************************************************************************* +Triangular matrix: estimate of a matrix condition number (infinity-norm). + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + IsUpper - True, if the matrix is upper triangular. + IsUnit - True, if the matrix has a unit diagonal. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double cmatrixtrrcondinf(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_bool isunit, + ae_state *_state) +{ + ae_frame _frame_block; + ae_int_t i; + ae_int_t j; + double v; + double nrm; + ae_vector pivots; + ae_int_t j1; + ae_int_t j2; + double result; + + ae_frame_make(_state, &_frame_block); + ae_vector_init(&pivots, 0, DT_INT, _state, ae_true); + + ae_assert(n>=1, "RMatrixTRRCondInf: N<1!", _state); + nrm = 0; + for(i=0; i<=n-1; i++) + { + if( isupper ) + { + j1 = i+1; + j2 = n-1; + } + else + { + j1 = 0; + j2 = i-1; + } + v = 0; + for(j=j1; j<=j2; j++) + { + v = v+ae_c_abs(a->ptr.pp_complex[i][j], _state); + } + if( isunit ) + { + v = v+1; + } + else + { + v = v+ae_c_abs(a->ptr.pp_complex[i][i], _state); + } + nrm = ae_maxreal(nrm, v, _state); + } + rcond_cmatrixrcondtrinternal(a, n, isupper, isunit, ae_false, nrm, &v, _state); + result = v; + ae_frame_leave(_state); + return result; +} + + +/************************************************************************* +Threshold for rcond: matrices with condition number beyond this threshold +are considered singular. + +Threshold must be far enough from underflow, at least Sqr(Threshold) must +be greater than underflow. +*************************************************************************/ +double rcondthreshold(ae_state *_state) +{ + double result; + + + result = ae_sqrt(ae_sqrt(ae_minrealnumber, _state), _state); + return result; +} + + +/************************************************************************* +Internal subroutine for condition number estimation + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 +*************************************************************************/ +static void rcond_rmatrixrcondtrinternal(/* Real */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_bool isunit, + ae_bool onenorm, + double anorm, + double* rc, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector ex; + ae_vector ev; + ae_vector iwork; + ae_vector tmp; + ae_int_t i; + ae_int_t j; + ae_int_t kase; + ae_int_t kase1; + ae_int_t j1; + ae_int_t j2; + double ainvnm; + double maxgrowth; + double s; + ae_bool mupper; + ae_bool mtrans; + ae_bool munit; + + ae_frame_make(_state, &_frame_block); + *rc = 0; + ae_vector_init(&ex, 0, DT_REAL, _state, ae_true); + ae_vector_init(&ev, 0, DT_REAL, _state, ae_true); + ae_vector_init(&iwork, 0, DT_INT, _state, ae_true); + ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true); + + + /* + * RC=0 if something happens + */ + *rc = 0; + + /* + * init + */ + if( onenorm ) + { + kase1 = 1; + } + else + { + kase1 = 2; + } + mupper = ae_true; + mtrans = ae_true; + munit = ae_true; + ae_vector_set_length(&iwork, n+1, _state); + ae_vector_set_length(&tmp, n, _state); + + /* + * prepare parameters for triangular solver + */ + maxgrowth = 1/rcondthreshold(_state); + s = 0; + for(i=0; i<=n-1; i++) + { + if( isupper ) + { + j1 = i+1; + j2 = n-1; + } + else + { + j1 = 0; + j2 = i-1; + } + for(j=j1; j<=j2; j++) + { + s = ae_maxreal(s, ae_fabs(a->ptr.pp_double[i][j], _state), _state); + } + if( isunit ) + { + s = ae_maxreal(s, 1, _state); + } + else + { + s = ae_maxreal(s, ae_fabs(a->ptr.pp_double[i][i], _state), _state); + } + } + if( ae_fp_eq(s,0) ) + { + s = 1; + } + s = 1/s; + + /* + * Scale according to S + */ + anorm = anorm*s; + + /* + * Quick return if possible + * We assume that ANORM<>0 after this block + */ + if( ae_fp_eq(anorm,0) ) + { + ae_frame_leave(_state); + return; + } + if( n==1 ) + { + *rc = 1; + ae_frame_leave(_state); + return; + } + + /* + * Estimate the norm of inv(A). + */ + ainvnm = 0; + kase = 0; + for(;;) + { + rcond_rmatrixestimatenorm(n, &ev, &ex, &iwork, &ainvnm, &kase, _state); + if( kase==0 ) + { + break; + } + + /* + * from 1-based array to 0-based + */ + for(i=0; i<=n-1; i++) + { + ex.ptr.p_double[i] = ex.ptr.p_double[i+1]; + } + + /* + * multiply by inv(A) or inv(A') + */ + if( kase==kase1 ) + { + + /* + * multiply by inv(A) + */ + if( !rmatrixscaledtrsafesolve(a, s, n, &ex, isupper, 0, isunit, maxgrowth, _state) ) + { + ae_frame_leave(_state); + return; + } + } + else + { + + /* + * multiply by inv(A') + */ + if( !rmatrixscaledtrsafesolve(a, s, n, &ex, isupper, 1, isunit, maxgrowth, _state) ) + { + ae_frame_leave(_state); + return; + } + } + + /* + * from 0-based array to 1-based + */ + for(i=n-1; i>=0; i--) + { + ex.ptr.p_double[i+1] = ex.ptr.p_double[i]; + } + } + + /* + * Compute the estimate of the reciprocal condition number. + */ + if( ae_fp_neq(ainvnm,0) ) + { + *rc = 1/ainvnm; + *rc = *rc/anorm; + if( ae_fp_less(*rc,rcondthreshold(_state)) ) + { + *rc = 0; + } + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Condition number estimation + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + March 31, 1993 +*************************************************************************/ +static void rcond_cmatrixrcondtrinternal(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_bool isunit, + ae_bool onenorm, + double anorm, + double* rc, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector ex; + ae_vector cwork2; + ae_vector cwork3; + ae_vector cwork4; + ae_vector isave; + ae_vector rsave; + ae_int_t kase; + ae_int_t kase1; + double ainvnm; + ae_int_t i; + ae_int_t j; + ae_int_t j1; + ae_int_t j2; + double s; + double maxgrowth; + + ae_frame_make(_state, &_frame_block); + *rc = 0; + ae_vector_init(&ex, 0, DT_COMPLEX, _state, ae_true); + ae_vector_init(&cwork2, 0, DT_COMPLEX, _state, ae_true); + ae_vector_init(&cwork3, 0, DT_COMPLEX, _state, ae_true); + ae_vector_init(&cwork4, 0, DT_COMPLEX, _state, ae_true); + ae_vector_init(&isave, 0, DT_INT, _state, ae_true); + ae_vector_init(&rsave, 0, DT_REAL, _state, ae_true); + + + /* + * RC=0 if something happens + */ + *rc = 0; + + /* + * init + */ + if( n<=0 ) + { + ae_frame_leave(_state); + return; + } + if( n==0 ) + { + *rc = 1; + ae_frame_leave(_state); + return; + } + ae_vector_set_length(&cwork2, n+1, _state); + + /* + * prepare parameters for triangular solver + */ + maxgrowth = 1/rcondthreshold(_state); + s = 0; + for(i=0; i<=n-1; i++) + { + if( isupper ) + { + j1 = i+1; + j2 = n-1; + } + else + { + j1 = 0; + j2 = i-1; + } + for(j=j1; j<=j2; j++) + { + s = ae_maxreal(s, ae_c_abs(a->ptr.pp_complex[i][j], _state), _state); + } + if( isunit ) + { + s = ae_maxreal(s, 1, _state); + } + else + { + s = ae_maxreal(s, ae_c_abs(a->ptr.pp_complex[i][i], _state), _state); + } + } + if( ae_fp_eq(s,0) ) + { + s = 1; + } + s = 1/s; + + /* + * Scale according to S + */ + anorm = anorm*s; + + /* + * Quick return if possible + */ + if( ae_fp_eq(anorm,0) ) + { + ae_frame_leave(_state); + return; + } + + /* + * Estimate the norm of inv(A). + */ + ainvnm = 0; + if( onenorm ) + { + kase1 = 1; + } + else + { + kase1 = 2; + } + kase = 0; + for(;;) + { + rcond_cmatrixestimatenorm(n, &cwork4, &ex, &ainvnm, &kase, &isave, &rsave, _state); + if( kase==0 ) + { + break; + } + + /* + * From 1-based to 0-based + */ + for(i=0; i<=n-1; i++) + { + ex.ptr.p_complex[i] = ex.ptr.p_complex[i+1]; + } + + /* + * multiply by inv(A) or inv(A') + */ + if( kase==kase1 ) + { + + /* + * multiply by inv(A) + */ + if( !cmatrixscaledtrsafesolve(a, s, n, &ex, isupper, 0, isunit, maxgrowth, _state) ) + { + ae_frame_leave(_state); + return; + } + } + else + { + + /* + * multiply by inv(A') + */ + if( !cmatrixscaledtrsafesolve(a, s, n, &ex, isupper, 2, isunit, maxgrowth, _state) ) + { + ae_frame_leave(_state); + return; + } + } + + /* + * from 0-based to 1-based + */ + for(i=n-1; i>=0; i--) + { + ex.ptr.p_complex[i+1] = ex.ptr.p_complex[i]; + } + } + + /* + * Compute the estimate of the reciprocal condition number. + */ + if( ae_fp_neq(ainvnm,0) ) + { + *rc = 1/ainvnm; + *rc = *rc/anorm; + if( ae_fp_less(*rc,rcondthreshold(_state)) ) + { + *rc = 0; + } + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Internal subroutine for condition number estimation + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 +*************************************************************************/ +static void rcond_spdmatrixrcondcholeskyinternal(/* Real */ ae_matrix* cha, + ae_int_t n, + ae_bool isupper, + ae_bool isnormprovided, + double anorm, + double* rc, + ae_state *_state) +{ + ae_frame _frame_block; + ae_int_t i; + ae_int_t j; + ae_int_t kase; + double ainvnm; + ae_vector ex; + ae_vector ev; + ae_vector tmp; + ae_vector iwork; + double sa; + double v; + double maxgrowth; + + ae_frame_make(_state, &_frame_block); + *rc = 0; + ae_vector_init(&ex, 0, DT_REAL, _state, ae_true); + ae_vector_init(&ev, 0, DT_REAL, _state, ae_true); + ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true); + ae_vector_init(&iwork, 0, DT_INT, _state, ae_true); + + ae_assert(n>=1, "Assertion failed", _state); + ae_vector_set_length(&tmp, n, _state); + + /* + * RC=0 if something happens + */ + *rc = 0; + + /* + * prepare parameters for triangular solver + */ + maxgrowth = 1/rcondthreshold(_state); + sa = 0; + if( isupper ) + { + for(i=0; i<=n-1; i++) + { + for(j=i; j<=n-1; j++) + { + sa = ae_maxreal(sa, ae_c_abs(ae_complex_from_d(cha->ptr.pp_double[i][j]), _state), _state); + } + } + } + else + { + for(i=0; i<=n-1; i++) + { + for(j=0; j<=i; j++) + { + sa = ae_maxreal(sa, ae_c_abs(ae_complex_from_d(cha->ptr.pp_double[i][j]), _state), _state); + } + } + } + if( ae_fp_eq(sa,0) ) + { + sa = 1; + } + sa = 1/sa; + + /* + * Estimate the norm of A. + */ + if( !isnormprovided ) + { + kase = 0; + anorm = 0; + for(;;) + { + rcond_rmatrixestimatenorm(n, &ev, &ex, &iwork, &anorm, &kase, _state); + if( kase==0 ) + { + break; + } + if( isupper ) + { + + /* + * Multiply by U + */ + for(i=1; i<=n; i++) + { + v = ae_v_dotproduct(&cha->ptr.pp_double[i-1][i-1], 1, &ex.ptr.p_double[i], 1, ae_v_len(i-1,n-1)); + ex.ptr.p_double[i] = v; + } + ae_v_muld(&ex.ptr.p_double[1], 1, ae_v_len(1,n), sa); + + /* + * Multiply by U' + */ + for(i=0; i<=n-1; i++) + { + tmp.ptr.p_double[i] = 0; + } + for(i=0; i<=n-1; i++) + { + v = ex.ptr.p_double[i+1]; + ae_v_addd(&tmp.ptr.p_double[i], 1, &cha->ptr.pp_double[i][i], 1, ae_v_len(i,n-1), v); + } + ae_v_move(&ex.ptr.p_double[1], 1, &tmp.ptr.p_double[0], 1, ae_v_len(1,n)); + ae_v_muld(&ex.ptr.p_double[1], 1, ae_v_len(1,n), sa); + } + else + { + + /* + * Multiply by L' + */ + for(i=0; i<=n-1; i++) + { + tmp.ptr.p_double[i] = 0; + } + for(i=0; i<=n-1; i++) + { + v = ex.ptr.p_double[i+1]; + ae_v_addd(&tmp.ptr.p_double[0], 1, &cha->ptr.pp_double[i][0], 1, ae_v_len(0,i), v); + } + ae_v_move(&ex.ptr.p_double[1], 1, &tmp.ptr.p_double[0], 1, ae_v_len(1,n)); + ae_v_muld(&ex.ptr.p_double[1], 1, ae_v_len(1,n), sa); + + /* + * Multiply by L + */ + for(i=n; i>=1; i--) + { + v = ae_v_dotproduct(&cha->ptr.pp_double[i-1][0], 1, &ex.ptr.p_double[1], 1, ae_v_len(0,i-1)); + ex.ptr.p_double[i] = v; + } + ae_v_muld(&ex.ptr.p_double[1], 1, ae_v_len(1,n), sa); + } + } + } + + /* + * Quick return if possible + */ + if( ae_fp_eq(anorm,0) ) + { + ae_frame_leave(_state); + return; + } + if( n==1 ) + { + *rc = 1; + ae_frame_leave(_state); + return; + } + + /* + * Estimate the 1-norm of inv(A). + */ + kase = 0; + for(;;) + { + rcond_rmatrixestimatenorm(n, &ev, &ex, &iwork, &ainvnm, &kase, _state); + if( kase==0 ) + { + break; + } + for(i=0; i<=n-1; i++) + { + ex.ptr.p_double[i] = ex.ptr.p_double[i+1]; + } + if( isupper ) + { + + /* + * Multiply by inv(U'). + */ + if( !rmatrixscaledtrsafesolve(cha, sa, n, &ex, isupper, 1, ae_false, maxgrowth, _state) ) + { + ae_frame_leave(_state); + return; + } + + /* + * Multiply by inv(U). + */ + if( !rmatrixscaledtrsafesolve(cha, sa, n, &ex, isupper, 0, ae_false, maxgrowth, _state) ) + { + ae_frame_leave(_state); + return; + } + } + else + { + + /* + * Multiply by inv(L). + */ + if( !rmatrixscaledtrsafesolve(cha, sa, n, &ex, isupper, 0, ae_false, maxgrowth, _state) ) + { + ae_frame_leave(_state); + return; + } + + /* + * Multiply by inv(L'). + */ + if( !rmatrixscaledtrsafesolve(cha, sa, n, &ex, isupper, 1, ae_false, maxgrowth, _state) ) + { + ae_frame_leave(_state); + return; + } + } + for(i=n-1; i>=0; i--) + { + ex.ptr.p_double[i+1] = ex.ptr.p_double[i]; + } + } + + /* + * Compute the estimate of the reciprocal condition number. + */ + if( ae_fp_neq(ainvnm,0) ) + { + v = 1/ainvnm; + *rc = v/anorm; + if( ae_fp_less(*rc,rcondthreshold(_state)) ) + { + *rc = 0; + } + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Internal subroutine for condition number estimation + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 +*************************************************************************/ +static void rcond_hpdmatrixrcondcholeskyinternal(/* Complex */ ae_matrix* cha, + ae_int_t n, + ae_bool isupper, + ae_bool isnormprovided, + double anorm, + double* rc, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector isave; + ae_vector rsave; + ae_vector ex; + ae_vector ev; + ae_vector tmp; + ae_int_t kase; + double ainvnm; + ae_complex v; + ae_int_t i; + ae_int_t j; + double sa; + double maxgrowth; + + ae_frame_make(_state, &_frame_block); + *rc = 0; + ae_vector_init(&isave, 0, DT_INT, _state, ae_true); + ae_vector_init(&rsave, 0, DT_REAL, _state, ae_true); + ae_vector_init(&ex, 0, DT_COMPLEX, _state, ae_true); + ae_vector_init(&ev, 0, DT_COMPLEX, _state, ae_true); + ae_vector_init(&tmp, 0, DT_COMPLEX, _state, ae_true); + + ae_assert(n>=1, "Assertion failed", _state); + ae_vector_set_length(&tmp, n, _state); + + /* + * RC=0 if something happens + */ + *rc = 0; + + /* + * prepare parameters for triangular solver + */ + maxgrowth = 1/rcondthreshold(_state); + sa = 0; + if( isupper ) + { + for(i=0; i<=n-1; i++) + { + for(j=i; j<=n-1; j++) + { + sa = ae_maxreal(sa, ae_c_abs(cha->ptr.pp_complex[i][j], _state), _state); + } + } + } + else + { + for(i=0; i<=n-1; i++) + { + for(j=0; j<=i; j++) + { + sa = ae_maxreal(sa, ae_c_abs(cha->ptr.pp_complex[i][j], _state), _state); + } + } + } + if( ae_fp_eq(sa,0) ) + { + sa = 1; + } + sa = 1/sa; + + /* + * Estimate the norm of A + */ + if( !isnormprovided ) + { + anorm = 0; + kase = 0; + for(;;) + { + rcond_cmatrixestimatenorm(n, &ev, &ex, &anorm, &kase, &isave, &rsave, _state); + if( kase==0 ) + { + break; + } + if( isupper ) + { + + /* + * Multiply by U + */ + for(i=1; i<=n; i++) + { + v = ae_v_cdotproduct(&cha->ptr.pp_complex[i-1][i-1], 1, "N", &ex.ptr.p_complex[i], 1, "N", ae_v_len(i-1,n-1)); + ex.ptr.p_complex[i] = v; + } + ae_v_cmuld(&ex.ptr.p_complex[1], 1, ae_v_len(1,n), sa); + + /* + * Multiply by U' + */ + for(i=0; i<=n-1; i++) + { + tmp.ptr.p_complex[i] = ae_complex_from_d(0); + } + for(i=0; i<=n-1; i++) + { + v = ex.ptr.p_complex[i+1]; + ae_v_caddc(&tmp.ptr.p_complex[i], 1, &cha->ptr.pp_complex[i][i], 1, "Conj", ae_v_len(i,n-1), v); + } + ae_v_cmove(&ex.ptr.p_complex[1], 1, &tmp.ptr.p_complex[0], 1, "N", ae_v_len(1,n)); + ae_v_cmuld(&ex.ptr.p_complex[1], 1, ae_v_len(1,n), sa); + } + else + { + + /* + * Multiply by L' + */ + for(i=0; i<=n-1; i++) + { + tmp.ptr.p_complex[i] = ae_complex_from_d(0); + } + for(i=0; i<=n-1; i++) + { + v = ex.ptr.p_complex[i+1]; + ae_v_caddc(&tmp.ptr.p_complex[0], 1, &cha->ptr.pp_complex[i][0], 1, "Conj", ae_v_len(0,i), v); + } + ae_v_cmove(&ex.ptr.p_complex[1], 1, &tmp.ptr.p_complex[0], 1, "N", ae_v_len(1,n)); + ae_v_cmuld(&ex.ptr.p_complex[1], 1, ae_v_len(1,n), sa); + + /* + * Multiply by L + */ + for(i=n; i>=1; i--) + { + v = ae_v_cdotproduct(&cha->ptr.pp_complex[i-1][0], 1, "N", &ex.ptr.p_complex[1], 1, "N", ae_v_len(0,i-1)); + ex.ptr.p_complex[i] = v; + } + ae_v_cmuld(&ex.ptr.p_complex[1], 1, ae_v_len(1,n), sa); + } + } + } + + /* + * Quick return if possible + * After this block we assume that ANORM<>0 + */ + if( ae_fp_eq(anorm,0) ) + { + ae_frame_leave(_state); + return; + } + if( n==1 ) + { + *rc = 1; + ae_frame_leave(_state); + return; + } + + /* + * Estimate the norm of inv(A). + */ + ainvnm = 0; + kase = 0; + for(;;) + { + rcond_cmatrixestimatenorm(n, &ev, &ex, &ainvnm, &kase, &isave, &rsave, _state); + if( kase==0 ) + { + break; + } + for(i=0; i<=n-1; i++) + { + ex.ptr.p_complex[i] = ex.ptr.p_complex[i+1]; + } + if( isupper ) + { + + /* + * Multiply by inv(U'). + */ + if( !cmatrixscaledtrsafesolve(cha, sa, n, &ex, isupper, 2, ae_false, maxgrowth, _state) ) + { + ae_frame_leave(_state); + return; + } + + /* + * Multiply by inv(U). + */ + if( !cmatrixscaledtrsafesolve(cha, sa, n, &ex, isupper, 0, ae_false, maxgrowth, _state) ) + { + ae_frame_leave(_state); + return; + } + } + else + { + + /* + * Multiply by inv(L). + */ + if( !cmatrixscaledtrsafesolve(cha, sa, n, &ex, isupper, 0, ae_false, maxgrowth, _state) ) + { + ae_frame_leave(_state); + return; + } + + /* + * Multiply by inv(L'). + */ + if( !cmatrixscaledtrsafesolve(cha, sa, n, &ex, isupper, 2, ae_false, maxgrowth, _state) ) + { + ae_frame_leave(_state); + return; + } + } + for(i=n-1; i>=0; i--) + { + ex.ptr.p_complex[i+1] = ex.ptr.p_complex[i]; + } + } + + /* + * Compute the estimate of the reciprocal condition number. + */ + if( ae_fp_neq(ainvnm,0) ) + { + *rc = 1/ainvnm; + *rc = *rc/anorm; + if( ae_fp_less(*rc,rcondthreshold(_state)) ) + { + *rc = 0; + } + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Internal subroutine for condition number estimation + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 +*************************************************************************/ +static void rcond_rmatrixrcondluinternal(/* Real */ ae_matrix* lua, + ae_int_t n, + ae_bool onenorm, + ae_bool isanormprovided, + double anorm, + double* rc, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector ex; + ae_vector ev; + ae_vector iwork; + ae_vector tmp; + double v; + ae_int_t i; + ae_int_t j; + ae_int_t kase; + ae_int_t kase1; + double ainvnm; + double maxgrowth; + double su; + double sl; + ae_bool mupper; + ae_bool mtrans; + ae_bool munit; + + ae_frame_make(_state, &_frame_block); + *rc = 0; + ae_vector_init(&ex, 0, DT_REAL, _state, ae_true); + ae_vector_init(&ev, 0, DT_REAL, _state, ae_true); + ae_vector_init(&iwork, 0, DT_INT, _state, ae_true); + ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true); + + + /* + * RC=0 if something happens + */ + *rc = 0; + + /* + * init + */ + if( onenorm ) + { + kase1 = 1; + } + else + { + kase1 = 2; + } + mupper = ae_true; + mtrans = ae_true; + munit = ae_true; + ae_vector_set_length(&iwork, n+1, _state); + ae_vector_set_length(&tmp, n, _state); + + /* + * prepare parameters for triangular solver + */ + maxgrowth = 1/rcondthreshold(_state); + su = 0; + sl = 1; + for(i=0; i<=n-1; i++) + { + for(j=0; j<=i-1; j++) + { + sl = ae_maxreal(sl, ae_fabs(lua->ptr.pp_double[i][j], _state), _state); + } + for(j=i; j<=n-1; j++) + { + su = ae_maxreal(su, ae_fabs(lua->ptr.pp_double[i][j], _state), _state); + } + } + if( ae_fp_eq(su,0) ) + { + su = 1; + } + su = 1/su; + sl = 1/sl; + + /* + * Estimate the norm of A. + */ + if( !isanormprovided ) + { + kase = 0; + anorm = 0; + for(;;) + { + rcond_rmatrixestimatenorm(n, &ev, &ex, &iwork, &anorm, &kase, _state); + if( kase==0 ) + { + break; + } + if( kase==kase1 ) + { + + /* + * Multiply by U + */ + for(i=1; i<=n; i++) + { + v = ae_v_dotproduct(&lua->ptr.pp_double[i-1][i-1], 1, &ex.ptr.p_double[i], 1, ae_v_len(i-1,n-1)); + ex.ptr.p_double[i] = v; + } + + /* + * Multiply by L + */ + for(i=n; i>=1; i--) + { + if( i>1 ) + { + v = ae_v_dotproduct(&lua->ptr.pp_double[i-1][0], 1, &ex.ptr.p_double[1], 1, ae_v_len(0,i-2)); + } + else + { + v = 0; + } + ex.ptr.p_double[i] = ex.ptr.p_double[i]+v; + } + } + else + { + + /* + * Multiply by L' + */ + for(i=0; i<=n-1; i++) + { + tmp.ptr.p_double[i] = 0; + } + for(i=0; i<=n-1; i++) + { + v = ex.ptr.p_double[i+1]; + if( i>=1 ) + { + ae_v_addd(&tmp.ptr.p_double[0], 1, &lua->ptr.pp_double[i][0], 1, ae_v_len(0,i-1), v); + } + tmp.ptr.p_double[i] = tmp.ptr.p_double[i]+v; + } + ae_v_move(&ex.ptr.p_double[1], 1, &tmp.ptr.p_double[0], 1, ae_v_len(1,n)); + + /* + * Multiply by U' + */ + for(i=0; i<=n-1; i++) + { + tmp.ptr.p_double[i] = 0; + } + for(i=0; i<=n-1; i++) + { + v = ex.ptr.p_double[i+1]; + ae_v_addd(&tmp.ptr.p_double[i], 1, &lua->ptr.pp_double[i][i], 1, ae_v_len(i,n-1), v); + } + ae_v_move(&ex.ptr.p_double[1], 1, &tmp.ptr.p_double[0], 1, ae_v_len(1,n)); + } + } + } + + /* + * Scale according to SU/SL + */ + anorm = anorm*su*sl; + + /* + * Quick return if possible + * We assume that ANORM<>0 after this block + */ + if( ae_fp_eq(anorm,0) ) + { + ae_frame_leave(_state); + return; + } + if( n==1 ) + { + *rc = 1; + ae_frame_leave(_state); + return; + } + + /* + * Estimate the norm of inv(A). + */ + ainvnm = 0; + kase = 0; + for(;;) + { + rcond_rmatrixestimatenorm(n, &ev, &ex, &iwork, &ainvnm, &kase, _state); + if( kase==0 ) + { + break; + } + + /* + * from 1-based array to 0-based + */ + for(i=0; i<=n-1; i++) + { + ex.ptr.p_double[i] = ex.ptr.p_double[i+1]; + } + + /* + * multiply by inv(A) or inv(A') + */ + if( kase==kase1 ) + { + + /* + * Multiply by inv(L). + */ + if( !rmatrixscaledtrsafesolve(lua, sl, n, &ex, !mupper, 0, munit, maxgrowth, _state) ) + { + ae_frame_leave(_state); + return; + } + + /* + * Multiply by inv(U). + */ + if( !rmatrixscaledtrsafesolve(lua, su, n, &ex, mupper, 0, !munit, maxgrowth, _state) ) + { + ae_frame_leave(_state); + return; + } + } + else + { + + /* + * Multiply by inv(U'). + */ + if( !rmatrixscaledtrsafesolve(lua, su, n, &ex, mupper, 1, !munit, maxgrowth, _state) ) + { + ae_frame_leave(_state); + return; + } + + /* + * Multiply by inv(L'). + */ + if( !rmatrixscaledtrsafesolve(lua, sl, n, &ex, !mupper, 1, munit, maxgrowth, _state) ) + { + ae_frame_leave(_state); + return; + } + } + + /* + * from 0-based array to 1-based + */ + for(i=n-1; i>=0; i--) + { + ex.ptr.p_double[i+1] = ex.ptr.p_double[i]; + } + } + + /* + * Compute the estimate of the reciprocal condition number. + */ + if( ae_fp_neq(ainvnm,0) ) + { + *rc = 1/ainvnm; + *rc = *rc/anorm; + if( ae_fp_less(*rc,rcondthreshold(_state)) ) + { + *rc = 0; + } + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Condition number estimation + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + March 31, 1993 +*************************************************************************/ +static void rcond_cmatrixrcondluinternal(/* Complex */ ae_matrix* lua, + ae_int_t n, + ae_bool onenorm, + ae_bool isanormprovided, + double anorm, + double* rc, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector ex; + ae_vector cwork2; + ae_vector cwork3; + ae_vector cwork4; + ae_vector isave; + ae_vector rsave; + ae_int_t kase; + ae_int_t kase1; + double ainvnm; + ae_complex v; + ae_int_t i; + ae_int_t j; + double su; + double sl; + double maxgrowth; + + ae_frame_make(_state, &_frame_block); + *rc = 0; + ae_vector_init(&ex, 0, DT_COMPLEX, _state, ae_true); + ae_vector_init(&cwork2, 0, DT_COMPLEX, _state, ae_true); + ae_vector_init(&cwork3, 0, DT_COMPLEX, _state, ae_true); + ae_vector_init(&cwork4, 0, DT_COMPLEX, _state, ae_true); + ae_vector_init(&isave, 0, DT_INT, _state, ae_true); + ae_vector_init(&rsave, 0, DT_REAL, _state, ae_true); + + if( n<=0 ) + { + ae_frame_leave(_state); + return; + } + ae_vector_set_length(&cwork2, n+1, _state); + *rc = 0; + if( n==0 ) + { + *rc = 1; + ae_frame_leave(_state); + return; + } + + /* + * prepare parameters for triangular solver + */ + maxgrowth = 1/rcondthreshold(_state); + su = 0; + sl = 1; + for(i=0; i<=n-1; i++) + { + for(j=0; j<=i-1; j++) + { + sl = ae_maxreal(sl, ae_c_abs(lua->ptr.pp_complex[i][j], _state), _state); + } + for(j=i; j<=n-1; j++) + { + su = ae_maxreal(su, ae_c_abs(lua->ptr.pp_complex[i][j], _state), _state); + } + } + if( ae_fp_eq(su,0) ) + { + su = 1; + } + su = 1/su; + sl = 1/sl; + + /* + * Estimate the norm of SU*SL*A. + */ + if( !isanormprovided ) + { + anorm = 0; + if( onenorm ) + { + kase1 = 1; + } + else + { + kase1 = 2; + } + kase = 0; + do + { + rcond_cmatrixestimatenorm(n, &cwork4, &ex, &anorm, &kase, &isave, &rsave, _state); + if( kase!=0 ) + { + if( kase==kase1 ) + { + + /* + * Multiply by U + */ + for(i=1; i<=n; i++) + { + v = ae_v_cdotproduct(&lua->ptr.pp_complex[i-1][i-1], 1, "N", &ex.ptr.p_complex[i], 1, "N", ae_v_len(i-1,n-1)); + ex.ptr.p_complex[i] = v; + } + + /* + * Multiply by L + */ + for(i=n; i>=1; i--) + { + v = ae_complex_from_d(0); + if( i>1 ) + { + v = ae_v_cdotproduct(&lua->ptr.pp_complex[i-1][0], 1, "N", &ex.ptr.p_complex[1], 1, "N", ae_v_len(0,i-2)); + } + ex.ptr.p_complex[i] = ae_c_add(v,ex.ptr.p_complex[i]); + } + } + else + { + + /* + * Multiply by L' + */ + for(i=1; i<=n; i++) + { + cwork2.ptr.p_complex[i] = ae_complex_from_d(0); + } + for(i=1; i<=n; i++) + { + v = ex.ptr.p_complex[i]; + if( i>1 ) + { + ae_v_caddc(&cwork2.ptr.p_complex[1], 1, &lua->ptr.pp_complex[i-1][0], 1, "Conj", ae_v_len(1,i-1), v); + } + cwork2.ptr.p_complex[i] = ae_c_add(cwork2.ptr.p_complex[i],v); + } + + /* + * Multiply by U' + */ + for(i=1; i<=n; i++) + { + ex.ptr.p_complex[i] = ae_complex_from_d(0); + } + for(i=1; i<=n; i++) + { + v = cwork2.ptr.p_complex[i]; + ae_v_caddc(&ex.ptr.p_complex[i], 1, &lua->ptr.pp_complex[i-1][i-1], 1, "Conj", ae_v_len(i,n), v); + } + } + } + } + while(kase!=0); + } + + /* + * Scale according to SU/SL + */ + anorm = anorm*su*sl; + + /* + * Quick return if possible + */ + if( ae_fp_eq(anorm,0) ) + { + ae_frame_leave(_state); + return; + } + + /* + * Estimate the norm of inv(A). + */ + ainvnm = 0; + if( onenorm ) + { + kase1 = 1; + } + else + { + kase1 = 2; + } + kase = 0; + for(;;) + { + rcond_cmatrixestimatenorm(n, &cwork4, &ex, &ainvnm, &kase, &isave, &rsave, _state); + if( kase==0 ) + { + break; + } + + /* + * From 1-based to 0-based + */ + for(i=0; i<=n-1; i++) + { + ex.ptr.p_complex[i] = ex.ptr.p_complex[i+1]; + } + + /* + * multiply by inv(A) or inv(A') + */ + if( kase==kase1 ) + { + + /* + * Multiply by inv(L). + */ + if( !cmatrixscaledtrsafesolve(lua, sl, n, &ex, ae_false, 0, ae_true, maxgrowth, _state) ) + { + *rc = 0; + ae_frame_leave(_state); + return; + } + + /* + * Multiply by inv(U). + */ + if( !cmatrixscaledtrsafesolve(lua, su, n, &ex, ae_true, 0, ae_false, maxgrowth, _state) ) + { + *rc = 0; + ae_frame_leave(_state); + return; + } + } + else + { + + /* + * Multiply by inv(U'). + */ + if( !cmatrixscaledtrsafesolve(lua, su, n, &ex, ae_true, 2, ae_false, maxgrowth, _state) ) + { + *rc = 0; + ae_frame_leave(_state); + return; + } + + /* + * Multiply by inv(L'). + */ + if( !cmatrixscaledtrsafesolve(lua, sl, n, &ex, ae_false, 2, ae_true, maxgrowth, _state) ) + { + *rc = 0; + ae_frame_leave(_state); + return; + } + } + + /* + * from 0-based to 1-based + */ + for(i=n-1; i>=0; i--) + { + ex.ptr.p_complex[i+1] = ex.ptr.p_complex[i]; + } + } + + /* + * Compute the estimate of the reciprocal condition number. + */ + if( ae_fp_neq(ainvnm,0) ) + { + *rc = 1/ainvnm; + *rc = *rc/anorm; + if( ae_fp_less(*rc,rcondthreshold(_state)) ) + { + *rc = 0; + } + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Internal subroutine for matrix norm estimation + + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 +*************************************************************************/ +static void rcond_rmatrixestimatenorm(ae_int_t n, + /* Real */ ae_vector* v, + /* Real */ ae_vector* x, + /* Integer */ ae_vector* isgn, + double* est, + ae_int_t* kase, + ae_state *_state) +{ + ae_int_t itmax; + ae_int_t i; + double t; + ae_bool flg; + ae_int_t positer; + ae_int_t posj; + ae_int_t posjlast; + ae_int_t posjump; + ae_int_t posaltsgn; + ae_int_t posestold; + ae_int_t postemp; + + + itmax = 5; + posaltsgn = n+1; + posestold = n+2; + postemp = n+3; + positer = n+1; + posj = n+2; + posjlast = n+3; + posjump = n+4; + if( *kase==0 ) + { + ae_vector_set_length(v, n+4, _state); + ae_vector_set_length(x, n+1, _state); + ae_vector_set_length(isgn, n+5, _state); + t = (double)1/(double)n; + for(i=1; i<=n; i++) + { + x->ptr.p_double[i] = t; + } + *kase = 1; + isgn->ptr.p_int[posjump] = 1; + return; + } + + /* + * ................ ENTRY (JUMP = 1) + * FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X. + */ + if( isgn->ptr.p_int[posjump]==1 ) + { + if( n==1 ) + { + v->ptr.p_double[1] = x->ptr.p_double[1]; + *est = ae_fabs(v->ptr.p_double[1], _state); + *kase = 0; + return; + } + *est = 0; + for(i=1; i<=n; i++) + { + *est = *est+ae_fabs(x->ptr.p_double[i], _state); + } + for(i=1; i<=n; i++) + { + if( ae_fp_greater_eq(x->ptr.p_double[i],0) ) + { + x->ptr.p_double[i] = 1; + } + else + { + x->ptr.p_double[i] = -1; + } + isgn->ptr.p_int[i] = ae_sign(x->ptr.p_double[i], _state); + } + *kase = 2; + isgn->ptr.p_int[posjump] = 2; + return; + } + + /* + * ................ ENTRY (JUMP = 2) + * FIRST ITERATION. X HAS BEEN OVERWRITTEN BY TRANDPOSE(A)*X. + */ + if( isgn->ptr.p_int[posjump]==2 ) + { + isgn->ptr.p_int[posj] = 1; + for(i=2; i<=n; i++) + { + if( ae_fp_greater(ae_fabs(x->ptr.p_double[i], _state),ae_fabs(x->ptr.p_double[isgn->ptr.p_int[posj]], _state)) ) + { + isgn->ptr.p_int[posj] = i; + } + } + isgn->ptr.p_int[positer] = 2; + + /* + * MAIN LOOP - ITERATIONS 2,3,...,ITMAX. + */ + for(i=1; i<=n; i++) + { + x->ptr.p_double[i] = 0; + } + x->ptr.p_double[isgn->ptr.p_int[posj]] = 1; + *kase = 1; + isgn->ptr.p_int[posjump] = 3; + return; + } + + /* + * ................ ENTRY (JUMP = 3) + * X HAS BEEN OVERWRITTEN BY A*X. + */ + if( isgn->ptr.p_int[posjump]==3 ) + { + ae_v_move(&v->ptr.p_double[1], 1, &x->ptr.p_double[1], 1, ae_v_len(1,n)); + v->ptr.p_double[posestold] = *est; + *est = 0; + for(i=1; i<=n; i++) + { + *est = *est+ae_fabs(v->ptr.p_double[i], _state); + } + flg = ae_false; + for(i=1; i<=n; i++) + { + if( (ae_fp_greater_eq(x->ptr.p_double[i],0)&&isgn->ptr.p_int[i]<0)||(ae_fp_less(x->ptr.p_double[i],0)&&isgn->ptr.p_int[i]>=0) ) + { + flg = ae_true; + } + } + + /* + * REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED. + * OR MAY BE CYCLING. + */ + if( !flg||ae_fp_less_eq(*est,v->ptr.p_double[posestold]) ) + { + v->ptr.p_double[posaltsgn] = 1; + for(i=1; i<=n; i++) + { + x->ptr.p_double[i] = v->ptr.p_double[posaltsgn]*(1+(double)(i-1)/(double)(n-1)); + v->ptr.p_double[posaltsgn] = -v->ptr.p_double[posaltsgn]; + } + *kase = 1; + isgn->ptr.p_int[posjump] = 5; + return; + } + for(i=1; i<=n; i++) + { + if( ae_fp_greater_eq(x->ptr.p_double[i],0) ) + { + x->ptr.p_double[i] = 1; + isgn->ptr.p_int[i] = 1; + } + else + { + x->ptr.p_double[i] = -1; + isgn->ptr.p_int[i] = -1; + } + } + *kase = 2; + isgn->ptr.p_int[posjump] = 4; + return; + } + + /* + * ................ ENTRY (JUMP = 4) + * X HAS BEEN OVERWRITTEN BY TRANDPOSE(A)*X. + */ + if( isgn->ptr.p_int[posjump]==4 ) + { + isgn->ptr.p_int[posjlast] = isgn->ptr.p_int[posj]; + isgn->ptr.p_int[posj] = 1; + for(i=2; i<=n; i++) + { + if( ae_fp_greater(ae_fabs(x->ptr.p_double[i], _state),ae_fabs(x->ptr.p_double[isgn->ptr.p_int[posj]], _state)) ) + { + isgn->ptr.p_int[posj] = i; + } + } + if( ae_fp_neq(x->ptr.p_double[isgn->ptr.p_int[posjlast]],ae_fabs(x->ptr.p_double[isgn->ptr.p_int[posj]], _state))&&isgn->ptr.p_int[positer]<itmax ) + { + isgn->ptr.p_int[positer] = isgn->ptr.p_int[positer]+1; + for(i=1; i<=n; i++) + { + x->ptr.p_double[i] = 0; + } + x->ptr.p_double[isgn->ptr.p_int[posj]] = 1; + *kase = 1; + isgn->ptr.p_int[posjump] = 3; + return; + } + + /* + * ITERATION COMPLETE. FINAL STAGE. + */ + v->ptr.p_double[posaltsgn] = 1; + for(i=1; i<=n; i++) + { + x->ptr.p_double[i] = v->ptr.p_double[posaltsgn]*(1+(double)(i-1)/(double)(n-1)); + v->ptr.p_double[posaltsgn] = -v->ptr.p_double[posaltsgn]; + } + *kase = 1; + isgn->ptr.p_int[posjump] = 5; + return; + } + + /* + * ................ ENTRY (JUMP = 5) + * X HAS BEEN OVERWRITTEN BY A*X. + */ + if( isgn->ptr.p_int[posjump]==5 ) + { + v->ptr.p_double[postemp] = 0; + for(i=1; i<=n; i++) + { + v->ptr.p_double[postemp] = v->ptr.p_double[postemp]+ae_fabs(x->ptr.p_double[i], _state); + } + v->ptr.p_double[postemp] = 2*v->ptr.p_double[postemp]/(3*n); + if( ae_fp_greater(v->ptr.p_double[postemp],*est) ) + { + ae_v_move(&v->ptr.p_double[1], 1, &x->ptr.p_double[1], 1, ae_v_len(1,n)); + *est = v->ptr.p_double[postemp]; + } + *kase = 0; + return; + } +} + + +static void rcond_cmatrixestimatenorm(ae_int_t n, + /* Complex */ ae_vector* v, + /* Complex */ ae_vector* x, + double* est, + ae_int_t* kase, + /* Integer */ ae_vector* isave, + /* Real */ ae_vector* rsave, + ae_state *_state) +{ + ae_int_t itmax; + ae_int_t i; + ae_int_t iter; + ae_int_t j; + ae_int_t jlast; + ae_int_t jump; + double absxi; + double altsgn; + double estold; + double safmin; + double temp; + + + + /* + *Executable Statements .. + */ + itmax = 5; + safmin = ae_minrealnumber; + if( *kase==0 ) + { + ae_vector_set_length(v, n+1, _state); + ae_vector_set_length(x, n+1, _state); + ae_vector_set_length(isave, 5, _state); + ae_vector_set_length(rsave, 4, _state); + for(i=1; i<=n; i++) + { + x->ptr.p_complex[i] = ae_complex_from_d((double)1/(double)n); + } + *kase = 1; + jump = 1; + rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state); + return; + } + rcond_internalcomplexrcondloadall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state); + + /* + * ENTRY (JUMP = 1) + * FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X. + */ + if( jump==1 ) + { + if( n==1 ) + { + v->ptr.p_complex[1] = x->ptr.p_complex[1]; + *est = ae_c_abs(v->ptr.p_complex[1], _state); + *kase = 0; + rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state); + return; + } + *est = rcond_internalcomplexrcondscsum1(x, n, _state); + for(i=1; i<=n; i++) + { + absxi = ae_c_abs(x->ptr.p_complex[i], _state); + if( ae_fp_greater(absxi,safmin) ) + { + x->ptr.p_complex[i] = ae_c_div_d(x->ptr.p_complex[i],absxi); + } + else + { + x->ptr.p_complex[i] = ae_complex_from_d(1); + } + } + *kase = 2; + jump = 2; + rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state); + return; + } + + /* + * ENTRY (JUMP = 2) + * FIRST ITERATION. X HAS BEEN OVERWRITTEN BY CTRANS(A)*X. + */ + if( jump==2 ) + { + j = rcond_internalcomplexrcondicmax1(x, n, _state); + iter = 2; + + /* + * MAIN LOOP - ITERATIONS 2,3,...,ITMAX. + */ + for(i=1; i<=n; i++) + { + x->ptr.p_complex[i] = ae_complex_from_d(0); + } + x->ptr.p_complex[j] = ae_complex_from_d(1); + *kase = 1; + jump = 3; + rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state); + return; + } + + /* + * ENTRY (JUMP = 3) + * X HAS BEEN OVERWRITTEN BY A*X. + */ + if( jump==3 ) + { + ae_v_cmove(&v->ptr.p_complex[1], 1, &x->ptr.p_complex[1], 1, "N", ae_v_len(1,n)); + estold = *est; + *est = rcond_internalcomplexrcondscsum1(v, n, _state); + + /* + * TEST FOR CYCLING. + */ + if( ae_fp_less_eq(*est,estold) ) + { + + /* + * ITERATION COMPLETE. FINAL STAGE. + */ + altsgn = 1; + for(i=1; i<=n; i++) + { + x->ptr.p_complex[i] = ae_complex_from_d(altsgn*(1+(double)(i-1)/(double)(n-1))); + altsgn = -altsgn; + } + *kase = 1; + jump = 5; + rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state); + return; + } + for(i=1; i<=n; i++) + { + absxi = ae_c_abs(x->ptr.p_complex[i], _state); + if( ae_fp_greater(absxi,safmin) ) + { + x->ptr.p_complex[i] = ae_c_div_d(x->ptr.p_complex[i],absxi); + } + else + { + x->ptr.p_complex[i] = ae_complex_from_d(1); + } + } + *kase = 2; + jump = 4; + rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state); + return; + } + + /* + * ENTRY (JUMP = 4) + * X HAS BEEN OVERWRITTEN BY CTRANS(A)*X. + */ + if( jump==4 ) + { + jlast = j; + j = rcond_internalcomplexrcondicmax1(x, n, _state); + if( ae_fp_neq(ae_c_abs(x->ptr.p_complex[jlast], _state),ae_c_abs(x->ptr.p_complex[j], _state))&&iter<itmax ) + { + iter = iter+1; + + /* + * MAIN LOOP - ITERATIONS 2,3,...,ITMAX. + */ + for(i=1; i<=n; i++) + { + x->ptr.p_complex[i] = ae_complex_from_d(0); + } + x->ptr.p_complex[j] = ae_complex_from_d(1); + *kase = 1; + jump = 3; + rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state); + return; + } + + /* + * ITERATION COMPLETE. FINAL STAGE. + */ + altsgn = 1; + for(i=1; i<=n; i++) + { + x->ptr.p_complex[i] = ae_complex_from_d(altsgn*(1+(double)(i-1)/(double)(n-1))); + altsgn = -altsgn; + } + *kase = 1; + jump = 5; + rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state); + return; + } + + /* + * ENTRY (JUMP = 5) + * X HAS BEEN OVERWRITTEN BY A*X. + */ + if( jump==5 ) + { + temp = 2*(rcond_internalcomplexrcondscsum1(x, n, _state)/(3*n)); + if( ae_fp_greater(temp,*est) ) + { + ae_v_cmove(&v->ptr.p_complex[1], 1, &x->ptr.p_complex[1], 1, "N", ae_v_len(1,n)); + *est = temp; + } + *kase = 0; + rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state); + return; + } +} + + +static double rcond_internalcomplexrcondscsum1(/* Complex */ ae_vector* x, + ae_int_t n, + ae_state *_state) +{ + ae_int_t i; + double result; + + + result = 0; + for(i=1; i<=n; i++) + { + result = result+ae_c_abs(x->ptr.p_complex[i], _state); + } + return result; +} + + +static ae_int_t rcond_internalcomplexrcondicmax1(/* Complex */ ae_vector* x, + ae_int_t n, + ae_state *_state) +{ + ae_int_t i; + double m; + ae_int_t result; + + + result = 1; + m = ae_c_abs(x->ptr.p_complex[1], _state); + for(i=2; i<=n; i++) + { + if( ae_fp_greater(ae_c_abs(x->ptr.p_complex[i], _state),m) ) + { + result = i; + m = ae_c_abs(x->ptr.p_complex[i], _state); + } + } + return result; +} + + +static void rcond_internalcomplexrcondsaveall(/* Integer */ ae_vector* isave, + /* Real */ ae_vector* rsave, + ae_int_t* i, + ae_int_t* iter, + ae_int_t* j, + ae_int_t* jlast, + ae_int_t* jump, + double* absxi, + double* altsgn, + double* estold, + double* temp, + ae_state *_state) +{ + + + isave->ptr.p_int[0] = *i; + isave->ptr.p_int[1] = *iter; + isave->ptr.p_int[2] = *j; + isave->ptr.p_int[3] = *jlast; + isave->ptr.p_int[4] = *jump; + rsave->ptr.p_double[0] = *absxi; + rsave->ptr.p_double[1] = *altsgn; + rsave->ptr.p_double[2] = *estold; + rsave->ptr.p_double[3] = *temp; +} + + +static void rcond_internalcomplexrcondloadall(/* Integer */ ae_vector* isave, + /* Real */ ae_vector* rsave, + ae_int_t* i, + ae_int_t* iter, + ae_int_t* j, + ae_int_t* jlast, + ae_int_t* jump, + double* absxi, + double* altsgn, + double* estold, + double* temp, + ae_state *_state) +{ + + + *i = isave->ptr.p_int[0]; + *iter = isave->ptr.p_int[1]; + *j = isave->ptr.p_int[2]; + *jlast = isave->ptr.p_int[3]; + *jump = isave->ptr.p_int[4]; + *absxi = rsave->ptr.p_double[0]; + *altsgn = rsave->ptr.p_double[1]; + *estold = rsave->ptr.p_double[2]; + *temp = rsave->ptr.p_double[3]; +} + + + + +/************************************************************************* +Inversion of a matrix given by its LU decomposition. + +INPUT PARAMETERS: + A - LU decomposition of the matrix + (output of RMatrixLU subroutine). + Pivots - table of permutations + (the output of RMatrixLU subroutine). + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + +OUTPUT PARAMETERS: + Info - return code: + * -3 A is singular, or VERY close to singular. + it is filled by zeros in such cases. + * 1 task is solved (but matrix A may be ill-conditioned, + check R1/RInf parameters for condition numbers). + Rep - solver report, see below for more info + A - inverse of matrix A. + Array whose indexes range within [0..N-1, 0..N-1]. + +SOLVER REPORT + +Subroutine sets following fields of the Rep structure: +* R1 reciprocal of condition number: 1/cond(A), 1-norm. +* RInf reciprocal of condition number: 1/cond(A), inf-norm. + + -- ALGLIB routine -- + 05.02.2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixluinverse(/* Real */ ae_matrix* a, + /* Integer */ ae_vector* pivots, + ae_int_t n, + ae_int_t* info, + matinvreport* rep, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector work; + ae_int_t i; + ae_int_t j; + ae_int_t k; + double v; + + ae_frame_make(_state, &_frame_block); + *info = 0; + _matinvreport_clear(rep); + ae_vector_init(&work, 0, DT_REAL, _state, ae_true); + + ae_assert(n>0, "RMatrixLUInverse: N<=0!", _state); + ae_assert(a->cols>=n, "RMatrixLUInverse: cols(A)<N!", _state); + ae_assert(a->rows>=n, "RMatrixLUInverse: rows(A)<N!", _state); + ae_assert(pivots->cnt>=n, "RMatrixLUInverse: len(Pivots)<N!", _state); + ae_assert(apservisfinitematrix(a, n, n, _state), "RMatrixLUInverse: A contains infinite or NaN values!", _state); + *info = 1; + for(i=0; i<=n-1; i++) + { + if( pivots->ptr.p_int[i]>n-1||pivots->ptr.p_int[i]<i ) + { + *info = -1; + } + } + ae_assert(*info>0, "RMatrixLUInverse: incorrect Pivots array!", _state); + + /* + * calculate condition numbers + */ + rep->r1 = rmatrixlurcond1(a, n, _state); + rep->rinf = rmatrixlurcondinf(a, n, _state); + if( ae_fp_less(rep->r1,rcondthreshold(_state))||ae_fp_less(rep->rinf,rcondthreshold(_state)) ) + { + for(i=0; i<=n-1; i++) + { + for(j=0; j<=n-1; j++) + { + a->ptr.pp_double[i][j] = 0; + } + } + rep->r1 = 0; + rep->rinf = 0; + *info = -3; + ae_frame_leave(_state); + return; + } + + /* + * Call cache-oblivious code + */ + ae_vector_set_length(&work, n, _state); + matinv_rmatrixluinverserec(a, 0, n, &work, info, rep, _state); + + /* + * apply permutations + */ + for(i=0; i<=n-1; i++) + { + for(j=n-2; j>=0; j--) + { + k = pivots->ptr.p_int[j]; + v = a->ptr.pp_double[i][j]; + a->ptr.pp_double[i][j] = a->ptr.pp_double[i][k]; + a->ptr.pp_double[i][k] = v; + } + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Inversion of a general matrix. + +Input parameters: + A - matrix. + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + +Output parameters: + Info - return code, same as in RMatrixLUInverse + Rep - solver report, same as in RMatrixLUInverse + A - inverse of matrix A, same as in RMatrixLUInverse + +Result: + True, if the matrix is not singular. + False, if the matrix is singular. + + -- ALGLIB -- + Copyright 2005-2010 by Bochkanov Sergey +*************************************************************************/ +void rmatrixinverse(/* Real */ ae_matrix* a, + ae_int_t n, + ae_int_t* info, + matinvreport* rep, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector pivots; + + ae_frame_make(_state, &_frame_block); + *info = 0; + _matinvreport_clear(rep); + ae_vector_init(&pivots, 0, DT_INT, _state, ae_true); + + ae_assert(n>0, "RMatrixInverse: N<=0!", _state); + ae_assert(a->cols>=n, "RMatrixInverse: cols(A)<N!", _state); + ae_assert(a->rows>=n, "RMatrixInverse: rows(A)<N!", _state); + ae_assert(apservisfinitematrix(a, n, n, _state), "RMatrixInverse: A contains infinite or NaN values!", _state); + rmatrixlu(a, n, n, &pivots, _state); + rmatrixluinverse(a, &pivots, n, info, rep, _state); + ae_frame_leave(_state); +} + + +/************************************************************************* +Inversion of a matrix given by its LU decomposition. + +INPUT PARAMETERS: + A - LU decomposition of the matrix + (output of CMatrixLU subroutine). + Pivots - table of permutations + (the output of CMatrixLU subroutine). + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + +OUTPUT PARAMETERS: + Info - return code, same as in RMatrixLUInverse + Rep - solver report, same as in RMatrixLUInverse + A - inverse of matrix A, same as in RMatrixLUInverse + + -- ALGLIB routine -- + 05.02.2010 + Bochkanov Sergey +*************************************************************************/ +void cmatrixluinverse(/* Complex */ ae_matrix* a, + /* Integer */ ae_vector* pivots, + ae_int_t n, + ae_int_t* info, + matinvreport* rep, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector work; + ae_int_t i; + ae_int_t j; + ae_int_t k; + ae_complex v; + + ae_frame_make(_state, &_frame_block); + *info = 0; + _matinvreport_clear(rep); + ae_vector_init(&work, 0, DT_COMPLEX, _state, ae_true); + + ae_assert(n>0, "CMatrixLUInverse: N<=0!", _state); + ae_assert(a->cols>=n, "CMatrixLUInverse: cols(A)<N!", _state); + ae_assert(a->rows>=n, "CMatrixLUInverse: rows(A)<N!", _state); + ae_assert(pivots->cnt>=n, "CMatrixLUInverse: len(Pivots)<N!", _state); + ae_assert(apservisfinitecmatrix(a, n, n, _state), "CMatrixLUInverse: A contains infinite or NaN values!", _state); + *info = 1; + for(i=0; i<=n-1; i++) + { + if( pivots->ptr.p_int[i]>n-1||pivots->ptr.p_int[i]<i ) + { + *info = -1; + } + } + ae_assert(*info>0, "CMatrixLUInverse: incorrect Pivots array!", _state); + + /* + * calculate condition numbers + */ + rep->r1 = cmatrixlurcond1(a, n, _state); + rep->rinf = cmatrixlurcondinf(a, n, _state); + if( ae_fp_less(rep->r1,rcondthreshold(_state))||ae_fp_less(rep->rinf,rcondthreshold(_state)) ) + { + for(i=0; i<=n-1; i++) + { + for(j=0; j<=n-1; j++) + { + a->ptr.pp_complex[i][j] = ae_complex_from_d(0); + } + } + rep->r1 = 0; + rep->rinf = 0; + *info = -3; + ae_frame_leave(_state); + return; + } + + /* + * Call cache-oblivious code + */ + ae_vector_set_length(&work, n, _state); + matinv_cmatrixluinverserec(a, 0, n, &work, info, rep, _state); + + /* + * apply permutations + */ + for(i=0; i<=n-1; i++) + { + for(j=n-2; j>=0; j--) + { + k = pivots->ptr.p_int[j]; + v = a->ptr.pp_complex[i][j]; + a->ptr.pp_complex[i][j] = a->ptr.pp_complex[i][k]; + a->ptr.pp_complex[i][k] = v; + } + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Inversion of a general matrix. + +Input parameters: + A - matrix + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + +Output parameters: + Info - return code, same as in RMatrixLUInverse + Rep - solver report, same as in RMatrixLUInverse + A - inverse of matrix A, same as in RMatrixLUInverse + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +void cmatrixinverse(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_int_t* info, + matinvreport* rep, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector pivots; + + ae_frame_make(_state, &_frame_block); + *info = 0; + _matinvreport_clear(rep); + ae_vector_init(&pivots, 0, DT_INT, _state, ae_true); + + ae_assert(n>0, "CRMatrixInverse: N<=0!", _state); + ae_assert(a->cols>=n, "CRMatrixInverse: cols(A)<N!", _state); + ae_assert(a->rows>=n, "CRMatrixInverse: rows(A)<N!", _state); + ae_assert(apservisfinitecmatrix(a, n, n, _state), "CMatrixInverse: A contains infinite or NaN values!", _state); + cmatrixlu(a, n, n, &pivots, _state); + cmatrixluinverse(a, &pivots, n, info, rep, _state); + ae_frame_leave(_state); +} + + +/************************************************************************* +Inversion of a symmetric positive definite matrix which is given +by Cholesky decomposition. + +Input parameters: + A - Cholesky decomposition of the matrix to be inverted: + A=U’*U or A = L*L'. + Output of SPDMatrixCholesky subroutine. + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + IsUpper - storage type (optional): + * if True, symmetric matrix A is given by its upper + triangle, and the lower triangle isn’t used/changed by + function + * if False, symmetric matrix A is given by its lower + triangle, and the upper triangle isn’t used/changed by + function + * if not given, lower half is used. + +Output parameters: + Info - return code, same as in RMatrixLUInverse + Rep - solver report, same as in RMatrixLUInverse + A - inverse of matrix A, same as in RMatrixLUInverse + + -- ALGLIB routine -- + 10.02.2010 + Bochkanov Sergey +*************************************************************************/ +void spdmatrixcholeskyinverse(/* Real */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_int_t* info, + matinvreport* rep, + ae_state *_state) +{ + ae_frame _frame_block; + ae_int_t i; + ae_int_t j; + ae_vector tmp; + matinvreport rep2; + ae_bool f; + + ae_frame_make(_state, &_frame_block); + *info = 0; + _matinvreport_clear(rep); + ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true); + _matinvreport_init(&rep2, _state, ae_true); + + ae_assert(n>0, "SPDMatrixCholeskyInverse: N<=0!", _state); + ae_assert(a->cols>=n, "SPDMatrixCholeskyInverse: cols(A)<N!", _state); + ae_assert(a->rows>=n, "SPDMatrixCholeskyInverse: rows(A)<N!", _state); + *info = 1; + f = ae_true; + for(i=0; i<=n-1; i++) + { + f = f&&ae_isfinite(a->ptr.pp_double[i][i], _state); + } + ae_assert(f, "SPDMatrixCholeskyInverse: A contains infinite or NaN values!", _state); + + /* + * calculate condition numbers + */ + rep->r1 = spdmatrixcholeskyrcond(a, n, isupper, _state); + rep->rinf = rep->r1; + if( ae_fp_less(rep->r1,rcondthreshold(_state))||ae_fp_less(rep->rinf,rcondthreshold(_state)) ) + { + if( isupper ) + { + for(i=0; i<=n-1; i++) + { + for(j=i; j<=n-1; j++) + { + a->ptr.pp_double[i][j] = 0; + } + } + } + else + { + for(i=0; i<=n-1; i++) + { + for(j=0; j<=i; j++) + { + a->ptr.pp_double[i][j] = 0; + } + } + } + rep->r1 = 0; + rep->rinf = 0; + *info = -3; + ae_frame_leave(_state); + return; + } + + /* + * Inverse + */ + ae_vector_set_length(&tmp, n, _state); + matinv_spdmatrixcholeskyinverserec(a, 0, n, isupper, &tmp, _state); + ae_frame_leave(_state); +} + + +/************************************************************************* +Inversion of a symmetric positive definite matrix. + +Given an upper or lower triangle of a symmetric positive definite matrix, +the algorithm generates matrix A^-1 and saves the upper or lower triangle +depending on the input. + +Input parameters: + A - matrix to be inverted (upper or lower triangle). + Array with elements [0..N-1,0..N-1]. + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + IsUpper - storage type (optional): + * if True, symmetric matrix A is given by its upper + triangle, and the lower triangle isn’t used/changed by + function + * if False, symmetric matrix A is given by its lower + triangle, and the upper triangle isn’t used/changed by + function + * if not given, both lower and upper triangles must be + filled. + +Output parameters: + Info - return code, same as in RMatrixLUInverse + Rep - solver report, same as in RMatrixLUInverse + A - inverse of matrix A, same as in RMatrixLUInverse + + -- ALGLIB routine -- + 10.02.2010 + Bochkanov Sergey +*************************************************************************/ +void spdmatrixinverse(/* Real */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_int_t* info, + matinvreport* rep, + ae_state *_state) +{ + + *info = 0; + _matinvreport_clear(rep); + + ae_assert(n>0, "SPDMatrixInverse: N<=0!", _state); + ae_assert(a->cols>=n, "SPDMatrixInverse: cols(A)<N!", _state); + ae_assert(a->rows>=n, "SPDMatrixInverse: rows(A)<N!", _state); + ae_assert(isfinitertrmatrix(a, n, isupper, _state), "SPDMatrixInverse: A contains infinite or NaN values!", _state); + *info = 1; + if( spdmatrixcholesky(a, n, isupper, _state) ) + { + spdmatrixcholeskyinverse(a, n, isupper, info, rep, _state); + } + else + { + *info = -3; + } +} + + +/************************************************************************* +Inversion of a Hermitian positive definite matrix which is given +by Cholesky decomposition. + +Input parameters: + A - Cholesky decomposition of the matrix to be inverted: + A=U’*U or A = L*L'. + Output of HPDMatrixCholesky subroutine. + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + IsUpper - storage type (optional): + * if True, symmetric matrix A is given by its upper + triangle, and the lower triangle isn’t used/changed by + function + * if False, symmetric matrix A is given by its lower + triangle, and the upper triangle isn’t used/changed by + function + * if not given, lower half is used. + +Output parameters: + Info - return code, same as in RMatrixLUInverse + Rep - solver report, same as in RMatrixLUInverse + A - inverse of matrix A, same as in RMatrixLUInverse + + -- ALGLIB routine -- + 10.02.2010 + Bochkanov Sergey +*************************************************************************/ +void hpdmatrixcholeskyinverse(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_int_t* info, + matinvreport* rep, + ae_state *_state) +{ + ae_frame _frame_block; + ae_int_t i; + ae_int_t j; + matinvreport rep2; + ae_vector tmp; + ae_bool f; + + ae_frame_make(_state, &_frame_block); + *info = 0; + _matinvreport_clear(rep); + _matinvreport_init(&rep2, _state, ae_true); + ae_vector_init(&tmp, 0, DT_COMPLEX, _state, ae_true); + + ae_assert(n>0, "HPDMatrixCholeskyInverse: N<=0!", _state); + ae_assert(a->cols>=n, "HPDMatrixCholeskyInverse: cols(A)<N!", _state); + ae_assert(a->rows>=n, "HPDMatrixCholeskyInverse: rows(A)<N!", _state); + f = ae_true; + for(i=0; i<=n-1; i++) + { + f = (f&&ae_isfinite(a->ptr.pp_complex[i][i].x, _state))&&ae_isfinite(a->ptr.pp_complex[i][i].y, _state); + } + ae_assert(f, "HPDMatrixCholeskyInverse: A contains infinite or NaN values!", _state); + *info = 1; + + /* + * calculate condition numbers + */ + rep->r1 = hpdmatrixcholeskyrcond(a, n, isupper, _state); + rep->rinf = rep->r1; + if( ae_fp_less(rep->r1,rcondthreshold(_state))||ae_fp_less(rep->rinf,rcondthreshold(_state)) ) + { + if( isupper ) + { + for(i=0; i<=n-1; i++) + { + for(j=i; j<=n-1; j++) + { + a->ptr.pp_complex[i][j] = ae_complex_from_d(0); + } + } + } + else + { + for(i=0; i<=n-1; i++) + { + for(j=0; j<=i; j++) + { + a->ptr.pp_complex[i][j] = ae_complex_from_d(0); + } + } + } + rep->r1 = 0; + rep->rinf = 0; + *info = -3; + ae_frame_leave(_state); + return; + } + + /* + * Inverse + */ + ae_vector_set_length(&tmp, n, _state); + matinv_hpdmatrixcholeskyinverserec(a, 0, n, isupper, &tmp, _state); + ae_frame_leave(_state); +} + + +/************************************************************************* +Inversion of a Hermitian positive definite matrix. + +Given an upper or lower triangle of a Hermitian positive definite matrix, +the algorithm generates matrix A^-1 and saves the upper or lower triangle +depending on the input. + +Input parameters: + A - matrix to be inverted (upper or lower triangle). + Array with elements [0..N-1,0..N-1]. + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + IsUpper - storage type (optional): + * if True, symmetric matrix A is given by its upper + triangle, and the lower triangle isn’t used/changed by + function + * if False, symmetric matrix A is given by its lower + triangle, and the upper triangle isn’t used/changed by + function + * if not given, both lower and upper triangles must be + filled. + +Output parameters: + Info - return code, same as in RMatrixLUInverse + Rep - solver report, same as in RMatrixLUInverse + A - inverse of matrix A, same as in RMatrixLUInverse + + -- ALGLIB routine -- + 10.02.2010 + Bochkanov Sergey +*************************************************************************/ +void hpdmatrixinverse(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_int_t* info, + matinvreport* rep, + ae_state *_state) +{ + + *info = 0; + _matinvreport_clear(rep); + + ae_assert(n>0, "HPDMatrixInverse: N<=0!", _state); + ae_assert(a->cols>=n, "HPDMatrixInverse: cols(A)<N!", _state); + ae_assert(a->rows>=n, "HPDMatrixInverse: rows(A)<N!", _state); + ae_assert(apservisfinitectrmatrix(a, n, isupper, _state), "HPDMatrixInverse: A contains infinite or NaN values!", _state); + *info = 1; + if( hpdmatrixcholesky(a, n, isupper, _state) ) + { + hpdmatrixcholeskyinverse(a, n, isupper, info, rep, _state); + } + else + { + *info = -3; + } +} + + +/************************************************************************* +Triangular matrix inverse (real) + +The subroutine inverts the following types of matrices: + * upper triangular + * upper triangular with unit diagonal + * lower triangular + * lower triangular with unit diagonal + +In case of an upper (lower) triangular matrix, the inverse matrix will +also be upper (lower) triangular, and after the end of the algorithm, the +inverse matrix replaces the source matrix. The elements below (above) the +main diagonal are not changed by the algorithm. + +If the matrix has a unit diagonal, the inverse matrix also has a unit +diagonal, and the diagonal elements are not passed to the algorithm. + +Input parameters: + A - matrix, array[0..N-1, 0..N-1]. + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + IsUpper - True, if the matrix is upper triangular. + IsUnit - diagonal type (optional): + * if True, matrix has unit diagonal (a[i,i] are NOT used) + * if False, matrix diagonal is arbitrary + * if not given, False is assumed + +Output parameters: + Info - same as for RMatrixLUInverse + Rep - same as for RMatrixLUInverse + A - same as for RMatrixLUInverse. + + -- ALGLIB -- + Copyright 05.02.2010 by Bochkanov Sergey +*************************************************************************/ +void rmatrixtrinverse(/* Real */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_bool isunit, + ae_int_t* info, + matinvreport* rep, + ae_state *_state) +{ + ae_frame _frame_block; + ae_int_t i; + ae_int_t j; + ae_vector tmp; + + ae_frame_make(_state, &_frame_block); + *info = 0; + _matinvreport_clear(rep); + ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true); + + ae_assert(n>0, "RMatrixTRInverse: N<=0!", _state); + ae_assert(a->cols>=n, "RMatrixTRInverse: cols(A)<N!", _state); + ae_assert(a->rows>=n, "RMatrixTRInverse: rows(A)<N!", _state); + ae_assert(isfinitertrmatrix(a, n, isupper, _state), "RMatrixTRInverse: A contains infinite or NaN values!", _state); + *info = 1; + + /* + * calculate condition numbers + */ + rep->r1 = rmatrixtrrcond1(a, n, isupper, isunit, _state); + rep->rinf = rmatrixtrrcondinf(a, n, isupper, isunit, _state); + if( ae_fp_less(rep->r1,rcondthreshold(_state))||ae_fp_less(rep->rinf,rcondthreshold(_state)) ) + { + for(i=0; i<=n-1; i++) + { + for(j=0; j<=n-1; j++) + { + a->ptr.pp_double[i][j] = 0; + } + } + rep->r1 = 0; + rep->rinf = 0; + *info = -3; + ae_frame_leave(_state); + return; + } + + /* + * Invert + */ + ae_vector_set_length(&tmp, n, _state); + matinv_rmatrixtrinverserec(a, 0, n, isupper, isunit, &tmp, info, rep, _state); + ae_frame_leave(_state); +} + + +/************************************************************************* +Triangular matrix inverse (complex) + +The subroutine inverts the following types of matrices: + * upper triangular + * upper triangular with unit diagonal + * lower triangular + * lower triangular with unit diagonal + +In case of an upper (lower) triangular matrix, the inverse matrix will +also be upper (lower) triangular, and after the end of the algorithm, the +inverse matrix replaces the source matrix. The elements below (above) the +main diagonal are not changed by the algorithm. + +If the matrix has a unit diagonal, the inverse matrix also has a unit +diagonal, and the diagonal elements are not passed to the algorithm. + +Input parameters: + A - matrix, array[0..N-1, 0..N-1]. + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + IsUpper - True, if the matrix is upper triangular. + IsUnit - diagonal type (optional): + * if True, matrix has unit diagonal (a[i,i] are NOT used) + * if False, matrix diagonal is arbitrary + * if not given, False is assumed + +Output parameters: + Info - same as for RMatrixLUInverse + Rep - same as for RMatrixLUInverse + A - same as for RMatrixLUInverse. + + -- ALGLIB -- + Copyright 05.02.2010 by Bochkanov Sergey +*************************************************************************/ +void cmatrixtrinverse(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_bool isunit, + ae_int_t* info, + matinvreport* rep, + ae_state *_state) +{ + ae_frame _frame_block; + ae_int_t i; + ae_int_t j; + ae_vector tmp; + + ae_frame_make(_state, &_frame_block); + *info = 0; + _matinvreport_clear(rep); + ae_vector_init(&tmp, 0, DT_COMPLEX, _state, ae_true); + + ae_assert(n>0, "CMatrixTRInverse: N<=0!", _state); + ae_assert(a->cols>=n, "CMatrixTRInverse: cols(A)<N!", _state); + ae_assert(a->rows>=n, "CMatrixTRInverse: rows(A)<N!", _state); + ae_assert(apservisfinitectrmatrix(a, n, isupper, _state), "CMatrixTRInverse: A contains infinite or NaN values!", _state); + *info = 1; + + /* + * calculate condition numbers + */ + rep->r1 = cmatrixtrrcond1(a, n, isupper, isunit, _state); + rep->rinf = cmatrixtrrcondinf(a, n, isupper, isunit, _state); + if( ae_fp_less(rep->r1,rcondthreshold(_state))||ae_fp_less(rep->rinf,rcondthreshold(_state)) ) + { + for(i=0; i<=n-1; i++) + { + for(j=0; j<=n-1; j++) + { + a->ptr.pp_complex[i][j] = ae_complex_from_d(0); + } + } + rep->r1 = 0; + rep->rinf = 0; + *info = -3; + ae_frame_leave(_state); + return; + } + + /* + * Invert + */ + ae_vector_set_length(&tmp, n, _state); + matinv_cmatrixtrinverserec(a, 0, n, isupper, isunit, &tmp, info, rep, _state); + ae_frame_leave(_state); +} + + +/************************************************************************* +Triangular matrix inversion, recursive subroutine + + -- ALGLIB -- + 05.02.2010, Bochkanov Sergey. + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992. +*************************************************************************/ +static void matinv_rmatrixtrinverserec(/* Real */ ae_matrix* a, + ae_int_t offs, + ae_int_t n, + ae_bool isupper, + ae_bool isunit, + /* Real */ ae_vector* tmp, + ae_int_t* info, + matinvreport* rep, + ae_state *_state) +{ + ae_int_t n1; + ae_int_t n2; + ae_int_t i; + ae_int_t j; + double v; + double ajj; + + + if( n<1 ) + { + *info = -1; + return; + } + + /* + * Base case + */ + if( n<=ablasblocksize(a, _state) ) + { + if( isupper ) + { + + /* + * Compute inverse of upper triangular matrix. + */ + for(j=0; j<=n-1; j++) + { + if( !isunit ) + { + if( ae_fp_eq(a->ptr.pp_double[offs+j][offs+j],0) ) + { + *info = -3; + return; + } + a->ptr.pp_double[offs+j][offs+j] = 1/a->ptr.pp_double[offs+j][offs+j]; + ajj = -a->ptr.pp_double[offs+j][offs+j]; + } + else + { + ajj = -1; + } + + /* + * Compute elements 1:j-1 of j-th column. + */ + if( j>0 ) + { + ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+0][offs+j], a->stride, ae_v_len(0,j-1)); + for(i=0; i<=j-1; i++) + { + if( i<j-1 ) + { + v = ae_v_dotproduct(&a->ptr.pp_double[offs+i][offs+i+1], 1, &tmp->ptr.p_double[i+1], 1, ae_v_len(offs+i+1,offs+j-1)); + } + else + { + v = 0; + } + if( !isunit ) + { + a->ptr.pp_double[offs+i][offs+j] = v+a->ptr.pp_double[offs+i][offs+i]*tmp->ptr.p_double[i]; + } + else + { + a->ptr.pp_double[offs+i][offs+j] = v+tmp->ptr.p_double[i]; + } + } + ae_v_muld(&a->ptr.pp_double[offs+0][offs+j], a->stride, ae_v_len(offs+0,offs+j-1), ajj); + } + } + } + else + { + + /* + * Compute inverse of lower triangular matrix. + */ + for(j=n-1; j>=0; j--) + { + if( !isunit ) + { + if( ae_fp_eq(a->ptr.pp_double[offs+j][offs+j],0) ) + { + *info = -3; + return; + } + a->ptr.pp_double[offs+j][offs+j] = 1/a->ptr.pp_double[offs+j][offs+j]; + ajj = -a->ptr.pp_double[offs+j][offs+j]; + } + else + { + ajj = -1; + } + if( j<n-1 ) + { + + /* + * Compute elements j+1:n of j-th column. + */ + ae_v_move(&tmp->ptr.p_double[j+1], 1, &a->ptr.pp_double[offs+j+1][offs+j], a->stride, ae_v_len(j+1,n-1)); + for(i=j+1; i<=n-1; i++) + { + if( i>j+1 ) + { + v = ae_v_dotproduct(&a->ptr.pp_double[offs+i][offs+j+1], 1, &tmp->ptr.p_double[j+1], 1, ae_v_len(offs+j+1,offs+i-1)); + } + else + { + v = 0; + } + if( !isunit ) + { + a->ptr.pp_double[offs+i][offs+j] = v+a->ptr.pp_double[offs+i][offs+i]*tmp->ptr.p_double[i]; + } + else + { + a->ptr.pp_double[offs+i][offs+j] = v+tmp->ptr.p_double[i]; + } + } + ae_v_muld(&a->ptr.pp_double[offs+j+1][offs+j], a->stride, ae_v_len(offs+j+1,offs+n-1), ajj); + } + } + } + return; + } + + /* + * Recursive case + */ + ablassplitlength(a, n, &n1, &n2, _state); + if( n2>0 ) + { + if( isupper ) + { + for(i=0; i<=n1-1; i++) + { + ae_v_muld(&a->ptr.pp_double[offs+i][offs+n1], 1, ae_v_len(offs+n1,offs+n-1), -1); + } + rmatrixlefttrsm(n1, n2, a, offs, offs, isupper, isunit, 0, a, offs, offs+n1, _state); + rmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, isupper, isunit, 0, a, offs, offs+n1, _state); + } + else + { + for(i=0; i<=n2-1; i++) + { + ae_v_muld(&a->ptr.pp_double[offs+n1+i][offs], 1, ae_v_len(offs,offs+n1-1), -1); + } + rmatrixrighttrsm(n2, n1, a, offs, offs, isupper, isunit, 0, a, offs+n1, offs, _state); + rmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, isupper, isunit, 0, a, offs+n1, offs, _state); + } + matinv_rmatrixtrinverserec(a, offs+n1, n2, isupper, isunit, tmp, info, rep, _state); + } + matinv_rmatrixtrinverserec(a, offs, n1, isupper, isunit, tmp, info, rep, _state); +} + + +/************************************************************************* +Triangular matrix inversion, recursive subroutine + + -- ALGLIB -- + 05.02.2010, Bochkanov Sergey. + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992. +*************************************************************************/ +static void matinv_cmatrixtrinverserec(/* Complex */ ae_matrix* a, + ae_int_t offs, + ae_int_t n, + ae_bool isupper, + ae_bool isunit, + /* Complex */ ae_vector* tmp, + ae_int_t* info, + matinvreport* rep, + ae_state *_state) +{ + ae_int_t n1; + ae_int_t n2; + ae_int_t i; + ae_int_t j; + ae_complex v; + ae_complex ajj; + + + if( n<1 ) + { + *info = -1; + return; + } + + /* + * Base case + */ + if( n<=ablascomplexblocksize(a, _state) ) + { + if( isupper ) + { + + /* + * Compute inverse of upper triangular matrix. + */ + for(j=0; j<=n-1; j++) + { + if( !isunit ) + { + if( ae_c_eq_d(a->ptr.pp_complex[offs+j][offs+j],0) ) + { + *info = -3; + return; + } + a->ptr.pp_complex[offs+j][offs+j] = ae_c_d_div(1,a->ptr.pp_complex[offs+j][offs+j]); + ajj = ae_c_neg(a->ptr.pp_complex[offs+j][offs+j]); + } + else + { + ajj = ae_complex_from_d(-1); + } + + /* + * Compute elements 1:j-1 of j-th column. + */ + if( j>0 ) + { + ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+0][offs+j], a->stride, "N", ae_v_len(0,j-1)); + for(i=0; i<=j-1; i++) + { + if( i<j-1 ) + { + v = ae_v_cdotproduct(&a->ptr.pp_complex[offs+i][offs+i+1], 1, "N", &tmp->ptr.p_complex[i+1], 1, "N", ae_v_len(offs+i+1,offs+j-1)); + } + else + { + v = ae_complex_from_d(0); + } + if( !isunit ) + { + a->ptr.pp_complex[offs+i][offs+j] = ae_c_add(v,ae_c_mul(a->ptr.pp_complex[offs+i][offs+i],tmp->ptr.p_complex[i])); + } + else + { + a->ptr.pp_complex[offs+i][offs+j] = ae_c_add(v,tmp->ptr.p_complex[i]); + } + } + ae_v_cmulc(&a->ptr.pp_complex[offs+0][offs+j], a->stride, ae_v_len(offs+0,offs+j-1), ajj); + } + } + } + else + { + + /* + * Compute inverse of lower triangular matrix. + */ + for(j=n-1; j>=0; j--) + { + if( !isunit ) + { + if( ae_c_eq_d(a->ptr.pp_complex[offs+j][offs+j],0) ) + { + *info = -3; + return; + } + a->ptr.pp_complex[offs+j][offs+j] = ae_c_d_div(1,a->ptr.pp_complex[offs+j][offs+j]); + ajj = ae_c_neg(a->ptr.pp_complex[offs+j][offs+j]); + } + else + { + ajj = ae_complex_from_d(-1); + } + if( j<n-1 ) + { + + /* + * Compute elements j+1:n of j-th column. + */ + ae_v_cmove(&tmp->ptr.p_complex[j+1], 1, &a->ptr.pp_complex[offs+j+1][offs+j], a->stride, "N", ae_v_len(j+1,n-1)); + for(i=j+1; i<=n-1; i++) + { + if( i>j+1 ) + { + v = ae_v_cdotproduct(&a->ptr.pp_complex[offs+i][offs+j+1], 1, "N", &tmp->ptr.p_complex[j+1], 1, "N", ae_v_len(offs+j+1,offs+i-1)); + } + else + { + v = ae_complex_from_d(0); + } + if( !isunit ) + { + a->ptr.pp_complex[offs+i][offs+j] = ae_c_add(v,ae_c_mul(a->ptr.pp_complex[offs+i][offs+i],tmp->ptr.p_complex[i])); + } + else + { + a->ptr.pp_complex[offs+i][offs+j] = ae_c_add(v,tmp->ptr.p_complex[i]); + } + } + ae_v_cmulc(&a->ptr.pp_complex[offs+j+1][offs+j], a->stride, ae_v_len(offs+j+1,offs+n-1), ajj); + } + } + } + return; + } + + /* + * Recursive case + */ + ablascomplexsplitlength(a, n, &n1, &n2, _state); + if( n2>0 ) + { + if( isupper ) + { + for(i=0; i<=n1-1; i++) + { + ae_v_cmuld(&a->ptr.pp_complex[offs+i][offs+n1], 1, ae_v_len(offs+n1,offs+n-1), -1); + } + cmatrixlefttrsm(n1, n2, a, offs, offs, isupper, isunit, 0, a, offs, offs+n1, _state); + cmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, isupper, isunit, 0, a, offs, offs+n1, _state); + } + else + { + for(i=0; i<=n2-1; i++) + { + ae_v_cmuld(&a->ptr.pp_complex[offs+n1+i][offs], 1, ae_v_len(offs,offs+n1-1), -1); + } + cmatrixrighttrsm(n2, n1, a, offs, offs, isupper, isunit, 0, a, offs+n1, offs, _state); + cmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, isupper, isunit, 0, a, offs+n1, offs, _state); + } + matinv_cmatrixtrinverserec(a, offs+n1, n2, isupper, isunit, tmp, info, rep, _state); + } + matinv_cmatrixtrinverserec(a, offs, n1, isupper, isunit, tmp, info, rep, _state); +} + + +static void matinv_rmatrixluinverserec(/* Real */ ae_matrix* a, + ae_int_t offs, + ae_int_t n, + /* Real */ ae_vector* work, + ae_int_t* info, + matinvreport* rep, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + double v; + ae_int_t n1; + ae_int_t n2; + + + if( n<1 ) + { + *info = -1; + return; + } + + /* + * Base case + */ + if( n<=ablasblocksize(a, _state) ) + { + + /* + * Form inv(U) + */ + matinv_rmatrixtrinverserec(a, offs, n, ae_true, ae_false, work, info, rep, _state); + if( *info<=0 ) + { + return; + } + + /* + * Solve the equation inv(A)*L = inv(U) for inv(A). + */ + for(j=n-1; j>=0; j--) + { + + /* + * Copy current column of L to WORK and replace with zeros. + */ + for(i=j+1; i<=n-1; i++) + { + work->ptr.p_double[i] = a->ptr.pp_double[offs+i][offs+j]; + a->ptr.pp_double[offs+i][offs+j] = 0; + } + + /* + * Compute current column of inv(A). + */ + if( j<n-1 ) + { + for(i=0; i<=n-1; i++) + { + v = ae_v_dotproduct(&a->ptr.pp_double[offs+i][offs+j+1], 1, &work->ptr.p_double[j+1], 1, ae_v_len(offs+j+1,offs+n-1)); + a->ptr.pp_double[offs+i][offs+j] = a->ptr.pp_double[offs+i][offs+j]-v; + } + } + } + return; + } + + /* + * Recursive code: + * + * ( L1 ) ( U1 U12 ) + * A = ( ) * ( ) + * ( L12 L2 ) ( U2 ) + * + * ( W X ) + * A^-1 = ( ) + * ( Y Z ) + */ + ablassplitlength(a, n, &n1, &n2, _state); + ae_assert(n2>0, "LUInverseRec: internal error!", _state); + + /* + * X := inv(U1)*U12*inv(U2) + */ + rmatrixlefttrsm(n1, n2, a, offs, offs, ae_true, ae_false, 0, a, offs, offs+n1, _state); + rmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, ae_true, ae_false, 0, a, offs, offs+n1, _state); + + /* + * Y := inv(L2)*L12*inv(L1) + */ + rmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, ae_false, ae_true, 0, a, offs+n1, offs, _state); + rmatrixrighttrsm(n2, n1, a, offs, offs, ae_false, ae_true, 0, a, offs+n1, offs, _state); + + /* + * W := inv(L1*U1)+X*Y + */ + matinv_rmatrixluinverserec(a, offs, n1, work, info, rep, _state); + if( *info<=0 ) + { + return; + } + rmatrixgemm(n1, n1, n2, 1.0, a, offs, offs+n1, 0, a, offs+n1, offs, 0, 1.0, a, offs, offs, _state); + + /* + * X := -X*inv(L2) + * Y := -inv(U2)*Y + */ + rmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, ae_false, ae_true, 0, a, offs, offs+n1, _state); + for(i=0; i<=n1-1; i++) + { + ae_v_muld(&a->ptr.pp_double[offs+i][offs+n1], 1, ae_v_len(offs+n1,offs+n-1), -1); + } + rmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, ae_true, ae_false, 0, a, offs+n1, offs, _state); + for(i=0; i<=n2-1; i++) + { + ae_v_muld(&a->ptr.pp_double[offs+n1+i][offs], 1, ae_v_len(offs,offs+n1-1), -1); + } + + /* + * Z := inv(L2*U2) + */ + matinv_rmatrixluinverserec(a, offs+n1, n2, work, info, rep, _state); +} + + +static void matinv_cmatrixluinverserec(/* Complex */ ae_matrix* a, + ae_int_t offs, + ae_int_t n, + /* Complex */ ae_vector* work, + ae_int_t* info, + matinvreport* rep, + ae_state *_state) +{ + ae_int_t i; + ae_int_t j; + ae_complex v; + ae_int_t n1; + ae_int_t n2; + + + if( n<1 ) + { + *info = -1; + return; + } + + /* + * Base case + */ + if( n<=ablascomplexblocksize(a, _state) ) + { + + /* + * Form inv(U) + */ + matinv_cmatrixtrinverserec(a, offs, n, ae_true, ae_false, work, info, rep, _state); + if( *info<=0 ) + { + return; + } + + /* + * Solve the equation inv(A)*L = inv(U) for inv(A). + */ + for(j=n-1; j>=0; j--) + { + + /* + * Copy current column of L to WORK and replace with zeros. + */ + for(i=j+1; i<=n-1; i++) + { + work->ptr.p_complex[i] = a->ptr.pp_complex[offs+i][offs+j]; + a->ptr.pp_complex[offs+i][offs+j] = ae_complex_from_d(0); + } + + /* + * Compute current column of inv(A). + */ + if( j<n-1 ) + { + for(i=0; i<=n-1; i++) + { + v = ae_v_cdotproduct(&a->ptr.pp_complex[offs+i][offs+j+1], 1, "N", &work->ptr.p_complex[j+1], 1, "N", ae_v_len(offs+j+1,offs+n-1)); + a->ptr.pp_complex[offs+i][offs+j] = ae_c_sub(a->ptr.pp_complex[offs+i][offs+j],v); + } + } + } + return; + } + + /* + * Recursive code: + * + * ( L1 ) ( U1 U12 ) + * A = ( ) * ( ) + * ( L12 L2 ) ( U2 ) + * + * ( W X ) + * A^-1 = ( ) + * ( Y Z ) + */ + ablascomplexsplitlength(a, n, &n1, &n2, _state); + ae_assert(n2>0, "LUInverseRec: internal error!", _state); + + /* + * X := inv(U1)*U12*inv(U2) + */ + cmatrixlefttrsm(n1, n2, a, offs, offs, ae_true, ae_false, 0, a, offs, offs+n1, _state); + cmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, ae_true, ae_false, 0, a, offs, offs+n1, _state); + + /* + * Y := inv(L2)*L12*inv(L1) + */ + cmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, ae_false, ae_true, 0, a, offs+n1, offs, _state); + cmatrixrighttrsm(n2, n1, a, offs, offs, ae_false, ae_true, 0, a, offs+n1, offs, _state); + + /* + * W := inv(L1*U1)+X*Y + */ + matinv_cmatrixluinverserec(a, offs, n1, work, info, rep, _state); + if( *info<=0 ) + { + return; + } + cmatrixgemm(n1, n1, n2, ae_complex_from_d(1.0), a, offs, offs+n1, 0, a, offs+n1, offs, 0, ae_complex_from_d(1.0), a, offs, offs, _state); + + /* + * X := -X*inv(L2) + * Y := -inv(U2)*Y + */ + cmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, ae_false, ae_true, 0, a, offs, offs+n1, _state); + for(i=0; i<=n1-1; i++) + { + ae_v_cmuld(&a->ptr.pp_complex[offs+i][offs+n1], 1, ae_v_len(offs+n1,offs+n-1), -1); + } + cmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, ae_true, ae_false, 0, a, offs+n1, offs, _state); + for(i=0; i<=n2-1; i++) + { + ae_v_cmuld(&a->ptr.pp_complex[offs+n1+i][offs], 1, ae_v_len(offs,offs+n1-1), -1); + } + + /* + * Z := inv(L2*U2) + */ + matinv_cmatrixluinverserec(a, offs+n1, n2, work, info, rep, _state); +} + + +/************************************************************************* +Recursive subroutine for SPD inversion. + + -- ALGLIB routine -- + 10.02.2010 + Bochkanov Sergey +*************************************************************************/ +static void matinv_spdmatrixcholeskyinverserec(/* Real */ ae_matrix* a, + ae_int_t offs, + ae_int_t n, + ae_bool isupper, + /* Real */ ae_vector* tmp, + ae_state *_state) +{ + ae_frame _frame_block; + ae_int_t i; + ae_int_t j; + double v; + ae_int_t n1; + ae_int_t n2; + ae_int_t info2; + matinvreport rep2; + + ae_frame_make(_state, &_frame_block); + _matinvreport_init(&rep2, _state, ae_true); + + if( n<1 ) + { + ae_frame_leave(_state); + return; + } + + /* + * Base case + */ + if( n<=ablasblocksize(a, _state) ) + { + matinv_rmatrixtrinverserec(a, offs, n, isupper, ae_false, tmp, &info2, &rep2, _state); + if( isupper ) + { + + /* + * Compute the product U * U'. + * NOTE: we never assume that diagonal of U is real + */ + for(i=0; i<=n-1; i++) + { + if( i==0 ) + { + + /* + * 1x1 matrix + */ + a->ptr.pp_double[offs+i][offs+i] = ae_sqr(a->ptr.pp_double[offs+i][offs+i], _state); + } + else + { + + /* + * (I+1)x(I+1) matrix, + * + * ( A11 A12 ) ( A11^H ) ( A11*A11^H+A12*A12^H A12*A22^H ) + * ( ) * ( ) = ( ) + * ( A22 ) ( A12^H A22^H ) ( A22*A12^H A22*A22^H ) + * + * A11 is IxI, A22 is 1x1. + */ + ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs][offs+i], a->stride, ae_v_len(0,i-1)); + for(j=0; j<=i-1; j++) + { + v = a->ptr.pp_double[offs+j][offs+i]; + ae_v_addd(&a->ptr.pp_double[offs+j][offs+j], 1, &tmp->ptr.p_double[j], 1, ae_v_len(offs+j,offs+i-1), v); + } + v = a->ptr.pp_double[offs+i][offs+i]; + ae_v_muld(&a->ptr.pp_double[offs][offs+i], a->stride, ae_v_len(offs,offs+i-1), v); + a->ptr.pp_double[offs+i][offs+i] = ae_sqr(a->ptr.pp_double[offs+i][offs+i], _state); + } + } + } + else + { + + /* + * Compute the product L' * L + * NOTE: we never assume that diagonal of L is real + */ + for(i=0; i<=n-1; i++) + { + if( i==0 ) + { + + /* + * 1x1 matrix + */ + a->ptr.pp_double[offs+i][offs+i] = ae_sqr(a->ptr.pp_double[offs+i][offs+i], _state); + } + else + { + + /* + * (I+1)x(I+1) matrix, + * + * ( A11^H A21^H ) ( A11 ) ( A11^H*A11+A21^H*A21 A21^H*A22 ) + * ( ) * ( ) = ( ) + * ( A22^H ) ( A21 A22 ) ( A22^H*A21 A22^H*A22 ) + * + * A11 is IxI, A22 is 1x1. + */ + ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+i][offs], 1, ae_v_len(0,i-1)); + for(j=0; j<=i-1; j++) + { + v = a->ptr.pp_double[offs+i][offs+j]; + ae_v_addd(&a->ptr.pp_double[offs+j][offs], 1, &tmp->ptr.p_double[0], 1, ae_v_len(offs,offs+j), v); + } + v = a->ptr.pp_double[offs+i][offs+i]; + ae_v_muld(&a->ptr.pp_double[offs+i][offs], 1, ae_v_len(offs,offs+i-1), v); + a->ptr.pp_double[offs+i][offs+i] = ae_sqr(a->ptr.pp_double[offs+i][offs+i], _state); + } + } + } + ae_frame_leave(_state); + return; + } + + /* + * Recursive code: triangular factor inversion merged with + * UU' or L'L multiplication + */ + ablassplitlength(a, n, &n1, &n2, _state); + + /* + * form off-diagonal block of trangular inverse + */ + if( isupper ) + { + for(i=0; i<=n1-1; i++) + { + ae_v_muld(&a->ptr.pp_double[offs+i][offs+n1], 1, ae_v_len(offs+n1,offs+n-1), -1); + } + rmatrixlefttrsm(n1, n2, a, offs, offs, isupper, ae_false, 0, a, offs, offs+n1, _state); + rmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, isupper, ae_false, 0, a, offs, offs+n1, _state); + } + else + { + for(i=0; i<=n2-1; i++) + { + ae_v_muld(&a->ptr.pp_double[offs+n1+i][offs], 1, ae_v_len(offs,offs+n1-1), -1); + } + rmatrixrighttrsm(n2, n1, a, offs, offs, isupper, ae_false, 0, a, offs+n1, offs, _state); + rmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, isupper, ae_false, 0, a, offs+n1, offs, _state); + } + + /* + * invert first diagonal block + */ + matinv_spdmatrixcholeskyinverserec(a, offs, n1, isupper, tmp, _state); + + /* + * update first diagonal block with off-diagonal block, + * update off-diagonal block + */ + if( isupper ) + { + rmatrixsyrk(n1, n2, 1.0, a, offs, offs+n1, 0, 1.0, a, offs, offs, isupper, _state); + rmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, isupper, ae_false, 1, a, offs, offs+n1, _state); + } + else + { + rmatrixsyrk(n1, n2, 1.0, a, offs+n1, offs, 1, 1.0, a, offs, offs, isupper, _state); + rmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, isupper, ae_false, 1, a, offs+n1, offs, _state); + } + + /* + * invert second diagonal block + */ + matinv_spdmatrixcholeskyinverserec(a, offs+n1, n2, isupper, tmp, _state); + ae_frame_leave(_state); +} + + +/************************************************************************* +Recursive subroutine for HPD inversion. + + -- ALGLIB routine -- + 10.02.2010 + Bochkanov Sergey +*************************************************************************/ +static void matinv_hpdmatrixcholeskyinverserec(/* Complex */ ae_matrix* a, + ae_int_t offs, + ae_int_t n, + ae_bool isupper, + /* Complex */ ae_vector* tmp, + ae_state *_state) +{ + ae_frame _frame_block; + ae_int_t i; + ae_int_t j; + ae_complex v; + ae_int_t n1; + ae_int_t n2; + ae_int_t info2; + matinvreport rep2; + + ae_frame_make(_state, &_frame_block); + _matinvreport_init(&rep2, _state, ae_true); + + if( n<1 ) + { + ae_frame_leave(_state); + return; + } + + /* + * Base case + */ + if( n<=ablascomplexblocksize(a, _state) ) + { + matinv_cmatrixtrinverserec(a, offs, n, isupper, ae_false, tmp, &info2, &rep2, _state); + if( isupper ) + { + + /* + * Compute the product U * U'. + * NOTE: we never assume that diagonal of U is real + */ + for(i=0; i<=n-1; i++) + { + if( i==0 ) + { + + /* + * 1x1 matrix + */ + a->ptr.pp_complex[offs+i][offs+i] = ae_complex_from_d(ae_sqr(a->ptr.pp_complex[offs+i][offs+i].x, _state)+ae_sqr(a->ptr.pp_complex[offs+i][offs+i].y, _state)); + } + else + { + + /* + * (I+1)x(I+1) matrix, + * + * ( A11 A12 ) ( A11^H ) ( A11*A11^H+A12*A12^H A12*A22^H ) + * ( ) * ( ) = ( ) + * ( A22 ) ( A12^H A22^H ) ( A22*A12^H A22*A22^H ) + * + * A11 is IxI, A22 is 1x1. + */ + ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs][offs+i], a->stride, "Conj", ae_v_len(0,i-1)); + for(j=0; j<=i-1; j++) + { + v = a->ptr.pp_complex[offs+j][offs+i]; + ae_v_caddc(&a->ptr.pp_complex[offs+j][offs+j], 1, &tmp->ptr.p_complex[j], 1, "N", ae_v_len(offs+j,offs+i-1), v); + } + v = ae_c_conj(a->ptr.pp_complex[offs+i][offs+i], _state); + ae_v_cmulc(&a->ptr.pp_complex[offs][offs+i], a->stride, ae_v_len(offs,offs+i-1), v); + a->ptr.pp_complex[offs+i][offs+i] = ae_complex_from_d(ae_sqr(a->ptr.pp_complex[offs+i][offs+i].x, _state)+ae_sqr(a->ptr.pp_complex[offs+i][offs+i].y, _state)); + } + } + } + else + { + + /* + * Compute the product L' * L + * NOTE: we never assume that diagonal of L is real + */ + for(i=0; i<=n-1; i++) + { + if( i==0 ) + { + + /* + * 1x1 matrix + */ + a->ptr.pp_complex[offs+i][offs+i] = ae_complex_from_d(ae_sqr(a->ptr.pp_complex[offs+i][offs+i].x, _state)+ae_sqr(a->ptr.pp_complex[offs+i][offs+i].y, _state)); + } + else + { + + /* + * (I+1)x(I+1) matrix, + * + * ( A11^H A21^H ) ( A11 ) ( A11^H*A11+A21^H*A21 A21^H*A22 ) + * ( ) * ( ) = ( ) + * ( A22^H ) ( A21 A22 ) ( A22^H*A21 A22^H*A22 ) + * + * A11 is IxI, A22 is 1x1. + */ + ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+i][offs], 1, "N", ae_v_len(0,i-1)); + for(j=0; j<=i-1; j++) + { + v = ae_c_conj(a->ptr.pp_complex[offs+i][offs+j], _state); + ae_v_caddc(&a->ptr.pp_complex[offs+j][offs], 1, &tmp->ptr.p_complex[0], 1, "N", ae_v_len(offs,offs+j), v); + } + v = ae_c_conj(a->ptr.pp_complex[offs+i][offs+i], _state); + ae_v_cmulc(&a->ptr.pp_complex[offs+i][offs], 1, ae_v_len(offs,offs+i-1), v); + a->ptr.pp_complex[offs+i][offs+i] = ae_complex_from_d(ae_sqr(a->ptr.pp_complex[offs+i][offs+i].x, _state)+ae_sqr(a->ptr.pp_complex[offs+i][offs+i].y, _state)); + } + } + } + ae_frame_leave(_state); + return; + } + + /* + * Recursive code: triangular factor inversion merged with + * UU' or L'L multiplication + */ + ablascomplexsplitlength(a, n, &n1, &n2, _state); + + /* + * form off-diagonal block of trangular inverse + */ + if( isupper ) + { + for(i=0; i<=n1-1; i++) + { + ae_v_cmuld(&a->ptr.pp_complex[offs+i][offs+n1], 1, ae_v_len(offs+n1,offs+n-1), -1); + } + cmatrixlefttrsm(n1, n2, a, offs, offs, isupper, ae_false, 0, a, offs, offs+n1, _state); + cmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, isupper, ae_false, 0, a, offs, offs+n1, _state); + } + else + { + for(i=0; i<=n2-1; i++) + { + ae_v_cmuld(&a->ptr.pp_complex[offs+n1+i][offs], 1, ae_v_len(offs,offs+n1-1), -1); + } + cmatrixrighttrsm(n2, n1, a, offs, offs, isupper, ae_false, 0, a, offs+n1, offs, _state); + cmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, isupper, ae_false, 0, a, offs+n1, offs, _state); + } + + /* + * invert first diagonal block + */ + matinv_hpdmatrixcholeskyinverserec(a, offs, n1, isupper, tmp, _state); + + /* + * update first diagonal block with off-diagonal block, + * update off-diagonal block + */ + if( isupper ) + { + cmatrixsyrk(n1, n2, 1.0, a, offs, offs+n1, 0, 1.0, a, offs, offs, isupper, _state); + cmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, isupper, ae_false, 2, a, offs, offs+n1, _state); + } + else + { + cmatrixsyrk(n1, n2, 1.0, a, offs+n1, offs, 2, 1.0, a, offs, offs, isupper, _state); + cmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, isupper, ae_false, 2, a, offs+n1, offs, _state); + } + + /* + * invert second diagonal block + */ + matinv_hpdmatrixcholeskyinverserec(a, offs+n1, n2, isupper, tmp, _state); + ae_frame_leave(_state); +} + + +ae_bool _matinvreport_init(matinvreport* p, ae_state *_state, ae_bool make_automatic) +{ + return ae_true; +} + + +ae_bool _matinvreport_init_copy(matinvreport* dst, matinvreport* src, ae_state *_state, ae_bool make_automatic) +{ + dst->r1 = src->r1; + dst->rinf = src->rinf; + return ae_true; +} + + +void _matinvreport_clear(matinvreport* p) +{ +} + + + + +/************************************************************************* +Singular value decomposition of a bidiagonal matrix (extended algorithm) + +The algorithm performs the singular value decomposition of a bidiagonal +matrix B (upper or lower) representing it as B = Q*S*P^T, where Q and P - +orthogonal matrices, S - diagonal matrix with non-negative elements on the +main diagonal, in descending order. + +The algorithm finds singular values. In addition, the algorithm can +calculate matrices Q and P (more precisely, not the matrices, but their +product with given matrices U and VT - U*Q and (P^T)*VT)). Of course, +matrices U and VT can be of any type, including identity. Furthermore, the +algorithm can calculate Q'*C (this product is calculated more effectively +than U*Q, because this calculation operates with rows instead of matrix +columns). + +The feature of the algorithm is its ability to find all singular values +including those which are arbitrarily close to 0 with relative accuracy +close to machine precision. If the parameter IsFractionalAccuracyRequired +is set to True, all singular values will have high relative accuracy close +to machine precision. If the parameter is set to False, only the biggest +singular value will have relative accuracy close to machine precision. +The absolute error of other singular values is equal to the absolute error +of the biggest singular value. + +Input parameters: + D - main diagonal of matrix B. + Array whose index ranges within [0..N-1]. + E - superdiagonal (or subdiagonal) of matrix B. + Array whose index ranges within [0..N-2]. + N - size of matrix B. + IsUpper - True, if the matrix is upper bidiagonal. + IsFractionalAccuracyRequired - + accuracy to search singular values with. + U - matrix to be multiplied by Q. + Array whose indexes range within [0..NRU-1, 0..N-1]. + The matrix can be bigger, in that case only the submatrix + [0..NRU-1, 0..N-1] will be multiplied by Q. + NRU - number of rows in matrix U. + C - matrix to be multiplied by Q'. + Array whose indexes range within [0..N-1, 0..NCC-1]. + The matrix can be bigger, in that case only the submatrix + [0..N-1, 0..NCC-1] will be multiplied by Q'. + NCC - number of columns in matrix C. + VT - matrix to be multiplied by P^T. + Array whose indexes range within [0..N-1, 0..NCVT-1]. + The matrix can be bigger, in that case only the submatrix + [0..N-1, 0..NCVT-1] will be multiplied by P^T. + NCVT - number of columns in matrix VT. + +Output parameters: + D - singular values of matrix B in descending order. + U - if NRU>0, contains matrix U*Q. + VT - if NCVT>0, contains matrix (P^T)*VT. + C - if NCC>0, contains matrix Q'*C. + +Result: + True, if the algorithm has converged. + False, if the algorithm hasn't converged (rare case). + +Additional information: + The type of convergence is controlled by the internal parameter TOL. + If the parameter is greater than 0, the singular values will have + relative accuracy TOL. If TOL<0, the singular values will have + absolute accuracy ABS(TOL)*norm(B). + By default, |TOL| falls within the range of 10*Epsilon and 100*Epsilon, + where Epsilon is the machine precision. It is not recommended to use + TOL less than 10*Epsilon since this will considerably slow down the + algorithm and may not lead to error decreasing. +History: + * 31 March, 2007. + changed MAXITR from 6 to 12. + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1999. +*************************************************************************/ +ae_bool rmatrixbdsvd(/* Real */ ae_vector* d, + /* Real */ ae_vector* e, + ae_int_t n, + ae_bool isupper, + ae_bool isfractionalaccuracyrequired, + /* Real */ ae_matrix* u, + ae_int_t nru, + /* Real */ ae_matrix* c, + ae_int_t ncc, + /* Real */ ae_matrix* vt, + ae_int_t ncvt, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector _e; + ae_vector d1; + ae_vector e1; + ae_bool result; + + ae_frame_make(_state, &_frame_block); + ae_vector_init_copy(&_e, e, _state, ae_true); + e = &_e; + ae_vector_init(&d1, 0, DT_REAL, _state, ae_true); + ae_vector_init(&e1, 0, DT_REAL, _state, ae_true); + + ae_vector_set_length(&d1, n+1, _state); + ae_v_move(&d1.ptr.p_double[1], 1, &d->ptr.p_double[0], 1, ae_v_len(1,n)); + if( n>1 ) + { + ae_vector_set_length(&e1, n-1+1, _state); + ae_v_move(&e1.ptr.p_double[1], 1, &e->ptr.p_double[0], 1, ae_v_len(1,n-1)); + } + result = bdsvd_bidiagonalsvddecompositioninternal(&d1, &e1, n, isupper, isfractionalaccuracyrequired, u, 0, nru, c, 0, ncc, vt, 0, ncvt, _state); + ae_v_move(&d->ptr.p_double[0], 1, &d1.ptr.p_double[1], 1, ae_v_len(0,n-1)); + ae_frame_leave(_state); + return result; +} + + +ae_bool bidiagonalsvddecomposition(/* Real */ ae_vector* d, + /* Real */ ae_vector* e, + ae_int_t n, + ae_bool isupper, + ae_bool isfractionalaccuracyrequired, + /* Real */ ae_matrix* u, + ae_int_t nru, + /* Real */ ae_matrix* c, + ae_int_t ncc, + /* Real */ ae_matrix* vt, + ae_int_t ncvt, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector _e; + ae_bool result; + + ae_frame_make(_state, &_frame_block); + ae_vector_init_copy(&_e, e, _state, ae_true); + e = &_e; + + result = bdsvd_bidiagonalsvddecompositioninternal(d, e, n, isupper, isfractionalaccuracyrequired, u, 1, nru, c, 1, ncc, vt, 1, ncvt, _state); + ae_frame_leave(_state); + return result; +} + + +/************************************************************************* +Internal working subroutine for bidiagonal decomposition +*************************************************************************/ +static ae_bool bdsvd_bidiagonalsvddecompositioninternal(/* Real */ ae_vector* d, + /* Real */ ae_vector* e, + ae_int_t n, + ae_bool isupper, + ae_bool isfractionalaccuracyrequired, + /* Real */ ae_matrix* u, + ae_int_t ustart, + ae_int_t nru, + /* Real */ ae_matrix* c, + ae_int_t cstart, + ae_int_t ncc, + /* Real */ ae_matrix* vt, + ae_int_t vstart, + ae_int_t ncvt, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector _e; + ae_int_t i; + ae_int_t idir; + ae_int_t isub; + ae_int_t iter; + ae_int_t j; + ae_int_t ll; + ae_int_t lll; + ae_int_t m; + ae_int_t maxit; + ae_int_t oldll; + ae_int_t oldm; + double abse; + double abss; + double cosl; + double cosr; + double cs; + double eps; + double f; + double g; + double h; + double mu; + double oldcs; + double oldsn; + double r; + double shift; + double sigmn; + double sigmx; + double sinl; + double sinr; + double sll; + double smax; + double smin; + double sminl; + double sminlo; + double sminoa; + double sn; + double thresh; + double tol; + double tolmul; + double unfl; + ae_vector work0; + ae_vector work1; + ae_vector work2; + ae_vector work3; + ae_int_t maxitr; + ae_bool matrixsplitflag; + ae_bool iterflag; + ae_vector utemp; + ae_vector vttemp; + ae_vector ctemp; + ae_vector etemp; + ae_bool rightside; + ae_bool fwddir; + double tmp; + ae_int_t mm1; + ae_int_t mm0; + ae_bool bchangedir; + ae_int_t uend; + ae_int_t cend; + ae_int_t vend; + ae_bool result; + + ae_frame_make(_state, &_frame_block); + ae_vector_init_copy(&_e, e, _state, ae_true); + e = &_e; + ae_vector_init(&work0, 0, DT_REAL, _state, ae_true); + ae_vector_init(&work1, 0, DT_REAL, _state, ae_true); + ae_vector_init(&work2, 0, DT_REAL, _state, ae_true); + ae_vector_init(&work3, 0, DT_REAL, _state, ae_true); + ae_vector_init(&utemp, 0, DT_REAL, _state, ae_true); + ae_vector_init(&vttemp, 0, DT_REAL, _state, ae_true); + ae_vector_init(&ctemp, 0, DT_REAL, _state, ae_true); + ae_vector_init(&etemp, 0, DT_REAL, _state, ae_true); + + result = ae_true; + if( n==0 ) + { + ae_frame_leave(_state); + return result; + } + if( n==1 ) + { + if( ae_fp_less(d->ptr.p_double[1],0) ) + { + d->ptr.p_double[1] = -d->ptr.p_double[1]; + if( ncvt>0 ) + { + ae_v_muld(&vt->ptr.pp_double[vstart][vstart], 1, ae_v_len(vstart,vstart+ncvt-1), -1); + } + } + ae_frame_leave(_state); + return result; + } + + /* + * these initializers are not really necessary, + * but without them compiler complains about uninitialized locals + */ + ll = 0; + oldsn = 0; + + /* + * init + */ + ae_vector_set_length(&work0, n-1+1, _state); + ae_vector_set_length(&work1, n-1+1, _state); + ae_vector_set_length(&work2, n-1+1, _state); + ae_vector_set_length(&work3, n-1+1, _state); + uend = ustart+ae_maxint(nru-1, 0, _state); + vend = vstart+ae_maxint(ncvt-1, 0, _state); + cend = cstart+ae_maxint(ncc-1, 0, _state); + ae_vector_set_length(&utemp, uend+1, _state); + ae_vector_set_length(&vttemp, vend+1, _state); + ae_vector_set_length(&ctemp, cend+1, _state); + maxitr = 12; + rightside = ae_true; + fwddir = ae_true; + + /* + * resize E from N-1 to N + */ + ae_vector_set_length(&etemp, n+1, _state); + for(i=1; i<=n-1; i++) + { + etemp.ptr.p_double[i] = e->ptr.p_double[i]; + } + ae_vector_set_length(e, n+1, _state); + for(i=1; i<=n-1; i++) + { + e->ptr.p_double[i] = etemp.ptr.p_double[i]; + } + e->ptr.p_double[n] = 0; + idir = 0; + + /* + * Get machine constants + */ + eps = ae_machineepsilon; + unfl = ae_minrealnumber; + + /* + * If matrix lower bidiagonal, rotate to be upper bidiagonal + * by applying Givens rotations on the left + */ + if( !isupper ) + { + for(i=1; i<=n-1; i++) + { + generaterotation(d->ptr.p_double[i], e->ptr.p_double[i], &cs, &sn, &r, _state); + d->ptr.p_double[i] = r; + e->ptr.p_double[i] = sn*d->ptr.p_double[i+1]; + d->ptr.p_double[i+1] = cs*d->ptr.p_double[i+1]; + work0.ptr.p_double[i] = cs; + work1.ptr.p_double[i] = sn; + } + + /* + * Update singular vectors if desired + */ + if( nru>0 ) + { + applyrotationsfromtheright(fwddir, ustart, uend, 1+ustart-1, n+ustart-1, &work0, &work1, u, &utemp, _state); + } + if( ncc>0 ) + { + applyrotationsfromtheleft(fwddir, 1+cstart-1, n+cstart-1, cstart, cend, &work0, &work1, c, &ctemp, _state); + } + } + + /* + * Compute singular values to relative accuracy TOL + * (By setting TOL to be negative, algorithm will compute + * singular values to absolute accuracy ABS(TOL)*norm(input matrix)) + */ + tolmul = ae_maxreal(10, ae_minreal(100, ae_pow(eps, -0.125, _state), _state), _state); + tol = tolmul*eps; + if( !isfractionalaccuracyrequired ) + { + tol = -tol; + } + + /* + * Compute approximate maximum, minimum singular values + */ + smax = 0; + for(i=1; i<=n; i++) + { + smax = ae_maxreal(smax, ae_fabs(d->ptr.p_double[i], _state), _state); + } + for(i=1; i<=n-1; i++) + { + smax = ae_maxreal(smax, ae_fabs(e->ptr.p_double[i], _state), _state); + } + sminl = 0; + if( ae_fp_greater_eq(tol,0) ) + { + + /* + * Relative accuracy desired + */ + sminoa = ae_fabs(d->ptr.p_double[1], _state); + if( ae_fp_neq(sminoa,0) ) + { + mu = sminoa; + for(i=2; i<=n; i++) + { + mu = ae_fabs(d->ptr.p_double[i], _state)*(mu/(mu+ae_fabs(e->ptr.p_double[i-1], _state))); + sminoa = ae_minreal(sminoa, mu, _state); + if( ae_fp_eq(sminoa,0) ) + { + break; + } + } + } + sminoa = sminoa/ae_sqrt(n, _state); + thresh = ae_maxreal(tol*sminoa, maxitr*n*n*unfl, _state); + } + else + { + + /* + * Absolute accuracy desired + */ + thresh = ae_maxreal(ae_fabs(tol, _state)*smax, maxitr*n*n*unfl, _state); + } + + /* + * Prepare for main iteration loop for the singular values + * (MAXIT is the maximum number of passes through the inner + * loop permitted before nonconvergence signalled.) + */ + maxit = maxitr*n*n; + iter = 0; + oldll = -1; + oldm = -1; + + /* + * M points to last element of unconverged part of matrix + */ + m = n; + + /* + * Begin main iteration loop + */ + for(;;) + { + + /* + * Check for convergence or exceeding iteration count + */ + if( m<=1 ) + { + break; + } + if( iter>maxit ) + { + result = ae_false; + ae_frame_leave(_state); + return result; + } + + /* + * Find diagonal block of matrix to work on + */ + if( ae_fp_less(tol,0)&&ae_fp_less_eq(ae_fabs(d->ptr.p_double[m], _state),thresh) ) + { + d->ptr.p_double[m] = 0; + } + smax = ae_fabs(d->ptr.p_double[m], _state); + smin = smax; + matrixsplitflag = ae_false; + for(lll=1; lll<=m-1; lll++) + { + ll = m-lll; + abss = ae_fabs(d->ptr.p_double[ll], _state); + abse = ae_fabs(e->ptr.p_double[ll], _state); + if( ae_fp_less(tol,0)&&ae_fp_less_eq(abss,thresh) ) + { + d->ptr.p_double[ll] = 0; + } + if( ae_fp_less_eq(abse,thresh) ) + { + matrixsplitflag = ae_true; + break; + } + smin = ae_minreal(smin, abss, _state); + smax = ae_maxreal(smax, ae_maxreal(abss, abse, _state), _state); + } + if( !matrixsplitflag ) + { + ll = 0; + } + else + { + + /* + * Matrix splits since E(LL) = 0 + */ + e->ptr.p_double[ll] = 0; + if( ll==m-1 ) + { + + /* + * Convergence of bottom singular value, return to top of loop + */ + m = m-1; + continue; + } + } + ll = ll+1; + + /* + * E(LL) through E(M-1) are nonzero, E(LL-1) is zero + */ + if( ll==m-1 ) + { + + /* + * 2 by 2 block, handle separately + */ + bdsvd_svdv2x2(d->ptr.p_double[m-1], e->ptr.p_double[m-1], d->ptr.p_double[m], &sigmn, &sigmx, &sinr, &cosr, &sinl, &cosl, _state); + d->ptr.p_double[m-1] = sigmx; + e->ptr.p_double[m-1] = 0; + d->ptr.p_double[m] = sigmn; + + /* + * Compute singular vectors, if desired + */ + if( ncvt>0 ) + { + mm0 = m+(vstart-1); + mm1 = m-1+(vstart-1); + ae_v_moved(&vttemp.ptr.p_double[vstart], 1, &vt->ptr.pp_double[mm1][vstart], 1, ae_v_len(vstart,vend), cosr); + ae_v_addd(&vttemp.ptr.p_double[vstart], 1, &vt->ptr.pp_double[mm0][vstart], 1, ae_v_len(vstart,vend), sinr); + ae_v_muld(&vt->ptr.pp_double[mm0][vstart], 1, ae_v_len(vstart,vend), cosr); + ae_v_subd(&vt->ptr.pp_double[mm0][vstart], 1, &vt->ptr.pp_double[mm1][vstart], 1, ae_v_len(vstart,vend), sinr); + ae_v_move(&vt->ptr.pp_double[mm1][vstart], 1, &vttemp.ptr.p_double[vstart], 1, ae_v_len(vstart,vend)); + } + if( nru>0 ) + { + mm0 = m+ustart-1; + mm1 = m-1+ustart-1; + ae_v_moved(&utemp.ptr.p_double[ustart], 1, &u->ptr.pp_double[ustart][mm1], u->stride, ae_v_len(ustart,uend), cosl); + ae_v_addd(&utemp.ptr.p_double[ustart], 1, &u->ptr.pp_double[ustart][mm0], u->stride, ae_v_len(ustart,uend), sinl); + ae_v_muld(&u->ptr.pp_double[ustart][mm0], u->stride, ae_v_len(ustart,uend), cosl); + ae_v_subd(&u->ptr.pp_double[ustart][mm0], u->stride, &u->ptr.pp_double[ustart][mm1], u->stride, ae_v_len(ustart,uend), sinl); + ae_v_move(&u->ptr.pp_double[ustart][mm1], u->stride, &utemp.ptr.p_double[ustart], 1, ae_v_len(ustart,uend)); + } + if( ncc>0 ) + { + mm0 = m+cstart-1; + mm1 = m-1+cstart-1; + ae_v_moved(&ctemp.ptr.p_double[cstart], 1, &c->ptr.pp_double[mm1][cstart], 1, ae_v_len(cstart,cend), cosl); + ae_v_addd(&ctemp.ptr.p_double[cstart], 1, &c->ptr.pp_double[mm0][cstart], 1, ae_v_len(cstart,cend), sinl); + ae_v_muld(&c->ptr.pp_double[mm0][cstart], 1, ae_v_len(cstart,cend), cosl); + ae_v_subd(&c->ptr.pp_double[mm0][cstart], 1, &c->ptr.pp_double[mm1][cstart], 1, ae_v_len(cstart,cend), sinl); + ae_v_move(&c->ptr.pp_double[mm1][cstart], 1, &ctemp.ptr.p_double[cstart], 1, ae_v_len(cstart,cend)); + } + m = m-2; + continue; + } + + /* + * If working on new submatrix, choose shift direction + * (from larger end diagonal element towards smaller) + * + * Previously was + * "if (LL>OLDM) or (M<OLDLL) then" + * fixed thanks to Michael Rolle < m@rolle.name > + * Very strange that LAPACK still contains it. + */ + bchangedir = ae_false; + if( idir==1&&ae_fp_less(ae_fabs(d->ptr.p_double[ll], _state),1.0E-3*ae_fabs(d->ptr.p_double[m], _state)) ) + { + bchangedir = ae_true; + } + if( idir==2&&ae_fp_less(ae_fabs(d->ptr.p_double[m], _state),1.0E-3*ae_fabs(d->ptr.p_double[ll], _state)) ) + { + bchangedir = ae_true; + } + if( (ll!=oldll||m!=oldm)||bchangedir ) + { + if( ae_fp_greater_eq(ae_fabs(d->ptr.p_double[ll], _state),ae_fabs(d->ptr.p_double[m], _state)) ) + { + + /* + * Chase bulge from top (big end) to bottom (small end) + */ + idir = 1; + } + else + { + + /* + * Chase bulge from bottom (big end) to top (small end) + */ + idir = 2; + } + } + + /* + * Apply convergence tests + */ + if( idir==1 ) + { + + /* + * Run convergence test in forward direction + * First apply standard test to bottom of matrix + */ + if( ae_fp_less_eq(ae_fabs(e->ptr.p_double[m-1], _state),ae_fabs(tol, _state)*ae_fabs(d->ptr.p_double[m], _state))||(ae_fp_less(tol,0)&&ae_fp_less_eq(ae_fabs(e->ptr.p_double[m-1], _state),thresh)) ) + { + e->ptr.p_double[m-1] = 0; + continue; + } + if( ae_fp_greater_eq(tol,0) ) + { + + /* + * If relative accuracy desired, + * apply convergence criterion forward + */ + mu = ae_fabs(d->ptr.p_double[ll], _state); + sminl = mu; + iterflag = ae_false; + for(lll=ll; lll<=m-1; lll++) + { + if( ae_fp_less_eq(ae_fabs(e->ptr.p_double[lll], _state),tol*mu) ) + { + e->ptr.p_double[lll] = 0; + iterflag = ae_true; + break; + } + sminlo = sminl; + mu = ae_fabs(d->ptr.p_double[lll+1], _state)*(mu/(mu+ae_fabs(e->ptr.p_double[lll], _state))); + sminl = ae_minreal(sminl, mu, _state); + } + if( iterflag ) + { + continue; + } + } + } + else + { + + /* + * Run convergence test in backward direction + * First apply standard test to top of matrix + */ + if( ae_fp_less_eq(ae_fabs(e->ptr.p_double[ll], _state),ae_fabs(tol, _state)*ae_fabs(d->ptr.p_double[ll], _state))||(ae_fp_less(tol,0)&&ae_fp_less_eq(ae_fabs(e->ptr.p_double[ll], _state),thresh)) ) + { + e->ptr.p_double[ll] = 0; + continue; + } + if( ae_fp_greater_eq(tol,0) ) + { + + /* + * If relative accuracy desired, + * apply convergence criterion backward + */ + mu = ae_fabs(d->ptr.p_double[m], _state); + sminl = mu; + iterflag = ae_false; + for(lll=m-1; lll>=ll; lll--) + { + if( ae_fp_less_eq(ae_fabs(e->ptr.p_double[lll], _state),tol*mu) ) + { + e->ptr.p_double[lll] = 0; + iterflag = ae_true; + break; + } + sminlo = sminl; + mu = ae_fabs(d->ptr.p_double[lll], _state)*(mu/(mu+ae_fabs(e->ptr.p_double[lll], _state))); + sminl = ae_minreal(sminl, mu, _state); + } + if( iterflag ) + { + continue; + } + } + } + oldll = ll; + oldm = m; + + /* + * Compute shift. First, test if shifting would ruin relative + * accuracy, and if so set the shift to zero. + */ + if( ae_fp_greater_eq(tol,0)&&ae_fp_less_eq(n*tol*(sminl/smax),ae_maxreal(eps, 0.01*tol, _state)) ) + { + + /* + * Use a zero shift to avoid loss of relative accuracy + */ + shift = 0; + } + else + { + + /* + * Compute the shift from 2-by-2 block at end of matrix + */ + if( idir==1 ) + { + sll = ae_fabs(d->ptr.p_double[ll], _state); + bdsvd_svd2x2(d->ptr.p_double[m-1], e->ptr.p_double[m-1], d->ptr.p_double[m], &shift, &r, _state); + } + else + { + sll = ae_fabs(d->ptr.p_double[m], _state); + bdsvd_svd2x2(d->ptr.p_double[ll], e->ptr.p_double[ll], d->ptr.p_double[ll+1], &shift, &r, _state); + } + + /* + * Test if shift negligible, and if so set to zero + */ + if( ae_fp_greater(sll,0) ) + { + if( ae_fp_less(ae_sqr(shift/sll, _state),eps) ) + { + shift = 0; + } + } + } + + /* + * Increment iteration count + */ + iter = iter+m-ll; + + /* + * If SHIFT = 0, do simplified QR iteration + */ + if( ae_fp_eq(shift,0) ) + { + if( idir==1 ) + { + + /* + * Chase bulge from top to bottom + * Save cosines and sines for later singular vector updates + */ + cs = 1; + oldcs = 1; + for(i=ll; i<=m-1; i++) + { + generaterotation(d->ptr.p_double[i]*cs, e->ptr.p_double[i], &cs, &sn, &r, _state); + if( i>ll ) + { + e->ptr.p_double[i-1] = oldsn*r; + } + generaterotation(oldcs*r, d->ptr.p_double[i+1]*sn, &oldcs, &oldsn, &tmp, _state); + d->ptr.p_double[i] = tmp; + work0.ptr.p_double[i-ll+1] = cs; + work1.ptr.p_double[i-ll+1] = sn; + work2.ptr.p_double[i-ll+1] = oldcs; + work3.ptr.p_double[i-ll+1] = oldsn; + } + h = d->ptr.p_double[m]*cs; + d->ptr.p_double[m] = h*oldcs; + e->ptr.p_double[m-1] = h*oldsn; + + /* + * Update singular vectors + */ + if( ncvt>0 ) + { + applyrotationsfromtheleft(fwddir, ll+vstart-1, m+vstart-1, vstart, vend, &work0, &work1, vt, &vttemp, _state); + } + if( nru>0 ) + { + applyrotationsfromtheright(fwddir, ustart, uend, ll+ustart-1, m+ustart-1, &work2, &work3, u, &utemp, _state); + } + if( ncc>0 ) + { + applyrotationsfromtheleft(fwddir, ll+cstart-1, m+cstart-1, cstart, cend, &work2, &work3, c, &ctemp, _state); + } + + /* + * Test convergence + */ + if( ae_fp_less_eq(ae_fabs(e->ptr.p_double[m-1], _state),thresh) ) + { + e->ptr.p_double[m-1] = 0; + } + } + else + { + + /* + * Chase bulge from bottom to top + * Save cosines and sines for later singular vector updates + */ + cs = 1; + oldcs = 1; + for(i=m; i>=ll+1; i--) + { + generaterotation(d->ptr.p_double[i]*cs, e->ptr.p_double[i-1], &cs, &sn, &r, _state); + if( i<m ) + { + e->ptr.p_double[i] = oldsn*r; + } + generaterotation(oldcs*r, d->ptr.p_double[i-1]*sn, &oldcs, &oldsn, &tmp, _state); + d->ptr.p_double[i] = tmp; + work0.ptr.p_double[i-ll] = cs; + work1.ptr.p_double[i-ll] = -sn; + work2.ptr.p_double[i-ll] = oldcs; + work3.ptr.p_double[i-ll] = -oldsn; + } + h = d->ptr.p_double[ll]*cs; + d->ptr.p_double[ll] = h*oldcs; + e->ptr.p_double[ll] = h*oldsn; + + /* + * Update singular vectors + */ + if( ncvt>0 ) + { + applyrotationsfromtheleft(!fwddir, ll+vstart-1, m+vstart-1, vstart, vend, &work2, &work3, vt, &vttemp, _state); + } + if( nru>0 ) + { + applyrotationsfromtheright(!fwddir, ustart, uend, ll+ustart-1, m+ustart-1, &work0, &work1, u, &utemp, _state); + } + if( ncc>0 ) + { + applyrotationsfromtheleft(!fwddir, ll+cstart-1, m+cstart-1, cstart, cend, &work0, &work1, c, &ctemp, _state); + } + + /* + * Test convergence + */ + if( ae_fp_less_eq(ae_fabs(e->ptr.p_double[ll], _state),thresh) ) + { + e->ptr.p_double[ll] = 0; + } + } + } + else + { + + /* + * Use nonzero shift + */ + if( idir==1 ) + { + + /* + * Chase bulge from top to bottom + * Save cosines and sines for later singular vector updates + */ + f = (ae_fabs(d->ptr.p_double[ll], _state)-shift)*(bdsvd_extsignbdsqr(1, d->ptr.p_double[ll], _state)+shift/d->ptr.p_double[ll]); + g = e->ptr.p_double[ll]; + for(i=ll; i<=m-1; i++) + { + generaterotation(f, g, &cosr, &sinr, &r, _state); + if( i>ll ) + { + e->ptr.p_double[i-1] = r; + } + f = cosr*d->ptr.p_double[i]+sinr*e->ptr.p_double[i]; + e->ptr.p_double[i] = cosr*e->ptr.p_double[i]-sinr*d->ptr.p_double[i]; + g = sinr*d->ptr.p_double[i+1]; + d->ptr.p_double[i+1] = cosr*d->ptr.p_double[i+1]; + generaterotation(f, g, &cosl, &sinl, &r, _state); + d->ptr.p_double[i] = r; + f = cosl*e->ptr.p_double[i]+sinl*d->ptr.p_double[i+1]; + d->ptr.p_double[i+1] = cosl*d->ptr.p_double[i+1]-sinl*e->ptr.p_double[i]; + if( i<m-1 ) + { + g = sinl*e->ptr.p_double[i+1]; + e->ptr.p_double[i+1] = cosl*e->ptr.p_double[i+1]; + } + work0.ptr.p_double[i-ll+1] = cosr; + work1.ptr.p_double[i-ll+1] = sinr; + work2.ptr.p_double[i-ll+1] = cosl; + work3.ptr.p_double[i-ll+1] = sinl; + } + e->ptr.p_double[m-1] = f; + + /* + * Update singular vectors + */ + if( ncvt>0 ) + { + applyrotationsfromtheleft(fwddir, ll+vstart-1, m+vstart-1, vstart, vend, &work0, &work1, vt, &vttemp, _state); + } + if( nru>0 ) + { + applyrotationsfromtheright(fwddir, ustart, uend, ll+ustart-1, m+ustart-1, &work2, &work3, u, &utemp, _state); + } + if( ncc>0 ) + { + applyrotationsfromtheleft(fwddir, ll+cstart-1, m+cstart-1, cstart, cend, &work2, &work3, c, &ctemp, _state); + } + + /* + * Test convergence + */ + if( ae_fp_less_eq(ae_fabs(e->ptr.p_double[m-1], _state),thresh) ) + { + e->ptr.p_double[m-1] = 0; + } + } + else + { + + /* + * Chase bulge from bottom to top + * Save cosines and sines for later singular vector updates + */ + f = (ae_fabs(d->ptr.p_double[m], _state)-shift)*(bdsvd_extsignbdsqr(1, d->ptr.p_double[m], _state)+shift/d->ptr.p_double[m]); + g = e->ptr.p_double[m-1]; + for(i=m; i>=ll+1; i--) + { + generaterotation(f, g, &cosr, &sinr, &r, _state); + if( i<m ) + { + e->ptr.p_double[i] = r; + } + f = cosr*d->ptr.p_double[i]+sinr*e->ptr.p_double[i-1]; + e->ptr.p_double[i-1] = cosr*e->ptr.p_double[i-1]-sinr*d->ptr.p_double[i]; + g = sinr*d->ptr.p_double[i-1]; + d->ptr.p_double[i-1] = cosr*d->ptr.p_double[i-1]; + generaterotation(f, g, &cosl, &sinl, &r, _state); + d->ptr.p_double[i] = r; + f = cosl*e->ptr.p_double[i-1]+sinl*d->ptr.p_double[i-1]; + d->ptr.p_double[i-1] = cosl*d->ptr.p_double[i-1]-sinl*e->ptr.p_double[i-1]; + if( i>ll+1 ) + { + g = sinl*e->ptr.p_double[i-2]; + e->ptr.p_double[i-2] = cosl*e->ptr.p_double[i-2]; + } + work0.ptr.p_double[i-ll] = cosr; + work1.ptr.p_double[i-ll] = -sinr; + work2.ptr.p_double[i-ll] = cosl; + work3.ptr.p_double[i-ll] = -sinl; + } + e->ptr.p_double[ll] = f; + + /* + * Test convergence + */ + if( ae_fp_less_eq(ae_fabs(e->ptr.p_double[ll], _state),thresh) ) + { + e->ptr.p_double[ll] = 0; + } + + /* + * Update singular vectors if desired + */ + if( ncvt>0 ) + { + applyrotationsfromtheleft(!fwddir, ll+vstart-1, m+vstart-1, vstart, vend, &work2, &work3, vt, &vttemp, _state); + } + if( nru>0 ) + { + applyrotationsfromtheright(!fwddir, ustart, uend, ll+ustart-1, m+ustart-1, &work0, &work1, u, &utemp, _state); + } + if( ncc>0 ) + { + applyrotationsfromtheleft(!fwddir, ll+cstart-1, m+cstart-1, cstart, cend, &work0, &work1, c, &ctemp, _state); + } + } + } + + /* + * QR iteration finished, go back and check convergence + */ + continue; + } + + /* + * All singular values converged, so make them positive + */ + for(i=1; i<=n; i++) + { + if( ae_fp_less(d->ptr.p_double[i],0) ) + { + d->ptr.p_double[i] = -d->ptr.p_double[i]; + + /* + * Change sign of singular vectors, if desired + */ + if( ncvt>0 ) + { + ae_v_muld(&vt->ptr.pp_double[i+vstart-1][vstart], 1, ae_v_len(vstart,vend), -1); + } + } + } + + /* + * Sort the singular values into decreasing order (insertion sort on + * singular values, but only one transposition per singular vector) + */ + for(i=1; i<=n-1; i++) + { + + /* + * Scan for smallest D(I) + */ + isub = 1; + smin = d->ptr.p_double[1]; + for(j=2; j<=n+1-i; j++) + { + if( ae_fp_less_eq(d->ptr.p_double[j],smin) ) + { + isub = j; + smin = d->ptr.p_double[j]; + } + } + if( isub!=n+1-i ) + { + + /* + * Swap singular values and vectors + */ + d->ptr.p_double[isub] = d->ptr.p_double[n+1-i]; + d->ptr.p_double[n+1-i] = smin; + if( ncvt>0 ) + { + j = n+1-i; + ae_v_move(&vttemp.ptr.p_double[vstart], 1, &vt->ptr.pp_double[isub+vstart-1][vstart], 1, ae_v_len(vstart,vend)); + ae_v_move(&vt->ptr.pp_double[isub+vstart-1][vstart], 1, &vt->ptr.pp_double[j+vstart-1][vstart], 1, ae_v_len(vstart,vend)); + ae_v_move(&vt->ptr.pp_double[j+vstart-1][vstart], 1, &vttemp.ptr.p_double[vstart], 1, ae_v_len(vstart,vend)); + } + if( nru>0 ) + { + j = n+1-i; + ae_v_move(&utemp.ptr.p_double[ustart], 1, &u->ptr.pp_double[ustart][isub+ustart-1], u->stride, ae_v_len(ustart,uend)); + ae_v_move(&u->ptr.pp_double[ustart][isub+ustart-1], u->stride, &u->ptr.pp_double[ustart][j+ustart-1], u->stride, ae_v_len(ustart,uend)); + ae_v_move(&u->ptr.pp_double[ustart][j+ustart-1], u->stride, &utemp.ptr.p_double[ustart], 1, ae_v_len(ustart,uend)); + } + if( ncc>0 ) + { + j = n+1-i; + ae_v_move(&ctemp.ptr.p_double[cstart], 1, &c->ptr.pp_double[isub+cstart-1][cstart], 1, ae_v_len(cstart,cend)); + ae_v_move(&c->ptr.pp_double[isub+cstart-1][cstart], 1, &c->ptr.pp_double[j+cstart-1][cstart], 1, ae_v_len(cstart,cend)); + ae_v_move(&c->ptr.pp_double[j+cstart-1][cstart], 1, &ctemp.ptr.p_double[cstart], 1, ae_v_len(cstart,cend)); + } + } + } + ae_frame_leave(_state); + return result; +} + + +static double bdsvd_extsignbdsqr(double a, double b, ae_state *_state) +{ + double result; + + + if( ae_fp_greater_eq(b,0) ) + { + result = ae_fabs(a, _state); + } + else + { + result = -ae_fabs(a, _state); + } + return result; +} + + +static void bdsvd_svd2x2(double f, + double g, + double h, + double* ssmin, + double* ssmax, + ae_state *_state) +{ + double aas; + double at; + double au; + double c; + double fa; + double fhmn; + double fhmx; + double ga; + double ha; + + *ssmin = 0; + *ssmax = 0; + + fa = ae_fabs(f, _state); + ga = ae_fabs(g, _state); + ha = ae_fabs(h, _state); + fhmn = ae_minreal(fa, ha, _state); + fhmx = ae_maxreal(fa, ha, _state); + if( ae_fp_eq(fhmn,0) ) + { + *ssmin = 0; + if( ae_fp_eq(fhmx,0) ) + { + *ssmax = ga; + } + else + { + *ssmax = ae_maxreal(fhmx, ga, _state)*ae_sqrt(1+ae_sqr(ae_minreal(fhmx, ga, _state)/ae_maxreal(fhmx, ga, _state), _state), _state); + } + } + else + { + if( ae_fp_less(ga,fhmx) ) + { + aas = 1+fhmn/fhmx; + at = (fhmx-fhmn)/fhmx; + au = ae_sqr(ga/fhmx, _state); + c = 2/(ae_sqrt(aas*aas+au, _state)+ae_sqrt(at*at+au, _state)); + *ssmin = fhmn*c; + *ssmax = fhmx/c; + } + else + { + au = fhmx/ga; + if( ae_fp_eq(au,0) ) + { + + /* + * Avoid possible harmful underflow if exponent range + * asymmetric (true SSMIN may not underflow even if + * AU underflows) + */ + *ssmin = fhmn*fhmx/ga; + *ssmax = ga; + } + else + { + aas = 1+fhmn/fhmx; + at = (fhmx-fhmn)/fhmx; + c = 1/(ae_sqrt(1+ae_sqr(aas*au, _state), _state)+ae_sqrt(1+ae_sqr(at*au, _state), _state)); + *ssmin = fhmn*c*au; + *ssmin = *ssmin+(*ssmin); + *ssmax = ga/(c+c); + } + } + } +} + + +static void bdsvd_svdv2x2(double f, + double g, + double h, + double* ssmin, + double* ssmax, + double* snr, + double* csr, + double* snl, + double* csl, + ae_state *_state) +{ + ae_bool gasmal; + ae_bool swp; + ae_int_t pmax; + double a; + double clt; + double crt; + double d; + double fa; + double ft; + double ga; + double gt; + double ha; + double ht; + double l; + double m; + double mm; + double r; + double s; + double slt; + double srt; + double t; + double temp; + double tsign; + double tt; + double v; + + *ssmin = 0; + *ssmax = 0; + *snr = 0; + *csr = 0; + *snl = 0; + *csl = 0; + + ft = f; + fa = ae_fabs(ft, _state); + ht = h; + ha = ae_fabs(h, _state); + + /* + * these initializers are not really necessary, + * but without them compiler complains about uninitialized locals + */ + clt = 0; + crt = 0; + slt = 0; + srt = 0; + tsign = 0; + + /* + * PMAX points to the maximum absolute element of matrix + * PMAX = 1 if F largest in absolute values + * PMAX = 2 if G largest in absolute values + * PMAX = 3 if H largest in absolute values + */ + pmax = 1; + swp = ae_fp_greater(ha,fa); + if( swp ) + { + + /* + * Now FA .ge. HA + */ + pmax = 3; + temp = ft; + ft = ht; + ht = temp; + temp = fa; + fa = ha; + ha = temp; + } + gt = g; + ga = ae_fabs(gt, _state); + if( ae_fp_eq(ga,0) ) + { + + /* + * Diagonal matrix + */ + *ssmin = ha; + *ssmax = fa; + clt = 1; + crt = 1; + slt = 0; + srt = 0; + } + else + { + gasmal = ae_true; + if( ae_fp_greater(ga,fa) ) + { + pmax = 2; + if( ae_fp_less(fa/ga,ae_machineepsilon) ) + { + + /* + * Case of very large GA + */ + gasmal = ae_false; + *ssmax = ga; + if( ae_fp_greater(ha,1) ) + { + v = ga/ha; + *ssmin = fa/v; + } + else + { + v = fa/ga; + *ssmin = v*ha; + } + clt = 1; + slt = ht/gt; + srt = 1; + crt = ft/gt; + } + } + if( gasmal ) + { + + /* + * Normal case + */ + d = fa-ha; + if( ae_fp_eq(d,fa) ) + { + l = 1; + } + else + { + l = d/fa; + } + m = gt/ft; + t = 2-l; + mm = m*m; + tt = t*t; + s = ae_sqrt(tt+mm, _state); + if( ae_fp_eq(l,0) ) + { + r = ae_fabs(m, _state); + } + else + { + r = ae_sqrt(l*l+mm, _state); + } + a = 0.5*(s+r); + *ssmin = ha/a; + *ssmax = fa*a; + if( ae_fp_eq(mm,0) ) + { + + /* + * Note that M is very tiny + */ + if( ae_fp_eq(l,0) ) + { + t = bdsvd_extsignbdsqr(2, ft, _state)*bdsvd_extsignbdsqr(1, gt, _state); + } + else + { + t = gt/bdsvd_extsignbdsqr(d, ft, _state)+m/t; + } + } + else + { + t = (m/(s+t)+m/(r+l))*(1+a); + } + l = ae_sqrt(t*t+4, _state); + crt = 2/l; + srt = t/l; + clt = (crt+srt*m)/a; + v = ht/ft; + slt = v*srt/a; + } + } + if( swp ) + { + *csl = srt; + *snl = crt; + *csr = slt; + *snr = clt; + } + else + { + *csl = clt; + *snl = slt; + *csr = crt; + *snr = srt; + } + + /* + * Correct signs of SSMAX and SSMIN + */ + if( pmax==1 ) + { + tsign = bdsvd_extsignbdsqr(1, *csr, _state)*bdsvd_extsignbdsqr(1, *csl, _state)*bdsvd_extsignbdsqr(1, f, _state); + } + if( pmax==2 ) + { + tsign = bdsvd_extsignbdsqr(1, *snr, _state)*bdsvd_extsignbdsqr(1, *csl, _state)*bdsvd_extsignbdsqr(1, g, _state); + } + if( pmax==3 ) + { + tsign = bdsvd_extsignbdsqr(1, *snr, _state)*bdsvd_extsignbdsqr(1, *snl, _state)*bdsvd_extsignbdsqr(1, h, _state); + } + *ssmax = bdsvd_extsignbdsqr(*ssmax, tsign, _state); + *ssmin = bdsvd_extsignbdsqr(*ssmin, tsign*bdsvd_extsignbdsqr(1, f, _state)*bdsvd_extsignbdsqr(1, h, _state), _state); +} + + + + +/************************************************************************* +Singular value decomposition of a rectangular matrix. + +The algorithm calculates the singular value decomposition of a matrix of +size MxN: A = U * S * V^T + +The algorithm finds the singular values and, optionally, matrices U and V^T. +The algorithm can find both first min(M,N) columns of matrix U and rows of +matrix V^T (singular vectors), and matrices U and V^T wholly (of sizes MxM +and NxN respectively). + +Take into account that the subroutine does not return matrix V but V^T. + +Input parameters: + A - matrix to be decomposed. + Array whose indexes range within [0..M-1, 0..N-1]. + M - number of rows in matrix A. + N - number of columns in matrix A. + UNeeded - 0, 1 or 2. See the description of the parameter U. + VTNeeded - 0, 1 or 2. See the description of the parameter VT. + AdditionalMemory - + If the parameter: + * equals 0, the algorithm doesn’t use additional + memory (lower requirements, lower performance). + * equals 1, the algorithm uses additional + memory of size min(M,N)*min(M,N) of real numbers. + It often speeds up the algorithm. + * equals 2, the algorithm uses additional + memory of size M*min(M,N) of real numbers. + It allows to get a maximum performance. + The recommended value of the parameter is 2. + +Output parameters: + W - contains singular values in descending order. + U - if UNeeded=0, U isn't changed, the left singular vectors + are not calculated. + if Uneeded=1, U contains left singular vectors (first + min(M,N) columns of matrix U). Array whose indexes range + within [0..M-1, 0..Min(M,N)-1]. + if UNeeded=2, U contains matrix U wholly. Array whose + indexes range within [0..M-1, 0..M-1]. + VT - if VTNeeded=0, VT isn’t changed, the right singular vectors + are not calculated. + if VTNeeded=1, VT contains right singular vectors (first + min(M,N) rows of matrix V^T). Array whose indexes range + within [0..min(M,N)-1, 0..N-1]. + if VTNeeded=2, VT contains matrix V^T wholly. Array whose + indexes range within [0..N-1, 0..N-1]. + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +ae_bool rmatrixsvd(/* Real */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + ae_int_t uneeded, + ae_int_t vtneeded, + ae_int_t additionalmemory, + /* Real */ ae_vector* w, + /* Real */ ae_matrix* u, + /* Real */ ae_matrix* vt, + ae_state *_state) +{ + ae_frame _frame_block; + ae_matrix _a; + ae_vector tauq; + ae_vector taup; + ae_vector tau; + ae_vector e; + ae_vector work; + ae_matrix t2; + ae_bool isupper; + ae_int_t minmn; + ae_int_t ncu; + ae_int_t nrvt; + ae_int_t nru; + ae_int_t ncvt; + ae_int_t i; + ae_int_t j; + ae_bool result; + + ae_frame_make(_state, &_frame_block); + ae_matrix_init_copy(&_a, a, _state, ae_true); + a = &_a; + ae_vector_clear(w); + ae_matrix_clear(u); + ae_matrix_clear(vt); + ae_vector_init(&tauq, 0, DT_REAL, _state, ae_true); + ae_vector_init(&taup, 0, DT_REAL, _state, ae_true); + ae_vector_init(&tau, 0, DT_REAL, _state, ae_true); + ae_vector_init(&e, 0, DT_REAL, _state, ae_true); + ae_vector_init(&work, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&t2, 0, 0, DT_REAL, _state, ae_true); + + result = ae_true; + if( m==0||n==0 ) + { + ae_frame_leave(_state); + return result; + } + ae_assert(uneeded>=0&&uneeded<=2, "SVDDecomposition: wrong parameters!", _state); + ae_assert(vtneeded>=0&&vtneeded<=2, "SVDDecomposition: wrong parameters!", _state); + ae_assert(additionalmemory>=0&&additionalmemory<=2, "SVDDecomposition: wrong parameters!", _state); + + /* + * initialize + */ + minmn = ae_minint(m, n, _state); + ae_vector_set_length(w, minmn+1, _state); + ncu = 0; + nru = 0; + if( uneeded==1 ) + { + nru = m; + ncu = minmn; + ae_matrix_set_length(u, nru-1+1, ncu-1+1, _state); + } + if( uneeded==2 ) + { + nru = m; + ncu = m; + ae_matrix_set_length(u, nru-1+1, ncu-1+1, _state); + } + nrvt = 0; + ncvt = 0; + if( vtneeded==1 ) + { + nrvt = minmn; + ncvt = n; + ae_matrix_set_length(vt, nrvt-1+1, ncvt-1+1, _state); + } + if( vtneeded==2 ) + { + nrvt = n; + ncvt = n; + ae_matrix_set_length(vt, nrvt-1+1, ncvt-1+1, _state); + } + + /* + * M much larger than N + * Use bidiagonal reduction with QR-decomposition + */ + if( ae_fp_greater(m,1.6*n) ) + { + if( uneeded==0 ) + { + + /* + * No left singular vectors to be computed + */ + rmatrixqr(a, m, n, &tau, _state); + for(i=0; i<=n-1; i++) + { + for(j=0; j<=i-1; j++) + { + a->ptr.pp_double[i][j] = 0; + } + } + rmatrixbd(a, n, n, &tauq, &taup, _state); + rmatrixbdunpackpt(a, n, n, &taup, nrvt, vt, _state); + rmatrixbdunpackdiagonals(a, n, n, &isupper, w, &e, _state); + result = rmatrixbdsvd(w, &e, n, isupper, ae_false, u, 0, a, 0, vt, ncvt, _state); + ae_frame_leave(_state); + return result; + } + else + { + + /* + * Left singular vectors (may be full matrix U) to be computed + */ + rmatrixqr(a, m, n, &tau, _state); + rmatrixqrunpackq(a, m, n, &tau, ncu, u, _state); + for(i=0; i<=n-1; i++) + { + for(j=0; j<=i-1; j++) + { + a->ptr.pp_double[i][j] = 0; + } + } + rmatrixbd(a, n, n, &tauq, &taup, _state); + rmatrixbdunpackpt(a, n, n, &taup, nrvt, vt, _state); + rmatrixbdunpackdiagonals(a, n, n, &isupper, w, &e, _state); + if( additionalmemory<1 ) + { + + /* + * No additional memory can be used + */ + rmatrixbdmultiplybyq(a, n, n, &tauq, u, m, n, ae_true, ae_false, _state); + result = rmatrixbdsvd(w, &e, n, isupper, ae_false, u, m, a, 0, vt, ncvt, _state); + } + else + { + + /* + * Large U. Transforming intermediate matrix T2 + */ + ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state); + rmatrixbdunpackq(a, n, n, &tauq, n, &t2, _state); + copymatrix(u, 0, m-1, 0, n-1, a, 0, m-1, 0, n-1, _state); + inplacetranspose(&t2, 0, n-1, 0, n-1, &work, _state); + result = rmatrixbdsvd(w, &e, n, isupper, ae_false, u, 0, &t2, n, vt, ncvt, _state); + matrixmatrixmultiply(a, 0, m-1, 0, n-1, ae_false, &t2, 0, n-1, 0, n-1, ae_true, 1.0, u, 0, m-1, 0, n-1, 0.0, &work, _state); + } + ae_frame_leave(_state); + return result; + } + } + + /* + * N much larger than M + * Use bidiagonal reduction with LQ-decomposition + */ + if( ae_fp_greater(n,1.6*m) ) + { + if( vtneeded==0 ) + { + + /* + * No right singular vectors to be computed + */ + rmatrixlq(a, m, n, &tau, _state); + for(i=0; i<=m-1; i++) + { + for(j=i+1; j<=m-1; j++) + { + a->ptr.pp_double[i][j] = 0; + } + } + rmatrixbd(a, m, m, &tauq, &taup, _state); + rmatrixbdunpackq(a, m, m, &tauq, ncu, u, _state); + rmatrixbdunpackdiagonals(a, m, m, &isupper, w, &e, _state); + ae_vector_set_length(&work, m+1, _state); + inplacetranspose(u, 0, nru-1, 0, ncu-1, &work, _state); + result = rmatrixbdsvd(w, &e, m, isupper, ae_false, a, 0, u, nru, vt, 0, _state); + inplacetranspose(u, 0, nru-1, 0, ncu-1, &work, _state); + ae_frame_leave(_state); + return result; + } + else + { + + /* + * Right singular vectors (may be full matrix VT) to be computed + */ + rmatrixlq(a, m, n, &tau, _state); + rmatrixlqunpackq(a, m, n, &tau, nrvt, vt, _state); + for(i=0; i<=m-1; i++) + { + for(j=i+1; j<=m-1; j++) + { + a->ptr.pp_double[i][j] = 0; + } + } + rmatrixbd(a, m, m, &tauq, &taup, _state); + rmatrixbdunpackq(a, m, m, &tauq, ncu, u, _state); + rmatrixbdunpackdiagonals(a, m, m, &isupper, w, &e, _state); + ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state); + inplacetranspose(u, 0, nru-1, 0, ncu-1, &work, _state); + if( additionalmemory<1 ) + { + + /* + * No additional memory available + */ + rmatrixbdmultiplybyp(a, m, m, &taup, vt, m, n, ae_false, ae_true, _state); + result = rmatrixbdsvd(w, &e, m, isupper, ae_false, a, 0, u, nru, vt, n, _state); + } + else + { + + /* + * Large VT. Transforming intermediate matrix T2 + */ + rmatrixbdunpackpt(a, m, m, &taup, m, &t2, _state); + result = rmatrixbdsvd(w, &e, m, isupper, ae_false, a, 0, u, nru, &t2, m, _state); + copymatrix(vt, 0, m-1, 0, n-1, a, 0, m-1, 0, n-1, _state); + matrixmatrixmultiply(&t2, 0, m-1, 0, m-1, ae_false, a, 0, m-1, 0, n-1, ae_false, 1.0, vt, 0, m-1, 0, n-1, 0.0, &work, _state); + } + inplacetranspose(u, 0, nru-1, 0, ncu-1, &work, _state); + ae_frame_leave(_state); + return result; + } + } + + /* + * M<=N + * We can use inplace transposition of U to get rid of columnwise operations + */ + if( m<=n ) + { + rmatrixbd(a, m, n, &tauq, &taup, _state); + rmatrixbdunpackq(a, m, n, &tauq, ncu, u, _state); + rmatrixbdunpackpt(a, m, n, &taup, nrvt, vt, _state); + rmatrixbdunpackdiagonals(a, m, n, &isupper, w, &e, _state); + ae_vector_set_length(&work, m+1, _state); + inplacetranspose(u, 0, nru-1, 0, ncu-1, &work, _state); + result = rmatrixbdsvd(w, &e, minmn, isupper, ae_false, a, 0, u, nru, vt, ncvt, _state); + inplacetranspose(u, 0, nru-1, 0, ncu-1, &work, _state); + ae_frame_leave(_state); + return result; + } + + /* + * Simple bidiagonal reduction + */ + rmatrixbd(a, m, n, &tauq, &taup, _state); + rmatrixbdunpackq(a, m, n, &tauq, ncu, u, _state); + rmatrixbdunpackpt(a, m, n, &taup, nrvt, vt, _state); + rmatrixbdunpackdiagonals(a, m, n, &isupper, w, &e, _state); + if( additionalmemory<2||uneeded==0 ) + { + + /* + * We cant use additional memory or there is no need in such operations + */ + result = rmatrixbdsvd(w, &e, minmn, isupper, ae_false, u, nru, a, 0, vt, ncvt, _state); + } + else + { + + /* + * We can use additional memory + */ + ae_matrix_set_length(&t2, minmn-1+1, m-1+1, _state); + copyandtranspose(u, 0, m-1, 0, minmn-1, &t2, 0, minmn-1, 0, m-1, _state); + result = rmatrixbdsvd(w, &e, minmn, isupper, ae_false, u, 0, &t2, m, vt, ncvt, _state); + copyandtranspose(&t2, 0, minmn-1, 0, m-1, u, 0, m-1, 0, minmn-1, _state); + } + ae_frame_leave(_state); + return result; +} + + + + +/************************************************************************* +Basic Cholesky solver for ScaleA*Cholesky(A)'*x = y. + +This subroutine assumes that: +* A*ScaleA is well scaled +* A is well-conditioned, so no zero divisions or overflow may occur + +INPUT PARAMETERS: + CHA - Cholesky decomposition of A + SqrtScaleA- square root of scale factor ScaleA + N - matrix size + IsUpper - storage type + XB - right part + Tmp - buffer; function automatically allocates it, if it is too + small. It can be reused if function is called several + times. + +OUTPUT PARAMETERS: + XB - solution + +NOTES: no assertion or tests are done during algorithm operation + + -- ALGLIB -- + Copyright 13.10.2010 by Bochkanov Sergey +*************************************************************************/ +void fblscholeskysolve(/* Real */ ae_matrix* cha, + double sqrtscalea, + ae_int_t n, + ae_bool isupper, + /* Real */ ae_vector* xb, + /* Real */ ae_vector* tmp, + ae_state *_state) +{ + ae_int_t i; + double v; + + + if( tmp->cnt<n ) + { + ae_vector_set_length(tmp, n, _state); + } + + /* + * A = L*L' or A=U'*U + */ + if( isupper ) + { + + /* + * Solve U'*y=b first. + */ + for(i=0; i<=n-1; i++) + { + xb->ptr.p_double[i] = xb->ptr.p_double[i]/(sqrtscalea*cha->ptr.pp_double[i][i]); + if( i<n-1 ) + { + v = xb->ptr.p_double[i]; + ae_v_moved(&tmp->ptr.p_double[i+1], 1, &cha->ptr.pp_double[i][i+1], 1, ae_v_len(i+1,n-1), sqrtscalea); + ae_v_subd(&xb->ptr.p_double[i+1], 1, &tmp->ptr.p_double[i+1], 1, ae_v_len(i+1,n-1), v); + } + } + + /* + * Solve U*x=y then. + */ + for(i=n-1; i>=0; i--) + { + if( i<n-1 ) + { + ae_v_moved(&tmp->ptr.p_double[i+1], 1, &cha->ptr.pp_double[i][i+1], 1, ae_v_len(i+1,n-1), sqrtscalea); + v = ae_v_dotproduct(&tmp->ptr.p_double[i+1], 1, &xb->ptr.p_double[i+1], 1, ae_v_len(i+1,n-1)); + xb->ptr.p_double[i] = xb->ptr.p_double[i]-v; + } + xb->ptr.p_double[i] = xb->ptr.p_double[i]/(sqrtscalea*cha->ptr.pp_double[i][i]); + } + } + else + { + + /* + * Solve L*y=b first + */ + for(i=0; i<=n-1; i++) + { + if( i>0 ) + { + ae_v_moved(&tmp->ptr.p_double[0], 1, &cha->ptr.pp_double[i][0], 1, ae_v_len(0,i-1), sqrtscalea); + v = ae_v_dotproduct(&tmp->ptr.p_double[0], 1, &xb->ptr.p_double[0], 1, ae_v_len(0,i-1)); + xb->ptr.p_double[i] = xb->ptr.p_double[i]-v; + } + xb->ptr.p_double[i] = xb->ptr.p_double[i]/(sqrtscalea*cha->ptr.pp_double[i][i]); + } + + /* + * Solve L'*x=y then. + */ + for(i=n-1; i>=0; i--) + { + xb->ptr.p_double[i] = xb->ptr.p_double[i]/(sqrtscalea*cha->ptr.pp_double[i][i]); + if( i>0 ) + { + v = xb->ptr.p_double[i]; + ae_v_moved(&tmp->ptr.p_double[0], 1, &cha->ptr.pp_double[i][0], 1, ae_v_len(0,i-1), sqrtscalea); + ae_v_subd(&xb->ptr.p_double[0], 1, &tmp->ptr.p_double[0], 1, ae_v_len(0,i-1), v); + } + } + } +} + + +/************************************************************************* +Fast basic linear solver: linear SPD CG + +Solves (A^T*A + alpha*I)*x = b where: +* A is MxN matrix +* alpha>0 is a scalar +* I is NxN identity matrix +* b is Nx1 vector +* X is Nx1 unknown vector. + +N iterations of linear conjugate gradient are used to solve problem. + +INPUT PARAMETERS: + A - array[M,N], matrix + M - number of rows + N - number of unknowns + B - array[N], right part + X - initial approxumation, array[N] + Buf - buffer; function automatically allocates it, if it is too + small. It can be reused if function is called several times + with same M and N. + +OUTPUT PARAMETERS: + X - improved solution + +NOTES: +* solver checks quality of improved solution. If (because of problem + condition number, numerical noise, etc.) new solution is WORSE than + original approximation, then original approximation is returned. +* solver assumes that both A, B, Alpha are well scaled (i.e. they are + less than sqrt(overflow) and greater than sqrt(underflow)). + + -- ALGLIB -- + Copyright 20.08.2009 by Bochkanov Sergey +*************************************************************************/ +void fblssolvecgx(/* Real */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + double alpha, + /* Real */ ae_vector* b, + /* Real */ ae_vector* x, + /* Real */ ae_vector* buf, + ae_state *_state) +{ + ae_int_t k; + ae_int_t offsrk; + ae_int_t offsrk1; + ae_int_t offsxk; + ae_int_t offsxk1; + ae_int_t offspk; + ae_int_t offspk1; + ae_int_t offstmp1; + ae_int_t offstmp2; + ae_int_t bs; + double e1; + double e2; + double rk2; + double rk12; + double pap; + double s; + double betak; + double v1; + double v2; + + + + /* + * Test for special case: B=0 + */ + v1 = ae_v_dotproduct(&b->ptr.p_double[0], 1, &b->ptr.p_double[0], 1, ae_v_len(0,n-1)); + if( ae_fp_eq(v1,0) ) + { + for(k=0; k<=n-1; k++) + { + x->ptr.p_double[k] = 0; + } + return; + } + + /* + * Offsets inside Buf for: + * * R[K], R[K+1] + * * X[K], X[K+1] + * * P[K], P[K+1] + * * Tmp1 - array[M], Tmp2 - array[N] + */ + offsrk = 0; + offsrk1 = offsrk+n; + offsxk = offsrk1+n; + offsxk1 = offsxk+n; + offspk = offsxk1+n; + offspk1 = offspk+n; + offstmp1 = offspk1+n; + offstmp2 = offstmp1+m; + bs = offstmp2+n; + if( buf->cnt<bs ) + { + ae_vector_set_length(buf, bs, _state); + } + + /* + * x(0) = x + */ + ae_v_move(&buf->ptr.p_double[offsxk], 1, &x->ptr.p_double[0], 1, ae_v_len(offsxk,offsxk+n-1)); + + /* + * r(0) = b-A*x(0) + * RK2 = r(0)'*r(0) + */ + rmatrixmv(m, n, a, 0, 0, 0, buf, offsxk, buf, offstmp1, _state); + rmatrixmv(n, m, a, 0, 0, 1, buf, offstmp1, buf, offstmp2, _state); + ae_v_addd(&buf->ptr.p_double[offstmp2], 1, &buf->ptr.p_double[offsxk], 1, ae_v_len(offstmp2,offstmp2+n-1), alpha); + ae_v_move(&buf->ptr.p_double[offsrk], 1, &b->ptr.p_double[0], 1, ae_v_len(offsrk,offsrk+n-1)); + ae_v_sub(&buf->ptr.p_double[offsrk], 1, &buf->ptr.p_double[offstmp2], 1, ae_v_len(offsrk,offsrk+n-1)); + rk2 = ae_v_dotproduct(&buf->ptr.p_double[offsrk], 1, &buf->ptr.p_double[offsrk], 1, ae_v_len(offsrk,offsrk+n-1)); + ae_v_move(&buf->ptr.p_double[offspk], 1, &buf->ptr.p_double[offsrk], 1, ae_v_len(offspk,offspk+n-1)); + e1 = ae_sqrt(rk2, _state); + + /* + * Cycle + */ + for(k=0; k<=n-1; k++) + { + + /* + * Calculate A*p(k) - store in Buf[OffsTmp2:OffsTmp2+N-1] + * and p(k)'*A*p(k) - store in PAP + * + * If PAP=0, break (iteration is over) + */ + rmatrixmv(m, n, a, 0, 0, 0, buf, offspk, buf, offstmp1, _state); + v1 = ae_v_dotproduct(&buf->ptr.p_double[offstmp1], 1, &buf->ptr.p_double[offstmp1], 1, ae_v_len(offstmp1,offstmp1+m-1)); + v2 = ae_v_dotproduct(&buf->ptr.p_double[offspk], 1, &buf->ptr.p_double[offspk], 1, ae_v_len(offspk,offspk+n-1)); + pap = v1+alpha*v2; + rmatrixmv(n, m, a, 0, 0, 1, buf, offstmp1, buf, offstmp2, _state); + ae_v_addd(&buf->ptr.p_double[offstmp2], 1, &buf->ptr.p_double[offspk], 1, ae_v_len(offstmp2,offstmp2+n-1), alpha); + if( ae_fp_eq(pap,0) ) + { + break; + } + + /* + * S = (r(k)'*r(k))/(p(k)'*A*p(k)) + */ + s = rk2/pap; + + /* + * x(k+1) = x(k) + S*p(k) + */ + ae_v_move(&buf->ptr.p_double[offsxk1], 1, &buf->ptr.p_double[offsxk], 1, ae_v_len(offsxk1,offsxk1+n-1)); + ae_v_addd(&buf->ptr.p_double[offsxk1], 1, &buf->ptr.p_double[offspk], 1, ae_v_len(offsxk1,offsxk1+n-1), s); + + /* + * r(k+1) = r(k) - S*A*p(k) + * RK12 = r(k+1)'*r(k+1) + * + * Break if r(k+1) small enough (when compared to r(k)) + */ + ae_v_move(&buf->ptr.p_double[offsrk1], 1, &buf->ptr.p_double[offsrk], 1, ae_v_len(offsrk1,offsrk1+n-1)); + ae_v_subd(&buf->ptr.p_double[offsrk1], 1, &buf->ptr.p_double[offstmp2], 1, ae_v_len(offsrk1,offsrk1+n-1), s); + rk12 = ae_v_dotproduct(&buf->ptr.p_double[offsrk1], 1, &buf->ptr.p_double[offsrk1], 1, ae_v_len(offsrk1,offsrk1+n-1)); + if( ae_fp_less_eq(ae_sqrt(rk12, _state),100*ae_machineepsilon*ae_sqrt(rk2, _state)) ) + { + + /* + * X(k) = x(k+1) before exit - + * - because we expect to find solution at x(k) + */ + ae_v_move(&buf->ptr.p_double[offsxk], 1, &buf->ptr.p_double[offsxk1], 1, ae_v_len(offsxk,offsxk+n-1)); + break; + } + + /* + * BetaK = RK12/RK2 + * p(k+1) = r(k+1)+betak*p(k) + */ + betak = rk12/rk2; + ae_v_move(&buf->ptr.p_double[offspk1], 1, &buf->ptr.p_double[offsrk1], 1, ae_v_len(offspk1,offspk1+n-1)); + ae_v_addd(&buf->ptr.p_double[offspk1], 1, &buf->ptr.p_double[offspk], 1, ae_v_len(offspk1,offspk1+n-1), betak); + + /* + * r(k) := r(k+1) + * x(k) := x(k+1) + * p(k) := p(k+1) + */ + ae_v_move(&buf->ptr.p_double[offsrk], 1, &buf->ptr.p_double[offsrk1], 1, ae_v_len(offsrk,offsrk+n-1)); + ae_v_move(&buf->ptr.p_double[offsxk], 1, &buf->ptr.p_double[offsxk1], 1, ae_v_len(offsxk,offsxk+n-1)); + ae_v_move(&buf->ptr.p_double[offspk], 1, &buf->ptr.p_double[offspk1], 1, ae_v_len(offspk,offspk+n-1)); + rk2 = rk12; + } + + /* + * Calculate E2 + */ + rmatrixmv(m, n, a, 0, 0, 0, buf, offsxk, buf, offstmp1, _state); + rmatrixmv(n, m, a, 0, 0, 1, buf, offstmp1, buf, offstmp2, _state); + ae_v_addd(&buf->ptr.p_double[offstmp2], 1, &buf->ptr.p_double[offsxk], 1, ae_v_len(offstmp2,offstmp2+n-1), alpha); + ae_v_move(&buf->ptr.p_double[offsrk], 1, &b->ptr.p_double[0], 1, ae_v_len(offsrk,offsrk+n-1)); + ae_v_sub(&buf->ptr.p_double[offsrk], 1, &buf->ptr.p_double[offstmp2], 1, ae_v_len(offsrk,offsrk+n-1)); + v1 = ae_v_dotproduct(&buf->ptr.p_double[offsrk], 1, &buf->ptr.p_double[offsrk], 1, ae_v_len(offsrk,offsrk+n-1)); + e2 = ae_sqrt(v1, _state); + + /* + * Output result (if it was improved) + */ + if( ae_fp_less(e2,e1) ) + { + ae_v_move(&x->ptr.p_double[0], 1, &buf->ptr.p_double[offsxk], 1, ae_v_len(0,n-1)); + } +} + + +/************************************************************************* +Construction of linear conjugate gradient solver. + +State parameter passed using "var" semantics (i.e. previous state is NOT +erased). When it is already initialized, we can reause prevously allocated +memory. + +INPUT PARAMETERS: + X - initial solution + B - right part + N - system size + State - structure; may be preallocated, if we want to reuse memory + +OUTPUT PARAMETERS: + State - structure which is used by FBLSCGIteration() to store + algorithm state between subsequent calls. + +NOTE: no error checking is done; caller must check all parameters, prevent + overflows, and so on. + + -- ALGLIB -- + Copyright 22.10.2009 by Bochkanov Sergey +*************************************************************************/ +void fblscgcreate(/* Real */ ae_vector* x, + /* Real */ ae_vector* b, + ae_int_t n, + fblslincgstate* state, + ae_state *_state) +{ + + + if( state->b.cnt<n ) + { + ae_vector_set_length(&state->b, n, _state); + } + if( state->rk.cnt<n ) + { + ae_vector_set_length(&state->rk, n, _state); + } + if( state->rk1.cnt<n ) + { + ae_vector_set_length(&state->rk1, n, _state); + } + if( state->xk.cnt<n ) + { + ae_vector_set_length(&state->xk, n, _state); + } + if( state->xk1.cnt<n ) + { + ae_vector_set_length(&state->xk1, n, _state); + } + if( state->pk.cnt<n ) + { + ae_vector_set_length(&state->pk, n, _state); + } + if( state->pk1.cnt<n ) + { + ae_vector_set_length(&state->pk1, n, _state); + } + if( state->tmp2.cnt<n ) + { + ae_vector_set_length(&state->tmp2, n, _state); + } + if( state->x.cnt<n ) + { + ae_vector_set_length(&state->x, n, _state); + } + if( state->ax.cnt<n ) + { + ae_vector_set_length(&state->ax, n, _state); + } + state->n = n; + ae_v_move(&state->xk.ptr.p_double[0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,n-1)); + ae_v_move(&state->b.ptr.p_double[0], 1, &b->ptr.p_double[0], 1, ae_v_len(0,n-1)); + ae_vector_set_length(&state->rstate.ia, 1+1, _state); + ae_vector_set_length(&state->rstate.ra, 6+1, _state); + state->rstate.stage = -1; +} + + +/************************************************************************* +Linear CG solver, function relying on reverse communication to calculate +matrix-vector products. + +See comments for FBLSLinCGState structure for more info. + + -- ALGLIB -- + Copyright 22.10.2009 by Bochkanov Sergey +*************************************************************************/ +ae_bool fblscgiteration(fblslincgstate* state, ae_state *_state) +{ + ae_int_t n; + ae_int_t k; + double rk2; + double rk12; + double pap; + double s; + double betak; + double v1; + double v2; + ae_bool result; + + + + /* + * Reverse communication preparations + * I know it looks ugly, but it works the same way + * anywhere from C++ to Python. + * + * This code initializes locals by: + * * random values determined during code + * generation - on first subroutine call + * * values from previous call - on subsequent calls + */ + if( state->rstate.stage>=0 ) + { + n = state->rstate.ia.ptr.p_int[0]; + k = state->rstate.ia.ptr.p_int[1]; + rk2 = state->rstate.ra.ptr.p_double[0]; + rk12 = state->rstate.ra.ptr.p_double[1]; + pap = state->rstate.ra.ptr.p_double[2]; + s = state->rstate.ra.ptr.p_double[3]; + betak = state->rstate.ra.ptr.p_double[4]; + v1 = state->rstate.ra.ptr.p_double[5]; + v2 = state->rstate.ra.ptr.p_double[6]; + } + else + { + n = -983; + k = -989; + rk2 = -834; + rk12 = 900; + pap = -287; + s = 364; + betak = 214; + v1 = -338; + v2 = -686; + } + if( state->rstate.stage==0 ) + { + goto lbl_0; + } + if( state->rstate.stage==1 ) + { + goto lbl_1; + } + if( state->rstate.stage==2 ) + { + goto lbl_2; + } + + /* + * Routine body + */ + + /* + * prepare locals + */ + n = state->n; + + /* + * Test for special case: B=0 + */ + v1 = ae_v_dotproduct(&state->b.ptr.p_double[0], 1, &state->b.ptr.p_double[0], 1, ae_v_len(0,n-1)); + if( ae_fp_eq(v1,0) ) + { + for(k=0; k<=n-1; k++) + { + state->xk.ptr.p_double[k] = 0; + } + result = ae_false; + return result; + } + + /* + * r(0) = b-A*x(0) + * RK2 = r(0)'*r(0) + */ + ae_v_move(&state->x.ptr.p_double[0], 1, &state->xk.ptr.p_double[0], 1, ae_v_len(0,n-1)); + state->rstate.stage = 0; + goto lbl_rcomm; +lbl_0: + ae_v_move(&state->rk.ptr.p_double[0], 1, &state->b.ptr.p_double[0], 1, ae_v_len(0,n-1)); + ae_v_sub(&state->rk.ptr.p_double[0], 1, &state->ax.ptr.p_double[0], 1, ae_v_len(0,n-1)); + rk2 = ae_v_dotproduct(&state->rk.ptr.p_double[0], 1, &state->rk.ptr.p_double[0], 1, ae_v_len(0,n-1)); + ae_v_move(&state->pk.ptr.p_double[0], 1, &state->rk.ptr.p_double[0], 1, ae_v_len(0,n-1)); + state->e1 = ae_sqrt(rk2, _state); + + /* + * Cycle + */ + k = 0; +lbl_3: + if( k>n-1 ) + { + goto lbl_5; + } + + /* + * Calculate A*p(k) - store in State.Tmp2 + * and p(k)'*A*p(k) - store in PAP + * + * If PAP=0, break (iteration is over) + */ + ae_v_move(&state->x.ptr.p_double[0], 1, &state->pk.ptr.p_double[0], 1, ae_v_len(0,n-1)); + state->rstate.stage = 1; + goto lbl_rcomm; +lbl_1: + ae_v_move(&state->tmp2.ptr.p_double[0], 1, &state->ax.ptr.p_double[0], 1, ae_v_len(0,n-1)); + pap = state->xax; + if( !ae_isfinite(pap, _state) ) + { + goto lbl_5; + } + if( ae_fp_less_eq(pap,0) ) + { + goto lbl_5; + } + + /* + * S = (r(k)'*r(k))/(p(k)'*A*p(k)) + */ + s = rk2/pap; + + /* + * x(k+1) = x(k) + S*p(k) + */ + ae_v_move(&state->xk1.ptr.p_double[0], 1, &state->xk.ptr.p_double[0], 1, ae_v_len(0,n-1)); + ae_v_addd(&state->xk1.ptr.p_double[0], 1, &state->pk.ptr.p_double[0], 1, ae_v_len(0,n-1), s); + + /* + * r(k+1) = r(k) - S*A*p(k) + * RK12 = r(k+1)'*r(k+1) + * + * Break if r(k+1) small enough (when compared to r(k)) + */ + ae_v_move(&state->rk1.ptr.p_double[0], 1, &state->rk.ptr.p_double[0], 1, ae_v_len(0,n-1)); + ae_v_subd(&state->rk1.ptr.p_double[0], 1, &state->tmp2.ptr.p_double[0], 1, ae_v_len(0,n-1), s); + rk12 = ae_v_dotproduct(&state->rk1.ptr.p_double[0], 1, &state->rk1.ptr.p_double[0], 1, ae_v_len(0,n-1)); + if( ae_fp_less_eq(ae_sqrt(rk12, _state),100*ae_machineepsilon*state->e1) ) + { + + /* + * X(k) = x(k+1) before exit - + * - because we expect to find solution at x(k) + */ + ae_v_move(&state->xk.ptr.p_double[0], 1, &state->xk1.ptr.p_double[0], 1, ae_v_len(0,n-1)); + goto lbl_5; + } + + /* + * BetaK = RK12/RK2 + * p(k+1) = r(k+1)+betak*p(k) + * + * NOTE: we expect that BetaK won't overflow because of + * "Sqrt(RK12)<=100*MachineEpsilon*E1" test above. + */ + betak = rk12/rk2; + ae_v_move(&state->pk1.ptr.p_double[0], 1, &state->rk1.ptr.p_double[0], 1, ae_v_len(0,n-1)); + ae_v_addd(&state->pk1.ptr.p_double[0], 1, &state->pk.ptr.p_double[0], 1, ae_v_len(0,n-1), betak); + + /* + * r(k) := r(k+1) + * x(k) := x(k+1) + * p(k) := p(k+1) + */ + ae_v_move(&state->rk.ptr.p_double[0], 1, &state->rk1.ptr.p_double[0], 1, ae_v_len(0,n-1)); + ae_v_move(&state->xk.ptr.p_double[0], 1, &state->xk1.ptr.p_double[0], 1, ae_v_len(0,n-1)); + ae_v_move(&state->pk.ptr.p_double[0], 1, &state->pk1.ptr.p_double[0], 1, ae_v_len(0,n-1)); + rk2 = rk12; + k = k+1; + goto lbl_3; +lbl_5: + + /* + * Calculate E2 + */ + ae_v_move(&state->x.ptr.p_double[0], 1, &state->xk.ptr.p_double[0], 1, ae_v_len(0,n-1)); + state->rstate.stage = 2; + goto lbl_rcomm; +lbl_2: + ae_v_move(&state->rk.ptr.p_double[0], 1, &state->b.ptr.p_double[0], 1, ae_v_len(0,n-1)); + ae_v_sub(&state->rk.ptr.p_double[0], 1, &state->ax.ptr.p_double[0], 1, ae_v_len(0,n-1)); + v1 = ae_v_dotproduct(&state->rk.ptr.p_double[0], 1, &state->rk.ptr.p_double[0], 1, ae_v_len(0,n-1)); + state->e2 = ae_sqrt(v1, _state); + result = ae_false; + return result; + + /* + * Saving state + */ +lbl_rcomm: + result = ae_true; + state->rstate.ia.ptr.p_int[0] = n; + state->rstate.ia.ptr.p_int[1] = k; + state->rstate.ra.ptr.p_double[0] = rk2; + state->rstate.ra.ptr.p_double[1] = rk12; + state->rstate.ra.ptr.p_double[2] = pap; + state->rstate.ra.ptr.p_double[3] = s; + state->rstate.ra.ptr.p_double[4] = betak; + state->rstate.ra.ptr.p_double[5] = v1; + state->rstate.ra.ptr.p_double[6] = v2; + return result; +} + + +ae_bool _fblslincgstate_init(fblslincgstate* p, ae_state *_state, ae_bool make_automatic) +{ + if( !ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->ax, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->rk, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->rk1, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->xk, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->xk1, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->pk, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->pk1, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->b, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !_rcommstate_init(&p->rstate, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->tmp2, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + return ae_true; +} + + +ae_bool _fblslincgstate_init_copy(fblslincgstate* dst, fblslincgstate* src, ae_state *_state, ae_bool make_automatic) +{ + dst->e1 = src->e1; + dst->e2 = src->e2; + if( !ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->ax, &src->ax, _state, make_automatic) ) + return ae_false; + dst->xax = src->xax; + dst->n = src->n; + if( !ae_vector_init_copy(&dst->rk, &src->rk, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->rk1, &src->rk1, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->xk, &src->xk, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->xk1, &src->xk1, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->pk, &src->pk, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->pk1, &src->pk1, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->b, &src->b, _state, make_automatic) ) + return ae_false; + if( !_rcommstate_init_copy(&dst->rstate, &src->rstate, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->tmp2, &src->tmp2, _state, make_automatic) ) + return ae_false; + return ae_true; +} + + +void _fblslincgstate_clear(fblslincgstate* p) +{ + ae_vector_clear(&p->x); + ae_vector_clear(&p->ax); + ae_vector_clear(&p->rk); + ae_vector_clear(&p->rk1); + ae_vector_clear(&p->xk); + ae_vector_clear(&p->xk1); + ae_vector_clear(&p->pk); + ae_vector_clear(&p->pk1); + ae_vector_clear(&p->b); + _rcommstate_clear(&p->rstate); + ae_vector_clear(&p->tmp2); +} + + + + +/************************************************************************* +Determinant calculation of the matrix given by its LU decomposition. + +Input parameters: + A - LU decomposition of the matrix (output of + RMatrixLU subroutine). + Pivots - table of permutations which were made during + the LU decomposition. + Output of RMatrixLU subroutine. + N - (optional) size of matrix A: + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, automatically determined from matrix size + (A must be square matrix) + +Result: matrix determinant. + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +double rmatrixludet(/* Real */ ae_matrix* a, + /* Integer */ ae_vector* pivots, + ae_int_t n, + ae_state *_state) +{ + ae_int_t i; + ae_int_t s; + double result; + + + ae_assert(n>=1, "RMatrixLUDet: N<1!", _state); + ae_assert(pivots->cnt>=n, "RMatrixLUDet: Pivots array is too short!", _state); + ae_assert(a->rows>=n, "RMatrixLUDet: rows(A)<N!", _state); + ae_assert(a->cols>=n, "RMatrixLUDet: cols(A)<N!", _state); + ae_assert(apservisfinitematrix(a, n, n, _state), "RMatrixLUDet: A contains infinite or NaN values!", _state); + result = 1; + s = 1; + for(i=0; i<=n-1; i++) + { + result = result*a->ptr.pp_double[i][i]; + if( pivots->ptr.p_int[i]!=i ) + { + s = -s; + } + } + result = result*s; + return result; +} + + +/************************************************************************* +Calculation of the determinant of a general matrix + +Input parameters: + A - matrix, array[0..N-1, 0..N-1] + N - (optional) size of matrix A: + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, automatically determined from matrix size + (A must be square matrix) + +Result: determinant of matrix A. + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +double rmatrixdet(/* Real */ ae_matrix* a, + ae_int_t n, + ae_state *_state) +{ + ae_frame _frame_block; + ae_matrix _a; + ae_vector pivots; + double result; + + ae_frame_make(_state, &_frame_block); + ae_matrix_init_copy(&_a, a, _state, ae_true); + a = &_a; + ae_vector_init(&pivots, 0, DT_INT, _state, ae_true); + + ae_assert(n>=1, "RMatrixDet: N<1!", _state); + ae_assert(a->rows>=n, "RMatrixDet: rows(A)<N!", _state); + ae_assert(a->cols>=n, "RMatrixDet: cols(A)<N!", _state); + ae_assert(apservisfinitematrix(a, n, n, _state), "RMatrixDet: A contains infinite or NaN values!", _state); + rmatrixlu(a, n, n, &pivots, _state); + result = rmatrixludet(a, &pivots, n, _state); + ae_frame_leave(_state); + return result; +} + + +/************************************************************************* +Determinant calculation of the matrix given by its LU decomposition. + +Input parameters: + A - LU decomposition of the matrix (output of + RMatrixLU subroutine). + Pivots - table of permutations which were made during + the LU decomposition. + Output of RMatrixLU subroutine. + N - (optional) size of matrix A: + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, automatically determined from matrix size + (A must be square matrix) + +Result: matrix determinant. + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +ae_complex cmatrixludet(/* Complex */ ae_matrix* a, + /* Integer */ ae_vector* pivots, + ae_int_t n, + ae_state *_state) +{ + ae_int_t i; + ae_int_t s; + ae_complex result; + + + ae_assert(n>=1, "CMatrixLUDet: N<1!", _state); + ae_assert(pivots->cnt>=n, "CMatrixLUDet: Pivots array is too short!", _state); + ae_assert(a->rows>=n, "CMatrixLUDet: rows(A)<N!", _state); + ae_assert(a->cols>=n, "CMatrixLUDet: cols(A)<N!", _state); + ae_assert(apservisfinitecmatrix(a, n, n, _state), "CMatrixLUDet: A contains infinite or NaN values!", _state); + result = ae_complex_from_d(1); + s = 1; + for(i=0; i<=n-1; i++) + { + result = ae_c_mul(result,a->ptr.pp_complex[i][i]); + if( pivots->ptr.p_int[i]!=i ) + { + s = -s; + } + } + result = ae_c_mul_d(result,s); + return result; +} + + +/************************************************************************* +Calculation of the determinant of a general matrix + +Input parameters: + A - matrix, array[0..N-1, 0..N-1] + N - (optional) size of matrix A: + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, automatically determined from matrix size + (A must be square matrix) + +Result: determinant of matrix A. + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +ae_complex cmatrixdet(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_state *_state) +{ + ae_frame _frame_block; + ae_matrix _a; + ae_vector pivots; + ae_complex result; + + ae_frame_make(_state, &_frame_block); + ae_matrix_init_copy(&_a, a, _state, ae_true); + a = &_a; + ae_vector_init(&pivots, 0, DT_INT, _state, ae_true); + + ae_assert(n>=1, "CMatrixDet: N<1!", _state); + ae_assert(a->rows>=n, "CMatrixDet: rows(A)<N!", _state); + ae_assert(a->cols>=n, "CMatrixDet: cols(A)<N!", _state); + ae_assert(apservisfinitecmatrix(a, n, n, _state), "CMatrixDet: A contains infinite or NaN values!", _state); + cmatrixlu(a, n, n, &pivots, _state); + result = cmatrixludet(a, &pivots, n, _state); + ae_frame_leave(_state); + return result; +} + + +/************************************************************************* +Determinant calculation of the matrix given by the Cholesky decomposition. + +Input parameters: + A - Cholesky decomposition, + output of SMatrixCholesky subroutine. + N - (optional) size of matrix A: + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, automatically determined from matrix size + (A must be square matrix) + +As the determinant is equal to the product of squares of diagonal elements, +it’s not necessary to specify which triangle - lower or upper - the matrix +is stored in. + +Result: + matrix determinant. + + -- ALGLIB -- + Copyright 2005-2008 by Bochkanov Sergey +*************************************************************************/ +double spdmatrixcholeskydet(/* Real */ ae_matrix* a, + ae_int_t n, + ae_state *_state) +{ + ae_int_t i; + ae_bool f; + double result; + + + ae_assert(n>=1, "SPDMatrixCholeskyDet: N<1!", _state); + ae_assert(a->rows>=n, "SPDMatrixCholeskyDet: rows(A)<N!", _state); + ae_assert(a->cols>=n, "SPDMatrixCholeskyDet: cols(A)<N!", _state); + f = ae_true; + for(i=0; i<=n-1; i++) + { + f = f&&ae_isfinite(a->ptr.pp_double[i][i], _state); + } + ae_assert(f, "SPDMatrixCholeskyDet: A contains infinite or NaN values!", _state); + result = 1; + for(i=0; i<=n-1; i++) + { + result = result*ae_sqr(a->ptr.pp_double[i][i], _state); + } + return result; +} + + +/************************************************************************* +Determinant calculation of the symmetric positive definite matrix. + +Input parameters: + A - matrix. Array with elements [0..N-1, 0..N-1]. + N - (optional) size of matrix A: + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, automatically determined from matrix size + (A must be square matrix) + IsUpper - (optional) storage type: + * if True, symmetric matrix A is given by its upper + triangle, and the lower triangle isn’t used/changed by + function + * if False, symmetric matrix A is given by its lower + triangle, and the upper triangle isn’t used/changed by + function + * if not given, both lower and upper triangles must be + filled. + +Result: + determinant of matrix A. + If matrix A is not positive definite, exception is thrown. + + -- ALGLIB -- + Copyright 2005-2008 by Bochkanov Sergey +*************************************************************************/ +double spdmatrixdet(/* Real */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_state *_state) +{ + ae_frame _frame_block; + ae_matrix _a; + ae_bool b; + double result; + + ae_frame_make(_state, &_frame_block); + ae_matrix_init_copy(&_a, a, _state, ae_true); + a = &_a; + + ae_assert(n>=1, "SPDMatrixDet: N<1!", _state); + ae_assert(a->rows>=n, "SPDMatrixDet: rows(A)<N!", _state); + ae_assert(a->cols>=n, "SPDMatrixDet: cols(A)<N!", _state); + ae_assert(isfinitertrmatrix(a, n, isupper, _state), "SPDMatrixDet: A contains infinite or NaN values!", _state); + b = spdmatrixcholesky(a, n, isupper, _state); + ae_assert(b, "SPDMatrixDet: A is not SPD!", _state); + result = spdmatrixcholeskydet(a, n, _state); + ae_frame_leave(_state); + return result; +} + + + + +/************************************************************************* +Algorithm for solving the following generalized symmetric positive-definite +eigenproblem: + A*x = lambda*B*x (1) or + A*B*x = lambda*x (2) or + B*A*x = lambda*x (3). +where A is a symmetric matrix, B - symmetric positive-definite matrix. +The problem is solved by reducing it to an ordinary symmetric eigenvalue +problem. + +Input parameters: + A - symmetric matrix which is given by its upper or lower + triangular part. + Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrices A and B. + IsUpperA - storage format of matrix A. + B - symmetric positive-definite matrix which is given by + its upper or lower triangular part. + Array whose indexes range within [0..N-1, 0..N-1]. + IsUpperB - storage format of matrix B. + ZNeeded - if ZNeeded is equal to: + * 0, the eigenvectors are not returned; + * 1, the eigenvectors are returned. + ProblemType - if ProblemType is equal to: + * 1, the following problem is solved: A*x = lambda*B*x; + * 2, the following problem is solved: A*B*x = lambda*x; + * 3, the following problem is solved: B*A*x = lambda*x. + +Output parameters: + D - eigenvalues in ascending order. + Array whose index ranges within [0..N-1]. + Z - if ZNeeded is equal to: + * 0, Z hasn’t changed; + * 1, Z contains eigenvectors. + Array whose indexes range within [0..N-1, 0..N-1]. + The eigenvectors are stored in matrix columns. It should + be noted that the eigenvectors in such problems do not + form an orthogonal system. + +Result: + True, if the problem was solved successfully. + False, if the error occurred during the Cholesky decomposition of matrix + B (the matrix isn’t positive-definite) or during the work of the iterative + algorithm for solving the symmetric eigenproblem. + +See also the GeneralizedSymmetricDefiniteEVDReduce subroutine. + + -- ALGLIB -- + Copyright 1.28.2006 by Bochkanov Sergey +*************************************************************************/ +ae_bool smatrixgevd(/* Real */ ae_matrix* a, + ae_int_t n, + ae_bool isuppera, + /* Real */ ae_matrix* b, + ae_bool isupperb, + ae_int_t zneeded, + ae_int_t problemtype, + /* Real */ ae_vector* d, + /* Real */ ae_matrix* z, + ae_state *_state) +{ + ae_frame _frame_block; + ae_matrix _a; + ae_matrix r; + ae_matrix t; + ae_bool isupperr; + ae_int_t j1; + ae_int_t j2; + ae_int_t j1inc; + ae_int_t j2inc; + ae_int_t i; + ae_int_t j; + double v; + ae_bool result; + + ae_frame_make(_state, &_frame_block); + ae_matrix_init_copy(&_a, a, _state, ae_true); + a = &_a; + ae_vector_clear(d); + ae_matrix_clear(z); + ae_matrix_init(&r, 0, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&t, 0, 0, DT_REAL, _state, ae_true); + + + /* + * Reduce and solve + */ + result = smatrixgevdreduce(a, n, isuppera, b, isupperb, problemtype, &r, &isupperr, _state); + if( !result ) + { + ae_frame_leave(_state); + return result; + } + result = smatrixevd(a, n, zneeded, isuppera, d, &t, _state); + if( !result ) + { + ae_frame_leave(_state); + return result; + } + + /* + * Transform eigenvectors if needed + */ + if( zneeded!=0 ) + { + + /* + * fill Z with zeros + */ + ae_matrix_set_length(z, n-1+1, n-1+1, _state); + for(j=0; j<=n-1; j++) + { + z->ptr.pp_double[0][j] = 0.0; + } + for(i=1; i<=n-1; i++) + { + ae_v_move(&z->ptr.pp_double[i][0], 1, &z->ptr.pp_double[0][0], 1, ae_v_len(0,n-1)); + } + + /* + * Setup R properties + */ + if( isupperr ) + { + j1 = 0; + j2 = n-1; + j1inc = 1; + j2inc = 0; + } + else + { + j1 = 0; + j2 = 0; + j1inc = 0; + j2inc = 1; + } + + /* + * Calculate R*Z + */ + for(i=0; i<=n-1; i++) + { + for(j=j1; j<=j2; j++) + { + v = r.ptr.pp_double[i][j]; + ae_v_addd(&z->ptr.pp_double[i][0], 1, &t.ptr.pp_double[j][0], 1, ae_v_len(0,n-1), v); + } + j1 = j1+j1inc; + j2 = j2+j2inc; + } + } + ae_frame_leave(_state); + return result; +} + + +/************************************************************************* +Algorithm for reduction of the following generalized symmetric positive- +definite eigenvalue problem: + A*x = lambda*B*x (1) or + A*B*x = lambda*x (2) or + B*A*x = lambda*x (3) +to the symmetric eigenvalues problem C*y = lambda*y (eigenvalues of this and +the given problems are the same, and the eigenvectors of the given problem +could be obtained by multiplying the obtained eigenvectors by the +transformation matrix x = R*y). + +Here A is a symmetric matrix, B - symmetric positive-definite matrix. + +Input parameters: + A - symmetric matrix which is given by its upper or lower + triangular part. + Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrices A and B. + IsUpperA - storage format of matrix A. + B - symmetric positive-definite matrix which is given by + its upper or lower triangular part. + Array whose indexes range within [0..N-1, 0..N-1]. + IsUpperB - storage format of matrix B. + ProblemType - if ProblemType is equal to: + * 1, the following problem is solved: A*x = lambda*B*x; + * 2, the following problem is solved: A*B*x = lambda*x; + * 3, the following problem is solved: B*A*x = lambda*x. + +Output parameters: + A - symmetric matrix which is given by its upper or lower + triangle depending on IsUpperA. Contains matrix C. + Array whose indexes range within [0..N-1, 0..N-1]. + R - upper triangular or low triangular transformation matrix + which is used to obtain the eigenvectors of a given problem + as the product of eigenvectors of C (from the right) and + matrix R (from the left). If the matrix is upper + triangular, the elements below the main diagonal + are equal to 0 (and vice versa). Thus, we can perform + the multiplication without taking into account the + internal structure (which is an easier though less + effective way). + Array whose indexes range within [0..N-1, 0..N-1]. + IsUpperR - type of matrix R (upper or lower triangular). + +Result: + True, if the problem was reduced successfully. + False, if the error occurred during the Cholesky decomposition of + matrix B (the matrix is not positive-definite). + + -- ALGLIB -- + Copyright 1.28.2006 by Bochkanov Sergey +*************************************************************************/ +ae_bool smatrixgevdreduce(/* Real */ ae_matrix* a, + ae_int_t n, + ae_bool isuppera, + /* Real */ ae_matrix* b, + ae_bool isupperb, + ae_int_t problemtype, + /* Real */ ae_matrix* r, + ae_bool* isupperr, + ae_state *_state) +{ + ae_frame _frame_block; + ae_matrix t; + ae_vector w1; + ae_vector w2; + ae_vector w3; + ae_int_t i; + ae_int_t j; + double v; + matinvreport rep; + ae_int_t info; + ae_bool result; + + ae_frame_make(_state, &_frame_block); + ae_matrix_clear(r); + *isupperr = ae_false; + ae_matrix_init(&t, 0, 0, DT_REAL, _state, ae_true); + ae_vector_init(&w1, 0, DT_REAL, _state, ae_true); + ae_vector_init(&w2, 0, DT_REAL, _state, ae_true); + ae_vector_init(&w3, 0, DT_REAL, _state, ae_true); + _matinvreport_init(&rep, _state, ae_true); + + ae_assert(n>0, "SMatrixGEVDReduce: N<=0!", _state); + ae_assert((problemtype==1||problemtype==2)||problemtype==3, "SMatrixGEVDReduce: incorrect ProblemType!", _state); + result = ae_true; + + /* + * Problem 1: A*x = lambda*B*x + * + * Reducing to: + * C*y = lambda*y + * C = L^(-1) * A * L^(-T) + * x = L^(-T) * y + */ + if( problemtype==1 ) + { + + /* + * Factorize B in T: B = LL' + */ + ae_matrix_set_length(&t, n-1+1, n-1+1, _state); + if( isupperb ) + { + for(i=0; i<=n-1; i++) + { + ae_v_move(&t.ptr.pp_double[i][i], t.stride, &b->ptr.pp_double[i][i], 1, ae_v_len(i,n-1)); + } + } + else + { + for(i=0; i<=n-1; i++) + { + ae_v_move(&t.ptr.pp_double[i][0], 1, &b->ptr.pp_double[i][0], 1, ae_v_len(0,i)); + } + } + if( !spdmatrixcholesky(&t, n, ae_false, _state) ) + { + result = ae_false; + ae_frame_leave(_state); + return result; + } + + /* + * Invert L in T + */ + rmatrixtrinverse(&t, n, ae_false, ae_false, &info, &rep, _state); + if( info<=0 ) + { + result = ae_false; + ae_frame_leave(_state); + return result; + } + + /* + * Build L^(-1) * A * L^(-T) in R + */ + ae_vector_set_length(&w1, n+1, _state); + ae_vector_set_length(&w2, n+1, _state); + ae_matrix_set_length(r, n-1+1, n-1+1, _state); + for(j=1; j<=n; j++) + { + + /* + * Form w2 = A * l'(j) (here l'(j) is j-th column of L^(-T)) + */ + ae_v_move(&w1.ptr.p_double[1], 1, &t.ptr.pp_double[j-1][0], 1, ae_v_len(1,j)); + symmetricmatrixvectormultiply(a, isuppera, 0, j-1, &w1, 1.0, &w2, _state); + if( isuppera ) + { + matrixvectormultiply(a, 0, j-1, j, n-1, ae_true, &w1, 1, j, 1.0, &w2, j+1, n, 0.0, _state); + } + else + { + matrixvectormultiply(a, j, n-1, 0, j-1, ae_false, &w1, 1, j, 1.0, &w2, j+1, n, 0.0, _state); + } + + /* + * Form l(i)*w2 (here l(i) is i-th row of L^(-1)) + */ + for(i=1; i<=n; i++) + { + v = ae_v_dotproduct(&t.ptr.pp_double[i-1][0], 1, &w2.ptr.p_double[1], 1, ae_v_len(0,i-1)); + r->ptr.pp_double[i-1][j-1] = v; + } + } + + /* + * Copy R to A + */ + for(i=0; i<=n-1; i++) + { + ae_v_move(&a->ptr.pp_double[i][0], 1, &r->ptr.pp_double[i][0], 1, ae_v_len(0,n-1)); + } + + /* + * Copy L^(-1) from T to R and transpose + */ + *isupperr = ae_true; + for(i=0; i<=n-1; i++) + { + for(j=0; j<=i-1; j++) + { + r->ptr.pp_double[i][j] = 0; + } + } + for(i=0; i<=n-1; i++) + { + ae_v_move(&r->ptr.pp_double[i][i], 1, &t.ptr.pp_double[i][i], t.stride, ae_v_len(i,n-1)); + } + ae_frame_leave(_state); + return result; + } + + /* + * Problem 2: A*B*x = lambda*x + * or + * problem 3: B*A*x = lambda*x + * + * Reducing to: + * C*y = lambda*y + * C = U * A * U' + * B = U'* U + */ + if( problemtype==2||problemtype==3 ) + { + + /* + * Factorize B in T: B = U'*U + */ + ae_matrix_set_length(&t, n-1+1, n-1+1, _state); + if( isupperb ) + { + for(i=0; i<=n-1; i++) + { + ae_v_move(&t.ptr.pp_double[i][i], 1, &b->ptr.pp_double[i][i], 1, ae_v_len(i,n-1)); + } + } + else + { + for(i=0; i<=n-1; i++) + { + ae_v_move(&t.ptr.pp_double[i][i], 1, &b->ptr.pp_double[i][i], b->stride, ae_v_len(i,n-1)); + } + } + if( !spdmatrixcholesky(&t, n, ae_true, _state) ) + { + result = ae_false; + ae_frame_leave(_state); + return result; + } + + /* + * Build U * A * U' in R + */ + ae_vector_set_length(&w1, n+1, _state); + ae_vector_set_length(&w2, n+1, _state); + ae_vector_set_length(&w3, n+1, _state); + ae_matrix_set_length(r, n-1+1, n-1+1, _state); + for(j=1; j<=n; j++) + { + + /* + * Form w2 = A * u'(j) (here u'(j) is j-th column of U') + */ + ae_v_move(&w1.ptr.p_double[1], 1, &t.ptr.pp_double[j-1][j-1], 1, ae_v_len(1,n-j+1)); + symmetricmatrixvectormultiply(a, isuppera, j-1, n-1, &w1, 1.0, &w3, _state); + ae_v_move(&w2.ptr.p_double[j], 1, &w3.ptr.p_double[1], 1, ae_v_len(j,n)); + ae_v_move(&w1.ptr.p_double[j], 1, &t.ptr.pp_double[j-1][j-1], 1, ae_v_len(j,n)); + if( isuppera ) + { + matrixvectormultiply(a, 0, j-2, j-1, n-1, ae_false, &w1, j, n, 1.0, &w2, 1, j-1, 0.0, _state); + } + else + { + matrixvectormultiply(a, j-1, n-1, 0, j-2, ae_true, &w1, j, n, 1.0, &w2, 1, j-1, 0.0, _state); + } + + /* + * Form u(i)*w2 (here u(i) is i-th row of U) + */ + for(i=1; i<=n; i++) + { + v = ae_v_dotproduct(&t.ptr.pp_double[i-1][i-1], 1, &w2.ptr.p_double[i], 1, ae_v_len(i-1,n-1)); + r->ptr.pp_double[i-1][j-1] = v; + } + } + + /* + * Copy R to A + */ + for(i=0; i<=n-1; i++) + { + ae_v_move(&a->ptr.pp_double[i][0], 1, &r->ptr.pp_double[i][0], 1, ae_v_len(0,n-1)); + } + if( problemtype==2 ) + { + + /* + * Invert U in T + */ + rmatrixtrinverse(&t, n, ae_true, ae_false, &info, &rep, _state); + if( info<=0 ) + { + result = ae_false; + ae_frame_leave(_state); + return result; + } + + /* + * Copy U^-1 from T to R + */ + *isupperr = ae_true; + for(i=0; i<=n-1; i++) + { + for(j=0; j<=i-1; j++) + { + r->ptr.pp_double[i][j] = 0; + } + } + for(i=0; i<=n-1; i++) + { + ae_v_move(&r->ptr.pp_double[i][i], 1, &t.ptr.pp_double[i][i], 1, ae_v_len(i,n-1)); + } + } + else + { + + /* + * Copy U from T to R and transpose + */ + *isupperr = ae_false; + for(i=0; i<=n-1; i++) + { + for(j=i+1; j<=n-1; j++) + { + r->ptr.pp_double[i][j] = 0; + } + } + for(i=0; i<=n-1; i++) + { + ae_v_move(&r->ptr.pp_double[i][i], r->stride, &t.ptr.pp_double[i][i], 1, ae_v_len(i,n-1)); + } + } + } + ae_frame_leave(_state); + return result; +} + + + + +/************************************************************************* +Inverse matrix update by the Sherman-Morrison formula + +The algorithm updates matrix A^-1 when adding a number to an element +of matrix A. + +Input parameters: + InvA - inverse of matrix A. + Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + UpdRow - row where the element to be updated is stored. + UpdColumn - column where the element to be updated is stored. + UpdVal - a number to be added to the element. + + +Output parameters: + InvA - inverse of modified matrix A. + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +void rmatrixinvupdatesimple(/* Real */ ae_matrix* inva, + ae_int_t n, + ae_int_t updrow, + ae_int_t updcolumn, + double updval, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector t1; + ae_vector t2; + ae_int_t i; + double lambdav; + double vt; + + ae_frame_make(_state, &_frame_block); + ae_vector_init(&t1, 0, DT_REAL, _state, ae_true); + ae_vector_init(&t2, 0, DT_REAL, _state, ae_true); + + ae_assert(updrow>=0&&updrow<n, "RMatrixInvUpdateSimple: incorrect UpdRow!", _state); + ae_assert(updcolumn>=0&&updcolumn<n, "RMatrixInvUpdateSimple: incorrect UpdColumn!", _state); + ae_vector_set_length(&t1, n-1+1, _state); + ae_vector_set_length(&t2, n-1+1, _state); + + /* + * T1 = InvA * U + */ + ae_v_move(&t1.ptr.p_double[0], 1, &inva->ptr.pp_double[0][updrow], inva->stride, ae_v_len(0,n-1)); + + /* + * T2 = v*InvA + */ + ae_v_move(&t2.ptr.p_double[0], 1, &inva->ptr.pp_double[updcolumn][0], 1, ae_v_len(0,n-1)); + + /* + * Lambda = v * InvA * U + */ + lambdav = updval*inva->ptr.pp_double[updcolumn][updrow]; + + /* + * InvA = InvA - correction + */ + for(i=0; i<=n-1; i++) + { + vt = updval*t1.ptr.p_double[i]; + vt = vt/(1+lambdav); + ae_v_subd(&inva->ptr.pp_double[i][0], 1, &t2.ptr.p_double[0], 1, ae_v_len(0,n-1), vt); + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Inverse matrix update by the Sherman-Morrison formula + +The algorithm updates matrix A^-1 when adding a vector to a row +of matrix A. + +Input parameters: + InvA - inverse of matrix A. + Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + UpdRow - the row of A whose vector V was added. + 0 <= Row <= N-1 + V - the vector to be added to a row. + Array whose index ranges within [0..N-1]. + +Output parameters: + InvA - inverse of modified matrix A. + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +void rmatrixinvupdaterow(/* Real */ ae_matrix* inva, + ae_int_t n, + ae_int_t updrow, + /* Real */ ae_vector* v, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector t1; + ae_vector t2; + ae_int_t i; + ae_int_t j; + double lambdav; + double vt; + + ae_frame_make(_state, &_frame_block); + ae_vector_init(&t1, 0, DT_REAL, _state, ae_true); + ae_vector_init(&t2, 0, DT_REAL, _state, ae_true); + + ae_vector_set_length(&t1, n-1+1, _state); + ae_vector_set_length(&t2, n-1+1, _state); + + /* + * T1 = InvA * U + */ + ae_v_move(&t1.ptr.p_double[0], 1, &inva->ptr.pp_double[0][updrow], inva->stride, ae_v_len(0,n-1)); + + /* + * T2 = v*InvA + * Lambda = v * InvA * U + */ + for(j=0; j<=n-1; j++) + { + vt = ae_v_dotproduct(&v->ptr.p_double[0], 1, &inva->ptr.pp_double[0][j], inva->stride, ae_v_len(0,n-1)); + t2.ptr.p_double[j] = vt; + } + lambdav = t2.ptr.p_double[updrow]; + + /* + * InvA = InvA - correction + */ + for(i=0; i<=n-1; i++) + { + vt = t1.ptr.p_double[i]/(1+lambdav); + ae_v_subd(&inva->ptr.pp_double[i][0], 1, &t2.ptr.p_double[0], 1, ae_v_len(0,n-1), vt); + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Inverse matrix update by the Sherman-Morrison formula + +The algorithm updates matrix A^-1 when adding a vector to a column +of matrix A. + +Input parameters: + InvA - inverse of matrix A. + Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + UpdColumn - the column of A whose vector U was added. + 0 <= UpdColumn <= N-1 + U - the vector to be added to a column. + Array whose index ranges within [0..N-1]. + +Output parameters: + InvA - inverse of modified matrix A. + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +void rmatrixinvupdatecolumn(/* Real */ ae_matrix* inva, + ae_int_t n, + ae_int_t updcolumn, + /* Real */ ae_vector* u, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector t1; + ae_vector t2; + ae_int_t i; + double lambdav; + double vt; + + ae_frame_make(_state, &_frame_block); + ae_vector_init(&t1, 0, DT_REAL, _state, ae_true); + ae_vector_init(&t2, 0, DT_REAL, _state, ae_true); + + ae_vector_set_length(&t1, n-1+1, _state); + ae_vector_set_length(&t2, n-1+1, _state); + + /* + * T1 = InvA * U + * Lambda = v * InvA * U + */ + for(i=0; i<=n-1; i++) + { + vt = ae_v_dotproduct(&inva->ptr.pp_double[i][0], 1, &u->ptr.p_double[0], 1, ae_v_len(0,n-1)); + t1.ptr.p_double[i] = vt; + } + lambdav = t1.ptr.p_double[updcolumn]; + + /* + * T2 = v*InvA + */ + ae_v_move(&t2.ptr.p_double[0], 1, &inva->ptr.pp_double[updcolumn][0], 1, ae_v_len(0,n-1)); + + /* + * InvA = InvA - correction + */ + for(i=0; i<=n-1; i++) + { + vt = t1.ptr.p_double[i]/(1+lambdav); + ae_v_subd(&inva->ptr.pp_double[i][0], 1, &t2.ptr.p_double[0], 1, ae_v_len(0,n-1), vt); + } + ae_frame_leave(_state); +} + + +/************************************************************************* +Inverse matrix update by the Sherman-Morrison formula + +The algorithm computes the inverse of matrix A+u*v’ by using the given matrix +A^-1 and the vectors u and v. + +Input parameters: + InvA - inverse of matrix A. + Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + U - the vector modifying the matrix. + Array whose index ranges within [0..N-1]. + V - the vector modifying the matrix. + Array whose index ranges within [0..N-1]. + +Output parameters: + InvA - inverse of matrix A + u*v'. + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +void rmatrixinvupdateuv(/* Real */ ae_matrix* inva, + ae_int_t n, + /* Real */ ae_vector* u, + /* Real */ ae_vector* v, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector t1; + ae_vector t2; + ae_int_t i; + ae_int_t j; + double lambdav; + double vt; + + ae_frame_make(_state, &_frame_block); + ae_vector_init(&t1, 0, DT_REAL, _state, ae_true); + ae_vector_init(&t2, 0, DT_REAL, _state, ae_true); + + ae_vector_set_length(&t1, n-1+1, _state); + ae_vector_set_length(&t2, n-1+1, _state); + + /* + * T1 = InvA * U + * Lambda = v * T1 + */ + for(i=0; i<=n-1; i++) + { + vt = ae_v_dotproduct(&inva->ptr.pp_double[i][0], 1, &u->ptr.p_double[0], 1, ae_v_len(0,n-1)); + t1.ptr.p_double[i] = vt; + } + lambdav = ae_v_dotproduct(&v->ptr.p_double[0], 1, &t1.ptr.p_double[0], 1, ae_v_len(0,n-1)); + + /* + * T2 = v*InvA + */ + for(j=0; j<=n-1; j++) + { + vt = ae_v_dotproduct(&v->ptr.p_double[0], 1, &inva->ptr.pp_double[0][j], inva->stride, ae_v_len(0,n-1)); + t2.ptr.p_double[j] = vt; + } + + /* + * InvA = InvA - correction + */ + for(i=0; i<=n-1; i++) + { + vt = t1.ptr.p_double[i]/(1+lambdav); + ae_v_subd(&inva->ptr.pp_double[i][0], 1, &t2.ptr.p_double[0], 1, ae_v_len(0,n-1), vt); + } + ae_frame_leave(_state); +} + + + + +/************************************************************************* +Subroutine performing the Schur decomposition of a general matrix by using +the QR algorithm with multiple shifts. + +The source matrix A is represented as S'*A*S = T, where S is an orthogonal +matrix (Schur vectors), T - upper quasi-triangular matrix (with blocks of +sizes 1x1 and 2x2 on the main diagonal). + +Input parameters: + A - matrix to be decomposed. + Array whose indexes range within [0..N-1, 0..N-1]. + N - size of A, N>=0. + + +Output parameters: + A - contains matrix T. + Array whose indexes range within [0..N-1, 0..N-1]. + S - contains Schur vectors. + Array whose indexes range within [0..N-1, 0..N-1]. + +Note 1: + The block structure of matrix T can be easily recognized: since all + the elements below the blocks are zeros, the elements a[i+1,i] which + are equal to 0 show the block border. + +Note 2: + The algorithm performance depends on the value of the internal parameter + NS of the InternalSchurDecomposition subroutine which defines the number + of shifts in the QR algorithm (similarly to the block width in block-matrix + algorithms in linear algebra). If you require maximum performance on + your machine, it is recommended to adjust this parameter manually. + +Result: + True, + if the algorithm has converged and parameters A and S contain the result. + False, + if the algorithm has not converged. + +Algorithm implemented on the basis of the DHSEQR subroutine (LAPACK 3.0 library). +*************************************************************************/ +ae_bool rmatrixschur(/* Real */ ae_matrix* a, + ae_int_t n, + /* Real */ ae_matrix* s, + ae_state *_state) +{ + ae_frame _frame_block; + ae_vector tau; + ae_vector wi; + ae_vector wr; + ae_matrix a1; + ae_matrix s1; + ae_int_t info; + ae_int_t i; + ae_int_t j; + ae_bool result; + + ae_frame_make(_state, &_frame_block); + ae_matrix_clear(s); + ae_vector_init(&tau, 0, DT_REAL, _state, ae_true); + ae_vector_init(&wi, 0, DT_REAL, _state, ae_true); + ae_vector_init(&wr, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&a1, 0, 0, DT_REAL, _state, ae_true); + ae_matrix_init(&s1, 0, 0, DT_REAL, _state, ae_true); + + + /* + * Upper Hessenberg form of the 0-based matrix + */ + rmatrixhessenberg(a, n, &tau, _state); + rmatrixhessenbergunpackq(a, n, &tau, s, _state); + + /* + * Convert from 0-based arrays to 1-based, + * then call InternalSchurDecomposition + * Awkward, of course, but Schur decompisiton subroutine + * is too complex to fix it. + * + */ + ae_matrix_set_length(&a1, n+1, n+1, _state); + ae_matrix_set_length(&s1, n+1, n+1, _state); + for(i=1; i<=n; i++) + { + for(j=1; j<=n; j++) + { + a1.ptr.pp_double[i][j] = a->ptr.pp_double[i-1][j-1]; + s1.ptr.pp_double[i][j] = s->ptr.pp_double[i-1][j-1]; + } + } + internalschurdecomposition(&a1, n, 1, 1, &wr, &wi, &s1, &info, _state); + result = info==0; + + /* + * convert from 1-based arrays to -based + */ + for(i=1; i<=n; i++) + { + for(j=1; j<=n; j++) + { + a->ptr.pp_double[i-1][j-1] = a1.ptr.pp_double[i][j]; + s->ptr.pp_double[i-1][j-1] = s1.ptr.pp_double[i][j]; + } + } + ae_frame_leave(_state); + return result; +} + + + +} + diff --git a/contrib/lbfgs/linalg.h b/contrib/lbfgs/linalg.h new file mode 100755 index 0000000000..38f2def55d --- /dev/null +++ b/contrib/lbfgs/linalg.h @@ -0,0 +1,4101 @@ +/************************************************************************* +Copyright (c) Sergey Bochkanov (ALGLIB project). + +>>> SOURCE LICENSE >>> +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation (www.fsf.org); either version 2 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +A copy of the GNU General Public License is available at +http://www.fsf.org/licensing/licenses +>>> END OF LICENSE >>> +*************************************************************************/ +#ifndef _linalg_pkg_h +#define _linalg_pkg_h +#include "ap.h" +#include "alglibinternal.h" +#include "alglibmisc.h" + +///////////////////////////////////////////////////////////////////////// +// +// THIS SECTION CONTAINS COMPUTATIONAL CORE DECLARATIONS (DATATYPES) +// +///////////////////////////////////////////////////////////////////////// +namespace alglib_impl +{ +typedef struct +{ + double r1; + double rinf; +} matinvreport; +typedef struct +{ + double e1; + double e2; + ae_vector x; + ae_vector ax; + double xax; + ae_int_t n; + ae_vector rk; + ae_vector rk1; + ae_vector xk; + ae_vector xk1; + ae_vector pk; + ae_vector pk1; + ae_vector b; + rcommstate rstate; + ae_vector tmp2; +} fblslincgstate; + +} + +///////////////////////////////////////////////////////////////////////// +// +// THIS SECTION CONTAINS C++ INTERFACE +// +///////////////////////////////////////////////////////////////////////// +namespace alglib +{ + + + + + + + + + + + + + +/************************************************************************* +Matrix inverse report: +* R1 reciprocal of condition number in 1-norm +* RInf reciprocal of condition number in inf-norm +*************************************************************************/ +class _matinvreport_owner +{ +public: + _matinvreport_owner(); + _matinvreport_owner(const _matinvreport_owner &rhs); + _matinvreport_owner& operator=(const _matinvreport_owner &rhs); + virtual ~_matinvreport_owner(); + alglib_impl::matinvreport* c_ptr(); + alglib_impl::matinvreport* c_ptr() const; +protected: + alglib_impl::matinvreport *p_struct; +}; +class matinvreport : public _matinvreport_owner +{ +public: + matinvreport(); + matinvreport(const matinvreport &rhs); + matinvreport& operator=(const matinvreport &rhs); + virtual ~matinvreport(); + double &r1; + double &rinf; + +}; + +/************************************************************************* +Cache-oblivous complex "copy-and-transpose" + +Input parameters: + M - number of rows + N - number of columns + A - source matrix, MxN submatrix is copied and transposed + IA - submatrix offset (row index) + JA - submatrix offset (column index) + A - destination matrix + IB - submatrix offset (row index) + JB - submatrix offset (column index) +*************************************************************************/ +void cmatrixtranspose(const ae_int_t m, const ae_int_t n, const complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, complex_2d_array &b, const ae_int_t ib, const ae_int_t jb); + + +/************************************************************************* +Cache-oblivous real "copy-and-transpose" + +Input parameters: + M - number of rows + N - number of columns + A - source matrix, MxN submatrix is copied and transposed + IA - submatrix offset (row index) + JA - submatrix offset (column index) + A - destination matrix + IB - submatrix offset (row index) + JB - submatrix offset (column index) +*************************************************************************/ +void rmatrixtranspose(const ae_int_t m, const ae_int_t n, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, real_2d_array &b, const ae_int_t ib, const ae_int_t jb); + + +/************************************************************************* +Copy + +Input parameters: + M - number of rows + N - number of columns + A - source matrix, MxN submatrix is copied and transposed + IA - submatrix offset (row index) + JA - submatrix offset (column index) + B - destination matrix + IB - submatrix offset (row index) + JB - submatrix offset (column index) +*************************************************************************/ +void cmatrixcopy(const ae_int_t m, const ae_int_t n, const complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, complex_2d_array &b, const ae_int_t ib, const ae_int_t jb); + + +/************************************************************************* +Copy + +Input parameters: + M - number of rows + N - number of columns + A - source matrix, MxN submatrix is copied and transposed + IA - submatrix offset (row index) + JA - submatrix offset (column index) + B - destination matrix + IB - submatrix offset (row index) + JB - submatrix offset (column index) +*************************************************************************/ +void rmatrixcopy(const ae_int_t m, const ae_int_t n, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, real_2d_array &b, const ae_int_t ib, const ae_int_t jb); + + +/************************************************************************* +Rank-1 correction: A := A + u*v' + +INPUT PARAMETERS: + M - number of rows + N - number of columns + A - target matrix, MxN submatrix is updated + IA - submatrix offset (row index) + JA - submatrix offset (column index) + U - vector #1 + IU - subvector offset + V - vector #2 + IV - subvector offset +*************************************************************************/ +void cmatrixrank1(const ae_int_t m, const ae_int_t n, complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, complex_1d_array &u, const ae_int_t iu, complex_1d_array &v, const ae_int_t iv); + + +/************************************************************************* +Rank-1 correction: A := A + u*v' + +INPUT PARAMETERS: + M - number of rows + N - number of columns + A - target matrix, MxN submatrix is updated + IA - submatrix offset (row index) + JA - submatrix offset (column index) + U - vector #1 + IU - subvector offset + V - vector #2 + IV - subvector offset +*************************************************************************/ +void rmatrixrank1(const ae_int_t m, const ae_int_t n, real_2d_array &a, const ae_int_t ia, const ae_int_t ja, real_1d_array &u, const ae_int_t iu, real_1d_array &v, const ae_int_t iv); + + +/************************************************************************* +Matrix-vector product: y := op(A)*x + +INPUT PARAMETERS: + M - number of rows of op(A) + M>=0 + N - number of columns of op(A) + N>=0 + A - target matrix + IA - submatrix offset (row index) + JA - submatrix offset (column index) + OpA - operation type: + * OpA=0 => op(A) = A + * OpA=1 => op(A) = A^T + * OpA=2 => op(A) = A^H + X - input vector + IX - subvector offset + IY - subvector offset + +OUTPUT PARAMETERS: + Y - vector which stores result + +if M=0, then subroutine does nothing. +if N=0, Y is filled by zeros. + + + -- ALGLIB routine -- + + 28.01.2010 + Bochkanov Sergey +*************************************************************************/ +void cmatrixmv(const ae_int_t m, const ae_int_t n, const complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t opa, const complex_1d_array &x, const ae_int_t ix, complex_1d_array &y, const ae_int_t iy); + + +/************************************************************************* +Matrix-vector product: y := op(A)*x + +INPUT PARAMETERS: + M - number of rows of op(A) + N - number of columns of op(A) + A - target matrix + IA - submatrix offset (row index) + JA - submatrix offset (column index) + OpA - operation type: + * OpA=0 => op(A) = A + * OpA=1 => op(A) = A^T + X - input vector + IX - subvector offset + IY - subvector offset + +OUTPUT PARAMETERS: + Y - vector which stores result + +if M=0, then subroutine does nothing. +if N=0, Y is filled by zeros. + + + -- ALGLIB routine -- + + 28.01.2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixmv(const ae_int_t m, const ae_int_t n, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t opa, const real_1d_array &x, const ae_int_t ix, real_1d_array &y, const ae_int_t iy); + + +/************************************************************************* +This subroutine calculates X*op(A^-1) where: +* X is MxN general matrix +* A is NxN upper/lower triangular/unitriangular matrix +* "op" may be identity transformation, transposition, conjugate transposition + +Multiplication result replaces X. +Cache-oblivious algorithm is used. + +INPUT PARAMETERS + N - matrix size, N>=0 + M - matrix size, N>=0 + A - matrix, actial matrix is stored in A[I1:I1+N-1,J1:J1+N-1] + I1 - submatrix offset + J1 - submatrix offset + IsUpper - whether matrix is upper triangular + IsUnit - whether matrix is unitriangular + OpType - transformation type: + * 0 - no transformation + * 1 - transposition + * 2 - conjugate transposition + C - matrix, actial matrix is stored in C[I2:I2+M-1,J2:J2+N-1] + I2 - submatrix offset + J2 - submatrix offset + + -- ALGLIB routine -- + 15.12.2009 + Bochkanov Sergey +*************************************************************************/ +void cmatrixrighttrsm(const ae_int_t m, const ae_int_t n, const complex_2d_array &a, const ae_int_t i1, const ae_int_t j1, const bool isupper, const bool isunit, const ae_int_t optype, complex_2d_array &x, const ae_int_t i2, const ae_int_t j2); + + +/************************************************************************* +This subroutine calculates op(A^-1)*X where: +* X is MxN general matrix +* A is MxM upper/lower triangular/unitriangular matrix +* "op" may be identity transformation, transposition, conjugate transposition + +Multiplication result replaces X. +Cache-oblivious algorithm is used. + +INPUT PARAMETERS + N - matrix size, N>=0 + M - matrix size, N>=0 + A - matrix, actial matrix is stored in A[I1:I1+M-1,J1:J1+M-1] + I1 - submatrix offset + J1 - submatrix offset + IsUpper - whether matrix is upper triangular + IsUnit - whether matrix is unitriangular + OpType - transformation type: + * 0 - no transformation + * 1 - transposition + * 2 - conjugate transposition + C - matrix, actial matrix is stored in C[I2:I2+M-1,J2:J2+N-1] + I2 - submatrix offset + J2 - submatrix offset + + -- ALGLIB routine -- + 15.12.2009 + Bochkanov Sergey +*************************************************************************/ +void cmatrixlefttrsm(const ae_int_t m, const ae_int_t n, const complex_2d_array &a, const ae_int_t i1, const ae_int_t j1, const bool isupper, const bool isunit, const ae_int_t optype, complex_2d_array &x, const ae_int_t i2, const ae_int_t j2); + + +/************************************************************************* +Same as CMatrixRightTRSM, but for real matrices + +OpType may be only 0 or 1. + + -- ALGLIB routine -- + 15.12.2009 + Bochkanov Sergey +*************************************************************************/ +void rmatrixrighttrsm(const ae_int_t m, const ae_int_t n, const real_2d_array &a, const ae_int_t i1, const ae_int_t j1, const bool isupper, const bool isunit, const ae_int_t optype, real_2d_array &x, const ae_int_t i2, const ae_int_t j2); + + +/************************************************************************* +Same as CMatrixLeftTRSM, but for real matrices + +OpType may be only 0 or 1. + + -- ALGLIB routine -- + 15.12.2009 + Bochkanov Sergey +*************************************************************************/ +void rmatrixlefttrsm(const ae_int_t m, const ae_int_t n, const real_2d_array &a, const ae_int_t i1, const ae_int_t j1, const bool isupper, const bool isunit, const ae_int_t optype, real_2d_array &x, const ae_int_t i2, const ae_int_t j2); + + +/************************************************************************* +This subroutine calculates C=alpha*A*A^H+beta*C or C=alpha*A^H*A+beta*C +where: +* C is NxN Hermitian matrix given by its upper/lower triangle +* A is NxK matrix when A*A^H is calculated, KxN matrix otherwise + +Additional info: +* cache-oblivious algorithm is used. +* multiplication result replaces C. If Beta=0, C elements are not used in + calculations (not multiplied by zero - just not referenced) +* if Alpha=0, A is not used (not multiplied by zero - just not referenced) +* if both Beta and Alpha are zero, C is filled by zeros. + +INPUT PARAMETERS + N - matrix size, N>=0 + K - matrix size, K>=0 + Alpha - coefficient + A - matrix + IA - submatrix offset + JA - submatrix offset + OpTypeA - multiplication type: + * 0 - A*A^H is calculated + * 2 - A^H*A is calculated + Beta - coefficient + C - matrix + IC - submatrix offset + JC - submatrix offset + IsUpper - whether C is upper triangular or lower triangular + + -- ALGLIB routine -- + 16.12.2009 + Bochkanov Sergey +*************************************************************************/ +void cmatrixsyrk(const ae_int_t n, const ae_int_t k, const double alpha, const complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t optypea, const double beta, complex_2d_array &c, const ae_int_t ic, const ae_int_t jc, const bool isupper); + + +/************************************************************************* +Same as CMatrixSYRK, but for real matrices + +OpType may be only 0 or 1. + + -- ALGLIB routine -- + 16.12.2009 + Bochkanov Sergey +*************************************************************************/ +void rmatrixsyrk(const ae_int_t n, const ae_int_t k, const double alpha, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t optypea, const double beta, real_2d_array &c, const ae_int_t ic, const ae_int_t jc, const bool isupper); + + +/************************************************************************* +This subroutine calculates C = alpha*op1(A)*op2(B) +beta*C where: +* C is MxN general matrix +* op1(A) is MxK matrix +* op2(B) is KxN matrix +* "op" may be identity transformation, transposition, conjugate transposition + +Additional info: +* cache-oblivious algorithm is used. +* multiplication result replaces C. If Beta=0, C elements are not used in + calculations (not multiplied by zero - just not referenced) +* if Alpha=0, A is not used (not multiplied by zero - just not referenced) +* if both Beta and Alpha are zero, C is filled by zeros. + +INPUT PARAMETERS + N - matrix size, N>0 + M - matrix size, N>0 + K - matrix size, K>0 + Alpha - coefficient + A - matrix + IA - submatrix offset + JA - submatrix offset + OpTypeA - transformation type: + * 0 - no transformation + * 1 - transposition + * 2 - conjugate transposition + B - matrix + IB - submatrix offset + JB - submatrix offset + OpTypeB - transformation type: + * 0 - no transformation + * 1 - transposition + * 2 - conjugate transposition + Beta - coefficient + C - matrix + IC - submatrix offset + JC - submatrix offset + + -- ALGLIB routine -- + 16.12.2009 + Bochkanov Sergey +*************************************************************************/ +void cmatrixgemm(const ae_int_t m, const ae_int_t n, const ae_int_t k, const alglib::complex alpha, const complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t optypea, const complex_2d_array &b, const ae_int_t ib, const ae_int_t jb, const ae_int_t optypeb, const alglib::complex beta, complex_2d_array &c, const ae_int_t ic, const ae_int_t jc); + + +/************************************************************************* +Same as CMatrixGEMM, but for real numbers. +OpType may be only 0 or 1. + + -- ALGLIB routine -- + 16.12.2009 + Bochkanov Sergey +*************************************************************************/ +void rmatrixgemm(const ae_int_t m, const ae_int_t n, const ae_int_t k, const double alpha, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t optypea, const real_2d_array &b, const ae_int_t ib, const ae_int_t jb, const ae_int_t optypeb, const double beta, real_2d_array &c, const ae_int_t ic, const ae_int_t jc); + +/************************************************************************* +QR decomposition of a rectangular matrix of size MxN + +Input parameters: + A - matrix A whose indexes range within [0..M-1, 0..N-1]. + M - number of rows in matrix A. + N - number of columns in matrix A. + +Output parameters: + A - matrices Q and R in compact form (see below). + Tau - array of scalar factors which are used to form + matrix Q. Array whose index ranges within [0.. Min(M-1,N-1)]. + +Matrix A is represented as A = QR, where Q is an orthogonal matrix of size +MxM, R - upper triangular (or upper trapezoid) matrix of size M x N. + +The elements of matrix R are located on and above the main diagonal of +matrix A. The elements which are located in Tau array and below the main +diagonal of matrix A are used to form matrix Q as follows: + +Matrix Q is represented as a product of elementary reflections + +Q = H(0)*H(2)*...*H(k-1), + +where k = min(m,n), and each H(i) is in the form + +H(i) = 1 - tau * v * (v^T) + +where tau is a scalar stored in Tau[I]; v - real vector, +so that v(0:i-1) = 0, v(i) = 1, v(i+1:m-1) stored in A(i+1:m-1,i). + + -- ALGLIB routine -- + 17.02.2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixqr(real_2d_array &a, const ae_int_t m, const ae_int_t n, real_1d_array &tau); + + +/************************************************************************* +LQ decomposition of a rectangular matrix of size MxN + +Input parameters: + A - matrix A whose indexes range within [0..M-1, 0..N-1]. + M - number of rows in matrix A. + N - number of columns in matrix A. + +Output parameters: + A - matrices L and Q in compact form (see below) + Tau - array of scalar factors which are used to form + matrix Q. Array whose index ranges within [0..Min(M,N)-1]. + +Matrix A is represented as A = LQ, where Q is an orthogonal matrix of size +MxM, L - lower triangular (or lower trapezoid) matrix of size M x N. + +The elements of matrix L are located on and below the main diagonal of +matrix A. The elements which are located in Tau array and above the main +diagonal of matrix A are used to form matrix Q as follows: + +Matrix Q is represented as a product of elementary reflections + +Q = H(k-1)*H(k-2)*...*H(1)*H(0), + +where k = min(m,n), and each H(i) is of the form + +H(i) = 1 - tau * v * (v^T) + +where tau is a scalar stored in Tau[I]; v - real vector, so that v(0:i-1)=0, +v(i) = 1, v(i+1:n-1) stored in A(i,i+1:n-1). + + -- ALGLIB routine -- + 17.02.2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixlq(real_2d_array &a, const ae_int_t m, const ae_int_t n, real_1d_array &tau); + + +/************************************************************************* +QR decomposition of a rectangular complex matrix of size MxN + +Input parameters: + A - matrix A whose indexes range within [0..M-1, 0..N-1] + M - number of rows in matrix A. + N - number of columns in matrix A. + +Output parameters: + A - matrices Q and R in compact form + Tau - array of scalar factors which are used to form matrix Q. Array + whose indexes range within [0.. Min(M,N)-1] + +Matrix A is represented as A = QR, where Q is an orthogonal matrix of size +MxM, R - upper triangular (or upper trapezoid) matrix of size MxN. + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 +*************************************************************************/ +void cmatrixqr(complex_2d_array &a, const ae_int_t m, const ae_int_t n, complex_1d_array &tau); + + +/************************************************************************* +LQ decomposition of a rectangular complex matrix of size MxN + +Input parameters: + A - matrix A whose indexes range within [0..M-1, 0..N-1] + M - number of rows in matrix A. + N - number of columns in matrix A. + +Output parameters: + A - matrices Q and L in compact form + Tau - array of scalar factors which are used to form matrix Q. Array + whose indexes range within [0.. Min(M,N)-1] + +Matrix A is represented as A = LQ, where Q is an orthogonal matrix of size +MxM, L - lower triangular (or lower trapezoid) matrix of size MxN. + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 +*************************************************************************/ +void cmatrixlq(complex_2d_array &a, const ae_int_t m, const ae_int_t n, complex_1d_array &tau); + + +/************************************************************************* +Partial unpacking of matrix Q from the QR decomposition of a matrix A + +Input parameters: + A - matrices Q and R in compact form. + Output of RMatrixQR subroutine. + M - number of rows in given matrix A. M>=0. + N - number of columns in given matrix A. N>=0. + Tau - scalar factors which are used to form Q. + Output of the RMatrixQR subroutine. + QColumns - required number of columns of matrix Q. M>=QColumns>=0. + +Output parameters: + Q - first QColumns columns of matrix Q. + Array whose indexes range within [0..M-1, 0..QColumns-1]. + If QColumns=0, the array remains unchanged. + + -- ALGLIB routine -- + 17.02.2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixqrunpackq(const real_2d_array &a, const ae_int_t m, const ae_int_t n, const real_1d_array &tau, const ae_int_t qcolumns, real_2d_array &q); + + +/************************************************************************* +Unpacking of matrix R from the QR decomposition of a matrix A + +Input parameters: + A - matrices Q and R in compact form. + Output of RMatrixQR subroutine. + M - number of rows in given matrix A. M>=0. + N - number of columns in given matrix A. N>=0. + +Output parameters: + R - matrix R, array[0..M-1, 0..N-1]. + + -- ALGLIB routine -- + 17.02.2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixqrunpackr(const real_2d_array &a, const ae_int_t m, const ae_int_t n, real_2d_array &r); + + +/************************************************************************* +Partial unpacking of matrix Q from the LQ decomposition of a matrix A + +Input parameters: + A - matrices L and Q in compact form. + Output of RMatrixLQ subroutine. + M - number of rows in given matrix A. M>=0. + N - number of columns in given matrix A. N>=0. + Tau - scalar factors which are used to form Q. + Output of the RMatrixLQ subroutine. + QRows - required number of rows in matrix Q. N>=QRows>=0. + +Output parameters: + Q - first QRows rows of matrix Q. Array whose indexes range + within [0..QRows-1, 0..N-1]. If QRows=0, the array remains + unchanged. + + -- ALGLIB routine -- + 17.02.2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixlqunpackq(const real_2d_array &a, const ae_int_t m, const ae_int_t n, const real_1d_array &tau, const ae_int_t qrows, real_2d_array &q); + + +/************************************************************************* +Unpacking of matrix L from the LQ decomposition of a matrix A + +Input parameters: + A - matrices Q and L in compact form. + Output of RMatrixLQ subroutine. + M - number of rows in given matrix A. M>=0. + N - number of columns in given matrix A. N>=0. + +Output parameters: + L - matrix L, array[0..M-1, 0..N-1]. + + -- ALGLIB routine -- + 17.02.2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixlqunpackl(const real_2d_array &a, const ae_int_t m, const ae_int_t n, real_2d_array &l); + + +/************************************************************************* +Partial unpacking of matrix Q from QR decomposition of a complex matrix A. + +Input parameters: + A - matrices Q and R in compact form. + Output of CMatrixQR subroutine . + M - number of rows in matrix A. M>=0. + N - number of columns in matrix A. N>=0. + Tau - scalar factors which are used to form Q. + Output of CMatrixQR subroutine . + QColumns - required number of columns in matrix Q. M>=QColumns>=0. + +Output parameters: + Q - first QColumns columns of matrix Q. + Array whose index ranges within [0..M-1, 0..QColumns-1]. + If QColumns=0, array isn't changed. + + -- ALGLIB routine -- + 17.02.2010 + Bochkanov Sergey +*************************************************************************/ +void cmatrixqrunpackq(const complex_2d_array &a, const ae_int_t m, const ae_int_t n, const complex_1d_array &tau, const ae_int_t qcolumns, complex_2d_array &q); + + +/************************************************************************* +Unpacking of matrix R from the QR decomposition of a matrix A + +Input parameters: + A - matrices Q and R in compact form. + Output of CMatrixQR subroutine. + M - number of rows in given matrix A. M>=0. + N - number of columns in given matrix A. N>=0. + +Output parameters: + R - matrix R, array[0..M-1, 0..N-1]. + + -- ALGLIB routine -- + 17.02.2010 + Bochkanov Sergey +*************************************************************************/ +void cmatrixqrunpackr(const complex_2d_array &a, const ae_int_t m, const ae_int_t n, complex_2d_array &r); + + +/************************************************************************* +Partial unpacking of matrix Q from LQ decomposition of a complex matrix A. + +Input parameters: + A - matrices Q and R in compact form. + Output of CMatrixLQ subroutine . + M - number of rows in matrix A. M>=0. + N - number of columns in matrix A. N>=0. + Tau - scalar factors which are used to form Q. + Output of CMatrixLQ subroutine . + QRows - required number of rows in matrix Q. N>=QColumns>=0. + +Output parameters: + Q - first QRows rows of matrix Q. + Array whose index ranges within [0..QRows-1, 0..N-1]. + If QRows=0, array isn't changed. + + -- ALGLIB routine -- + 17.02.2010 + Bochkanov Sergey +*************************************************************************/ +void cmatrixlqunpackq(const complex_2d_array &a, const ae_int_t m, const ae_int_t n, const complex_1d_array &tau, const ae_int_t qrows, complex_2d_array &q); + + +/************************************************************************* +Unpacking of matrix L from the LQ decomposition of a matrix A + +Input parameters: + A - matrices Q and L in compact form. + Output of CMatrixLQ subroutine. + M - number of rows in given matrix A. M>=0. + N - number of columns in given matrix A. N>=0. + +Output parameters: + L - matrix L, array[0..M-1, 0..N-1]. + + -- ALGLIB routine -- + 17.02.2010 + Bochkanov Sergey +*************************************************************************/ +void cmatrixlqunpackl(const complex_2d_array &a, const ae_int_t m, const ae_int_t n, complex_2d_array &l); + + +/************************************************************************* +Reduction of a rectangular matrix to bidiagonal form + +The algorithm reduces the rectangular matrix A to bidiagonal form by +orthogonal transformations P and Q: A = Q*B*P. + +Input parameters: + A - source matrix. array[0..M-1, 0..N-1] + M - number of rows in matrix A. + N - number of columns in matrix A. + +Output parameters: + A - matrices Q, B, P in compact form (see below). + TauQ - scalar factors which are used to form matrix Q. + TauP - scalar factors which are used to form matrix P. + +The main diagonal and one of the secondary diagonals of matrix A are +replaced with bidiagonal matrix B. Other elements contain elementary +reflections which form MxM matrix Q and NxN matrix P, respectively. + +If M>=N, B is the upper bidiagonal MxN matrix and is stored in the +corresponding elements of matrix A. Matrix Q is represented as a +product of elementary reflections Q = H(0)*H(1)*...*H(n-1), where +H(i) = 1-tau*v*v'. Here tau is a scalar which is stored in TauQ[i], and +vector v has the following structure: v(0:i-1)=0, v(i)=1, v(i+1:m-1) is +stored in elements A(i+1:m-1,i). Matrix P is as follows: P = +G(0)*G(1)*...*G(n-2), where G(i) = 1 - tau*u*u'. Tau is stored in TauP[i], +u(0:i)=0, u(i+1)=1, u(i+2:n-1) is stored in elements A(i,i+2:n-1). + +If M<N, B is the lower bidiagonal MxN matrix and is stored in the +corresponding elements of matrix A. Q = H(0)*H(1)*...*H(m-2), where +H(i) = 1 - tau*v*v', tau is stored in TauQ, v(0:i)=0, v(i+1)=1, v(i+2:m-1) +is stored in elements A(i+2:m-1,i). P = G(0)*G(1)*...*G(m-1), +G(i) = 1-tau*u*u', tau is stored in TauP, u(0:i-1)=0, u(i)=1, u(i+1:n-1) +is stored in A(i,i+1:n-1). + +EXAMPLE: + +m=6, n=5 (m > n): m=5, n=6 (m < n): + +( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) +( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) +( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) +( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) +( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) +( v1 v2 v3 v4 v5 ) + +Here vi and ui are vectors which form H(i) and G(i), and d and e - +are the diagonal and off-diagonal elements of matrix B. + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994. + Sergey Bochkanov, ALGLIB project, translation from FORTRAN to + pseudocode, 2007-2010. +*************************************************************************/ +void rmatrixbd(real_2d_array &a, const ae_int_t m, const ae_int_t n, real_1d_array &tauq, real_1d_array &taup); + + +/************************************************************************* +Unpacking matrix Q which reduces a matrix to bidiagonal form. + +Input parameters: + QP - matrices Q and P in compact form. + Output of ToBidiagonal subroutine. + M - number of rows in matrix A. + N - number of columns in matrix A. + TAUQ - scalar factors which are used to form Q. + Output of ToBidiagonal subroutine. + QColumns - required number of columns in matrix Q. + M>=QColumns>=0. + +Output parameters: + Q - first QColumns columns of matrix Q. + Array[0..M-1, 0..QColumns-1] + If QColumns=0, the array is not modified. + + -- ALGLIB -- + 2005-2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixbdunpackq(const real_2d_array &qp, const ae_int_t m, const ae_int_t n, const real_1d_array &tauq, const ae_int_t qcolumns, real_2d_array &q); + + +/************************************************************************* +Multiplication by matrix Q which reduces matrix A to bidiagonal form. + +The algorithm allows pre- or post-multiply by Q or Q'. + +Input parameters: + QP - matrices Q and P in compact form. + Output of ToBidiagonal subroutine. + M - number of rows in matrix A. + N - number of columns in matrix A. + TAUQ - scalar factors which are used to form Q. + Output of ToBidiagonal subroutine. + Z - multiplied matrix. + array[0..ZRows-1,0..ZColumns-1] + ZRows - number of rows in matrix Z. If FromTheRight=False, + ZRows=M, otherwise ZRows can be arbitrary. + ZColumns - number of columns in matrix Z. If FromTheRight=True, + ZColumns=M, otherwise ZColumns can be arbitrary. + FromTheRight - pre- or post-multiply. + DoTranspose - multiply by Q or Q'. + +Output parameters: + Z - product of Z and Q. + Array[0..ZRows-1,0..ZColumns-1] + If ZRows=0 or ZColumns=0, the array is not modified. + + -- ALGLIB -- + 2005-2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixbdmultiplybyq(const real_2d_array &qp, const ae_int_t m, const ae_int_t n, const real_1d_array &tauq, real_2d_array &z, const ae_int_t zrows, const ae_int_t zcolumns, const bool fromtheright, const bool dotranspose); + + +/************************************************************************* +Unpacking matrix P which reduces matrix A to bidiagonal form. +The subroutine returns transposed matrix P. + +Input parameters: + QP - matrices Q and P in compact form. + Output of ToBidiagonal subroutine. + M - number of rows in matrix A. + N - number of columns in matrix A. + TAUP - scalar factors which are used to form P. + Output of ToBidiagonal subroutine. + PTRows - required number of rows of matrix P^T. N >= PTRows >= 0. + +Output parameters: + PT - first PTRows columns of matrix P^T + Array[0..PTRows-1, 0..N-1] + If PTRows=0, the array is not modified. + + -- ALGLIB -- + 2005-2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixbdunpackpt(const real_2d_array &qp, const ae_int_t m, const ae_int_t n, const real_1d_array &taup, const ae_int_t ptrows, real_2d_array &pt); + + +/************************************************************************* +Multiplication by matrix P which reduces matrix A to bidiagonal form. + +The algorithm allows pre- or post-multiply by P or P'. + +Input parameters: + QP - matrices Q and P in compact form. + Output of RMatrixBD subroutine. + M - number of rows in matrix A. + N - number of columns in matrix A. + TAUP - scalar factors which are used to form P. + Output of RMatrixBD subroutine. + Z - multiplied matrix. + Array whose indexes range within [0..ZRows-1,0..ZColumns-1]. + ZRows - number of rows in matrix Z. If FromTheRight=False, + ZRows=N, otherwise ZRows can be arbitrary. + ZColumns - number of columns in matrix Z. If FromTheRight=True, + ZColumns=N, otherwise ZColumns can be arbitrary. + FromTheRight - pre- or post-multiply. + DoTranspose - multiply by P or P'. + +Output parameters: + Z - product of Z and P. + Array whose indexes range within [0..ZRows-1,0..ZColumns-1]. + If ZRows=0 or ZColumns=0, the array is not modified. + + -- ALGLIB -- + 2005-2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixbdmultiplybyp(const real_2d_array &qp, const ae_int_t m, const ae_int_t n, const real_1d_array &taup, real_2d_array &z, const ae_int_t zrows, const ae_int_t zcolumns, const bool fromtheright, const bool dotranspose); + + +/************************************************************************* +Unpacking of the main and secondary diagonals of bidiagonal decomposition +of matrix A. + +Input parameters: + B - output of RMatrixBD subroutine. + M - number of rows in matrix B. + N - number of columns in matrix B. + +Output parameters: + IsUpper - True, if the matrix is upper bidiagonal. + otherwise IsUpper is False. + D - the main diagonal. + Array whose index ranges within [0..Min(M,N)-1]. + E - the secondary diagonal (upper or lower, depending on + the value of IsUpper). + Array index ranges within [0..Min(M,N)-1], the last + element is not used. + + -- ALGLIB -- + 2005-2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixbdunpackdiagonals(const real_2d_array &b, const ae_int_t m, const ae_int_t n, bool &isupper, real_1d_array &d, real_1d_array &e); + + +/************************************************************************* +Reduction of a square matrix to upper Hessenberg form: Q'*A*Q = H, +where Q is an orthogonal matrix, H - Hessenberg matrix. + +Input parameters: + A - matrix A with elements [0..N-1, 0..N-1] + N - size of matrix A. + +Output parameters: + A - matrices Q and P in compact form (see below). + Tau - array of scalar factors which are used to form matrix Q. + Array whose index ranges within [0..N-2] + +Matrix H is located on the main diagonal, on the lower secondary diagonal +and above the main diagonal of matrix A. The elements which are used to +form matrix Q are situated in array Tau and below the lower secondary +diagonal of matrix A as follows: + +Matrix Q is represented as a product of elementary reflections + +Q = H(0)*H(2)*...*H(n-2), + +where each H(i) is given by + +H(i) = 1 - tau * v * (v^T) + +where tau is a scalar stored in Tau[I]; v - is a real vector, +so that v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) stored in A(i+2:n-1,i). + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 +*************************************************************************/ +void rmatrixhessenberg(real_2d_array &a, const ae_int_t n, real_1d_array &tau); + + +/************************************************************************* +Unpacking matrix Q which reduces matrix A to upper Hessenberg form + +Input parameters: + A - output of RMatrixHessenberg subroutine. + N - size of matrix A. + Tau - scalar factors which are used to form Q. + Output of RMatrixHessenberg subroutine. + +Output parameters: + Q - matrix Q. + Array whose indexes range within [0..N-1, 0..N-1]. + + -- ALGLIB -- + 2005-2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixhessenbergunpackq(const real_2d_array &a, const ae_int_t n, const real_1d_array &tau, real_2d_array &q); + + +/************************************************************************* +Unpacking matrix H (the result of matrix A reduction to upper Hessenberg form) + +Input parameters: + A - output of RMatrixHessenberg subroutine. + N - size of matrix A. + +Output parameters: + H - matrix H. Array whose indexes range within [0..N-1, 0..N-1]. + + -- ALGLIB -- + 2005-2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixhessenbergunpackh(const real_2d_array &a, const ae_int_t n, real_2d_array &h); + + +/************************************************************************* +Reduction of a symmetric matrix which is given by its higher or lower +triangular part to a tridiagonal matrix using orthogonal similarity +transformation: Q'*A*Q=T. + +Input parameters: + A - matrix to be transformed + array with elements [0..N-1, 0..N-1]. + N - size of matrix A. + IsUpper - storage format. If IsUpper = True, then matrix A is given + by its upper triangle, and the lower triangle is not used + and not modified by the algorithm, and vice versa + if IsUpper = False. + +Output parameters: + A - matrices T and Q in compact form (see lower) + Tau - array of factors which are forming matrices H(i) + array with elements [0..N-2]. + D - main diagonal of symmetric matrix T. + array with elements [0..N-1]. + E - secondary diagonal of symmetric matrix T. + array with elements [0..N-2]. + + + If IsUpper=True, the matrix Q is represented as a product of elementary + reflectors + + Q = H(n-2) . . . H(2) H(0). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(i+1:n-1) = 0, v(i) = 1, v(0:i-1) is stored on exit in + A(0:i-1,i+1), and tau in TAU(i). + + If IsUpper=False, the matrix Q is represented as a product of elementary + reflectors + + Q = H(0) H(2) . . . H(n-2). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) is stored on exit in A(i+2:n-1,i), + and tau in TAU(i). + + The contents of A on exit are illustrated by the following examples + with n = 5: + + if UPLO = 'U': if UPLO = 'L': + + ( d e v1 v2 v3 ) ( d ) + ( d e v2 v3 ) ( e d ) + ( d e v3 ) ( v0 e d ) + ( d e ) ( v0 v1 e d ) + ( d ) ( v0 v1 v2 e d ) + + where d and e denote diagonal and off-diagonal elements of T, and vi + denotes an element of the vector defining H(i). + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 +*************************************************************************/ +void smatrixtd(real_2d_array &a, const ae_int_t n, const bool isupper, real_1d_array &tau, real_1d_array &d, real_1d_array &e); + + +/************************************************************************* +Unpacking matrix Q which reduces symmetric matrix to a tridiagonal +form. + +Input parameters: + A - the result of a SMatrixTD subroutine + N - size of matrix A. + IsUpper - storage format (a parameter of SMatrixTD subroutine) + Tau - the result of a SMatrixTD subroutine + +Output parameters: + Q - transformation matrix. + array with elements [0..N-1, 0..N-1]. + + -- ALGLIB -- + Copyright 2005-2010 by Bochkanov Sergey +*************************************************************************/ +void smatrixtdunpackq(const real_2d_array &a, const ae_int_t n, const bool isupper, const real_1d_array &tau, real_2d_array &q); + + +/************************************************************************* +Reduction of a Hermitian matrix which is given by its higher or lower +triangular part to a real tridiagonal matrix using unitary similarity +transformation: Q'*A*Q = T. + +Input parameters: + A - matrix to be transformed + array with elements [0..N-1, 0..N-1]. + N - size of matrix A. + IsUpper - storage format. If IsUpper = True, then matrix A is given + by its upper triangle, and the lower triangle is not used + and not modified by the algorithm, and vice versa + if IsUpper = False. + +Output parameters: + A - matrices T and Q in compact form (see lower) + Tau - array of factors which are forming matrices H(i) + array with elements [0..N-2]. + D - main diagonal of real symmetric matrix T. + array with elements [0..N-1]. + E - secondary diagonal of real symmetric matrix T. + array with elements [0..N-2]. + + + If IsUpper=True, the matrix Q is represented as a product of elementary + reflectors + + Q = H(n-2) . . . H(2) H(0). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(i+1:n-1) = 0, v(i) = 1, v(0:i-1) is stored on exit in + A(0:i-1,i+1), and tau in TAU(i). + + If IsUpper=False, the matrix Q is represented as a product of elementary + reflectors + + Q = H(0) H(2) . . . H(n-2). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) is stored on exit in A(i+2:n-1,i), + and tau in TAU(i). + + The contents of A on exit are illustrated by the following examples + with n = 5: + + if UPLO = 'U': if UPLO = 'L': + + ( d e v1 v2 v3 ) ( d ) + ( d e v2 v3 ) ( e d ) + ( d e v3 ) ( v0 e d ) + ( d e ) ( v0 v1 e d ) + ( d ) ( v0 v1 v2 e d ) + +where d and e denote diagonal and off-diagonal elements of T, and vi +denotes an element of the vector defining H(i). + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 +*************************************************************************/ +void hmatrixtd(complex_2d_array &a, const ae_int_t n, const bool isupper, complex_1d_array &tau, real_1d_array &d, real_1d_array &e); + + +/************************************************************************* +Unpacking matrix Q which reduces a Hermitian matrix to a real tridiagonal +form. + +Input parameters: + A - the result of a HMatrixTD subroutine + N - size of matrix A. + IsUpper - storage format (a parameter of HMatrixTD subroutine) + Tau - the result of a HMatrixTD subroutine + +Output parameters: + Q - transformation matrix. + array with elements [0..N-1, 0..N-1]. + + -- ALGLIB -- + Copyright 2005-2010 by Bochkanov Sergey +*************************************************************************/ +void hmatrixtdunpackq(const complex_2d_array &a, const ae_int_t n, const bool isupper, const complex_1d_array &tau, complex_2d_array &q); + +/************************************************************************* +Finding the eigenvalues and eigenvectors of a symmetric matrix + +The algorithm finds eigen pairs of a symmetric matrix by reducing it to +tridiagonal form and using the QL/QR algorithm. + +Input parameters: + A - symmetric matrix which is given by its upper or lower + triangular part. + Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + ZNeeded - flag controlling whether the eigenvectors are needed or not. + If ZNeeded is equal to: + * 0, the eigenvectors are not returned; + * 1, the eigenvectors are returned. + IsUpper - storage format. + +Output parameters: + D - eigenvalues in ascending order. + Array whose index ranges within [0..N-1]. + Z - if ZNeeded is equal to: + * 0, Z hasn’t changed; + * 1, Z contains the eigenvectors. + Array whose indexes range within [0..N-1, 0..N-1]. + The eigenvectors are stored in the matrix columns. + +Result: + True, if the algorithm has converged. + False, if the algorithm hasn't converged (rare case). + + -- ALGLIB -- + Copyright 2005-2008 by Bochkanov Sergey +*************************************************************************/ +bool smatrixevd(const real_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, real_1d_array &d, real_2d_array &z); + + +/************************************************************************* +Subroutine for finding the eigenvalues (and eigenvectors) of a symmetric +matrix in a given half open interval (A, B] by using a bisection and +inverse iteration + +Input parameters: + A - symmetric matrix which is given by its upper or lower + triangular part. Array [0..N-1, 0..N-1]. + N - size of matrix A. + ZNeeded - flag controlling whether the eigenvectors are needed or not. + If ZNeeded is equal to: + * 0, the eigenvectors are not returned; + * 1, the eigenvectors are returned. + IsUpperA - storage format of matrix A. + B1, B2 - half open interval (B1, B2] to search eigenvalues in. + +Output parameters: + M - number of eigenvalues found in a given half-interval (M>=0). + W - array of the eigenvalues found. + Array whose index ranges within [0..M-1]. + Z - if ZNeeded is equal to: + * 0, Z hasn’t changed; + * 1, Z contains eigenvectors. + Array whose indexes range within [0..N-1, 0..M-1]. + The eigenvectors are stored in the matrix columns. + +Result: + True, if successful. M contains the number of eigenvalues in the given + half-interval (could be equal to 0), W contains the eigenvalues, + Z contains the eigenvectors (if needed). + + False, if the bisection method subroutine wasn't able to find the + eigenvalues in the given interval or if the inverse iteration subroutine + wasn't able to find all the corresponding eigenvectors. + In that case, the eigenvalues and eigenvectors are not returned, + M is equal to 0. + + -- ALGLIB -- + Copyright 07.01.2006 by Bochkanov Sergey +*************************************************************************/ +bool smatrixevdr(const real_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, const double b1, const double b2, ae_int_t &m, real_1d_array &w, real_2d_array &z); + + +/************************************************************************* +Subroutine for finding the eigenvalues and eigenvectors of a symmetric +matrix with given indexes by using bisection and inverse iteration methods. + +Input parameters: + A - symmetric matrix which is given by its upper or lower + triangular part. Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + ZNeeded - flag controlling whether the eigenvectors are needed or not. + If ZNeeded is equal to: + * 0, the eigenvectors are not returned; + * 1, the eigenvectors are returned. + IsUpperA - storage format of matrix A. + I1, I2 - index interval for searching (from I1 to I2). + 0 <= I1 <= I2 <= N-1. + +Output parameters: + W - array of the eigenvalues found. + Array whose index ranges within [0..I2-I1]. + Z - if ZNeeded is equal to: + * 0, Z hasn’t changed; + * 1, Z contains eigenvectors. + Array whose indexes range within [0..N-1, 0..I2-I1]. + In that case, the eigenvectors are stored in the matrix columns. + +Result: + True, if successful. W contains the eigenvalues, Z contains the + eigenvectors (if needed). + + False, if the bisection method subroutine wasn't able to find the + eigenvalues in the given interval or if the inverse iteration subroutine + wasn't able to find all the corresponding eigenvectors. + In that case, the eigenvalues and eigenvectors are not returned. + + -- ALGLIB -- + Copyright 07.01.2006 by Bochkanov Sergey +*************************************************************************/ +bool smatrixevdi(const real_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, const ae_int_t i1, const ae_int_t i2, real_1d_array &w, real_2d_array &z); + + +/************************************************************************* +Finding the eigenvalues and eigenvectors of a Hermitian matrix + +The algorithm finds eigen pairs of a Hermitian matrix by reducing it to +real tridiagonal form and using the QL/QR algorithm. + +Input parameters: + A - Hermitian matrix which is given by its upper or lower + triangular part. + Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + IsUpper - storage format. + ZNeeded - flag controlling whether the eigenvectors are needed or + not. If ZNeeded is equal to: + * 0, the eigenvectors are not returned; + * 1, the eigenvectors are returned. + +Output parameters: + D - eigenvalues in ascending order. + Array whose index ranges within [0..N-1]. + Z - if ZNeeded is equal to: + * 0, Z hasn’t changed; + * 1, Z contains the eigenvectors. + Array whose indexes range within [0..N-1, 0..N-1]. + The eigenvectors are stored in the matrix columns. + +Result: + True, if the algorithm has converged. + False, if the algorithm hasn't converged (rare case). + +Note: + eigenvectors of Hermitian matrix are defined up to multiplication by + a complex number L, such that |L|=1. + + -- ALGLIB -- + Copyright 2005, 23 March 2007 by Bochkanov Sergey +*************************************************************************/ +bool hmatrixevd(const complex_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, real_1d_array &d, complex_2d_array &z); + + +/************************************************************************* +Subroutine for finding the eigenvalues (and eigenvectors) of a Hermitian +matrix in a given half-interval (A, B] by using a bisection and inverse +iteration + +Input parameters: + A - Hermitian matrix which is given by its upper or lower + triangular part. Array whose indexes range within + [0..N-1, 0..N-1]. + N - size of matrix A. + ZNeeded - flag controlling whether the eigenvectors are needed or + not. If ZNeeded is equal to: + * 0, the eigenvectors are not returned; + * 1, the eigenvectors are returned. + IsUpperA - storage format of matrix A. + B1, B2 - half-interval (B1, B2] to search eigenvalues in. + +Output parameters: + M - number of eigenvalues found in a given half-interval, M>=0 + W - array of the eigenvalues found. + Array whose index ranges within [0..M-1]. + Z - if ZNeeded is equal to: + * 0, Z hasn’t changed; + * 1, Z contains eigenvectors. + Array whose indexes range within [0..N-1, 0..M-1]. + The eigenvectors are stored in the matrix columns. + +Result: + True, if successful. M contains the number of eigenvalues in the given + half-interval (could be equal to 0), W contains the eigenvalues, + Z contains the eigenvectors (if needed). + + False, if the bisection method subroutine wasn't able to find the + eigenvalues in the given interval or if the inverse iteration + subroutine wasn't able to find all the corresponding eigenvectors. + In that case, the eigenvalues and eigenvectors are not returned, M is + equal to 0. + +Note: + eigen vectors of Hermitian matrix are defined up to multiplication by + a complex number L, such as |L|=1. + + -- ALGLIB -- + Copyright 07.01.2006, 24.03.2007 by Bochkanov Sergey. +*************************************************************************/ +bool hmatrixevdr(const complex_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, const double b1, const double b2, ae_int_t &m, real_1d_array &w, complex_2d_array &z); + + +/************************************************************************* +Subroutine for finding the eigenvalues and eigenvectors of a Hermitian +matrix with given indexes by using bisection and inverse iteration methods + +Input parameters: + A - Hermitian matrix which is given by its upper or lower + triangular part. + Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + ZNeeded - flag controlling whether the eigenvectors are needed or + not. If ZNeeded is equal to: + * 0, the eigenvectors are not returned; + * 1, the eigenvectors are returned. + IsUpperA - storage format of matrix A. + I1, I2 - index interval for searching (from I1 to I2). + 0 <= I1 <= I2 <= N-1. + +Output parameters: + W - array of the eigenvalues found. + Array whose index ranges within [0..I2-I1]. + Z - if ZNeeded is equal to: + * 0, Z hasn’t changed; + * 1, Z contains eigenvectors. + Array whose indexes range within [0..N-1, 0..I2-I1]. + In that case, the eigenvectors are stored in the matrix + columns. + +Result: + True, if successful. W contains the eigenvalues, Z contains the + eigenvectors (if needed). + + False, if the bisection method subroutine wasn't able to find the + eigenvalues in the given interval or if the inverse iteration + subroutine wasn't able to find all the corresponding eigenvectors. + In that case, the eigenvalues and eigenvectors are not returned. + +Note: + eigen vectors of Hermitian matrix are defined up to multiplication by + a complex number L, such as |L|=1. + + -- ALGLIB -- + Copyright 07.01.2006, 24.03.2007 by Bochkanov Sergey. +*************************************************************************/ +bool hmatrixevdi(const complex_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, const ae_int_t i1, const ae_int_t i2, real_1d_array &w, complex_2d_array &z); + + +/************************************************************************* +Finding the eigenvalues and eigenvectors of a tridiagonal symmetric matrix + +The algorithm finds the eigen pairs of a tridiagonal symmetric matrix by +using an QL/QR algorithm with implicit shifts. + +Input parameters: + D - the main diagonal of a tridiagonal matrix. + Array whose index ranges within [0..N-1]. + E - the secondary diagonal of a tridiagonal matrix. + Array whose index ranges within [0..N-2]. + N - size of matrix A. + ZNeeded - flag controlling whether the eigenvectors are needed or not. + If ZNeeded is equal to: + * 0, the eigenvectors are not needed; + * 1, the eigenvectors of a tridiagonal matrix + are multiplied by the square matrix Z. It is used if the + tridiagonal matrix is obtained by the similarity + transformation of a symmetric matrix; + * 2, the eigenvectors of a tridiagonal matrix replace the + square matrix Z; + * 3, matrix Z contains the first row of the eigenvectors + matrix. + Z - if ZNeeded=1, Z contains the square matrix by which the + eigenvectors are multiplied. + Array whose indexes range within [0..N-1, 0..N-1]. + +Output parameters: + D - eigenvalues in ascending order. + Array whose index ranges within [0..N-1]. + Z - if ZNeeded is equal to: + * 0, Z hasn’t changed; + * 1, Z contains the product of a given matrix (from the left) + and the eigenvectors matrix (from the right); + * 2, Z contains the eigenvectors. + * 3, Z contains the first row of the eigenvectors matrix. + If ZNeeded<3, Z is the array whose indexes range within [0..N-1, 0..N-1]. + In that case, the eigenvectors are stored in the matrix columns. + If ZNeeded=3, Z is the array whose indexes range within [0..0, 0..N-1]. + +Result: + True, if the algorithm has converged. + False, if the algorithm hasn't converged. + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 +*************************************************************************/ +bool smatrixtdevd(real_1d_array &d, const real_1d_array &e, const ae_int_t n, const ae_int_t zneeded, real_2d_array &z); + + +/************************************************************************* +Subroutine for finding the tridiagonal matrix eigenvalues/vectors in a +given half-interval (A, B] by using bisection and inverse iteration. + +Input parameters: + D - the main diagonal of a tridiagonal matrix. + Array whose index ranges within [0..N-1]. + E - the secondary diagonal of a tridiagonal matrix. + Array whose index ranges within [0..N-2]. + N - size of matrix, N>=0. + ZNeeded - flag controlling whether the eigenvectors are needed or not. + If ZNeeded is equal to: + * 0, the eigenvectors are not needed; + * 1, the eigenvectors of a tridiagonal matrix are multiplied + by the square matrix Z. It is used if the tridiagonal + matrix is obtained by the similarity transformation + of a symmetric matrix. + * 2, the eigenvectors of a tridiagonal matrix replace matrix Z. + A, B - half-interval (A, B] to search eigenvalues in. + Z - if ZNeeded is equal to: + * 0, Z isn't used and remains unchanged; + * 1, Z contains the square matrix (array whose indexes range + within [0..N-1, 0..N-1]) which reduces the given symmetric + matrix to tridiagonal form; + * 2, Z isn't used (but changed on the exit). + +Output parameters: + D - array of the eigenvalues found. + Array whose index ranges within [0..M-1]. + M - number of eigenvalues found in the given half-interval (M>=0). + Z - if ZNeeded is equal to: + * 0, doesn't contain any information; + * 1, contains the product of a given NxN matrix Z (from the + left) and NxM matrix of the eigenvectors found (from the + right). Array whose indexes range within [0..N-1, 0..M-1]. + * 2, contains the matrix of the eigenvectors found. + Array whose indexes range within [0..N-1, 0..M-1]. + +Result: + + True, if successful. In that case, M contains the number of eigenvalues + in the given half-interval (could be equal to 0), D contains the eigenvalues, + Z contains the eigenvectors (if needed). + It should be noted that the subroutine changes the size of arrays D and Z. + + False, if the bisection method subroutine wasn't able to find the + eigenvalues in the given interval or if the inverse iteration subroutine + wasn't able to find all the corresponding eigenvectors. In that case, + the eigenvalues and eigenvectors are not returned, M is equal to 0. + + -- ALGLIB -- + Copyright 31.03.2008 by Bochkanov Sergey +*************************************************************************/ +bool smatrixtdevdr(real_1d_array &d, const real_1d_array &e, const ae_int_t n, const ae_int_t zneeded, const double a, const double b, ae_int_t &m, real_2d_array &z); + + +/************************************************************************* +Subroutine for finding tridiagonal matrix eigenvalues/vectors with given +indexes (in ascending order) by using the bisection and inverse iteraion. + +Input parameters: + D - the main diagonal of a tridiagonal matrix. + Array whose index ranges within [0..N-1]. + E - the secondary diagonal of a tridiagonal matrix. + Array whose index ranges within [0..N-2]. + N - size of matrix. N>=0. + ZNeeded - flag controlling whether the eigenvectors are needed or not. + If ZNeeded is equal to: + * 0, the eigenvectors are not needed; + * 1, the eigenvectors of a tridiagonal matrix are multiplied + by the square matrix Z. It is used if the + tridiagonal matrix is obtained by the similarity transformation + of a symmetric matrix. + * 2, the eigenvectors of a tridiagonal matrix replace + matrix Z. + I1, I2 - index interval for searching (from I1 to I2). + 0 <= I1 <= I2 <= N-1. + Z - if ZNeeded is equal to: + * 0, Z isn't used and remains unchanged; + * 1, Z contains the square matrix (array whose indexes range within [0..N-1, 0..N-1]) + which reduces the given symmetric matrix to tridiagonal form; + * 2, Z isn't used (but changed on the exit). + +Output parameters: + D - array of the eigenvalues found. + Array whose index ranges within [0..I2-I1]. + Z - if ZNeeded is equal to: + * 0, doesn't contain any information; + * 1, contains the product of a given NxN matrix Z (from the left) and + Nx(I2-I1) matrix of the eigenvectors found (from the right). + Array whose indexes range within [0..N-1, 0..I2-I1]. + * 2, contains the matrix of the eigenvalues found. + Array whose indexes range within [0..N-1, 0..I2-I1]. + + +Result: + + True, if successful. In that case, D contains the eigenvalues, + Z contains the eigenvectors (if needed). + It should be noted that the subroutine changes the size of arrays D and Z. + + False, if the bisection method subroutine wasn't able to find the eigenvalues + in the given interval or if the inverse iteration subroutine wasn't able + to find all the corresponding eigenvectors. In that case, the eigenvalues + and eigenvectors are not returned. + + -- ALGLIB -- + Copyright 25.12.2005 by Bochkanov Sergey +*************************************************************************/ +bool smatrixtdevdi(real_1d_array &d, const real_1d_array &e, const ae_int_t n, const ae_int_t zneeded, const ae_int_t i1, const ae_int_t i2, real_2d_array &z); + + +/************************************************************************* +Finding eigenvalues and eigenvectors of a general matrix + +The algorithm finds eigenvalues and eigenvectors of a general matrix by +using the QR algorithm with multiple shifts. The algorithm can find +eigenvalues and both left and right eigenvectors. + +The right eigenvector is a vector x such that A*x = w*x, and the left +eigenvector is a vector y such that y'*A = w*y' (here y' implies a complex +conjugate transposition of vector y). + +Input parameters: + A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + VNeeded - flag controlling whether eigenvectors are needed or not. + If VNeeded is equal to: + * 0, eigenvectors are not returned; + * 1, right eigenvectors are returned; + * 2, left eigenvectors are returned; + * 3, both left and right eigenvectors are returned. + +Output parameters: + WR - real parts of eigenvalues. + Array whose index ranges within [0..N-1]. + WR - imaginary parts of eigenvalues. + Array whose index ranges within [0..N-1]. + VL, VR - arrays of left and right eigenvectors (if they are needed). + If WI[i]=0, the respective eigenvalue is a real number, + and it corresponds to the column number I of matrices VL/VR. + If WI[i]>0, we have a pair of complex conjugate numbers with + positive and negative imaginary parts: + the first eigenvalue WR[i] + sqrt(-1)*WI[i]; + the second eigenvalue WR[i+1] + sqrt(-1)*WI[i+1]; + WI[i]>0 + WI[i+1] = -WI[i] < 0 + In that case, the eigenvector corresponding to the first + eigenvalue is located in i and i+1 columns of matrices + VL/VR (the column number i contains the real part, and the + column number i+1 contains the imaginary part), and the vector + corresponding to the second eigenvalue is a complex conjugate to + the first vector. + Arrays whose indexes range within [0..N-1, 0..N-1]. + +Result: + True, if the algorithm has converged. + False, if the algorithm has not converged. + +Note 1: + Some users may ask the following question: what if WI[N-1]>0? + WI[N] must contain an eigenvalue which is complex conjugate to the + N-th eigenvalue, but the array has only size N? + The answer is as follows: such a situation cannot occur because the + algorithm finds a pairs of eigenvalues, therefore, if WI[i]>0, I is + strictly less than N-1. + +Note 2: + The algorithm performance depends on the value of the internal parameter + NS of the InternalSchurDecomposition subroutine which defines the number + of shifts in the QR algorithm (similarly to the block width in block-matrix + algorithms of linear algebra). If you require maximum performance + on your machine, it is recommended to adjust this parameter manually. + + +See also the InternalTREVC subroutine. + +The algorithm is based on the LAPACK 3.0 library. +*************************************************************************/ +bool rmatrixevd(const real_2d_array &a, const ae_int_t n, const ae_int_t vneeded, real_1d_array &wr, real_1d_array &wi, real_2d_array &vl, real_2d_array &vr); + +/************************************************************************* +Generation of a random uniformly distributed (Haar) orthogonal matrix + +INPUT PARAMETERS: + N - matrix size, N>=1 + +OUTPUT PARAMETERS: + A - orthogonal NxN matrix, array[0..N-1,0..N-1] + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void rmatrixrndorthogonal(const ae_int_t n, real_2d_array &a); + + +/************************************************************************* +Generation of random NxN matrix with given condition number and norm2(A)=1 + +INPUT PARAMETERS: + N - matrix size + C - condition number (in 2-norm) + +OUTPUT PARAMETERS: + A - random matrix with norm2(A)=1 and cond(A)=C + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void rmatrixrndcond(const ae_int_t n, const double c, real_2d_array &a); + + +/************************************************************************* +Generation of a random Haar distributed orthogonal complex matrix + +INPUT PARAMETERS: + N - matrix size, N>=1 + +OUTPUT PARAMETERS: + A - orthogonal NxN matrix, array[0..N-1,0..N-1] + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void cmatrixrndorthogonal(const ae_int_t n, complex_2d_array &a); + + +/************************************************************************* +Generation of random NxN complex matrix with given condition number C and +norm2(A)=1 + +INPUT PARAMETERS: + N - matrix size + C - condition number (in 2-norm) + +OUTPUT PARAMETERS: + A - random matrix with norm2(A)=1 and cond(A)=C + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void cmatrixrndcond(const ae_int_t n, const double c, complex_2d_array &a); + + +/************************************************************************* +Generation of random NxN symmetric matrix with given condition number and +norm2(A)=1 + +INPUT PARAMETERS: + N - matrix size + C - condition number (in 2-norm) + +OUTPUT PARAMETERS: + A - random matrix with norm2(A)=1 and cond(A)=C + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void smatrixrndcond(const ae_int_t n, const double c, real_2d_array &a); + + +/************************************************************************* +Generation of random NxN symmetric positive definite matrix with given +condition number and norm2(A)=1 + +INPUT PARAMETERS: + N - matrix size + C - condition number (in 2-norm) + +OUTPUT PARAMETERS: + A - random SPD matrix with norm2(A)=1 and cond(A)=C + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void spdmatrixrndcond(const ae_int_t n, const double c, real_2d_array &a); + + +/************************************************************************* +Generation of random NxN Hermitian matrix with given condition number and +norm2(A)=1 + +INPUT PARAMETERS: + N - matrix size + C - condition number (in 2-norm) + +OUTPUT PARAMETERS: + A - random matrix with norm2(A)=1 and cond(A)=C + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void hmatrixrndcond(const ae_int_t n, const double c, complex_2d_array &a); + + +/************************************************************************* +Generation of random NxN Hermitian positive definite matrix with given +condition number and norm2(A)=1 + +INPUT PARAMETERS: + N - matrix size + C - condition number (in 2-norm) + +OUTPUT PARAMETERS: + A - random HPD matrix with norm2(A)=1 and cond(A)=C + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void hpdmatrixrndcond(const ae_int_t n, const double c, complex_2d_array &a); + + +/************************************************************************* +Multiplication of MxN matrix by NxN random Haar distributed orthogonal matrix + +INPUT PARAMETERS: + A - matrix, array[0..M-1, 0..N-1] + M, N- matrix size + +OUTPUT PARAMETERS: + A - A*Q, where Q is random NxN orthogonal matrix + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void rmatrixrndorthogonalfromtheright(real_2d_array &a, const ae_int_t m, const ae_int_t n); + + +/************************************************************************* +Multiplication of MxN matrix by MxM random Haar distributed orthogonal matrix + +INPUT PARAMETERS: + A - matrix, array[0..M-1, 0..N-1] + M, N- matrix size + +OUTPUT PARAMETERS: + A - Q*A, where Q is random MxM orthogonal matrix + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void rmatrixrndorthogonalfromtheleft(real_2d_array &a, const ae_int_t m, const ae_int_t n); + + +/************************************************************************* +Multiplication of MxN complex matrix by NxN random Haar distributed +complex orthogonal matrix + +INPUT PARAMETERS: + A - matrix, array[0..M-1, 0..N-1] + M, N- matrix size + +OUTPUT PARAMETERS: + A - A*Q, where Q is random NxN orthogonal matrix + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void cmatrixrndorthogonalfromtheright(complex_2d_array &a, const ae_int_t m, const ae_int_t n); + + +/************************************************************************* +Multiplication of MxN complex matrix by MxM random Haar distributed +complex orthogonal matrix + +INPUT PARAMETERS: + A - matrix, array[0..M-1, 0..N-1] + M, N- matrix size + +OUTPUT PARAMETERS: + A - Q*A, where Q is random MxM orthogonal matrix + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void cmatrixrndorthogonalfromtheleft(complex_2d_array &a, const ae_int_t m, const ae_int_t n); + + +/************************************************************************* +Symmetric multiplication of NxN matrix by random Haar distributed +orthogonal matrix + +INPUT PARAMETERS: + A - matrix, array[0..N-1, 0..N-1] + N - matrix size + +OUTPUT PARAMETERS: + A - Q'*A*Q, where Q is random NxN orthogonal matrix + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void smatrixrndmultiply(real_2d_array &a, const ae_int_t n); + + +/************************************************************************* +Hermitian multiplication of NxN matrix by random Haar distributed +complex orthogonal matrix + +INPUT PARAMETERS: + A - matrix, array[0..N-1, 0..N-1] + N - matrix size + +OUTPUT PARAMETERS: + A - Q^H*A*Q, where Q is random NxN orthogonal matrix + + -- ALGLIB routine -- + 04.12.2009 + Bochkanov Sergey +*************************************************************************/ +void hmatrixrndmultiply(complex_2d_array &a, const ae_int_t n); + +/************************************************************************* +LU decomposition of a general real matrix with row pivoting + +A is represented as A = P*L*U, where: +* L is lower unitriangular matrix +* U is upper triangular matrix +* P = P0*P1*...*PK, K=min(M,N)-1, + Pi - permutation matrix for I and Pivots[I] + +This is cache-oblivous implementation of LU decomposition. +It is optimized for square matrices. As for rectangular matrices: +* best case - M>>N +* worst case - N>>M, small M, large N, matrix does not fit in CPU cache + +INPUT PARAMETERS: + A - array[0..M-1, 0..N-1]. + M - number of rows in matrix A. + N - number of columns in matrix A. + + +OUTPUT PARAMETERS: + A - matrices L and U in compact form: + * L is stored under main diagonal + * U is stored on and above main diagonal + Pivots - permutation matrix in compact form. + array[0..Min(M-1,N-1)]. + + -- ALGLIB routine -- + 10.01.2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixlu(real_2d_array &a, const ae_int_t m, const ae_int_t n, integer_1d_array &pivots); + + +/************************************************************************* +LU decomposition of a general complex matrix with row pivoting + +A is represented as A = P*L*U, where: +* L is lower unitriangular matrix +* U is upper triangular matrix +* P = P0*P1*...*PK, K=min(M,N)-1, + Pi - permutation matrix for I and Pivots[I] + +This is cache-oblivous implementation of LU decomposition. It is optimized +for square matrices. As for rectangular matrices: +* best case - M>>N +* worst case - N>>M, small M, large N, matrix does not fit in CPU cache + +INPUT PARAMETERS: + A - array[0..M-1, 0..N-1]. + M - number of rows in matrix A. + N - number of columns in matrix A. + + +OUTPUT PARAMETERS: + A - matrices L and U in compact form: + * L is stored under main diagonal + * U is stored on and above main diagonal + Pivots - permutation matrix in compact form. + array[0..Min(M-1,N-1)]. + + -- ALGLIB routine -- + 10.01.2010 + Bochkanov Sergey +*************************************************************************/ +void cmatrixlu(complex_2d_array &a, const ae_int_t m, const ae_int_t n, integer_1d_array &pivots); + + +/************************************************************************* +Cache-oblivious Cholesky decomposition + +The algorithm computes Cholesky decomposition of a Hermitian positive- +definite matrix. The result of an algorithm is a representation of A as +A=U'*U or A=L*L' (here X' detones conj(X^T)). + +INPUT PARAMETERS: + A - upper or lower triangle of a factorized matrix. + array with elements [0..N-1, 0..N-1]. + N - size of matrix A. + IsUpper - if IsUpper=True, then A contains an upper triangle of + a symmetric matrix, otherwise A contains a lower one. + +OUTPUT PARAMETERS: + A - the result of factorization. If IsUpper=True, then + the upper triangle contains matrix U, so that A = U'*U, + and the elements below the main diagonal are not modified. + Similarly, if IsUpper = False. + +RESULT: + If the matrix is positive-definite, the function returns True. + Otherwise, the function returns False. Contents of A is not determined + in such case. + + -- ALGLIB routine -- + 15.12.2009 + Bochkanov Sergey +*************************************************************************/ +bool hpdmatrixcholesky(complex_2d_array &a, const ae_int_t n, const bool isupper); + + +/************************************************************************* +Cache-oblivious Cholesky decomposition + +The algorithm computes Cholesky decomposition of a symmetric positive- +definite matrix. The result of an algorithm is a representation of A as +A=U^T*U or A=L*L^T + +INPUT PARAMETERS: + A - upper or lower triangle of a factorized matrix. + array with elements [0..N-1, 0..N-1]. + N - size of matrix A. + IsUpper - if IsUpper=True, then A contains an upper triangle of + a symmetric matrix, otherwise A contains a lower one. + +OUTPUT PARAMETERS: + A - the result of factorization. If IsUpper=True, then + the upper triangle contains matrix U, so that A = U^T*U, + and the elements below the main diagonal are not modified. + Similarly, if IsUpper = False. + +RESULT: + If the matrix is positive-definite, the function returns True. + Otherwise, the function returns False. Contents of A is not determined + in such case. + + -- ALGLIB routine -- + 15.12.2009 + Bochkanov Sergey +*************************************************************************/ +bool spdmatrixcholesky(real_2d_array &a, const ae_int_t n, const bool isupper); + +/************************************************************************* +Estimate of a matrix condition number (1-norm) + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double rmatrixrcond1(const real_2d_array &a, const ae_int_t n); + + +/************************************************************************* +Estimate of a matrix condition number (infinity-norm). + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double rmatrixrcondinf(const real_2d_array &a, const ae_int_t n); + + +/************************************************************************* +Condition number estimate of a symmetric positive definite matrix. + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +It should be noted that 1-norm and inf-norm of condition numbers of symmetric +matrices are equal, so the algorithm doesn't take into account the +differences between these types of norms. + +Input parameters: + A - symmetric positive definite matrix which is given by its + upper or lower triangle depending on the value of + IsUpper. Array with elements [0..N-1, 0..N-1]. + N - size of matrix A. + IsUpper - storage format. + +Result: + 1/LowerBound(cond(A)), if matrix A is positive definite, + -1, if matrix A is not positive definite, and its condition number + could not be found by this algorithm. + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double spdmatrixrcond(const real_2d_array &a, const ae_int_t n, const bool isupper); + + +/************************************************************************* +Triangular matrix: estimate of a condition number (1-norm) + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + A - matrix. Array[0..N-1, 0..N-1]. + N - size of A. + IsUpper - True, if the matrix is upper triangular. + IsUnit - True, if the matrix has a unit diagonal. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double rmatrixtrrcond1(const real_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit); + + +/************************************************************************* +Triangular matrix: estimate of a matrix condition number (infinity-norm). + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + IsUpper - True, if the matrix is upper triangular. + IsUnit - True, if the matrix has a unit diagonal. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double rmatrixtrrcondinf(const real_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit); + + +/************************************************************************* +Condition number estimate of a Hermitian positive definite matrix. + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +It should be noted that 1-norm and inf-norm of condition numbers of symmetric +matrices are equal, so the algorithm doesn't take into account the +differences between these types of norms. + +Input parameters: + A - Hermitian positive definite matrix which is given by its + upper or lower triangle depending on the value of + IsUpper. Array with elements [0..N-1, 0..N-1]. + N - size of matrix A. + IsUpper - storage format. + +Result: + 1/LowerBound(cond(A)), if matrix A is positive definite, + -1, if matrix A is not positive definite, and its condition number + could not be found by this algorithm. + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double hpdmatrixrcond(const complex_2d_array &a, const ae_int_t n, const bool isupper); + + +/************************************************************************* +Estimate of a matrix condition number (1-norm) + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double cmatrixrcond1(const complex_2d_array &a, const ae_int_t n); + + +/************************************************************************* +Estimate of a matrix condition number (infinity-norm). + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double cmatrixrcondinf(const complex_2d_array &a, const ae_int_t n); + + +/************************************************************************* +Estimate of the condition number of a matrix given by its LU decomposition (1-norm) + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + LUA - LU decomposition of a matrix in compact form. Output of + the RMatrixLU subroutine. + N - size of matrix A. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double rmatrixlurcond1(const real_2d_array &lua, const ae_int_t n); + + +/************************************************************************* +Estimate of the condition number of a matrix given by its LU decomposition +(infinity norm). + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + LUA - LU decomposition of a matrix in compact form. Output of + the RMatrixLU subroutine. + N - size of matrix A. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double rmatrixlurcondinf(const real_2d_array &lua, const ae_int_t n); + + +/************************************************************************* +Condition number estimate of a symmetric positive definite matrix given by +Cholesky decomposition. + +The algorithm calculates a lower bound of the condition number. In this +case, the algorithm does not return a lower bound of the condition number, +but an inverse number (to avoid an overflow in case of a singular matrix). + +It should be noted that 1-norm and inf-norm condition numbers of symmetric +matrices are equal, so the algorithm doesn't take into account the +differences between these types of norms. + +Input parameters: + CD - Cholesky decomposition of matrix A, + output of SMatrixCholesky subroutine. + N - size of matrix A. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double spdmatrixcholeskyrcond(const real_2d_array &a, const ae_int_t n, const bool isupper); + + +/************************************************************************* +Condition number estimate of a Hermitian positive definite matrix given by +Cholesky decomposition. + +The algorithm calculates a lower bound of the condition number. In this +case, the algorithm does not return a lower bound of the condition number, +but an inverse number (to avoid an overflow in case of a singular matrix). + +It should be noted that 1-norm and inf-norm condition numbers of symmetric +matrices are equal, so the algorithm doesn't take into account the +differences between these types of norms. + +Input parameters: + CD - Cholesky decomposition of matrix A, + output of SMatrixCholesky subroutine. + N - size of matrix A. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double hpdmatrixcholeskyrcond(const complex_2d_array &a, const ae_int_t n, const bool isupper); + + +/************************************************************************* +Estimate of the condition number of a matrix given by its LU decomposition (1-norm) + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + LUA - LU decomposition of a matrix in compact form. Output of + the CMatrixLU subroutine. + N - size of matrix A. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double cmatrixlurcond1(const complex_2d_array &lua, const ae_int_t n); + + +/************************************************************************* +Estimate of the condition number of a matrix given by its LU decomposition +(infinity norm). + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + LUA - LU decomposition of a matrix in compact form. Output of + the CMatrixLU subroutine. + N - size of matrix A. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double cmatrixlurcondinf(const complex_2d_array &lua, const ae_int_t n); + + +/************************************************************************* +Triangular matrix: estimate of a condition number (1-norm) + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + A - matrix. Array[0..N-1, 0..N-1]. + N - size of A. + IsUpper - True, if the matrix is upper triangular. + IsUnit - True, if the matrix has a unit diagonal. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double cmatrixtrrcond1(const complex_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit); + + +/************************************************************************* +Triangular matrix: estimate of a matrix condition number (infinity-norm). + +The algorithm calculates a lower bound of the condition number. In this case, +the algorithm does not return a lower bound of the condition number, but an +inverse number (to avoid an overflow in case of a singular matrix). + +Input parameters: + A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + IsUpper - True, if the matrix is upper triangular. + IsUnit - True, if the matrix has a unit diagonal. + +Result: 1/LowerBound(cond(A)) + +NOTE: + if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, + 0.0 is returned in such cases. +*************************************************************************/ +double cmatrixtrrcondinf(const complex_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit); + +/************************************************************************* +Inversion of a matrix given by its LU decomposition. + +INPUT PARAMETERS: + A - LU decomposition of the matrix + (output of RMatrixLU subroutine). + Pivots - table of permutations + (the output of RMatrixLU subroutine). + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + +OUTPUT PARAMETERS: + Info - return code: + * -3 A is singular, or VERY close to singular. + it is filled by zeros in such cases. + * 1 task is solved (but matrix A may be ill-conditioned, + check R1/RInf parameters for condition numbers). + Rep - solver report, see below for more info + A - inverse of matrix A. + Array whose indexes range within [0..N-1, 0..N-1]. + +SOLVER REPORT + +Subroutine sets following fields of the Rep structure: +* R1 reciprocal of condition number: 1/cond(A), 1-norm. +* RInf reciprocal of condition number: 1/cond(A), inf-norm. + + -- ALGLIB routine -- + 05.02.2010 + Bochkanov Sergey +*************************************************************************/ +void rmatrixluinverse(real_2d_array &a, const integer_1d_array &pivots, const ae_int_t n, ae_int_t &info, matinvreport &rep); +void rmatrixluinverse(real_2d_array &a, const integer_1d_array &pivots, ae_int_t &info, matinvreport &rep); + + +/************************************************************************* +Inversion of a general matrix. + +Input parameters: + A - matrix. + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + +Output parameters: + Info - return code, same as in RMatrixLUInverse + Rep - solver report, same as in RMatrixLUInverse + A - inverse of matrix A, same as in RMatrixLUInverse + +Result: + True, if the matrix is not singular. + False, if the matrix is singular. + + -- ALGLIB -- + Copyright 2005-2010 by Bochkanov Sergey +*************************************************************************/ +void rmatrixinverse(real_2d_array &a, const ae_int_t n, ae_int_t &info, matinvreport &rep); +void rmatrixinverse(real_2d_array &a, ae_int_t &info, matinvreport &rep); + + +/************************************************************************* +Inversion of a matrix given by its LU decomposition. + +INPUT PARAMETERS: + A - LU decomposition of the matrix + (output of CMatrixLU subroutine). + Pivots - table of permutations + (the output of CMatrixLU subroutine). + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + +OUTPUT PARAMETERS: + Info - return code, same as in RMatrixLUInverse + Rep - solver report, same as in RMatrixLUInverse + A - inverse of matrix A, same as in RMatrixLUInverse + + -- ALGLIB routine -- + 05.02.2010 + Bochkanov Sergey +*************************************************************************/ +void cmatrixluinverse(complex_2d_array &a, const integer_1d_array &pivots, const ae_int_t n, ae_int_t &info, matinvreport &rep); +void cmatrixluinverse(complex_2d_array &a, const integer_1d_array &pivots, ae_int_t &info, matinvreport &rep); + + +/************************************************************************* +Inversion of a general matrix. + +Input parameters: + A - matrix + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + +Output parameters: + Info - return code, same as in RMatrixLUInverse + Rep - solver report, same as in RMatrixLUInverse + A - inverse of matrix A, same as in RMatrixLUInverse + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +void cmatrixinverse(complex_2d_array &a, const ae_int_t n, ae_int_t &info, matinvreport &rep); +void cmatrixinverse(complex_2d_array &a, ae_int_t &info, matinvreport &rep); + + +/************************************************************************* +Inversion of a symmetric positive definite matrix which is given +by Cholesky decomposition. + +Input parameters: + A - Cholesky decomposition of the matrix to be inverted: + A=U’*U or A = L*L'. + Output of SPDMatrixCholesky subroutine. + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + IsUpper - storage type (optional): + * if True, symmetric matrix A is given by its upper + triangle, and the lower triangle isn’t used/changed by + function + * if False, symmetric matrix A is given by its lower + triangle, and the upper triangle isn’t used/changed by + function + * if not given, lower half is used. + +Output parameters: + Info - return code, same as in RMatrixLUInverse + Rep - solver report, same as in RMatrixLUInverse + A - inverse of matrix A, same as in RMatrixLUInverse + + -- ALGLIB routine -- + 10.02.2010 + Bochkanov Sergey +*************************************************************************/ +void spdmatrixcholeskyinverse(real_2d_array &a, const ae_int_t n, const bool isupper, ae_int_t &info, matinvreport &rep); +void spdmatrixcholeskyinverse(real_2d_array &a, ae_int_t &info, matinvreport &rep); + + +/************************************************************************* +Inversion of a symmetric positive definite matrix. + +Given an upper or lower triangle of a symmetric positive definite matrix, +the algorithm generates matrix A^-1 and saves the upper or lower triangle +depending on the input. + +Input parameters: + A - matrix to be inverted (upper or lower triangle). + Array with elements [0..N-1,0..N-1]. + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + IsUpper - storage type (optional): + * if True, symmetric matrix A is given by its upper + triangle, and the lower triangle isn’t used/changed by + function + * if False, symmetric matrix A is given by its lower + triangle, and the upper triangle isn’t used/changed by + function + * if not given, both lower and upper triangles must be + filled. + +Output parameters: + Info - return code, same as in RMatrixLUInverse + Rep - solver report, same as in RMatrixLUInverse + A - inverse of matrix A, same as in RMatrixLUInverse + + -- ALGLIB routine -- + 10.02.2010 + Bochkanov Sergey +*************************************************************************/ +void spdmatrixinverse(real_2d_array &a, const ae_int_t n, const bool isupper, ae_int_t &info, matinvreport &rep); +void spdmatrixinverse(real_2d_array &a, ae_int_t &info, matinvreport &rep); + + +/************************************************************************* +Inversion of a Hermitian positive definite matrix which is given +by Cholesky decomposition. + +Input parameters: + A - Cholesky decomposition of the matrix to be inverted: + A=U’*U or A = L*L'. + Output of HPDMatrixCholesky subroutine. + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + IsUpper - storage type (optional): + * if True, symmetric matrix A is given by its upper + triangle, and the lower triangle isn’t used/changed by + function + * if False, symmetric matrix A is given by its lower + triangle, and the upper triangle isn’t used/changed by + function + * if not given, lower half is used. + +Output parameters: + Info - return code, same as in RMatrixLUInverse + Rep - solver report, same as in RMatrixLUInverse + A - inverse of matrix A, same as in RMatrixLUInverse + + -- ALGLIB routine -- + 10.02.2010 + Bochkanov Sergey +*************************************************************************/ +void hpdmatrixcholeskyinverse(complex_2d_array &a, const ae_int_t n, const bool isupper, ae_int_t &info, matinvreport &rep); +void hpdmatrixcholeskyinverse(complex_2d_array &a, ae_int_t &info, matinvreport &rep); + + +/************************************************************************* +Inversion of a Hermitian positive definite matrix. + +Given an upper or lower triangle of a Hermitian positive definite matrix, +the algorithm generates matrix A^-1 and saves the upper or lower triangle +depending on the input. + +Input parameters: + A - matrix to be inverted (upper or lower triangle). + Array with elements [0..N-1,0..N-1]. + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + IsUpper - storage type (optional): + * if True, symmetric matrix A is given by its upper + triangle, and the lower triangle isn’t used/changed by + function + * if False, symmetric matrix A is given by its lower + triangle, and the upper triangle isn’t used/changed by + function + * if not given, both lower and upper triangles must be + filled. + +Output parameters: + Info - return code, same as in RMatrixLUInverse + Rep - solver report, same as in RMatrixLUInverse + A - inverse of matrix A, same as in RMatrixLUInverse + + -- ALGLIB routine -- + 10.02.2010 + Bochkanov Sergey +*************************************************************************/ +void hpdmatrixinverse(complex_2d_array &a, const ae_int_t n, const bool isupper, ae_int_t &info, matinvreport &rep); +void hpdmatrixinverse(complex_2d_array &a, ae_int_t &info, matinvreport &rep); + + +/************************************************************************* +Triangular matrix inverse (real) + +The subroutine inverts the following types of matrices: + * upper triangular + * upper triangular with unit diagonal + * lower triangular + * lower triangular with unit diagonal + +In case of an upper (lower) triangular matrix, the inverse matrix will +also be upper (lower) triangular, and after the end of the algorithm, the +inverse matrix replaces the source matrix. The elements below (above) the +main diagonal are not changed by the algorithm. + +If the matrix has a unit diagonal, the inverse matrix also has a unit +diagonal, and the diagonal elements are not passed to the algorithm. + +Input parameters: + A - matrix, array[0..N-1, 0..N-1]. + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + IsUpper - True, if the matrix is upper triangular. + IsUnit - diagonal type (optional): + * if True, matrix has unit diagonal (a[i,i] are NOT used) + * if False, matrix diagonal is arbitrary + * if not given, False is assumed + +Output parameters: + Info - same as for RMatrixLUInverse + Rep - same as for RMatrixLUInverse + A - same as for RMatrixLUInverse. + + -- ALGLIB -- + Copyright 05.02.2010 by Bochkanov Sergey +*************************************************************************/ +void rmatrixtrinverse(real_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit, ae_int_t &info, matinvreport &rep); +void rmatrixtrinverse(real_2d_array &a, const bool isupper, ae_int_t &info, matinvreport &rep); + + +/************************************************************************* +Triangular matrix inverse (complex) + +The subroutine inverts the following types of matrices: + * upper triangular + * upper triangular with unit diagonal + * lower triangular + * lower triangular with unit diagonal + +In case of an upper (lower) triangular matrix, the inverse matrix will +also be upper (lower) triangular, and after the end of the algorithm, the +inverse matrix replaces the source matrix. The elements below (above) the +main diagonal are not changed by the algorithm. + +If the matrix has a unit diagonal, the inverse matrix also has a unit +diagonal, and the diagonal elements are not passed to the algorithm. + +Input parameters: + A - matrix, array[0..N-1, 0..N-1]. + N - size of matrix A (optional) : + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, size is automatically determined from + matrix size (A must be square matrix) + IsUpper - True, if the matrix is upper triangular. + IsUnit - diagonal type (optional): + * if True, matrix has unit diagonal (a[i,i] are NOT used) + * if False, matrix diagonal is arbitrary + * if not given, False is assumed + +Output parameters: + Info - same as for RMatrixLUInverse + Rep - same as for RMatrixLUInverse + A - same as for RMatrixLUInverse. + + -- ALGLIB -- + Copyright 05.02.2010 by Bochkanov Sergey +*************************************************************************/ +void cmatrixtrinverse(complex_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit, ae_int_t &info, matinvreport &rep); +void cmatrixtrinverse(complex_2d_array &a, const bool isupper, ae_int_t &info, matinvreport &rep); + +/************************************************************************* +Singular value decomposition of a bidiagonal matrix (extended algorithm) + +The algorithm performs the singular value decomposition of a bidiagonal +matrix B (upper or lower) representing it as B = Q*S*P^T, where Q and P - +orthogonal matrices, S - diagonal matrix with non-negative elements on the +main diagonal, in descending order. + +The algorithm finds singular values. In addition, the algorithm can +calculate matrices Q and P (more precisely, not the matrices, but their +product with given matrices U and VT - U*Q and (P^T)*VT)). Of course, +matrices U and VT can be of any type, including identity. Furthermore, the +algorithm can calculate Q'*C (this product is calculated more effectively +than U*Q, because this calculation operates with rows instead of matrix +columns). + +The feature of the algorithm is its ability to find all singular values +including those which are arbitrarily close to 0 with relative accuracy +close to machine precision. If the parameter IsFractionalAccuracyRequired +is set to True, all singular values will have high relative accuracy close +to machine precision. If the parameter is set to False, only the biggest +singular value will have relative accuracy close to machine precision. +The absolute error of other singular values is equal to the absolute error +of the biggest singular value. + +Input parameters: + D - main diagonal of matrix B. + Array whose index ranges within [0..N-1]. + E - superdiagonal (or subdiagonal) of matrix B. + Array whose index ranges within [0..N-2]. + N - size of matrix B. + IsUpper - True, if the matrix is upper bidiagonal. + IsFractionalAccuracyRequired - + accuracy to search singular values with. + U - matrix to be multiplied by Q. + Array whose indexes range within [0..NRU-1, 0..N-1]. + The matrix can be bigger, in that case only the submatrix + [0..NRU-1, 0..N-1] will be multiplied by Q. + NRU - number of rows in matrix U. + C - matrix to be multiplied by Q'. + Array whose indexes range within [0..N-1, 0..NCC-1]. + The matrix can be bigger, in that case only the submatrix + [0..N-1, 0..NCC-1] will be multiplied by Q'. + NCC - number of columns in matrix C. + VT - matrix to be multiplied by P^T. + Array whose indexes range within [0..N-1, 0..NCVT-1]. + The matrix can be bigger, in that case only the submatrix + [0..N-1, 0..NCVT-1] will be multiplied by P^T. + NCVT - number of columns in matrix VT. + +Output parameters: + D - singular values of matrix B in descending order. + U - if NRU>0, contains matrix U*Q. + VT - if NCVT>0, contains matrix (P^T)*VT. + C - if NCC>0, contains matrix Q'*C. + +Result: + True, if the algorithm has converged. + False, if the algorithm hasn't converged (rare case). + +Additional information: + The type of convergence is controlled by the internal parameter TOL. + If the parameter is greater than 0, the singular values will have + relative accuracy TOL. If TOL<0, the singular values will have + absolute accuracy ABS(TOL)*norm(B). + By default, |TOL| falls within the range of 10*Epsilon and 100*Epsilon, + where Epsilon is the machine precision. It is not recommended to use + TOL less than 10*Epsilon since this will considerably slow down the + algorithm and may not lead to error decreasing. +History: + * 31 March, 2007. + changed MAXITR from 6 to 12. + + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1999. +*************************************************************************/ +bool rmatrixbdsvd(real_1d_array &d, const real_1d_array &e, const ae_int_t n, const bool isupper, const bool isfractionalaccuracyrequired, real_2d_array &u, const ae_int_t nru, real_2d_array &c, const ae_int_t ncc, real_2d_array &vt, const ae_int_t ncvt); + +/************************************************************************* +Singular value decomposition of a rectangular matrix. + +The algorithm calculates the singular value decomposition of a matrix of +size MxN: A = U * S * V^T + +The algorithm finds the singular values and, optionally, matrices U and V^T. +The algorithm can find both first min(M,N) columns of matrix U and rows of +matrix V^T (singular vectors), and matrices U and V^T wholly (of sizes MxM +and NxN respectively). + +Take into account that the subroutine does not return matrix V but V^T. + +Input parameters: + A - matrix to be decomposed. + Array whose indexes range within [0..M-1, 0..N-1]. + M - number of rows in matrix A. + N - number of columns in matrix A. + UNeeded - 0, 1 or 2. See the description of the parameter U. + VTNeeded - 0, 1 or 2. See the description of the parameter VT. + AdditionalMemory - + If the parameter: + * equals 0, the algorithm doesn’t use additional + memory (lower requirements, lower performance). + * equals 1, the algorithm uses additional + memory of size min(M,N)*min(M,N) of real numbers. + It often speeds up the algorithm. + * equals 2, the algorithm uses additional + memory of size M*min(M,N) of real numbers. + It allows to get a maximum performance. + The recommended value of the parameter is 2. + +Output parameters: + W - contains singular values in descending order. + U - if UNeeded=0, U isn't changed, the left singular vectors + are not calculated. + if Uneeded=1, U contains left singular vectors (first + min(M,N) columns of matrix U). Array whose indexes range + within [0..M-1, 0..Min(M,N)-1]. + if UNeeded=2, U contains matrix U wholly. Array whose + indexes range within [0..M-1, 0..M-1]. + VT - if VTNeeded=0, VT isn’t changed, the right singular vectors + are not calculated. + if VTNeeded=1, VT contains right singular vectors (first + min(M,N) rows of matrix V^T). Array whose indexes range + within [0..min(M,N)-1, 0..N-1]. + if VTNeeded=2, VT contains matrix V^T wholly. Array whose + indexes range within [0..N-1, 0..N-1]. + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +bool rmatrixsvd(const real_2d_array &a, const ae_int_t m, const ae_int_t n, const ae_int_t uneeded, const ae_int_t vtneeded, const ae_int_t additionalmemory, real_1d_array &w, real_2d_array &u, real_2d_array &vt); + + + +/************************************************************************* +Determinant calculation of the matrix given by its LU decomposition. + +Input parameters: + A - LU decomposition of the matrix (output of + RMatrixLU subroutine). + Pivots - table of permutations which were made during + the LU decomposition. + Output of RMatrixLU subroutine. + N - (optional) size of matrix A: + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, automatically determined from matrix size + (A must be square matrix) + +Result: matrix determinant. + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +double rmatrixludet(const real_2d_array &a, const integer_1d_array &pivots, const ae_int_t n); +double rmatrixludet(const real_2d_array &a, const integer_1d_array &pivots); + + +/************************************************************************* +Calculation of the determinant of a general matrix + +Input parameters: + A - matrix, array[0..N-1, 0..N-1] + N - (optional) size of matrix A: + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, automatically determined from matrix size + (A must be square matrix) + +Result: determinant of matrix A. + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +double rmatrixdet(const real_2d_array &a, const ae_int_t n); +double rmatrixdet(const real_2d_array &a); + + +/************************************************************************* +Determinant calculation of the matrix given by its LU decomposition. + +Input parameters: + A - LU decomposition of the matrix (output of + RMatrixLU subroutine). + Pivots - table of permutations which were made during + the LU decomposition. + Output of RMatrixLU subroutine. + N - (optional) size of matrix A: + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, automatically determined from matrix size + (A must be square matrix) + +Result: matrix determinant. + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +alglib::complex cmatrixludet(const complex_2d_array &a, const integer_1d_array &pivots, const ae_int_t n); +alglib::complex cmatrixludet(const complex_2d_array &a, const integer_1d_array &pivots); + + +/************************************************************************* +Calculation of the determinant of a general matrix + +Input parameters: + A - matrix, array[0..N-1, 0..N-1] + N - (optional) size of matrix A: + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, automatically determined from matrix size + (A must be square matrix) + +Result: determinant of matrix A. + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +alglib::complex cmatrixdet(const complex_2d_array &a, const ae_int_t n); +alglib::complex cmatrixdet(const complex_2d_array &a); + + +/************************************************************************* +Determinant calculation of the matrix given by the Cholesky decomposition. + +Input parameters: + A - Cholesky decomposition, + output of SMatrixCholesky subroutine. + N - (optional) size of matrix A: + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, automatically determined from matrix size + (A must be square matrix) + +As the determinant is equal to the product of squares of diagonal elements, +it’s not necessary to specify which triangle - lower or upper - the matrix +is stored in. + +Result: + matrix determinant. + + -- ALGLIB -- + Copyright 2005-2008 by Bochkanov Sergey +*************************************************************************/ +double spdmatrixcholeskydet(const real_2d_array &a, const ae_int_t n); +double spdmatrixcholeskydet(const real_2d_array &a); + + +/************************************************************************* +Determinant calculation of the symmetric positive definite matrix. + +Input parameters: + A - matrix. Array with elements [0..N-1, 0..N-1]. + N - (optional) size of matrix A: + * if given, only principal NxN submatrix is processed and + overwritten. other elements are unchanged. + * if not given, automatically determined from matrix size + (A must be square matrix) + IsUpper - (optional) storage type: + * if True, symmetric matrix A is given by its upper + triangle, and the lower triangle isn’t used/changed by + function + * if False, symmetric matrix A is given by its lower + triangle, and the upper triangle isn’t used/changed by + function + * if not given, both lower and upper triangles must be + filled. + +Result: + determinant of matrix A. + If matrix A is not positive definite, exception is thrown. + + -- ALGLIB -- + Copyright 2005-2008 by Bochkanov Sergey +*************************************************************************/ +double spdmatrixdet(const real_2d_array &a, const ae_int_t n, const bool isupper); +double spdmatrixdet(const real_2d_array &a); + +/************************************************************************* +Algorithm for solving the following generalized symmetric positive-definite +eigenproblem: + A*x = lambda*B*x (1) or + A*B*x = lambda*x (2) or + B*A*x = lambda*x (3). +where A is a symmetric matrix, B - symmetric positive-definite matrix. +The problem is solved by reducing it to an ordinary symmetric eigenvalue +problem. + +Input parameters: + A - symmetric matrix which is given by its upper or lower + triangular part. + Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrices A and B. + IsUpperA - storage format of matrix A. + B - symmetric positive-definite matrix which is given by + its upper or lower triangular part. + Array whose indexes range within [0..N-1, 0..N-1]. + IsUpperB - storage format of matrix B. + ZNeeded - if ZNeeded is equal to: + * 0, the eigenvectors are not returned; + * 1, the eigenvectors are returned. + ProblemType - if ProblemType is equal to: + * 1, the following problem is solved: A*x = lambda*B*x; + * 2, the following problem is solved: A*B*x = lambda*x; + * 3, the following problem is solved: B*A*x = lambda*x. + +Output parameters: + D - eigenvalues in ascending order. + Array whose index ranges within [0..N-1]. + Z - if ZNeeded is equal to: + * 0, Z hasn’t changed; + * 1, Z contains eigenvectors. + Array whose indexes range within [0..N-1, 0..N-1]. + The eigenvectors are stored in matrix columns. It should + be noted that the eigenvectors in such problems do not + form an orthogonal system. + +Result: + True, if the problem was solved successfully. + False, if the error occurred during the Cholesky decomposition of matrix + B (the matrix isn’t positive-definite) or during the work of the iterative + algorithm for solving the symmetric eigenproblem. + +See also the GeneralizedSymmetricDefiniteEVDReduce subroutine. + + -- ALGLIB -- + Copyright 1.28.2006 by Bochkanov Sergey +*************************************************************************/ +bool smatrixgevd(const real_2d_array &a, const ae_int_t n, const bool isuppera, const real_2d_array &b, const bool isupperb, const ae_int_t zneeded, const ae_int_t problemtype, real_1d_array &d, real_2d_array &z); + + +/************************************************************************* +Algorithm for reduction of the following generalized symmetric positive- +definite eigenvalue problem: + A*x = lambda*B*x (1) or + A*B*x = lambda*x (2) or + B*A*x = lambda*x (3) +to the symmetric eigenvalues problem C*y = lambda*y (eigenvalues of this and +the given problems are the same, and the eigenvectors of the given problem +could be obtained by multiplying the obtained eigenvectors by the +transformation matrix x = R*y). + +Here A is a symmetric matrix, B - symmetric positive-definite matrix. + +Input parameters: + A - symmetric matrix which is given by its upper or lower + triangular part. + Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrices A and B. + IsUpperA - storage format of matrix A. + B - symmetric positive-definite matrix which is given by + its upper or lower triangular part. + Array whose indexes range within [0..N-1, 0..N-1]. + IsUpperB - storage format of matrix B. + ProblemType - if ProblemType is equal to: + * 1, the following problem is solved: A*x = lambda*B*x; + * 2, the following problem is solved: A*B*x = lambda*x; + * 3, the following problem is solved: B*A*x = lambda*x. + +Output parameters: + A - symmetric matrix which is given by its upper or lower + triangle depending on IsUpperA. Contains matrix C. + Array whose indexes range within [0..N-1, 0..N-1]. + R - upper triangular or low triangular transformation matrix + which is used to obtain the eigenvectors of a given problem + as the product of eigenvectors of C (from the right) and + matrix R (from the left). If the matrix is upper + triangular, the elements below the main diagonal + are equal to 0 (and vice versa). Thus, we can perform + the multiplication without taking into account the + internal structure (which is an easier though less + effective way). + Array whose indexes range within [0..N-1, 0..N-1]. + IsUpperR - type of matrix R (upper or lower triangular). + +Result: + True, if the problem was reduced successfully. + False, if the error occurred during the Cholesky decomposition of + matrix B (the matrix is not positive-definite). + + -- ALGLIB -- + Copyright 1.28.2006 by Bochkanov Sergey +*************************************************************************/ +bool smatrixgevdreduce(real_2d_array &a, const ae_int_t n, const bool isuppera, const real_2d_array &b, const bool isupperb, const ae_int_t problemtype, real_2d_array &r, bool &isupperr); + +/************************************************************************* +Inverse matrix update by the Sherman-Morrison formula + +The algorithm updates matrix A^-1 when adding a number to an element +of matrix A. + +Input parameters: + InvA - inverse of matrix A. + Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + UpdRow - row where the element to be updated is stored. + UpdColumn - column where the element to be updated is stored. + UpdVal - a number to be added to the element. + + +Output parameters: + InvA - inverse of modified matrix A. + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +void rmatrixinvupdatesimple(real_2d_array &inva, const ae_int_t n, const ae_int_t updrow, const ae_int_t updcolumn, const double updval); + + +/************************************************************************* +Inverse matrix update by the Sherman-Morrison formula + +The algorithm updates matrix A^-1 when adding a vector to a row +of matrix A. + +Input parameters: + InvA - inverse of matrix A. + Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + UpdRow - the row of A whose vector V was added. + 0 <= Row <= N-1 + V - the vector to be added to a row. + Array whose index ranges within [0..N-1]. + +Output parameters: + InvA - inverse of modified matrix A. + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +void rmatrixinvupdaterow(real_2d_array &inva, const ae_int_t n, const ae_int_t updrow, const real_1d_array &v); + + +/************************************************************************* +Inverse matrix update by the Sherman-Morrison formula + +The algorithm updates matrix A^-1 when adding a vector to a column +of matrix A. + +Input parameters: + InvA - inverse of matrix A. + Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + UpdColumn - the column of A whose vector U was added. + 0 <= UpdColumn <= N-1 + U - the vector to be added to a column. + Array whose index ranges within [0..N-1]. + +Output parameters: + InvA - inverse of modified matrix A. + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +void rmatrixinvupdatecolumn(real_2d_array &inva, const ae_int_t n, const ae_int_t updcolumn, const real_1d_array &u); + + +/************************************************************************* +Inverse matrix update by the Sherman-Morrison formula + +The algorithm computes the inverse of matrix A+u*v’ by using the given matrix +A^-1 and the vectors u and v. + +Input parameters: + InvA - inverse of matrix A. + Array whose indexes range within [0..N-1, 0..N-1]. + N - size of matrix A. + U - the vector modifying the matrix. + Array whose index ranges within [0..N-1]. + V - the vector modifying the matrix. + Array whose index ranges within [0..N-1]. + +Output parameters: + InvA - inverse of matrix A + u*v'. + + -- ALGLIB -- + Copyright 2005 by Bochkanov Sergey +*************************************************************************/ +void rmatrixinvupdateuv(real_2d_array &inva, const ae_int_t n, const real_1d_array &u, const real_1d_array &v); + +/************************************************************************* +Subroutine performing the Schur decomposition of a general matrix by using +the QR algorithm with multiple shifts. + +The source matrix A is represented as S'*A*S = T, where S is an orthogonal +matrix (Schur vectors), T - upper quasi-triangular matrix (with blocks of +sizes 1x1 and 2x2 on the main diagonal). + +Input parameters: + A - matrix to be decomposed. + Array whose indexes range within [0..N-1, 0..N-1]. + N - size of A, N>=0. + + +Output parameters: + A - contains matrix T. + Array whose indexes range within [0..N-1, 0..N-1]. + S - contains Schur vectors. + Array whose indexes range within [0..N-1, 0..N-1]. + +Note 1: + The block structure of matrix T can be easily recognized: since all + the elements below the blocks are zeros, the elements a[i+1,i] which + are equal to 0 show the block border. + +Note 2: + The algorithm performance depends on the value of the internal parameter + NS of the InternalSchurDecomposition subroutine which defines the number + of shifts in the QR algorithm (similarly to the block width in block-matrix + algorithms in linear algebra). If you require maximum performance on + your machine, it is recommended to adjust this parameter manually. + +Result: + True, + if the algorithm has converged and parameters A and S contain the result. + False, + if the algorithm has not converged. + +Algorithm implemented on the basis of the DHSEQR subroutine (LAPACK 3.0 library). +*************************************************************************/ +bool rmatrixschur(real_2d_array &a, const ae_int_t n, real_2d_array &s); +} + +///////////////////////////////////////////////////////////////////////// +// +// THIS SECTION CONTAINS COMPUTATIONAL CORE DECLARATIONS (FUNCTIONS) +// +///////////////////////////////////////////////////////////////////////// +namespace alglib_impl +{ +void ablassplitlength(/* Real */ ae_matrix* a, + ae_int_t n, + ae_int_t* n1, + ae_int_t* n2, + ae_state *_state); +void ablascomplexsplitlength(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_int_t* n1, + ae_int_t* n2, + ae_state *_state); +ae_int_t ablasblocksize(/* Real */ ae_matrix* a, ae_state *_state); +ae_int_t ablascomplexblocksize(/* Complex */ ae_matrix* a, + ae_state *_state); +ae_int_t ablasmicroblocksize(ae_state *_state); +void cmatrixtranspose(ae_int_t m, + ae_int_t n, + /* Complex */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + /* Complex */ ae_matrix* b, + ae_int_t ib, + ae_int_t jb, + ae_state *_state); +void rmatrixtranspose(ae_int_t m, + ae_int_t n, + /* Real */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + /* Real */ ae_matrix* b, + ae_int_t ib, + ae_int_t jb, + ae_state *_state); +void cmatrixcopy(ae_int_t m, + ae_int_t n, + /* Complex */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + /* Complex */ ae_matrix* b, + ae_int_t ib, + ae_int_t jb, + ae_state *_state); +void rmatrixcopy(ae_int_t m, + ae_int_t n, + /* Real */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + /* Real */ ae_matrix* b, + ae_int_t ib, + ae_int_t jb, + ae_state *_state); +void cmatrixrank1(ae_int_t m, + ae_int_t n, + /* Complex */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + /* Complex */ ae_vector* u, + ae_int_t iu, + /* Complex */ ae_vector* v, + ae_int_t iv, + ae_state *_state); +void rmatrixrank1(ae_int_t m, + ae_int_t n, + /* Real */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + /* Real */ ae_vector* u, + ae_int_t iu, + /* Real */ ae_vector* v, + ae_int_t iv, + ae_state *_state); +void cmatrixmv(ae_int_t m, + ae_int_t n, + /* Complex */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + ae_int_t opa, + /* Complex */ ae_vector* x, + ae_int_t ix, + /* Complex */ ae_vector* y, + ae_int_t iy, + ae_state *_state); +void rmatrixmv(ae_int_t m, + ae_int_t n, + /* Real */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + ae_int_t opa, + /* Real */ ae_vector* x, + ae_int_t ix, + /* Real */ ae_vector* y, + ae_int_t iy, + ae_state *_state); +void cmatrixrighttrsm(ae_int_t m, + ae_int_t n, + /* Complex */ ae_matrix* a, + ae_int_t i1, + ae_int_t j1, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + /* Complex */ ae_matrix* x, + ae_int_t i2, + ae_int_t j2, + ae_state *_state); +void cmatrixlefttrsm(ae_int_t m, + ae_int_t n, + /* Complex */ ae_matrix* a, + ae_int_t i1, + ae_int_t j1, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + /* Complex */ ae_matrix* x, + ae_int_t i2, + ae_int_t j2, + ae_state *_state); +void rmatrixrighttrsm(ae_int_t m, + ae_int_t n, + /* Real */ ae_matrix* a, + ae_int_t i1, + ae_int_t j1, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + /* Real */ ae_matrix* x, + ae_int_t i2, + ae_int_t j2, + ae_state *_state); +void rmatrixlefttrsm(ae_int_t m, + ae_int_t n, + /* Real */ ae_matrix* a, + ae_int_t i1, + ae_int_t j1, + ae_bool isupper, + ae_bool isunit, + ae_int_t optype, + /* Real */ ae_matrix* x, + ae_int_t i2, + ae_int_t j2, + ae_state *_state); +void cmatrixsyrk(ae_int_t n, + ae_int_t k, + double alpha, + /* Complex */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + ae_int_t optypea, + double beta, + /* Complex */ ae_matrix* c, + ae_int_t ic, + ae_int_t jc, + ae_bool isupper, + ae_state *_state); +void rmatrixsyrk(ae_int_t n, + ae_int_t k, + double alpha, + /* Real */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + ae_int_t optypea, + double beta, + /* Real */ ae_matrix* c, + ae_int_t ic, + ae_int_t jc, + ae_bool isupper, + ae_state *_state); +void cmatrixgemm(ae_int_t m, + ae_int_t n, + ae_int_t k, + ae_complex alpha, + /* Complex */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + ae_int_t optypea, + /* Complex */ ae_matrix* b, + ae_int_t ib, + ae_int_t jb, + ae_int_t optypeb, + ae_complex beta, + /* Complex */ ae_matrix* c, + ae_int_t ic, + ae_int_t jc, + ae_state *_state); +void rmatrixgemm(ae_int_t m, + ae_int_t n, + ae_int_t k, + double alpha, + /* Real */ ae_matrix* a, + ae_int_t ia, + ae_int_t ja, + ae_int_t optypea, + /* Real */ ae_matrix* b, + ae_int_t ib, + ae_int_t jb, + ae_int_t optypeb, + double beta, + /* Real */ ae_matrix* c, + ae_int_t ic, + ae_int_t jc, + ae_state *_state); +void rmatrixqr(/* Real */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Real */ ae_vector* tau, + ae_state *_state); +void rmatrixlq(/* Real */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Real */ ae_vector* tau, + ae_state *_state); +void cmatrixqr(/* Complex */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Complex */ ae_vector* tau, + ae_state *_state); +void cmatrixlq(/* Complex */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Complex */ ae_vector* tau, + ae_state *_state); +void rmatrixqrunpackq(/* Real */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Real */ ae_vector* tau, + ae_int_t qcolumns, + /* Real */ ae_matrix* q, + ae_state *_state); +void rmatrixqrunpackr(/* Real */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Real */ ae_matrix* r, + ae_state *_state); +void rmatrixlqunpackq(/* Real */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Real */ ae_vector* tau, + ae_int_t qrows, + /* Real */ ae_matrix* q, + ae_state *_state); +void rmatrixlqunpackl(/* Real */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Real */ ae_matrix* l, + ae_state *_state); +void cmatrixqrunpackq(/* Complex */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Complex */ ae_vector* tau, + ae_int_t qcolumns, + /* Complex */ ae_matrix* q, + ae_state *_state); +void cmatrixqrunpackr(/* Complex */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Complex */ ae_matrix* r, + ae_state *_state); +void cmatrixlqunpackq(/* Complex */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Complex */ ae_vector* tau, + ae_int_t qrows, + /* Complex */ ae_matrix* q, + ae_state *_state); +void cmatrixlqunpackl(/* Complex */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Complex */ ae_matrix* l, + ae_state *_state); +void rmatrixbd(/* Real */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Real */ ae_vector* tauq, + /* Real */ ae_vector* taup, + ae_state *_state); +void rmatrixbdunpackq(/* Real */ ae_matrix* qp, + ae_int_t m, + ae_int_t n, + /* Real */ ae_vector* tauq, + ae_int_t qcolumns, + /* Real */ ae_matrix* q, + ae_state *_state); +void rmatrixbdmultiplybyq(/* Real */ ae_matrix* qp, + ae_int_t m, + ae_int_t n, + /* Real */ ae_vector* tauq, + /* Real */ ae_matrix* z, + ae_int_t zrows, + ae_int_t zcolumns, + ae_bool fromtheright, + ae_bool dotranspose, + ae_state *_state); +void rmatrixbdunpackpt(/* Real */ ae_matrix* qp, + ae_int_t m, + ae_int_t n, + /* Real */ ae_vector* taup, + ae_int_t ptrows, + /* Real */ ae_matrix* pt, + ae_state *_state); +void rmatrixbdmultiplybyp(/* Real */ ae_matrix* qp, + ae_int_t m, + ae_int_t n, + /* Real */ ae_vector* taup, + /* Real */ ae_matrix* z, + ae_int_t zrows, + ae_int_t zcolumns, + ae_bool fromtheright, + ae_bool dotranspose, + ae_state *_state); +void rmatrixbdunpackdiagonals(/* Real */ ae_matrix* b, + ae_int_t m, + ae_int_t n, + ae_bool* isupper, + /* Real */ ae_vector* d, + /* Real */ ae_vector* e, + ae_state *_state); +void rmatrixhessenberg(/* Real */ ae_matrix* a, + ae_int_t n, + /* Real */ ae_vector* tau, + ae_state *_state); +void rmatrixhessenbergunpackq(/* Real */ ae_matrix* a, + ae_int_t n, + /* Real */ ae_vector* tau, + /* Real */ ae_matrix* q, + ae_state *_state); +void rmatrixhessenbergunpackh(/* Real */ ae_matrix* a, + ae_int_t n, + /* Real */ ae_matrix* h, + ae_state *_state); +void smatrixtd(/* Real */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + /* Real */ ae_vector* tau, + /* Real */ ae_vector* d, + /* Real */ ae_vector* e, + ae_state *_state); +void smatrixtdunpackq(/* Real */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + /* Real */ ae_vector* tau, + /* Real */ ae_matrix* q, + ae_state *_state); +void hmatrixtd(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + /* Complex */ ae_vector* tau, + /* Real */ ae_vector* d, + /* Real */ ae_vector* e, + ae_state *_state); +void hmatrixtdunpackq(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + /* Complex */ ae_vector* tau, + /* Complex */ ae_matrix* q, + ae_state *_state); +ae_bool smatrixevd(/* Real */ ae_matrix* a, + ae_int_t n, + ae_int_t zneeded, + ae_bool isupper, + /* Real */ ae_vector* d, + /* Real */ ae_matrix* z, + ae_state *_state); +ae_bool smatrixevdr(/* Real */ ae_matrix* a, + ae_int_t n, + ae_int_t zneeded, + ae_bool isupper, + double b1, + double b2, + ae_int_t* m, + /* Real */ ae_vector* w, + /* Real */ ae_matrix* z, + ae_state *_state); +ae_bool smatrixevdi(/* Real */ ae_matrix* a, + ae_int_t n, + ae_int_t zneeded, + ae_bool isupper, + ae_int_t i1, + ae_int_t i2, + /* Real */ ae_vector* w, + /* Real */ ae_matrix* z, + ae_state *_state); +ae_bool hmatrixevd(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_int_t zneeded, + ae_bool isupper, + /* Real */ ae_vector* d, + /* Complex */ ae_matrix* z, + ae_state *_state); +ae_bool hmatrixevdr(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_int_t zneeded, + ae_bool isupper, + double b1, + double b2, + ae_int_t* m, + /* Real */ ae_vector* w, + /* Complex */ ae_matrix* z, + ae_state *_state); +ae_bool hmatrixevdi(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_int_t zneeded, + ae_bool isupper, + ae_int_t i1, + ae_int_t i2, + /* Real */ ae_vector* w, + /* Complex */ ae_matrix* z, + ae_state *_state); +ae_bool smatrixtdevd(/* Real */ ae_vector* d, + /* Real */ ae_vector* e, + ae_int_t n, + ae_int_t zneeded, + /* Real */ ae_matrix* z, + ae_state *_state); +ae_bool smatrixtdevdr(/* Real */ ae_vector* d, + /* Real */ ae_vector* e, + ae_int_t n, + ae_int_t zneeded, + double a, + double b, + ae_int_t* m, + /* Real */ ae_matrix* z, + ae_state *_state); +ae_bool smatrixtdevdi(/* Real */ ae_vector* d, + /* Real */ ae_vector* e, + ae_int_t n, + ae_int_t zneeded, + ae_int_t i1, + ae_int_t i2, + /* Real */ ae_matrix* z, + ae_state *_state); +ae_bool rmatrixevd(/* Real */ ae_matrix* a, + ae_int_t n, + ae_int_t vneeded, + /* Real */ ae_vector* wr, + /* Real */ ae_vector* wi, + /* Real */ ae_matrix* vl, + /* Real */ ae_matrix* vr, + ae_state *_state); +void rmatrixrndorthogonal(ae_int_t n, + /* Real */ ae_matrix* a, + ae_state *_state); +void rmatrixrndcond(ae_int_t n, + double c, + /* Real */ ae_matrix* a, + ae_state *_state); +void cmatrixrndorthogonal(ae_int_t n, + /* Complex */ ae_matrix* a, + ae_state *_state); +void cmatrixrndcond(ae_int_t n, + double c, + /* Complex */ ae_matrix* a, + ae_state *_state); +void smatrixrndcond(ae_int_t n, + double c, + /* Real */ ae_matrix* a, + ae_state *_state); +void spdmatrixrndcond(ae_int_t n, + double c, + /* Real */ ae_matrix* a, + ae_state *_state); +void hmatrixrndcond(ae_int_t n, + double c, + /* Complex */ ae_matrix* a, + ae_state *_state); +void hpdmatrixrndcond(ae_int_t n, + double c, + /* Complex */ ae_matrix* a, + ae_state *_state); +void rmatrixrndorthogonalfromtheright(/* Real */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + ae_state *_state); +void rmatrixrndorthogonalfromtheleft(/* Real */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + ae_state *_state); +void cmatrixrndorthogonalfromtheright(/* Complex */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + ae_state *_state); +void cmatrixrndorthogonalfromtheleft(/* Complex */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + ae_state *_state); +void smatrixrndmultiply(/* Real */ ae_matrix* a, + ae_int_t n, + ae_state *_state); +void hmatrixrndmultiply(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_state *_state); +void rmatrixlu(/* Real */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Integer */ ae_vector* pivots, + ae_state *_state); +void cmatrixlu(/* Complex */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Integer */ ae_vector* pivots, + ae_state *_state); +ae_bool hpdmatrixcholesky(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_state *_state); +ae_bool spdmatrixcholesky(/* Real */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_state *_state); +void rmatrixlup(/* Real */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Integer */ ae_vector* pivots, + ae_state *_state); +void cmatrixlup(/* Complex */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Integer */ ae_vector* pivots, + ae_state *_state); +void rmatrixplu(/* Real */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Integer */ ae_vector* pivots, + ae_state *_state); +void cmatrixplu(/* Complex */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + /* Integer */ ae_vector* pivots, + ae_state *_state); +ae_bool spdmatrixcholeskyrec(/* Real */ ae_matrix* a, + ae_int_t offs, + ae_int_t n, + ae_bool isupper, + /* Real */ ae_vector* tmp, + ae_state *_state); +double rmatrixrcond1(/* Real */ ae_matrix* a, + ae_int_t n, + ae_state *_state); +double rmatrixrcondinf(/* Real */ ae_matrix* a, + ae_int_t n, + ae_state *_state); +double spdmatrixrcond(/* Real */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_state *_state); +double rmatrixtrrcond1(/* Real */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_bool isunit, + ae_state *_state); +double rmatrixtrrcondinf(/* Real */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_bool isunit, + ae_state *_state); +double hpdmatrixrcond(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_state *_state); +double cmatrixrcond1(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_state *_state); +double cmatrixrcondinf(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_state *_state); +double rmatrixlurcond1(/* Real */ ae_matrix* lua, + ae_int_t n, + ae_state *_state); +double rmatrixlurcondinf(/* Real */ ae_matrix* lua, + ae_int_t n, + ae_state *_state); +double spdmatrixcholeskyrcond(/* Real */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_state *_state); +double hpdmatrixcholeskyrcond(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_state *_state); +double cmatrixlurcond1(/* Complex */ ae_matrix* lua, + ae_int_t n, + ae_state *_state); +double cmatrixlurcondinf(/* Complex */ ae_matrix* lua, + ae_int_t n, + ae_state *_state); +double cmatrixtrrcond1(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_bool isunit, + ae_state *_state); +double cmatrixtrrcondinf(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_bool isunit, + ae_state *_state); +double rcondthreshold(ae_state *_state); +void rmatrixluinverse(/* Real */ ae_matrix* a, + /* Integer */ ae_vector* pivots, + ae_int_t n, + ae_int_t* info, + matinvreport* rep, + ae_state *_state); +void rmatrixinverse(/* Real */ ae_matrix* a, + ae_int_t n, + ae_int_t* info, + matinvreport* rep, + ae_state *_state); +void cmatrixluinverse(/* Complex */ ae_matrix* a, + /* Integer */ ae_vector* pivots, + ae_int_t n, + ae_int_t* info, + matinvreport* rep, + ae_state *_state); +void cmatrixinverse(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_int_t* info, + matinvreport* rep, + ae_state *_state); +void spdmatrixcholeskyinverse(/* Real */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_int_t* info, + matinvreport* rep, + ae_state *_state); +void spdmatrixinverse(/* Real */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_int_t* info, + matinvreport* rep, + ae_state *_state); +void hpdmatrixcholeskyinverse(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_int_t* info, + matinvreport* rep, + ae_state *_state); +void hpdmatrixinverse(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_int_t* info, + matinvreport* rep, + ae_state *_state); +void rmatrixtrinverse(/* Real */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_bool isunit, + ae_int_t* info, + matinvreport* rep, + ae_state *_state); +void cmatrixtrinverse(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_bool isunit, + ae_int_t* info, + matinvreport* rep, + ae_state *_state); +ae_bool _matinvreport_init(matinvreport* p, ae_state *_state, ae_bool make_automatic); +ae_bool _matinvreport_init_copy(matinvreport* dst, matinvreport* src, ae_state *_state, ae_bool make_automatic); +void _matinvreport_clear(matinvreport* p); +ae_bool rmatrixbdsvd(/* Real */ ae_vector* d, + /* Real */ ae_vector* e, + ae_int_t n, + ae_bool isupper, + ae_bool isfractionalaccuracyrequired, + /* Real */ ae_matrix* u, + ae_int_t nru, + /* Real */ ae_matrix* c, + ae_int_t ncc, + /* Real */ ae_matrix* vt, + ae_int_t ncvt, + ae_state *_state); +ae_bool bidiagonalsvddecomposition(/* Real */ ae_vector* d, + /* Real */ ae_vector* e, + ae_int_t n, + ae_bool isupper, + ae_bool isfractionalaccuracyrequired, + /* Real */ ae_matrix* u, + ae_int_t nru, + /* Real */ ae_matrix* c, + ae_int_t ncc, + /* Real */ ae_matrix* vt, + ae_int_t ncvt, + ae_state *_state); +ae_bool rmatrixsvd(/* Real */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + ae_int_t uneeded, + ae_int_t vtneeded, + ae_int_t additionalmemory, + /* Real */ ae_vector* w, + /* Real */ ae_matrix* u, + /* Real */ ae_matrix* vt, + ae_state *_state); +void fblscholeskysolve(/* Real */ ae_matrix* cha, + double sqrtscalea, + ae_int_t n, + ae_bool isupper, + /* Real */ ae_vector* xb, + /* Real */ ae_vector* tmp, + ae_state *_state); +void fblssolvecgx(/* Real */ ae_matrix* a, + ae_int_t m, + ae_int_t n, + double alpha, + /* Real */ ae_vector* b, + /* Real */ ae_vector* x, + /* Real */ ae_vector* buf, + ae_state *_state); +void fblscgcreate(/* Real */ ae_vector* x, + /* Real */ ae_vector* b, + ae_int_t n, + fblslincgstate* state, + ae_state *_state); +ae_bool fblscgiteration(fblslincgstate* state, ae_state *_state); +ae_bool _fblslincgstate_init(fblslincgstate* p, ae_state *_state, ae_bool make_automatic); +ae_bool _fblslincgstate_init_copy(fblslincgstate* dst, fblslincgstate* src, ae_state *_state, ae_bool make_automatic); +void _fblslincgstate_clear(fblslincgstate* p); +double rmatrixludet(/* Real */ ae_matrix* a, + /* Integer */ ae_vector* pivots, + ae_int_t n, + ae_state *_state); +double rmatrixdet(/* Real */ ae_matrix* a, + ae_int_t n, + ae_state *_state); +ae_complex cmatrixludet(/* Complex */ ae_matrix* a, + /* Integer */ ae_vector* pivots, + ae_int_t n, + ae_state *_state); +ae_complex cmatrixdet(/* Complex */ ae_matrix* a, + ae_int_t n, + ae_state *_state); +double spdmatrixcholeskydet(/* Real */ ae_matrix* a, + ae_int_t n, + ae_state *_state); +double spdmatrixdet(/* Real */ ae_matrix* a, + ae_int_t n, + ae_bool isupper, + ae_state *_state); +ae_bool smatrixgevd(/* Real */ ae_matrix* a, + ae_int_t n, + ae_bool isuppera, + /* Real */ ae_matrix* b, + ae_bool isupperb, + ae_int_t zneeded, + ae_int_t problemtype, + /* Real */ ae_vector* d, + /* Real */ ae_matrix* z, + ae_state *_state); +ae_bool smatrixgevdreduce(/* Real */ ae_matrix* a, + ae_int_t n, + ae_bool isuppera, + /* Real */ ae_matrix* b, + ae_bool isupperb, + ae_int_t problemtype, + /* Real */ ae_matrix* r, + ae_bool* isupperr, + ae_state *_state); +void rmatrixinvupdatesimple(/* Real */ ae_matrix* inva, + ae_int_t n, + ae_int_t updrow, + ae_int_t updcolumn, + double updval, + ae_state *_state); +void rmatrixinvupdaterow(/* Real */ ae_matrix* inva, + ae_int_t n, + ae_int_t updrow, + /* Real */ ae_vector* v, + ae_state *_state); +void rmatrixinvupdatecolumn(/* Real */ ae_matrix* inva, + ae_int_t n, + ae_int_t updcolumn, + /* Real */ ae_vector* u, + ae_state *_state); +void rmatrixinvupdateuv(/* Real */ ae_matrix* inva, + ae_int_t n, + /* Real */ ae_vector* u, + /* Real */ ae_vector* v, + ae_state *_state); +ae_bool rmatrixschur(/* Real */ ae_matrix* a, + ae_int_t n, + /* Real */ ae_matrix* s, + ae_state *_state); + +} +#endif + diff --git a/contrib/lbfgs/optimization.cpp b/contrib/lbfgs/optimization.cpp new file mode 100755 index 0000000000..2f4c034023 --- /dev/null +++ b/contrib/lbfgs/optimization.cpp @@ -0,0 +1,11827 @@ +/************************************************************************* +Copyright (c) Sergey Bochkanov (ALGLIB project). + +>>> SOURCE LICENSE >>> +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation (www.fsf.org); either version 2 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +A copy of the GNU General Public License is available at +http://www.fsf.org/licensing/licenses +>>> END OF LICENSE >>> +*************************************************************************/ +#include "stdafx.h" +#include "optimization.h" + +// disable some irrelevant warnings +#if (AE_COMPILER==AE_MSVC) +#pragma warning(disable:4100) +#pragma warning(disable:4127) +#pragma warning(disable:4702) +#pragma warning(disable:4996) +#endif +using namespace std; + +///////////////////////////////////////////////////////////////////////// +// +// THIS SECTION CONTAINS IMPLEMENTATION OF C++ INTERFACE +// +///////////////////////////////////////////////////////////////////////// +namespace alglib +{ + + +/************************************************************************* + +*************************************************************************/ +_minlbfgsstate_owner::_minlbfgsstate_owner() +{ + p_struct = (alglib_impl::minlbfgsstate*)alglib_impl::ae_malloc(sizeof(alglib_impl::minlbfgsstate), NULL); + if( p_struct==NULL ) + throw ap_error("ALGLIB: malloc error"); + if( !alglib_impl::_minlbfgsstate_init(p_struct, NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); +} + +_minlbfgsstate_owner::_minlbfgsstate_owner(const _minlbfgsstate_owner &rhs) +{ + p_struct = (alglib_impl::minlbfgsstate*)alglib_impl::ae_malloc(sizeof(alglib_impl::minlbfgsstate), NULL); + if( p_struct==NULL ) + throw ap_error("ALGLIB: malloc error"); + if( !alglib_impl::_minlbfgsstate_init_copy(p_struct, const_cast<alglib_impl::minlbfgsstate*>(rhs.p_struct), NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); +} + +_minlbfgsstate_owner& _minlbfgsstate_owner::operator=(const _minlbfgsstate_owner &rhs) +{ + if( this==&rhs ) + return *this; + alglib_impl::_minlbfgsstate_clear(p_struct); + if( !alglib_impl::_minlbfgsstate_init_copy(p_struct, const_cast<alglib_impl::minlbfgsstate*>(rhs.p_struct), NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); + return *this; +} + +_minlbfgsstate_owner::~_minlbfgsstate_owner() +{ + alglib_impl::_minlbfgsstate_clear(p_struct); + ae_free(p_struct); +} + +alglib_impl::minlbfgsstate* _minlbfgsstate_owner::c_ptr() +{ + return p_struct; +} + +alglib_impl::minlbfgsstate* _minlbfgsstate_owner::c_ptr() const +{ + return const_cast<alglib_impl::minlbfgsstate*>(p_struct); +} +minlbfgsstate::minlbfgsstate() : _minlbfgsstate_owner() ,needfg(p_struct->needfg),xupdated(p_struct->xupdated),f(p_struct->f),g(&p_struct->g),x(&p_struct->x) +{ +} + +minlbfgsstate::minlbfgsstate(const minlbfgsstate &rhs):_minlbfgsstate_owner(rhs) ,needfg(p_struct->needfg),xupdated(p_struct->xupdated),f(p_struct->f),g(&p_struct->g),x(&p_struct->x) +{ +} + +minlbfgsstate& minlbfgsstate::operator=(const minlbfgsstate &rhs) +{ + if( this==&rhs ) + return *this; + _minlbfgsstate_owner::operator=(rhs); + return *this; +} + +minlbfgsstate::~minlbfgsstate() +{ +} + + +/************************************************************************* + +*************************************************************************/ +_minlbfgsreport_owner::_minlbfgsreport_owner() +{ + p_struct = (alglib_impl::minlbfgsreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::minlbfgsreport), NULL); + if( p_struct==NULL ) + throw ap_error("ALGLIB: malloc error"); + if( !alglib_impl::_minlbfgsreport_init(p_struct, NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); +} + +_minlbfgsreport_owner::_minlbfgsreport_owner(const _minlbfgsreport_owner &rhs) +{ + p_struct = (alglib_impl::minlbfgsreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::minlbfgsreport), NULL); + if( p_struct==NULL ) + throw ap_error("ALGLIB: malloc error"); + if( !alglib_impl::_minlbfgsreport_init_copy(p_struct, const_cast<alglib_impl::minlbfgsreport*>(rhs.p_struct), NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); +} + +_minlbfgsreport_owner& _minlbfgsreport_owner::operator=(const _minlbfgsreport_owner &rhs) +{ + if( this==&rhs ) + return *this; + alglib_impl::_minlbfgsreport_clear(p_struct); + if( !alglib_impl::_minlbfgsreport_init_copy(p_struct, const_cast<alglib_impl::minlbfgsreport*>(rhs.p_struct), NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); + return *this; +} + +_minlbfgsreport_owner::~_minlbfgsreport_owner() +{ + alglib_impl::_minlbfgsreport_clear(p_struct); + ae_free(p_struct); +} + +alglib_impl::minlbfgsreport* _minlbfgsreport_owner::c_ptr() +{ + return p_struct; +} + +alglib_impl::minlbfgsreport* _minlbfgsreport_owner::c_ptr() const +{ + return const_cast<alglib_impl::minlbfgsreport*>(p_struct); +} +minlbfgsreport::minlbfgsreport() : _minlbfgsreport_owner() ,iterationscount(p_struct->iterationscount),nfev(p_struct->nfev),terminationtype(p_struct->terminationtype) +{ +} + +minlbfgsreport::minlbfgsreport(const minlbfgsreport &rhs):_minlbfgsreport_owner(rhs) ,iterationscount(p_struct->iterationscount),nfev(p_struct->nfev),terminationtype(p_struct->terminationtype) +{ +} + +minlbfgsreport& minlbfgsreport::operator=(const minlbfgsreport &rhs) +{ + if( this==&rhs ) + return *this; + _minlbfgsreport_owner::operator=(rhs); + return *this; +} + +minlbfgsreport::~minlbfgsreport() +{ +} + +/************************************************************************* + LIMITED MEMORY BFGS METHOD FOR LARGE SCALE OPTIMIZATION + +DESCRIPTION: +The subroutine minimizes function F(x) of N arguments by using a quasi- +Newton method (LBFGS scheme) which is optimized to use a minimum amount +of memory. +The subroutine generates the approximation of an inverse Hessian matrix by +using information about the last M steps of the algorithm (instead of N). +It lessens a required amount of memory from a value of order N^2 to a +value of order 2*N*M. + + +REQUIREMENTS: +Algorithm will request following information during its operation: +* function value F and its gradient G (simultaneously) at given point X + + +USAGE: +1. User initializes algorithm state with MinLBFGSCreate() call +2. User tunes solver parameters with MinLBFGSSetCond() MinLBFGSSetStpMax() + and other functions +3. User calls MinLBFGSOptimize() function which takes algorithm state and + pointer (delegate, etc.) to callback function which calculates F/G. +4. User calls MinLBFGSResults() to get solution +5. Optionally user may call MinLBFGSRestartFrom() to solve another problem + with same N/M but another starting point and/or another function. + MinLBFGSRestartFrom() allows to reuse already initialized structure. + + +INPUT PARAMETERS: + N - problem dimension. N>0 + M - number of corrections in the BFGS scheme of Hessian + approximation update. Recommended value: 3<=M<=7. The smaller + value causes worse convergence, the bigger will not cause a + considerably better convergence, but will cause a fall in the + performance. M<=N. + X - initial solution approximation, array[0..N-1]. + + +OUTPUT PARAMETERS: + State - structure which stores algorithm state + + +NOTES: +1. you may tune stopping conditions with MinLBFGSSetCond() function +2. if target function contains exp() or other fast growing functions, and + optimization algorithm makes too large steps which leads to overflow, + use MinLBFGSSetStpMax() function to bound algorithm's steps. However, + L-BFGS rarely needs such a tuning. + + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minlbfgscreate(const ae_int_t n, const ae_int_t m, const real_1d_array &x, minlbfgsstate &state) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minlbfgscreate(n, m, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minlbfgsstate*>(state.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* + LIMITED MEMORY BFGS METHOD FOR LARGE SCALE OPTIMIZATION + +DESCRIPTION: +The subroutine minimizes function F(x) of N arguments by using a quasi- +Newton method (LBFGS scheme) which is optimized to use a minimum amount +of memory. +The subroutine generates the approximation of an inverse Hessian matrix by +using information about the last M steps of the algorithm (instead of N). +It lessens a required amount of memory from a value of order N^2 to a +value of order 2*N*M. + + +REQUIREMENTS: +Algorithm will request following information during its operation: +* function value F and its gradient G (simultaneously) at given point X + + +USAGE: +1. User initializes algorithm state with MinLBFGSCreate() call +2. User tunes solver parameters with MinLBFGSSetCond() MinLBFGSSetStpMax() + and other functions +3. User calls MinLBFGSOptimize() function which takes algorithm state and + pointer (delegate, etc.) to callback function which calculates F/G. +4. User calls MinLBFGSResults() to get solution +5. Optionally user may call MinLBFGSRestartFrom() to solve another problem + with same N/M but another starting point and/or another function. + MinLBFGSRestartFrom() allows to reuse already initialized structure. + + +INPUT PARAMETERS: + N - problem dimension. N>0 + M - number of corrections in the BFGS scheme of Hessian + approximation update. Recommended value: 3<=M<=7. The smaller + value causes worse convergence, the bigger will not cause a + considerably better convergence, but will cause a fall in the + performance. M<=N. + X - initial solution approximation, array[0..N-1]. + + +OUTPUT PARAMETERS: + State - structure which stores algorithm state + + +NOTES: +1. you may tune stopping conditions with MinLBFGSSetCond() function +2. if target function contains exp() or other fast growing functions, and + optimization algorithm makes too large steps which leads to overflow, + use MinLBFGSSetStpMax() function to bound algorithm's steps. However, + L-BFGS rarely needs such a tuning. + + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minlbfgscreate(const ae_int_t m, const real_1d_array &x, minlbfgsstate &state) +{ + alglib_impl::ae_state _alglib_env_state; + ae_int_t n; + + n = x.length(); + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minlbfgscreate(n, m, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minlbfgsstate*>(state.c_ptr()), &_alglib_env_state); + + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This function sets stopping conditions for L-BFGS optimization algorithm. + +INPUT PARAMETERS: + State - structure which stores algorithm state + EpsG - >=0 + The subroutine finishes its work if the condition + ||G||<EpsG is satisfied, where ||.|| means Euclidian norm, + G - gradient. + EpsF - >=0 + The subroutine finishes its work if on k+1-th iteration + the condition |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1} + is satisfied. + EpsX - >=0 + The subroutine finishes its work if on k+1-th iteration + the condition |X(k+1)-X(k)| <= EpsX is fulfilled. + MaxIts - maximum number of iterations. If MaxIts=0, the number of + iterations is unlimited. + +Passing EpsG=0, EpsF=0, EpsX=0 and MaxIts=0 (simultaneously) will lead to +automatic stopping criterion selection (small EpsX). + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minlbfgssetcond(const minlbfgsstate &state, const double epsg, const double epsf, const double epsx, const ae_int_t maxits) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minlbfgssetcond(const_cast<alglib_impl::minlbfgsstate*>(state.c_ptr()), epsg, epsf, epsx, maxits, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This function turns on/off reporting. + +INPUT PARAMETERS: + State - structure which stores algorithm state + NeedXRep- whether iteration reports are needed or not + +If NeedXRep is True, algorithm will call rep() callback function if it is +provided to MinLBFGSOptimize(). + + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minlbfgssetxrep(const minlbfgsstate &state, const bool needxrep) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minlbfgssetxrep(const_cast<alglib_impl::minlbfgsstate*>(state.c_ptr()), needxrep, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This function sets maximum step length + +INPUT PARAMETERS: + State - structure which stores algorithm state + StpMax - maximum step length, >=0. Set StpMax to 0.0 (default), if + you don't want to limit step length. + +Use this subroutine when you optimize target function which contains exp() +or other fast growing functions, and optimization algorithm makes too +large steps which leads to overflow. This function allows us to reject +steps that are too large (and therefore expose us to the possible +overflow) without actually calculating function value at the x+stp*d. + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minlbfgssetstpmax(const minlbfgsstate &state, const double stpmax) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minlbfgssetstpmax(const_cast<alglib_impl::minlbfgsstate*>(state.c_ptr()), stpmax, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Modification of the preconditioner: +default preconditioner (simple scaling) is used. + +INPUT PARAMETERS: + State - structure which stores algorithm state + +After call to this function preconditioner is changed to the default one. + +NOTE: you can change preconditioner "on the fly", during algorithm +iterations. + + -- ALGLIB -- + Copyright 13.10.2010 by Bochkanov Sergey +*************************************************************************/ +void minlbfgssetdefaultpreconditioner(const minlbfgsstate &state) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minlbfgssetdefaultpreconditioner(const_cast<alglib_impl::minlbfgsstate*>(state.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Modification of the preconditioner: +Cholesky factorization of approximate Hessian is used. + +INPUT PARAMETERS: + State - structure which stores algorithm state + P - triangular preconditioner, Cholesky factorization of + the approximate Hessian. array[0..N-1,0..N-1], + (if larger, only leading N elements are used). + IsUpper - whether upper or lower triangle of P is given + (other triangle is not referenced) + +After call to this function preconditioner is changed to P (P is copied +into the internal buffer). + +NOTE: you can change preconditioner "on the fly", during algorithm +iterations. + +NOTE 2: P should be nonsingular. Exception will be thrown otherwise. It +also should be well conditioned, although only strict non-singularity is +tested. + + -- ALGLIB -- + Copyright 13.10.2010 by Bochkanov Sergey +*************************************************************************/ +void minlbfgssetcholeskypreconditioner(const minlbfgsstate &state, const real_2d_array &p, const bool isupper) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minlbfgssetcholeskypreconditioner(const_cast<alglib_impl::minlbfgsstate*>(state.c_ptr()), const_cast<alglib_impl::ae_matrix*>(p.c_ptr()), isupper, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This function provides reverse communication interface +Reverse communication interface is not documented or recommended to use. +See below for functions which provide better documented API +*************************************************************************/ +bool minlbfgsiteration(const minlbfgsstate &state) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + ae_bool result = alglib_impl::minlbfgsiteration(const_cast<alglib_impl::minlbfgsstate*>(state.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<bool*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + + +void minlbfgsoptimize(minlbfgsstate &state, + void (*grad)(const real_1d_array &x, double &func, real_1d_array &grad, void *ptr), + void (*rep)(const real_1d_array &x, double func, void *ptr), + void *ptr) +{ + alglib_impl::ae_state _alglib_env_state; + if( grad==NULL ) + throw ap_error("ALGLIB: error in 'minlbfgsoptimize()' (grad is NULL)"); + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + while( alglib_impl::minlbfgsiteration(state.c_ptr(), &_alglib_env_state) ) + { + if( state.needfg ) + { + grad(state.x, state.f, state.g, ptr); + continue; + } + if( state.xupdated ) + { + if( rep!=NULL ) + rep(state.x, state.f, ptr); + continue; + } + throw ap_error("ALGLIB: error in 'minlbfgsoptimize' (some derivatives were not provided?)"); + } + alglib_impl::ae_state_clear(&_alglib_env_state); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + + + +/************************************************************************* +L-BFGS algorithm results + +INPUT PARAMETERS: + State - algorithm state + +OUTPUT PARAMETERS: + X - array[0..N-1], solution + Rep - optimization report: + * Rep.TerminationType completetion code: + * -2 rounding errors prevent further improvement. + X contains best point found. + * -1 incorrect parameters were specified + * 1 relative function improvement is no more than + EpsF. + * 2 relative step is no more than EpsX. + * 4 gradient norm is no more than EpsG + * 5 MaxIts steps was taken + * 7 stopping conditions are too stringent, + further improvement is impossible + * Rep.IterationsCount contains iterations count + * NFEV countains number of function calculations + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minlbfgsresults(const minlbfgsstate &state, real_1d_array &x, minlbfgsreport &rep) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minlbfgsresults(const_cast<alglib_impl::minlbfgsstate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minlbfgsreport*>(rep.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +L-BFGS algorithm results + +Buffered implementation of MinLBFGSResults which uses pre-allocated buffer +to store X[]. If buffer size is too small, it resizes buffer. It is +intended to be used in the inner cycles of performance critical algorithms +where array reallocation penalty is too large to be ignored. + + -- ALGLIB -- + Copyright 20.08.2010 by Bochkanov Sergey +*************************************************************************/ +void minlbfgsresultsbuf(const minlbfgsstate &state, real_1d_array &x, minlbfgsreport &rep) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minlbfgsresultsbuf(const_cast<alglib_impl::minlbfgsstate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minlbfgsreport*>(rep.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This subroutine restarts LBFGS algorithm from new point. All optimization +parameters are left unchanged. + +This function allows to solve multiple optimization problems (which +must have same number of dimensions) without object reallocation penalty. + +INPUT PARAMETERS: + State - structure used to store algorithm state + X - new starting point. + + -- ALGLIB -- + Copyright 30.07.2010 by Bochkanov Sergey +*************************************************************************/ +void minlbfgsrestartfrom(const minlbfgsstate &state, const real_1d_array &x) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minlbfgsrestartfrom(const_cast<alglib_impl::minlbfgsstate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Levenberg-Marquardt optimizer. + +This structure should be created using one of the MinLMCreate???() +functions. You should not access its fields directly; use ALGLIB functions +to work with it. +*************************************************************************/ +_minlmstate_owner::_minlmstate_owner() +{ + p_struct = (alglib_impl::minlmstate*)alglib_impl::ae_malloc(sizeof(alglib_impl::minlmstate), NULL); + if( p_struct==NULL ) + throw ap_error("ALGLIB: malloc error"); + if( !alglib_impl::_minlmstate_init(p_struct, NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); +} + +_minlmstate_owner::_minlmstate_owner(const _minlmstate_owner &rhs) +{ + p_struct = (alglib_impl::minlmstate*)alglib_impl::ae_malloc(sizeof(alglib_impl::minlmstate), NULL); + if( p_struct==NULL ) + throw ap_error("ALGLIB: malloc error"); + if( !alglib_impl::_minlmstate_init_copy(p_struct, const_cast<alglib_impl::minlmstate*>(rhs.p_struct), NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); +} + +_minlmstate_owner& _minlmstate_owner::operator=(const _minlmstate_owner &rhs) +{ + if( this==&rhs ) + return *this; + alglib_impl::_minlmstate_clear(p_struct); + if( !alglib_impl::_minlmstate_init_copy(p_struct, const_cast<alglib_impl::minlmstate*>(rhs.p_struct), NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); + return *this; +} + +_minlmstate_owner::~_minlmstate_owner() +{ + alglib_impl::_minlmstate_clear(p_struct); + ae_free(p_struct); +} + +alglib_impl::minlmstate* _minlmstate_owner::c_ptr() +{ + return p_struct; +} + +alglib_impl::minlmstate* _minlmstate_owner::c_ptr() const +{ + return const_cast<alglib_impl::minlmstate*>(p_struct); +} +minlmstate::minlmstate() : _minlmstate_owner() ,needf(p_struct->needf),needfg(p_struct->needfg),needfgh(p_struct->needfgh),needfi(p_struct->needfi),needfij(p_struct->needfij),xupdated(p_struct->xupdated),f(p_struct->f),fi(&p_struct->fi),g(&p_struct->g),h(&p_struct->h),j(&p_struct->j),x(&p_struct->x) +{ +} + +minlmstate::minlmstate(const minlmstate &rhs):_minlmstate_owner(rhs) ,needf(p_struct->needf),needfg(p_struct->needfg),needfgh(p_struct->needfgh),needfi(p_struct->needfi),needfij(p_struct->needfij),xupdated(p_struct->xupdated),f(p_struct->f),fi(&p_struct->fi),g(&p_struct->g),h(&p_struct->h),j(&p_struct->j),x(&p_struct->x) +{ +} + +minlmstate& minlmstate::operator=(const minlmstate &rhs) +{ + if( this==&rhs ) + return *this; + _minlmstate_owner::operator=(rhs); + return *this; +} + +minlmstate::~minlmstate() +{ +} + + +/************************************************************************* +Optimization report, filled by MinLMResults() function + +FIELDS: +* TerminationType, completetion code: + * -9 derivative correctness check failed; + see Rep.WrongNum, Rep.WrongI, Rep.WrongJ for + more information. + * 1 relative function improvement is no more than + EpsF. + * 2 relative step is no more than EpsX. + * 4 gradient is no more than EpsG. + * 5 MaxIts steps was taken + * 7 stopping conditions are too stringent, + further improvement is impossible +* IterationsCount, contains iterations count +* NFunc, number of function calculations +* NJac, number of Jacobi matrix calculations +* NGrad, number of gradient calculations +* NHess, number of Hessian calculations +* NCholesky, number of Cholesky decomposition calculations +*************************************************************************/ +_minlmreport_owner::_minlmreport_owner() +{ + p_struct = (alglib_impl::minlmreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::minlmreport), NULL); + if( p_struct==NULL ) + throw ap_error("ALGLIB: malloc error"); + if( !alglib_impl::_minlmreport_init(p_struct, NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); +} + +_minlmreport_owner::_minlmreport_owner(const _minlmreport_owner &rhs) +{ + p_struct = (alglib_impl::minlmreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::minlmreport), NULL); + if( p_struct==NULL ) + throw ap_error("ALGLIB: malloc error"); + if( !alglib_impl::_minlmreport_init_copy(p_struct, const_cast<alglib_impl::minlmreport*>(rhs.p_struct), NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); +} + +_minlmreport_owner& _minlmreport_owner::operator=(const _minlmreport_owner &rhs) +{ + if( this==&rhs ) + return *this; + alglib_impl::_minlmreport_clear(p_struct); + if( !alglib_impl::_minlmreport_init_copy(p_struct, const_cast<alglib_impl::minlmreport*>(rhs.p_struct), NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); + return *this; +} + +_minlmreport_owner::~_minlmreport_owner() +{ + alglib_impl::_minlmreport_clear(p_struct); + ae_free(p_struct); +} + +alglib_impl::minlmreport* _minlmreport_owner::c_ptr() +{ + return p_struct; +} + +alglib_impl::minlmreport* _minlmreport_owner::c_ptr() const +{ + return const_cast<alglib_impl::minlmreport*>(p_struct); +} +minlmreport::minlmreport() : _minlmreport_owner() ,iterationscount(p_struct->iterationscount),terminationtype(p_struct->terminationtype),nfunc(p_struct->nfunc),njac(p_struct->njac),ngrad(p_struct->ngrad),nhess(p_struct->nhess),ncholesky(p_struct->ncholesky) +{ +} + +minlmreport::minlmreport(const minlmreport &rhs):_minlmreport_owner(rhs) ,iterationscount(p_struct->iterationscount),terminationtype(p_struct->terminationtype),nfunc(p_struct->nfunc),njac(p_struct->njac),ngrad(p_struct->ngrad),nhess(p_struct->nhess),ncholesky(p_struct->ncholesky) +{ +} + +minlmreport& minlmreport::operator=(const minlmreport &rhs) +{ + if( this==&rhs ) + return *this; + _minlmreport_owner::operator=(rhs); + return *this; +} + +minlmreport::~minlmreport() +{ +} + +/************************************************************************* + IMPROVED LEVENBERG-MARQUARDT METHOD FOR + NON-LINEAR LEAST SQUARES OPTIMIZATION + +DESCRIPTION: +This function is used to find minimum of function which is represented as +sum of squares: + F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1]) +using value of function vector f[] and Jacobian of f[]. + + +REQUIREMENTS: +This algorithm will request following information during its operation: + +* function vector f[] at given point X +* function vector f[] and Jacobian of f[] (simultaneously) at given point + +There are several overloaded versions of MinLMOptimize() function which +correspond to different LM-like optimization algorithms provided by this +unit. You should choose version which accepts fvec() and jac() callbacks. +First one is used to calculate f[] at given point, second one calculates +f[] and Jacobian df[i]/dx[j]. + +You can try to initialize MinLMState structure with VJ function and then +use incorrect version of MinLMOptimize() (for example, version which +works with general form function and does not provide Jacobian), but it +will lead to exception being thrown after first attempt to calculate +Jacobian. + + +USAGE: +1. User initializes algorithm state with MinLMCreateVJ() call +2. User tunes solver parameters with MinLMSetCond(), MinLMSetStpMax() and + other functions +3. User calls MinLMOptimize() function which takes algorithm state and + callback functions. +4. User calls MinLMResults() to get solution +5. Optionally, user may call MinLMRestartFrom() to solve another problem + with same N/M but another starting point and/or another function. + MinLMRestartFrom() allows to reuse already initialized structure. + + +INPUT PARAMETERS: + N - dimension, N>1 + * if given, only leading N elements of X are used + * if not given, automatically determined from size of X + M - number of functions f[i] + X - initial solution, array[0..N-1] + +OUTPUT PARAMETERS: + State - structure which stores algorithm state + +NOTES: +1. you may tune stopping conditions with MinLMSetCond() function +2. if target function contains exp() or other fast growing functions, and + optimization algorithm makes too large steps which leads to overflow, + use MinLMSetStpMax() function to bound algorithm's steps. + + -- ALGLIB -- + Copyright 30.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minlmcreatevj(const ae_int_t n, const ae_int_t m, const real_1d_array &x, minlmstate &state) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minlmcreatevj(n, m, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minlmstate*>(state.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* + IMPROVED LEVENBERG-MARQUARDT METHOD FOR + NON-LINEAR LEAST SQUARES OPTIMIZATION + +DESCRIPTION: +This function is used to find minimum of function which is represented as +sum of squares: + F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1]) +using value of function vector f[] and Jacobian of f[]. + + +REQUIREMENTS: +This algorithm will request following information during its operation: + +* function vector f[] at given point X +* function vector f[] and Jacobian of f[] (simultaneously) at given point + +There are several overloaded versions of MinLMOptimize() function which +correspond to different LM-like optimization algorithms provided by this +unit. You should choose version which accepts fvec() and jac() callbacks. +First one is used to calculate f[] at given point, second one calculates +f[] and Jacobian df[i]/dx[j]. + +You can try to initialize MinLMState structure with VJ function and then +use incorrect version of MinLMOptimize() (for example, version which +works with general form function and does not provide Jacobian), but it +will lead to exception being thrown after first attempt to calculate +Jacobian. + + +USAGE: +1. User initializes algorithm state with MinLMCreateVJ() call +2. User tunes solver parameters with MinLMSetCond(), MinLMSetStpMax() and + other functions +3. User calls MinLMOptimize() function which takes algorithm state and + callback functions. +4. User calls MinLMResults() to get solution +5. Optionally, user may call MinLMRestartFrom() to solve another problem + with same N/M but another starting point and/or another function. + MinLMRestartFrom() allows to reuse already initialized structure. + + +INPUT PARAMETERS: + N - dimension, N>1 + * if given, only leading N elements of X are used + * if not given, automatically determined from size of X + M - number of functions f[i] + X - initial solution, array[0..N-1] + +OUTPUT PARAMETERS: + State - structure which stores algorithm state + +NOTES: +1. you may tune stopping conditions with MinLMSetCond() function +2. if target function contains exp() or other fast growing functions, and + optimization algorithm makes too large steps which leads to overflow, + use MinLMSetStpMax() function to bound algorithm's steps. + + -- ALGLIB -- + Copyright 30.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minlmcreatevj(const ae_int_t m, const real_1d_array &x, minlmstate &state) +{ + alglib_impl::ae_state _alglib_env_state; + ae_int_t n; + + n = x.length(); + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minlmcreatevj(n, m, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minlmstate*>(state.c_ptr()), &_alglib_env_state); + + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* + IMPROVED LEVENBERG-MARQUARDT METHOD FOR + NON-LINEAR LEAST SQUARES OPTIMIZATION + +DESCRIPTION: +This function is used to find minimum of function which is represented as +sum of squares: + F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1]) +using value of function vector f[] only. Finite differences are used to +calculate Jacobian. + + +REQUIREMENTS: +This algorithm will request following information during its operation: +* function vector f[] at given point X + +There are several overloaded versions of MinLMOptimize() function which +correspond to different LM-like optimization algorithms provided by this +unit. You should choose version which accepts fvec() callback. + +You can try to initialize MinLMState structure with VJ function and then +use incorrect version of MinLMOptimize() (for example, version which +works with general form function and does not accept function vector), but +it will lead to exception being thrown after first attempt to calculate +Jacobian. + + +USAGE: +1. User initializes algorithm state with MinLMCreateV() call +2. User tunes solver parameters with MinLMSetCond(), MinLMSetStpMax() and + other functions +3. User calls MinLMOptimize() function which takes algorithm state and + callback functions. +4. User calls MinLMResults() to get solution +5. Optionally, user may call MinLMRestartFrom() to solve another problem + with same N/M but another starting point and/or another function. + MinLMRestartFrom() allows to reuse already initialized structure. + + +INPUT PARAMETERS: + N - dimension, N>1 + * if given, only leading N elements of X are used + * if not given, automatically determined from size of X + M - number of functions f[i] + X - initial solution, array[0..N-1] + DiffStep- differentiation step, >0 + +OUTPUT PARAMETERS: + State - structure which stores algorithm state + +See also MinLMIteration, MinLMResults. + +NOTES: +1. you may tune stopping conditions with MinLMSetCond() function +2. if target function contains exp() or other fast growing functions, and + optimization algorithm makes too large steps which leads to overflow, + use MinLMSetStpMax() function to bound algorithm's steps. + + -- ALGLIB -- + Copyright 30.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minlmcreatev(const ae_int_t n, const ae_int_t m, const real_1d_array &x, const double diffstep, minlmstate &state) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minlmcreatev(n, m, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), diffstep, const_cast<alglib_impl::minlmstate*>(state.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* + IMPROVED LEVENBERG-MARQUARDT METHOD FOR + NON-LINEAR LEAST SQUARES OPTIMIZATION + +DESCRIPTION: +This function is used to find minimum of function which is represented as +sum of squares: + F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1]) +using value of function vector f[] only. Finite differences are used to +calculate Jacobian. + + +REQUIREMENTS: +This algorithm will request following information during its operation: +* function vector f[] at given point X + +There are several overloaded versions of MinLMOptimize() function which +correspond to different LM-like optimization algorithms provided by this +unit. You should choose version which accepts fvec() callback. + +You can try to initialize MinLMState structure with VJ function and then +use incorrect version of MinLMOptimize() (for example, version which +works with general form function and does not accept function vector), but +it will lead to exception being thrown after first attempt to calculate +Jacobian. + + +USAGE: +1. User initializes algorithm state with MinLMCreateV() call +2. User tunes solver parameters with MinLMSetCond(), MinLMSetStpMax() and + other functions +3. User calls MinLMOptimize() function which takes algorithm state and + callback functions. +4. User calls MinLMResults() to get solution +5. Optionally, user may call MinLMRestartFrom() to solve another problem + with same N/M but another starting point and/or another function. + MinLMRestartFrom() allows to reuse already initialized structure. + + +INPUT PARAMETERS: + N - dimension, N>1 + * if given, only leading N elements of X are used + * if not given, automatically determined from size of X + M - number of functions f[i] + X - initial solution, array[0..N-1] + DiffStep- differentiation step, >0 + +OUTPUT PARAMETERS: + State - structure which stores algorithm state + +See also MinLMIteration, MinLMResults. + +NOTES: +1. you may tune stopping conditions with MinLMSetCond() function +2. if target function contains exp() or other fast growing functions, and + optimization algorithm makes too large steps which leads to overflow, + use MinLMSetStpMax() function to bound algorithm's steps. + + -- ALGLIB -- + Copyright 30.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minlmcreatev(const ae_int_t m, const real_1d_array &x, const double diffstep, minlmstate &state) +{ + alglib_impl::ae_state _alglib_env_state; + ae_int_t n; + + n = x.length(); + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minlmcreatev(n, m, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), diffstep, const_cast<alglib_impl::minlmstate*>(state.c_ptr()), &_alglib_env_state); + + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* + LEVENBERG-MARQUARDT-LIKE METHOD FOR NON-LINEAR OPTIMIZATION + +DESCRIPTION: +This function is used to find minimum of general form (not "sum-of- +-squares") function + F = F(x[0], ..., x[n-1]) +using its gradient and Hessian. Levenberg-Marquardt modification with +L-BFGS pre-optimization and internal pre-conditioned L-BFGS optimization +after each Levenberg-Marquardt step is used. + + +REQUIREMENTS: +This algorithm will request following information during its operation: + +* function value F at given point X +* F and gradient G (simultaneously) at given point X +* F, G and Hessian H (simultaneously) at given point X + +There are several overloaded versions of MinLMOptimize() function which +correspond to different LM-like optimization algorithms provided by this +unit. You should choose version which accepts func(), grad() and hess() +function pointers. First pointer is used to calculate F at given point, +second one calculates F(x) and grad F(x), third one calculates F(x), +grad F(x), hess F(x). + +You can try to initialize MinLMState structure with FGH-function and then +use incorrect version of MinLMOptimize() (for example, version which does +not provide Hessian matrix), but it will lead to exception being thrown +after first attempt to calculate Hessian. + + +USAGE: +1. User initializes algorithm state with MinLMCreateFGH() call +2. User tunes solver parameters with MinLMSetCond(), MinLMSetStpMax() and + other functions +3. User calls MinLMOptimize() function which takes algorithm state and + pointers (delegates, etc.) to callback functions. +4. User calls MinLMResults() to get solution +5. Optionally, user may call MinLMRestartFrom() to solve another problem + with same N but another starting point and/or another function. + MinLMRestartFrom() allows to reuse already initialized structure. + + +INPUT PARAMETERS: + N - dimension, N>1 + * if given, only leading N elements of X are used + * if not given, automatically determined from size of X + X - initial solution, array[0..N-1] + +OUTPUT PARAMETERS: + State - structure which stores algorithm state + +NOTES: +1. you may tune stopping conditions with MinLMSetCond() function +2. if target function contains exp() or other fast growing functions, and + optimization algorithm makes too large steps which leads to overflow, + use MinLMSetStpMax() function to bound algorithm's steps. + + -- ALGLIB -- + Copyright 30.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minlmcreatefgh(const ae_int_t n, const real_1d_array &x, minlmstate &state) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minlmcreatefgh(n, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minlmstate*>(state.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* + LEVENBERG-MARQUARDT-LIKE METHOD FOR NON-LINEAR OPTIMIZATION + +DESCRIPTION: +This function is used to find minimum of general form (not "sum-of- +-squares") function + F = F(x[0], ..., x[n-1]) +using its gradient and Hessian. Levenberg-Marquardt modification with +L-BFGS pre-optimization and internal pre-conditioned L-BFGS optimization +after each Levenberg-Marquardt step is used. + + +REQUIREMENTS: +This algorithm will request following information during its operation: + +* function value F at given point X +* F and gradient G (simultaneously) at given point X +* F, G and Hessian H (simultaneously) at given point X + +There are several overloaded versions of MinLMOptimize() function which +correspond to different LM-like optimization algorithms provided by this +unit. You should choose version which accepts func(), grad() and hess() +function pointers. First pointer is used to calculate F at given point, +second one calculates F(x) and grad F(x), third one calculates F(x), +grad F(x), hess F(x). + +You can try to initialize MinLMState structure with FGH-function and then +use incorrect version of MinLMOptimize() (for example, version which does +not provide Hessian matrix), but it will lead to exception being thrown +after first attempt to calculate Hessian. + + +USAGE: +1. User initializes algorithm state with MinLMCreateFGH() call +2. User tunes solver parameters with MinLMSetCond(), MinLMSetStpMax() and + other functions +3. User calls MinLMOptimize() function which takes algorithm state and + pointers (delegates, etc.) to callback functions. +4. User calls MinLMResults() to get solution +5. Optionally, user may call MinLMRestartFrom() to solve another problem + with same N but another starting point and/or another function. + MinLMRestartFrom() allows to reuse already initialized structure. + + +INPUT PARAMETERS: + N - dimension, N>1 + * if given, only leading N elements of X are used + * if not given, automatically determined from size of X + X - initial solution, array[0..N-1] + +OUTPUT PARAMETERS: + State - structure which stores algorithm state + +NOTES: +1. you may tune stopping conditions with MinLMSetCond() function +2. if target function contains exp() or other fast growing functions, and + optimization algorithm makes too large steps which leads to overflow, + use MinLMSetStpMax() function to bound algorithm's steps. + + -- ALGLIB -- + Copyright 30.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minlmcreatefgh(const real_1d_array &x, minlmstate &state) +{ + alglib_impl::ae_state _alglib_env_state; + ae_int_t n; + + n = x.length(); + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minlmcreatefgh(n, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minlmstate*>(state.c_ptr()), &_alglib_env_state); + + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* + IMPROVED LEVENBERG-MARQUARDT METHOD FOR + NON-LINEAR LEAST SQUARES OPTIMIZATION + +DESCRIPTION: +This function is used to find minimum of function which is represented as +sum of squares: + F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1]) +using: +* value of function vector f[] +* value of Jacobian of f[] +* gradient of merit function F(x) + +This function creates optimizer which uses acceleration strategy 2. Cheap +gradient of merit function (which is twice the product of function vector +and Jacobian) is used for accelerated iterations (see User Guide for more +info on this subject). + +REQUIREMENTS: +This algorithm will request following information during its operation: + +* function vector f[] at given point X +* function vector f[] and Jacobian of f[] (simultaneously) at given point +* gradient of + +There are several overloaded versions of MinLMOptimize() function which +correspond to different LM-like optimization algorithms provided by this +unit. You should choose version which accepts fvec(), jac() and grad() +callbacks. First one is used to calculate f[] at given point, second one +calculates f[] and Jacobian df[i]/dx[j], last one calculates gradient of +merit function F(x). + +You can try to initialize MinLMState structure with VJ function and then +use incorrect version of MinLMOptimize() (for example, version which +works with general form function and does not provide Jacobian), but it +will lead to exception being thrown after first attempt to calculate +Jacobian. + + +USAGE: +1. User initializes algorithm state with MinLMCreateVGJ() call +2. User tunes solver parameters with MinLMSetCond(), MinLMSetStpMax() and + other functions +3. User calls MinLMOptimize() function which takes algorithm state and + callback functions. +4. User calls MinLMResults() to get solution +5. Optionally, user may call MinLMRestartFrom() to solve another problem + with same N/M but another starting point and/or another function. + MinLMRestartFrom() allows to reuse already initialized structure. + + +INPUT PARAMETERS: + N - dimension, N>1 + * if given, only leading N elements of X are used + * if not given, automatically determined from size of X + M - number of functions f[i] + X - initial solution, array[0..N-1] + +OUTPUT PARAMETERS: + State - structure which stores algorithm state + +NOTES: +1. you may tune stopping conditions with MinLMSetCond() function +2. if target function contains exp() or other fast growing functions, and + optimization algorithm makes too large steps which leads to overflow, + use MinLMSetStpMax() function to bound algorithm's steps. + + -- ALGLIB -- + Copyright 30.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minlmcreatevgj(const ae_int_t n, const ae_int_t m, const real_1d_array &x, minlmstate &state) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minlmcreatevgj(n, m, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minlmstate*>(state.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* + IMPROVED LEVENBERG-MARQUARDT METHOD FOR + NON-LINEAR LEAST SQUARES OPTIMIZATION + +DESCRIPTION: +This function is used to find minimum of function which is represented as +sum of squares: + F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1]) +using: +* value of function vector f[] +* value of Jacobian of f[] +* gradient of merit function F(x) + +This function creates optimizer which uses acceleration strategy 2. Cheap +gradient of merit function (which is twice the product of function vector +and Jacobian) is used for accelerated iterations (see User Guide for more +info on this subject). + +REQUIREMENTS: +This algorithm will request following information during its operation: + +* function vector f[] at given point X +* function vector f[] and Jacobian of f[] (simultaneously) at given point +* gradient of + +There are several overloaded versions of MinLMOptimize() function which +correspond to different LM-like optimization algorithms provided by this +unit. You should choose version which accepts fvec(), jac() and grad() +callbacks. First one is used to calculate f[] at given point, second one +calculates f[] and Jacobian df[i]/dx[j], last one calculates gradient of +merit function F(x). + +You can try to initialize MinLMState structure with VJ function and then +use incorrect version of MinLMOptimize() (for example, version which +works with general form function and does not provide Jacobian), but it +will lead to exception being thrown after first attempt to calculate +Jacobian. + + +USAGE: +1. User initializes algorithm state with MinLMCreateVGJ() call +2. User tunes solver parameters with MinLMSetCond(), MinLMSetStpMax() and + other functions +3. User calls MinLMOptimize() function which takes algorithm state and + callback functions. +4. User calls MinLMResults() to get solution +5. Optionally, user may call MinLMRestartFrom() to solve another problem + with same N/M but another starting point and/or another function. + MinLMRestartFrom() allows to reuse already initialized structure. + + +INPUT PARAMETERS: + N - dimension, N>1 + * if given, only leading N elements of X are used + * if not given, automatically determined from size of X + M - number of functions f[i] + X - initial solution, array[0..N-1] + +OUTPUT PARAMETERS: + State - structure which stores algorithm state + +NOTES: +1. you may tune stopping conditions with MinLMSetCond() function +2. if target function contains exp() or other fast growing functions, and + optimization algorithm makes too large steps which leads to overflow, + use MinLMSetStpMax() function to bound algorithm's steps. + + -- ALGLIB -- + Copyright 30.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minlmcreatevgj(const ae_int_t m, const real_1d_array &x, minlmstate &state) +{ + alglib_impl::ae_state _alglib_env_state; + ae_int_t n; + + n = x.length(); + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minlmcreatevgj(n, m, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minlmstate*>(state.c_ptr()), &_alglib_env_state); + + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* + LEVENBERG-MARQUARDT-LIKE METHOD FOR + NON-LINEAR LEAST SQUARES OPTIMIZATION + +DESCRIPTION: + +This function is used to find minimum of function which is represented as +sum of squares: + F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1]) +using value of F(), gradient of F(), function vector f[] and Jacobian of +f[]. + +This function is considered obsolete since ALGLIB 3.1.0 and is present for +backward compatibility only. We recommend to use MinLMCreateVGJ, which +provides similar, but more consistent interface. + + -- ALGLIB -- + Copyright 30.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minlmcreatefgj(const ae_int_t n, const ae_int_t m, const real_1d_array &x, minlmstate &state) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minlmcreatefgj(n, m, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minlmstate*>(state.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* + LEVENBERG-MARQUARDT-LIKE METHOD FOR + NON-LINEAR LEAST SQUARES OPTIMIZATION + +DESCRIPTION: + +This function is used to find minimum of function which is represented as +sum of squares: + F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1]) +using value of F(), gradient of F(), function vector f[] and Jacobian of +f[]. + +This function is considered obsolete since ALGLIB 3.1.0 and is present for +backward compatibility only. We recommend to use MinLMCreateVGJ, which +provides similar, but more consistent interface. + + -- ALGLIB -- + Copyright 30.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minlmcreatefgj(const ae_int_t m, const real_1d_array &x, minlmstate &state) +{ + alglib_impl::ae_state _alglib_env_state; + ae_int_t n; + + n = x.length(); + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minlmcreatefgj(n, m, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minlmstate*>(state.c_ptr()), &_alglib_env_state); + + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* + CLASSIC LEVENBERG-MARQUARDT METHOD FOR NON-LINEAR OPTIMIZATION + +DESCRIPTION: +This function is used to find minimum of function which is represented as +sum of squares: + F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1]) +using value of F(), function vector f[] and Jacobian of f[]. Classic +Levenberg-Marquardt method is used. + +This function is considered obsolete since ALGLIB 3.1.0 and is present for +backward compatibility only. We recommend to use MinLMCreateVJ, which +provides similar, but more consistent and feature-rich interface. + + -- ALGLIB -- + Copyright 30.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minlmcreatefj(const ae_int_t n, const ae_int_t m, const real_1d_array &x, minlmstate &state) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minlmcreatefj(n, m, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minlmstate*>(state.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* + CLASSIC LEVENBERG-MARQUARDT METHOD FOR NON-LINEAR OPTIMIZATION + +DESCRIPTION: +This function is used to find minimum of function which is represented as +sum of squares: + F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1]) +using value of F(), function vector f[] and Jacobian of f[]. Classic +Levenberg-Marquardt method is used. + +This function is considered obsolete since ALGLIB 3.1.0 and is present for +backward compatibility only. We recommend to use MinLMCreateVJ, which +provides similar, but more consistent and feature-rich interface. + + -- ALGLIB -- + Copyright 30.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minlmcreatefj(const ae_int_t m, const real_1d_array &x, minlmstate &state) +{ + alglib_impl::ae_state _alglib_env_state; + ae_int_t n; + + n = x.length(); + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minlmcreatefj(n, m, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minlmstate*>(state.c_ptr()), &_alglib_env_state); + + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This function sets stopping conditions for Levenberg-Marquardt optimization +algorithm. + +INPUT PARAMETERS: + State - structure which stores algorithm state + EpsG - >=0 + The subroutine finishes its work if the condition + ||G||<EpsG is satisfied, where ||.|| means Euclidian norm, + G - gradient. + EpsF - >=0 + The subroutine finishes its work if on k+1-th iteration + the condition |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1} + is satisfied. + EpsX - >=0 + The subroutine finishes its work if on k+1-th iteration + the condition |X(k+1)-X(k)| <= EpsX is fulfilled. + MaxIts - maximum number of iterations. If MaxIts=0, the number of + iterations is unlimited. Only Levenberg-Marquardt + iterations are counted (L-BFGS/CG iterations are NOT + counted because their cost is very low compared to that of + LM). + +Passing EpsG=0, EpsF=0, EpsX=0 and MaxIts=0 (simultaneously) will lead to +automatic stopping criterion selection (small EpsX). + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minlmsetcond(const minlmstate &state, const double epsg, const double epsf, const double epsx, const ae_int_t maxits) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minlmsetcond(const_cast<alglib_impl::minlmstate*>(state.c_ptr()), epsg, epsf, epsx, maxits, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This function turns on/off reporting. + +INPUT PARAMETERS: + State - structure which stores algorithm state + NeedXRep- whether iteration reports are needed or not + +If NeedXRep is True, algorithm will call rep() callback function if it is +provided to MinLMOptimize(). Both Levenberg-Marquardt and internal L-BFGS +iterations are reported. + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minlmsetxrep(const minlmstate &state, const bool needxrep) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minlmsetxrep(const_cast<alglib_impl::minlmstate*>(state.c_ptr()), needxrep, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This function sets maximum step length + +INPUT PARAMETERS: + State - structure which stores algorithm state + StpMax - maximum step length, >=0. Set StpMax to 0.0, if you don't + want to limit step length. + +Use this subroutine when you optimize target function which contains exp() +or other fast growing functions, and optimization algorithm makes too +large steps which leads to overflow. This function allows us to reject +steps that are too large (and therefore expose us to the possible +overflow) without actually calculating function value at the x+stp*d. + +NOTE: non-zero StpMax leads to moderate performance degradation because +intermediate step of preconditioned L-BFGS optimization is incompatible +with limits on step size. + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minlmsetstpmax(const minlmstate &state, const double stpmax) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minlmsetstpmax(const_cast<alglib_impl::minlmstate*>(state.c_ptr()), stpmax, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This function is used to change acceleration settings + +You can choose between three acceleration strategies: +* AccType=0, no acceleration. +* AccType=1, secant updates are used to update quadratic model after each + iteration. After fixed number of iterations (or after model breakdown) + we recalculate quadratic model using analytic Jacobian or finite + differences. Number of secant-based iterations depends on optimization + settings: about 3 iterations - when we have analytic Jacobian, up to 2*N + iterations - when we use finite differences to calculate Jacobian. +* AccType=2, after quadratic model is built and LM step is made, we use it + as preconditioner for several (5-10) iterations of L-BFGS algorithm. + +AccType=1 is recommended when Jacobian calculation cost is prohibitive +high (several Mx1 function vector calculations followed by several NxN +Cholesky factorizations are faster than calculation of one M*N Jacobian). +It should also be used when we have no Jacobian, because finite difference +approximation takes too much time to compute. + +AccType=2 is recommended when Jacobian is cheap - much more cheaper than +one Cholesky factorization. We can reduce number of Cholesky +factorizations at the cost of increased number of Jacobian calculations. +Sometimes it helps. + +Table below list optimization protocols (XYZ protocol corresponds to +MinLMCreateXYZ) and acceleration types they support (and use by default). + +ACCELERATION TYPES SUPPORTED BY OPTIMIZATION PROTOCOLS: + +protocol 0 1 2 comment +V + + +VJ + + + +FGH + + +VGJ + + + special protocol, not for widespread use +FJ + + obsolete protocol, not recommended +FGJ + + obsolete protocol, not recommended + +DAFAULT VALUES: + +protocol 0 1 2 comment +V x without acceleration it is so slooooooooow +VJ x +FGH x +VGJ x we've implicitly turned (2) by passing gradient +FJ x obsolete protocol, not recommended +FGJ x obsolete protocol, not recommended + +NOTE: this function should be called before optimization. Attempt to call +it during algorithm iterations may result in unexpected behavior. + +NOTE: attempt to call this function with unsupported protocol/acceleration +combination will result in exception being thrown. + + -- ALGLIB -- + Copyright 14.10.2010 by Bochkanov Sergey +*************************************************************************/ +void minlmsetacctype(const minlmstate &state, const ae_int_t acctype) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minlmsetacctype(const_cast<alglib_impl::minlmstate*>(state.c_ptr()), acctype, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This function provides reverse communication interface +Reverse communication interface is not documented or recommended to use. +See below for functions which provide better documented API +*************************************************************************/ +bool minlmiteration(const minlmstate &state) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + ae_bool result = alglib_impl::minlmiteration(const_cast<alglib_impl::minlmstate*>(state.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<bool*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + + +void minlmoptimize(minlmstate &state, + void (*fvec)(const real_1d_array &x, real_1d_array &fi, void *ptr), + void (*rep)(const real_1d_array &x, double func, void *ptr), + void *ptr) +{ + alglib_impl::ae_state _alglib_env_state; + if( fvec==NULL ) + throw ap_error("ALGLIB: error in 'minlmoptimize()' (fvec is NULL)"); + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + while( alglib_impl::minlmiteration(state.c_ptr(), &_alglib_env_state) ) + { + if( state.needfi ) + { + fvec(state.x, state.fi, ptr); + continue; + } + if( state.xupdated ) + { + if( rep!=NULL ) + rep(state.x, state.f, ptr); + continue; + } + throw ap_error("ALGLIB: error in 'minlmoptimize' (some derivatives were not provided?)"); + } + alglib_impl::ae_state_clear(&_alglib_env_state); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + + +void minlmoptimize(minlmstate &state, + void (*fvec)(const real_1d_array &x, real_1d_array &fi, void *ptr), + void (*jac)(const real_1d_array &x, real_1d_array &fi, real_2d_array &jac, void *ptr), + void (*rep)(const real_1d_array &x, double func, void *ptr), + void *ptr) +{ + alglib_impl::ae_state _alglib_env_state; + if( fvec==NULL ) + throw ap_error("ALGLIB: error in 'minlmoptimize()' (fvec is NULL)"); + if( jac==NULL ) + throw ap_error("ALGLIB: error in 'minlmoptimize()' (jac is NULL)"); + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + while( alglib_impl::minlmiteration(state.c_ptr(), &_alglib_env_state) ) + { + if( state.needfi ) + { + fvec(state.x, state.fi, ptr); + continue; + } + if( state.needfij ) + { + jac(state.x, state.fi, state.j, ptr); + continue; + } + if( state.xupdated ) + { + if( rep!=NULL ) + rep(state.x, state.f, ptr); + continue; + } + throw ap_error("ALGLIB: error in 'minlmoptimize' (some derivatives were not provided?)"); + } + alglib_impl::ae_state_clear(&_alglib_env_state); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + + +void minlmoptimize(minlmstate &state, + void (*func)(const real_1d_array &x, double &func, void *ptr), + void (*grad)(const real_1d_array &x, double &func, real_1d_array &grad, void *ptr), + void (*hess)(const real_1d_array &x, double &func, real_1d_array &grad, real_2d_array &hess, void *ptr), + void (*rep)(const real_1d_array &x, double func, void *ptr), + void *ptr) +{ + alglib_impl::ae_state _alglib_env_state; + if( func==NULL ) + throw ap_error("ALGLIB: error in 'minlmoptimize()' (func is NULL)"); + if( grad==NULL ) + throw ap_error("ALGLIB: error in 'minlmoptimize()' (grad is NULL)"); + if( hess==NULL ) + throw ap_error("ALGLIB: error in 'minlmoptimize()' (hess is NULL)"); + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + while( alglib_impl::minlmiteration(state.c_ptr(), &_alglib_env_state) ) + { + if( state.needf ) + { + func(state.x, state.f, ptr); + continue; + } + if( state.needfg ) + { + grad(state.x, state.f, state.g, ptr); + continue; + } + if( state.needfgh ) + { + hess(state.x, state.f, state.g, state.h, ptr); + continue; + } + if( state.xupdated ) + { + if( rep!=NULL ) + rep(state.x, state.f, ptr); + continue; + } + throw ap_error("ALGLIB: error in 'minlmoptimize' (some derivatives were not provided?)"); + } + alglib_impl::ae_state_clear(&_alglib_env_state); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + + +void minlmoptimize(minlmstate &state, + void (*func)(const real_1d_array &x, double &func, void *ptr), + void (*jac)(const real_1d_array &x, real_1d_array &fi, real_2d_array &jac, void *ptr), + void (*rep)(const real_1d_array &x, double func, void *ptr), + void *ptr) +{ + alglib_impl::ae_state _alglib_env_state; + if( func==NULL ) + throw ap_error("ALGLIB: error in 'minlmoptimize()' (func is NULL)"); + if( jac==NULL ) + throw ap_error("ALGLIB: error in 'minlmoptimize()' (jac is NULL)"); + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + while( alglib_impl::minlmiteration(state.c_ptr(), &_alglib_env_state) ) + { + if( state.needf ) + { + func(state.x, state.f, ptr); + continue; + } + if( state.needfij ) + { + jac(state.x, state.fi, state.j, ptr); + continue; + } + if( state.xupdated ) + { + if( rep!=NULL ) + rep(state.x, state.f, ptr); + continue; + } + throw ap_error("ALGLIB: error in 'minlmoptimize' (some derivatives were not provided?)"); + } + alglib_impl::ae_state_clear(&_alglib_env_state); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + + +void minlmoptimize(minlmstate &state, + void (*func)(const real_1d_array &x, double &func, void *ptr), + void (*grad)(const real_1d_array &x, double &func, real_1d_array &grad, void *ptr), + void (*jac)(const real_1d_array &x, real_1d_array &fi, real_2d_array &jac, void *ptr), + void (*rep)(const real_1d_array &x, double func, void *ptr), + void *ptr) +{ + alglib_impl::ae_state _alglib_env_state; + if( func==NULL ) + throw ap_error("ALGLIB: error in 'minlmoptimize()' (func is NULL)"); + if( grad==NULL ) + throw ap_error("ALGLIB: error in 'minlmoptimize()' (grad is NULL)"); + if( jac==NULL ) + throw ap_error("ALGLIB: error in 'minlmoptimize()' (jac is NULL)"); + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + while( alglib_impl::minlmiteration(state.c_ptr(), &_alglib_env_state) ) + { + if( state.needf ) + { + func(state.x, state.f, ptr); + continue; + } + if( state.needfg ) + { + grad(state.x, state.f, state.g, ptr); + continue; + } + if( state.needfij ) + { + jac(state.x, state.fi, state.j, ptr); + continue; + } + if( state.xupdated ) + { + if( rep!=NULL ) + rep(state.x, state.f, ptr); + continue; + } + throw ap_error("ALGLIB: error in 'minlmoptimize' (some derivatives were not provided?)"); + } + alglib_impl::ae_state_clear(&_alglib_env_state); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + + + +/************************************************************************* +Levenberg-Marquardt algorithm results + +INPUT PARAMETERS: + State - algorithm state + +OUTPUT PARAMETERS: + X - array[0..N-1], solution + Rep - optimization report; + see comments for this structure for more info. + + -- ALGLIB -- + Copyright 10.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minlmresults(const minlmstate &state, real_1d_array &x, minlmreport &rep) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minlmresults(const_cast<alglib_impl::minlmstate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minlmreport*>(rep.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Levenberg-Marquardt algorithm results + +Buffered implementation of MinLMResults(), which uses pre-allocated buffer +to store X[]. If buffer size is too small, it resizes buffer. It is +intended to be used in the inner cycles of performance critical algorithms +where array reallocation penalty is too large to be ignored. + + -- ALGLIB -- + Copyright 10.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minlmresultsbuf(const minlmstate &state, real_1d_array &x, minlmreport &rep) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minlmresultsbuf(const_cast<alglib_impl::minlmstate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minlmreport*>(rep.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This subroutine restarts LM algorithm from new point. All optimization +parameters are left unchanged. + +This function allows to solve multiple optimization problems (which +must have same number of dimensions) without object reallocation penalty. + +INPUT PARAMETERS: + State - structure used for reverse communication previously + allocated with MinLMCreateXXX call. + X - new starting point. + + -- ALGLIB -- + Copyright 30.07.2010 by Bochkanov Sergey +*************************************************************************/ +void minlmrestartfrom(const minlmstate &state, const real_1d_array &x) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minlmrestartfrom(const_cast<alglib_impl::minlmstate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* + +*************************************************************************/ +_minasastate_owner::_minasastate_owner() +{ + p_struct = (alglib_impl::minasastate*)alglib_impl::ae_malloc(sizeof(alglib_impl::minasastate), NULL); + if( p_struct==NULL ) + throw ap_error("ALGLIB: malloc error"); + if( !alglib_impl::_minasastate_init(p_struct, NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); +} + +_minasastate_owner::_minasastate_owner(const _minasastate_owner &rhs) +{ + p_struct = (alglib_impl::minasastate*)alglib_impl::ae_malloc(sizeof(alglib_impl::minasastate), NULL); + if( p_struct==NULL ) + throw ap_error("ALGLIB: malloc error"); + if( !alglib_impl::_minasastate_init_copy(p_struct, const_cast<alglib_impl::minasastate*>(rhs.p_struct), NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); +} + +_minasastate_owner& _minasastate_owner::operator=(const _minasastate_owner &rhs) +{ + if( this==&rhs ) + return *this; + alglib_impl::_minasastate_clear(p_struct); + if( !alglib_impl::_minasastate_init_copy(p_struct, const_cast<alglib_impl::minasastate*>(rhs.p_struct), NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); + return *this; +} + +_minasastate_owner::~_minasastate_owner() +{ + alglib_impl::_minasastate_clear(p_struct); + ae_free(p_struct); +} + +alglib_impl::minasastate* _minasastate_owner::c_ptr() +{ + return p_struct; +} + +alglib_impl::minasastate* _minasastate_owner::c_ptr() const +{ + return const_cast<alglib_impl::minasastate*>(p_struct); +} +minasastate::minasastate() : _minasastate_owner() ,needfg(p_struct->needfg),xupdated(p_struct->xupdated),f(p_struct->f),g(&p_struct->g),x(&p_struct->x) +{ +} + +minasastate::minasastate(const minasastate &rhs):_minasastate_owner(rhs) ,needfg(p_struct->needfg),xupdated(p_struct->xupdated),f(p_struct->f),g(&p_struct->g),x(&p_struct->x) +{ +} + +minasastate& minasastate::operator=(const minasastate &rhs) +{ + if( this==&rhs ) + return *this; + _minasastate_owner::operator=(rhs); + return *this; +} + +minasastate::~minasastate() +{ +} + + +/************************************************************************* + +*************************************************************************/ +_minasareport_owner::_minasareport_owner() +{ + p_struct = (alglib_impl::minasareport*)alglib_impl::ae_malloc(sizeof(alglib_impl::minasareport), NULL); + if( p_struct==NULL ) + throw ap_error("ALGLIB: malloc error"); + if( !alglib_impl::_minasareport_init(p_struct, NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); +} + +_minasareport_owner::_minasareport_owner(const _minasareport_owner &rhs) +{ + p_struct = (alglib_impl::minasareport*)alglib_impl::ae_malloc(sizeof(alglib_impl::minasareport), NULL); + if( p_struct==NULL ) + throw ap_error("ALGLIB: malloc error"); + if( !alglib_impl::_minasareport_init_copy(p_struct, const_cast<alglib_impl::minasareport*>(rhs.p_struct), NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); +} + +_minasareport_owner& _minasareport_owner::operator=(const _minasareport_owner &rhs) +{ + if( this==&rhs ) + return *this; + alglib_impl::_minasareport_clear(p_struct); + if( !alglib_impl::_minasareport_init_copy(p_struct, const_cast<alglib_impl::minasareport*>(rhs.p_struct), NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); + return *this; +} + +_minasareport_owner::~_minasareport_owner() +{ + alglib_impl::_minasareport_clear(p_struct); + ae_free(p_struct); +} + +alglib_impl::minasareport* _minasareport_owner::c_ptr() +{ + return p_struct; +} + +alglib_impl::minasareport* _minasareport_owner::c_ptr() const +{ + return const_cast<alglib_impl::minasareport*>(p_struct); +} +minasareport::minasareport() : _minasareport_owner() ,iterationscount(p_struct->iterationscount),nfev(p_struct->nfev),terminationtype(p_struct->terminationtype),activeconstraints(p_struct->activeconstraints) +{ +} + +minasareport::minasareport(const minasareport &rhs):_minasareport_owner(rhs) ,iterationscount(p_struct->iterationscount),nfev(p_struct->nfev),terminationtype(p_struct->terminationtype),activeconstraints(p_struct->activeconstraints) +{ +} + +minasareport& minasareport::operator=(const minasareport &rhs) +{ + if( this==&rhs ) + return *this; + _minasareport_owner::operator=(rhs); + return *this; +} + +minasareport::~minasareport() +{ +} + +/************************************************************************* + NONLINEAR BOUND CONSTRAINED OPTIMIZATION USING + MODIFIED ACTIVE SET ALGORITHM + WILLIAM W. HAGER AND HONGCHAO ZHANG + +DESCRIPTION: +The subroutine minimizes function F(x) of N arguments with bound +constraints: BndL[i] <= x[i] <= BndU[i] + +This method is globally convergent as long as grad(f) is Lipschitz +continuous on a level set: L = { x : f(x)<=f(x0) }. + + +REQUIREMENTS: +Algorithm will request following information during its operation: +* function value F and its gradient G (simultaneously) at given point X + + +USAGE: +1. User initializes algorithm state with MinASACreate() call +2. User tunes solver parameters with MinASASetCond() MinASASetStpMax() and + other functions +3. User calls MinASAOptimize() function which takes algorithm state and + pointer (delegate, etc.) to callback function which calculates F/G. +4. User calls MinASAResults() to get solution +5. Optionally, user may call MinASARestartFrom() to solve another problem + with same N but another starting point and/or another function. + MinASARestartFrom() allows to reuse already initialized structure. + + +INPUT PARAMETERS: + N - problem dimension, N>0: + * if given, only leading N elements of X are used + * if not given, automatically determined from sizes of + X/BndL/BndU. + X - starting point, array[0..N-1]. + BndL - lower bounds, array[0..N-1]. + all elements MUST be specified, i.e. all variables are + bounded. However, if some (all) variables are unbounded, + you may specify very small number as bound: -1000, -1.0E6 + or -1.0E300, or something like that. + BndU - upper bounds, array[0..N-1]. + all elements MUST be specified, i.e. all variables are + bounded. However, if some (all) variables are unbounded, + you may specify very large number as bound: +1000, +1.0E6 + or +1.0E300, or something like that. + +OUTPUT PARAMETERS: + State - structure stores algorithm state + +NOTES: + +1. you may tune stopping conditions with MinASASetCond() function +2. if target function contains exp() or other fast growing functions, and + optimization algorithm makes too large steps which leads to overflow, + use MinASASetStpMax() function to bound algorithm's steps. +3. this function does NOT support infinite/NaN values in X, BndL, BndU. + + -- ALGLIB -- + Copyright 25.03.2010 by Bochkanov Sergey +*************************************************************************/ +void minasacreate(const ae_int_t n, const real_1d_array &x, const real_1d_array &bndl, const real_1d_array &bndu, minasastate &state) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minasacreate(n, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(bndl.c_ptr()), const_cast<alglib_impl::ae_vector*>(bndu.c_ptr()), const_cast<alglib_impl::minasastate*>(state.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* + NONLINEAR BOUND CONSTRAINED OPTIMIZATION USING + MODIFIED ACTIVE SET ALGORITHM + WILLIAM W. HAGER AND HONGCHAO ZHANG + +DESCRIPTION: +The subroutine minimizes function F(x) of N arguments with bound +constraints: BndL[i] <= x[i] <= BndU[i] + +This method is globally convergent as long as grad(f) is Lipschitz +continuous on a level set: L = { x : f(x)<=f(x0) }. + + +REQUIREMENTS: +Algorithm will request following information during its operation: +* function value F and its gradient G (simultaneously) at given point X + + +USAGE: +1. User initializes algorithm state with MinASACreate() call +2. User tunes solver parameters with MinASASetCond() MinASASetStpMax() and + other functions +3. User calls MinASAOptimize() function which takes algorithm state and + pointer (delegate, etc.) to callback function which calculates F/G. +4. User calls MinASAResults() to get solution +5. Optionally, user may call MinASARestartFrom() to solve another problem + with same N but another starting point and/or another function. + MinASARestartFrom() allows to reuse already initialized structure. + + +INPUT PARAMETERS: + N - problem dimension, N>0: + * if given, only leading N elements of X are used + * if not given, automatically determined from sizes of + X/BndL/BndU. + X - starting point, array[0..N-1]. + BndL - lower bounds, array[0..N-1]. + all elements MUST be specified, i.e. all variables are + bounded. However, if some (all) variables are unbounded, + you may specify very small number as bound: -1000, -1.0E6 + or -1.0E300, or something like that. + BndU - upper bounds, array[0..N-1]. + all elements MUST be specified, i.e. all variables are + bounded. However, if some (all) variables are unbounded, + you may specify very large number as bound: +1000, +1.0E6 + or +1.0E300, or something like that. + +OUTPUT PARAMETERS: + State - structure stores algorithm state + +NOTES: + +1. you may tune stopping conditions with MinASASetCond() function +2. if target function contains exp() or other fast growing functions, and + optimization algorithm makes too large steps which leads to overflow, + use MinASASetStpMax() function to bound algorithm's steps. +3. this function does NOT support infinite/NaN values in X, BndL, BndU. + + -- ALGLIB -- + Copyright 25.03.2010 by Bochkanov Sergey +*************************************************************************/ +void minasacreate(const real_1d_array &x, const real_1d_array &bndl, const real_1d_array &bndu, minasastate &state) +{ + alglib_impl::ae_state _alglib_env_state; + ae_int_t n; + if( (x.length()!=bndl.length()) || (x.length()!=bndu.length())) + throw ap_error("Error while calling 'minasacreate': looks like one of arguments has wrong size"); + n = x.length(); + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minasacreate(n, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(bndl.c_ptr()), const_cast<alglib_impl::ae_vector*>(bndu.c_ptr()), const_cast<alglib_impl::minasastate*>(state.c_ptr()), &_alglib_env_state); + + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This function sets stopping conditions for the ASA optimization algorithm. + +INPUT PARAMETERS: + State - structure which stores algorithm state + EpsG - >=0 + The subroutine finishes its work if the condition + ||G||<EpsG is satisfied, where ||.|| means Euclidian norm, + G - gradient. + EpsF - >=0 + The subroutine finishes its work if on k+1-th iteration + the condition |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1} + is satisfied. + EpsX - >=0 + The subroutine finishes its work if on k+1-th iteration + the condition |X(k+1)-X(k)| <= EpsX is fulfilled. + MaxIts - maximum number of iterations. If MaxIts=0, the number of + iterations is unlimited. + +Passing EpsG=0, EpsF=0, EpsX=0 and MaxIts=0 (simultaneously) will lead to +automatic stopping criterion selection (small EpsX). + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minasasetcond(const minasastate &state, const double epsg, const double epsf, const double epsx, const ae_int_t maxits) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minasasetcond(const_cast<alglib_impl::minasastate*>(state.c_ptr()), epsg, epsf, epsx, maxits, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This function turns on/off reporting. + +INPUT PARAMETERS: + State - structure which stores algorithm state + NeedXRep- whether iteration reports are needed or not + +If NeedXRep is True, algorithm will call rep() callback function if it is +provided to MinASAOptimize(). + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minasasetxrep(const minasastate &state, const bool needxrep) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minasasetxrep(const_cast<alglib_impl::minasastate*>(state.c_ptr()), needxrep, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This function sets optimization algorithm. + +INPUT PARAMETERS: + State - structure which stores algorithm stat + UAType - algorithm type: + * -1 automatic selection of the best algorithm + * 0 DY (Dai and Yuan) algorithm + * 1 Hybrid DY-HS algorithm + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minasasetalgorithm(const minasastate &state, const ae_int_t algotype) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minasasetalgorithm(const_cast<alglib_impl::minasastate*>(state.c_ptr()), algotype, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This function sets maximum step length + +INPUT PARAMETERS: + State - structure which stores algorithm state + StpMax - maximum step length, >=0. Set StpMax to 0.0, if you don't + want to limit step length (zero by default). + +Use this subroutine when you optimize target function which contains exp() +or other fast growing functions, and optimization algorithm makes too +large steps which leads to overflow. This function allows us to reject +steps that are too large (and therefore expose us to the possible +overflow) without actually calculating function value at the x+stp*d. + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minasasetstpmax(const minasastate &state, const double stpmax) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minasasetstpmax(const_cast<alglib_impl::minasastate*>(state.c_ptr()), stpmax, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This function provides reverse communication interface +Reverse communication interface is not documented or recommended to use. +See below for functions which provide better documented API +*************************************************************************/ +bool minasaiteration(const minasastate &state) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + ae_bool result = alglib_impl::minasaiteration(const_cast<alglib_impl::minasastate*>(state.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<bool*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + + +void minasaoptimize(minasastate &state, + void (*grad)(const real_1d_array &x, double &func, real_1d_array &grad, void *ptr), + void (*rep)(const real_1d_array &x, double func, void *ptr), + void *ptr) +{ + alglib_impl::ae_state _alglib_env_state; + if( grad==NULL ) + throw ap_error("ALGLIB: error in 'minasaoptimize()' (grad is NULL)"); + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + while( alglib_impl::minasaiteration(state.c_ptr(), &_alglib_env_state) ) + { + if( state.needfg ) + { + grad(state.x, state.f, state.g, ptr); + continue; + } + if( state.xupdated ) + { + if( rep!=NULL ) + rep(state.x, state.f, ptr); + continue; + } + throw ap_error("ALGLIB: error in 'minasaoptimize' (some derivatives were not provided?)"); + } + alglib_impl::ae_state_clear(&_alglib_env_state); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + + + +/************************************************************************* +ASA results + +INPUT PARAMETERS: + State - algorithm state + +OUTPUT PARAMETERS: + X - array[0..N-1], solution + Rep - optimization report: + * Rep.TerminationType completetion code: + * -2 rounding errors prevent further improvement. + X contains best point found. + * -1 incorrect parameters were specified + * 1 relative function improvement is no more than + EpsF. + * 2 relative step is no more than EpsX. + * 4 gradient norm is no more than EpsG + * 5 MaxIts steps was taken + * 7 stopping conditions are too stringent, + further improvement is impossible + * Rep.IterationsCount contains iterations count + * NFEV countains number of function calculations + * ActiveConstraints contains number of active constraints + + -- ALGLIB -- + Copyright 20.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minasaresults(const minasastate &state, real_1d_array &x, minasareport &rep) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minasaresults(const_cast<alglib_impl::minasastate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minasareport*>(rep.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +ASA results + +Buffered implementation of MinASAResults() which uses pre-allocated buffer +to store X[]. If buffer size is too small, it resizes buffer. It is +intended to be used in the inner cycles of performance critical algorithms +where array reallocation penalty is too large to be ignored. + + -- ALGLIB -- + Copyright 20.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minasaresultsbuf(const minasastate &state, real_1d_array &x, minasareport &rep) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minasaresultsbuf(const_cast<alglib_impl::minasastate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minasareport*>(rep.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This subroutine restarts CG algorithm from new point. All optimization +parameters are left unchanged. + +This function allows to solve multiple optimization problems (which +must have same number of dimensions) without object reallocation penalty. + +INPUT PARAMETERS: + State - structure previously allocated with MinCGCreate call. + X - new starting point. + BndL - new lower bounds + BndU - new upper bounds + + -- ALGLIB -- + Copyright 30.07.2010 by Bochkanov Sergey +*************************************************************************/ +void minasarestartfrom(const minasastate &state, const real_1d_array &x, const real_1d_array &bndl, const real_1d_array &bndu) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minasarestartfrom(const_cast<alglib_impl::minasastate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(bndl.c_ptr()), const_cast<alglib_impl::ae_vector*>(bndu.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This object stores state of the nonlinear CG optimizer. + +You should use ALGLIB functions to work with this object. +*************************************************************************/ +_mincgstate_owner::_mincgstate_owner() +{ + p_struct = (alglib_impl::mincgstate*)alglib_impl::ae_malloc(sizeof(alglib_impl::mincgstate), NULL); + if( p_struct==NULL ) + throw ap_error("ALGLIB: malloc error"); + if( !alglib_impl::_mincgstate_init(p_struct, NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); +} + +_mincgstate_owner::_mincgstate_owner(const _mincgstate_owner &rhs) +{ + p_struct = (alglib_impl::mincgstate*)alglib_impl::ae_malloc(sizeof(alglib_impl::mincgstate), NULL); + if( p_struct==NULL ) + throw ap_error("ALGLIB: malloc error"); + if( !alglib_impl::_mincgstate_init_copy(p_struct, const_cast<alglib_impl::mincgstate*>(rhs.p_struct), NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); +} + +_mincgstate_owner& _mincgstate_owner::operator=(const _mincgstate_owner &rhs) +{ + if( this==&rhs ) + return *this; + alglib_impl::_mincgstate_clear(p_struct); + if( !alglib_impl::_mincgstate_init_copy(p_struct, const_cast<alglib_impl::mincgstate*>(rhs.p_struct), NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); + return *this; +} + +_mincgstate_owner::~_mincgstate_owner() +{ + alglib_impl::_mincgstate_clear(p_struct); + ae_free(p_struct); +} + +alglib_impl::mincgstate* _mincgstate_owner::c_ptr() +{ + return p_struct; +} + +alglib_impl::mincgstate* _mincgstate_owner::c_ptr() const +{ + return const_cast<alglib_impl::mincgstate*>(p_struct); +} +mincgstate::mincgstate() : _mincgstate_owner() ,needfg(p_struct->needfg),xupdated(p_struct->xupdated),f(p_struct->f),g(&p_struct->g),x(&p_struct->x) +{ +} + +mincgstate::mincgstate(const mincgstate &rhs):_mincgstate_owner(rhs) ,needfg(p_struct->needfg),xupdated(p_struct->xupdated),f(p_struct->f),g(&p_struct->g),x(&p_struct->x) +{ +} + +mincgstate& mincgstate::operator=(const mincgstate &rhs) +{ + if( this==&rhs ) + return *this; + _mincgstate_owner::operator=(rhs); + return *this; +} + +mincgstate::~mincgstate() +{ +} + + +/************************************************************************* + +*************************************************************************/ +_mincgreport_owner::_mincgreport_owner() +{ + p_struct = (alglib_impl::mincgreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::mincgreport), NULL); + if( p_struct==NULL ) + throw ap_error("ALGLIB: malloc error"); + if( !alglib_impl::_mincgreport_init(p_struct, NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); +} + +_mincgreport_owner::_mincgreport_owner(const _mincgreport_owner &rhs) +{ + p_struct = (alglib_impl::mincgreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::mincgreport), NULL); + if( p_struct==NULL ) + throw ap_error("ALGLIB: malloc error"); + if( !alglib_impl::_mincgreport_init_copy(p_struct, const_cast<alglib_impl::mincgreport*>(rhs.p_struct), NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); +} + +_mincgreport_owner& _mincgreport_owner::operator=(const _mincgreport_owner &rhs) +{ + if( this==&rhs ) + return *this; + alglib_impl::_mincgreport_clear(p_struct); + if( !alglib_impl::_mincgreport_init_copy(p_struct, const_cast<alglib_impl::mincgreport*>(rhs.p_struct), NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); + return *this; +} + +_mincgreport_owner::~_mincgreport_owner() +{ + alglib_impl::_mincgreport_clear(p_struct); + ae_free(p_struct); +} + +alglib_impl::mincgreport* _mincgreport_owner::c_ptr() +{ + return p_struct; +} + +alglib_impl::mincgreport* _mincgreport_owner::c_ptr() const +{ + return const_cast<alglib_impl::mincgreport*>(p_struct); +} +mincgreport::mincgreport() : _mincgreport_owner() ,iterationscount(p_struct->iterationscount),nfev(p_struct->nfev),terminationtype(p_struct->terminationtype) +{ +} + +mincgreport::mincgreport(const mincgreport &rhs):_mincgreport_owner(rhs) ,iterationscount(p_struct->iterationscount),nfev(p_struct->nfev),terminationtype(p_struct->terminationtype) +{ +} + +mincgreport& mincgreport::operator=(const mincgreport &rhs) +{ + if( this==&rhs ) + return *this; + _mincgreport_owner::operator=(rhs); + return *this; +} + +mincgreport::~mincgreport() +{ +} + +/************************************************************************* + NONLINEAR CONJUGATE GRADIENT METHOD + +DESCRIPTION: +The subroutine minimizes function F(x) of N arguments by using one of the +nonlinear conjugate gradient methods. + +These CG methods are globally convergent (even on non-convex functions) as +long as grad(f) is Lipschitz continuous in a some neighborhood of the +L = { x : f(x)<=f(x0) }. + + +REQUIREMENTS: +Algorithm will request following information during its operation: +* function value F and its gradient G (simultaneously) at given point X + + +USAGE: +1. User initializes algorithm state with MinCGCreate() call +2. User tunes solver parameters with MinCGSetCond(), MinCGSetStpMax() and + other functions +3. User calls MinCGOptimize() function which takes algorithm state and + pointer (delegate, etc.) to callback function which calculates F/G. +4. User calls MinCGResults() to get solution +5. Optionally, user may call MinCGRestartFrom() to solve another problem + with same N but another starting point and/or another function. + MinCGRestartFrom() allows to reuse already initialized structure. + + +INPUT PARAMETERS: + N - problem dimension, N>0: + * if given, only leading N elements of X are used + * if not given, automatically determined from size of X + X - starting point, array[0..N-1]. + +OUTPUT PARAMETERS: + State - structure which stores algorithm state + + -- ALGLIB -- + Copyright 25.03.2010 by Bochkanov Sergey +*************************************************************************/ +void mincgcreate(const ae_int_t n, const real_1d_array &x, mincgstate &state) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::mincgcreate(n, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::mincgstate*>(state.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* + NONLINEAR CONJUGATE GRADIENT METHOD + +DESCRIPTION: +The subroutine minimizes function F(x) of N arguments by using one of the +nonlinear conjugate gradient methods. + +These CG methods are globally convergent (even on non-convex functions) as +long as grad(f) is Lipschitz continuous in a some neighborhood of the +L = { x : f(x)<=f(x0) }. + + +REQUIREMENTS: +Algorithm will request following information during its operation: +* function value F and its gradient G (simultaneously) at given point X + + +USAGE: +1. User initializes algorithm state with MinCGCreate() call +2. User tunes solver parameters with MinCGSetCond(), MinCGSetStpMax() and + other functions +3. User calls MinCGOptimize() function which takes algorithm state and + pointer (delegate, etc.) to callback function which calculates F/G. +4. User calls MinCGResults() to get solution +5. Optionally, user may call MinCGRestartFrom() to solve another problem + with same N but another starting point and/or another function. + MinCGRestartFrom() allows to reuse already initialized structure. + + +INPUT PARAMETERS: + N - problem dimension, N>0: + * if given, only leading N elements of X are used + * if not given, automatically determined from size of X + X - starting point, array[0..N-1]. + +OUTPUT PARAMETERS: + State - structure which stores algorithm state + + -- ALGLIB -- + Copyright 25.03.2010 by Bochkanov Sergey +*************************************************************************/ +void mincgcreate(const real_1d_array &x, mincgstate &state) +{ + alglib_impl::ae_state _alglib_env_state; + ae_int_t n; + + n = x.length(); + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::mincgcreate(n, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::mincgstate*>(state.c_ptr()), &_alglib_env_state); + + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This function sets stopping conditions for CG optimization algorithm. + +INPUT PARAMETERS: + State - structure which stores algorithm state + EpsG - >=0 + The subroutine finishes its work if the condition + ||G||<EpsG is satisfied, where ||.|| means Euclidian norm, + G - gradient. + EpsF - >=0 + The subroutine finishes its work if on k+1-th iteration + the condition |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1} + is satisfied. + EpsX - >=0 + The subroutine finishes its work if on k+1-th iteration + the condition |X(k+1)-X(k)| <= EpsX is fulfilled. + MaxIts - maximum number of iterations. If MaxIts=0, the number of + iterations is unlimited. + +Passing EpsG=0, EpsF=0, EpsX=0 and MaxIts=0 (simultaneously) will lead to +automatic stopping criterion selection (small EpsX). + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void mincgsetcond(const mincgstate &state, const double epsg, const double epsf, const double epsx, const ae_int_t maxits) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::mincgsetcond(const_cast<alglib_impl::mincgstate*>(state.c_ptr()), epsg, epsf, epsx, maxits, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This function turns on/off reporting. + +INPUT PARAMETERS: + State - structure which stores algorithm state + NeedXRep- whether iteration reports are needed or not + +If NeedXRep is True, algorithm will call rep() callback function if it is +provided to MinCGOptimize(). + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void mincgsetxrep(const mincgstate &state, const bool needxrep) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::mincgsetxrep(const_cast<alglib_impl::mincgstate*>(state.c_ptr()), needxrep, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This function sets CG algorithm. + +INPUT PARAMETERS: + State - structure which stores algorithm state + CGType - algorithm type: + * -1 automatic selection of the best algorithm + * 0 DY (Dai and Yuan) algorithm + * 1 Hybrid DY-HS algorithm + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void mincgsetcgtype(const mincgstate &state, const ae_int_t cgtype) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::mincgsetcgtype(const_cast<alglib_impl::mincgstate*>(state.c_ptr()), cgtype, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This function sets maximum step length + +INPUT PARAMETERS: + State - structure which stores algorithm state + StpMax - maximum step length, >=0. Set StpMax to 0.0, if you don't + want to limit step length. + +Use this subroutine when you optimize target function which contains exp() +or other fast growing functions, and optimization algorithm makes too +large steps which leads to overflow. This function allows us to reject +steps that are too large (and therefore expose us to the possible +overflow) without actually calculating function value at the x+stp*d. + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void mincgsetstpmax(const mincgstate &state, const double stpmax) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::mincgsetstpmax(const_cast<alglib_impl::mincgstate*>(state.c_ptr()), stpmax, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This function allows to suggest initial step length to the CG algorithm. + +Suggested step length is used as starting point for the line search. It +can be useful when you have badly scaled problem, i.e. when ||grad|| +(which is used as initial estimate for the first step) is many orders of +magnitude different from the desired step. + +Line search may fail on such problems without good estimate of initial +step length. Imagine, for example, problem with ||grad||=10^50 and desired +step equal to 0.1 Line search function will use 10^50 as initial step, +then it will decrease step length by 2 (up to 20 attempts) and will get +10^44, which is still too large. + +This function allows us to tell than line search should be started from +some moderate step length, like 1.0, so algorithm will be able to detect +desired step length in a several searches. + +This function influences only first iteration of algorithm. It should be +called between MinCGCreate/MinCGRestartFrom() call and MinCGOptimize call. + +INPUT PARAMETERS: + State - structure used to store algorithm state. + Stp - initial estimate of the step length. + Can be zero (no estimate). + + -- ALGLIB -- + Copyright 30.07.2010 by Bochkanov Sergey +*************************************************************************/ +void mincgsuggeststep(const mincgstate &state, const double stp) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::mincgsuggeststep(const_cast<alglib_impl::mincgstate*>(state.c_ptr()), stp, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This function provides reverse communication interface +Reverse communication interface is not documented or recommended to use. +See below for functions which provide better documented API +*************************************************************************/ +bool mincgiteration(const mincgstate &state) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + ae_bool result = alglib_impl::mincgiteration(const_cast<alglib_impl::mincgstate*>(state.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<bool*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + + +void mincgoptimize(mincgstate &state, + void (*grad)(const real_1d_array &x, double &func, real_1d_array &grad, void *ptr), + void (*rep)(const real_1d_array &x, double func, void *ptr), + void *ptr) +{ + alglib_impl::ae_state _alglib_env_state; + if( grad==NULL ) + throw ap_error("ALGLIB: error in 'mincgoptimize()' (grad is NULL)"); + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + while( alglib_impl::mincgiteration(state.c_ptr(), &_alglib_env_state) ) + { + if( state.needfg ) + { + grad(state.x, state.f, state.g, ptr); + continue; + } + if( state.xupdated ) + { + if( rep!=NULL ) + rep(state.x, state.f, ptr); + continue; + } + throw ap_error("ALGLIB: error in 'mincgoptimize' (some derivatives were not provided?)"); + } + alglib_impl::ae_state_clear(&_alglib_env_state); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + + + +/************************************************************************* +Conjugate gradient results + +INPUT PARAMETERS: + State - algorithm state + +OUTPUT PARAMETERS: + X - array[0..N-1], solution + Rep - optimization report: + * Rep.TerminationType completetion code: + * 1 relative function improvement is no more than + EpsF. + * 2 relative step is no more than EpsX. + * 4 gradient norm is no more than EpsG + * 5 MaxIts steps was taken + * 7 stopping conditions are too stringent, + further improvement is impossible, + we return best X found so far + * Rep.IterationsCount contains iterations count + * NFEV countains number of function calculations + + -- ALGLIB -- + Copyright 20.04.2009 by Bochkanov Sergey +*************************************************************************/ +void mincgresults(const mincgstate &state, real_1d_array &x, mincgreport &rep) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::mincgresults(const_cast<alglib_impl::mincgstate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::mincgreport*>(rep.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +Conjugate gradient results + +Buffered implementation of MinCGResults(), which uses pre-allocated buffer +to store X[]. If buffer size is too small, it resizes buffer. It is +intended to be used in the inner cycles of performance critical algorithms +where array reallocation penalty is too large to be ignored. + + -- ALGLIB -- + Copyright 20.04.2009 by Bochkanov Sergey +*************************************************************************/ +void mincgresultsbuf(const mincgstate &state, real_1d_array &x, mincgreport &rep) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::mincgresultsbuf(const_cast<alglib_impl::mincgstate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::mincgreport*>(rep.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This subroutine restarts CG algorithm from new point. All optimization +parameters are left unchanged. + +This function allows to solve multiple optimization problems (which +must have same number of dimensions) without object reallocation penalty. + +INPUT PARAMETERS: + State - structure used to store algorithm state. + X - new starting point. + + -- ALGLIB -- + Copyright 30.07.2010 by Bochkanov Sergey +*************************************************************************/ +void mincgrestartfrom(const mincgstate &state, const real_1d_array &x) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::mincgrestartfrom(const_cast<alglib_impl::mincgstate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This object stores nonlinear optimizer state. +You should use functions provided by MinBLEIC subpackage to work with this +object +*************************************************************************/ +_minbleicstate_owner::_minbleicstate_owner() +{ + p_struct = (alglib_impl::minbleicstate*)alglib_impl::ae_malloc(sizeof(alglib_impl::minbleicstate), NULL); + if( p_struct==NULL ) + throw ap_error("ALGLIB: malloc error"); + if( !alglib_impl::_minbleicstate_init(p_struct, NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); +} + +_minbleicstate_owner::_minbleicstate_owner(const _minbleicstate_owner &rhs) +{ + p_struct = (alglib_impl::minbleicstate*)alglib_impl::ae_malloc(sizeof(alglib_impl::minbleicstate), NULL); + if( p_struct==NULL ) + throw ap_error("ALGLIB: malloc error"); + if( !alglib_impl::_minbleicstate_init_copy(p_struct, const_cast<alglib_impl::minbleicstate*>(rhs.p_struct), NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); +} + +_minbleicstate_owner& _minbleicstate_owner::operator=(const _minbleicstate_owner &rhs) +{ + if( this==&rhs ) + return *this; + alglib_impl::_minbleicstate_clear(p_struct); + if( !alglib_impl::_minbleicstate_init_copy(p_struct, const_cast<alglib_impl::minbleicstate*>(rhs.p_struct), NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); + return *this; +} + +_minbleicstate_owner::~_minbleicstate_owner() +{ + alglib_impl::_minbleicstate_clear(p_struct); + ae_free(p_struct); +} + +alglib_impl::minbleicstate* _minbleicstate_owner::c_ptr() +{ + return p_struct; +} + +alglib_impl::minbleicstate* _minbleicstate_owner::c_ptr() const +{ + return const_cast<alglib_impl::minbleicstate*>(p_struct); +} +minbleicstate::minbleicstate() : _minbleicstate_owner() ,needfg(p_struct->needfg),xupdated(p_struct->xupdated),f(p_struct->f),g(&p_struct->g),x(&p_struct->x) +{ +} + +minbleicstate::minbleicstate(const minbleicstate &rhs):_minbleicstate_owner(rhs) ,needfg(p_struct->needfg),xupdated(p_struct->xupdated),f(p_struct->f),g(&p_struct->g),x(&p_struct->x) +{ +} + +minbleicstate& minbleicstate::operator=(const minbleicstate &rhs) +{ + if( this==&rhs ) + return *this; + _minbleicstate_owner::operator=(rhs); + return *this; +} + +minbleicstate::~minbleicstate() +{ +} + + +/************************************************************************* +This structure stores optimization report: +* InnerIterationsCount number of inner iterations +* OuterIterationsCount number of outer iterations +* NFEV number of gradient evaluations + +There are additional fields which can be used for debugging: +* DebugEqErr error in the equality constraints (2-norm) +* DebugFS f, calculated at projection of initial point + to the feasible set +* DebugFF f, calculated at the final point +* DebugDX |X_start-X_final| +*************************************************************************/ +_minbleicreport_owner::_minbleicreport_owner() +{ + p_struct = (alglib_impl::minbleicreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::minbleicreport), NULL); + if( p_struct==NULL ) + throw ap_error("ALGLIB: malloc error"); + if( !alglib_impl::_minbleicreport_init(p_struct, NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); +} + +_minbleicreport_owner::_minbleicreport_owner(const _minbleicreport_owner &rhs) +{ + p_struct = (alglib_impl::minbleicreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::minbleicreport), NULL); + if( p_struct==NULL ) + throw ap_error("ALGLIB: malloc error"); + if( !alglib_impl::_minbleicreport_init_copy(p_struct, const_cast<alglib_impl::minbleicreport*>(rhs.p_struct), NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); +} + +_minbleicreport_owner& _minbleicreport_owner::operator=(const _minbleicreport_owner &rhs) +{ + if( this==&rhs ) + return *this; + alglib_impl::_minbleicreport_clear(p_struct); + if( !alglib_impl::_minbleicreport_init_copy(p_struct, const_cast<alglib_impl::minbleicreport*>(rhs.p_struct), NULL, ae_false) ) + throw ap_error("ALGLIB: malloc error"); + return *this; +} + +_minbleicreport_owner::~_minbleicreport_owner() +{ + alglib_impl::_minbleicreport_clear(p_struct); + ae_free(p_struct); +} + +alglib_impl::minbleicreport* _minbleicreport_owner::c_ptr() +{ + return p_struct; +} + +alglib_impl::minbleicreport* _minbleicreport_owner::c_ptr() const +{ + return const_cast<alglib_impl::minbleicreport*>(p_struct); +} +minbleicreport::minbleicreport() : _minbleicreport_owner() ,inneriterationscount(p_struct->inneriterationscount),outeriterationscount(p_struct->outeriterationscount),nfev(p_struct->nfev),terminationtype(p_struct->terminationtype),debugeqerr(p_struct->debugeqerr),debugfs(p_struct->debugfs),debugff(p_struct->debugff),debugdx(p_struct->debugdx) +{ +} + +minbleicreport::minbleicreport(const minbleicreport &rhs):_minbleicreport_owner(rhs) ,inneriterationscount(p_struct->inneriterationscount),outeriterationscount(p_struct->outeriterationscount),nfev(p_struct->nfev),terminationtype(p_struct->terminationtype),debugeqerr(p_struct->debugeqerr),debugfs(p_struct->debugfs),debugff(p_struct->debugff),debugdx(p_struct->debugdx) +{ +} + +minbleicreport& minbleicreport::operator=(const minbleicreport &rhs) +{ + if( this==&rhs ) + return *this; + _minbleicreport_owner::operator=(rhs); + return *this; +} + +minbleicreport::~minbleicreport() +{ +} + +/************************************************************************* + BOUND CONSTRAINED OPTIMIZATION + WITH ADDITIONAL LINEAR EQUALITY AND INEQUALITY CONSTRAINTS + +DESCRIPTION: +The subroutine minimizes function F(x) of N arguments subject to any +combination of: +* bound constraints +* linear inequality constraints +* linear equality constraints + +REQUIREMENTS: +* function value and gradient +* grad(f) must be Lipschitz continuous on a level set: L = { x : f(x)<=f(x0) } +* function must be defined even in the infeasible points (algorithm make take + steps in the infeasible area before converging to the feasible point) +* starting point X0 must be feasible or not too far away from the feasible set +* problem must satisfy strict complementary conditions + +USAGE: + +Constrained optimization if far more complex than the unconstrained one. +Here we give very brief outline of the BLEIC optimizer. We strongly recommend +you to read examples in the ALGLIB Reference Manual and to read ALGLIB User Guide +on optimization, which is available at http://www.alglib.net/optimization/ + +1. User initializes algorithm state with MinBLEICCreate() call + +2. USer adds boundary and/or linear constraints by calling + MinBLEICSetBC() and MinBLEICSetLC() functions. + +3. User sets stopping conditions for underlying unconstrained solver + with MinBLEICSetInnerCond() call. + This function controls accuracy of underlying optimization algorithm. + +4. User sets stopping conditions for outer iteration by calling + MinBLEICSetOuterCond() function. + This function controls handling of boundary and inequality constraints. + +5. User tunes barrier parameters: + * barrier width with MinBLEICSetBarrierWidth() call + * (optionally) dynamics of the barrier width with MinBLEICSetBarrierDecay() call + These functions control handling of boundary and inequality constraints. + +6. Additionally, user may set limit on number of internal iterations + by MinBLEICSetMaxIts() call. + This function allows to prevent algorithm from looping forever. + +7. User calls MinBLEICOptimize() function which takes algorithm state and + pointer (delegate, etc.) to callback function which calculates F/G. + +8. User calls MinBLEICResults() to get solution + +9. Optionally user may call MinBLEICRestartFrom() to solve another problem + with same N but another starting point. + MinBLEICRestartFrom() allows to reuse already initialized structure. + + +INPUT PARAMETERS: + N - problem dimension, N>0: + * if given, only leading N elements of X are used + * if not given, automatically determined from size ofX + X - starting point, array[N]: + * it is better to set X to a feasible point + * but X can be infeasible, in which case algorithm will try + to find feasible point first, using X as initial + approximation. + +OUTPUT PARAMETERS: + State - structure stores algorithm state + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleiccreate(const ae_int_t n, const real_1d_array &x, minbleicstate &state) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minbleiccreate(n, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minbleicstate*>(state.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* + BOUND CONSTRAINED OPTIMIZATION + WITH ADDITIONAL LINEAR EQUALITY AND INEQUALITY CONSTRAINTS + +DESCRIPTION: +The subroutine minimizes function F(x) of N arguments subject to any +combination of: +* bound constraints +* linear inequality constraints +* linear equality constraints + +REQUIREMENTS: +* function value and gradient +* grad(f) must be Lipschitz continuous on a level set: L = { x : f(x)<=f(x0) } +* function must be defined even in the infeasible points (algorithm make take + steps in the infeasible area before converging to the feasible point) +* starting point X0 must be feasible or not too far away from the feasible set +* problem must satisfy strict complementary conditions + +USAGE: + +Constrained optimization if far more complex than the unconstrained one. +Here we give very brief outline of the BLEIC optimizer. We strongly recommend +you to read examples in the ALGLIB Reference Manual and to read ALGLIB User Guide +on optimization, which is available at http://www.alglib.net/optimization/ + +1. User initializes algorithm state with MinBLEICCreate() call + +2. USer adds boundary and/or linear constraints by calling + MinBLEICSetBC() and MinBLEICSetLC() functions. + +3. User sets stopping conditions for underlying unconstrained solver + with MinBLEICSetInnerCond() call. + This function controls accuracy of underlying optimization algorithm. + +4. User sets stopping conditions for outer iteration by calling + MinBLEICSetOuterCond() function. + This function controls handling of boundary and inequality constraints. + +5. User tunes barrier parameters: + * barrier width with MinBLEICSetBarrierWidth() call + * (optionally) dynamics of the barrier width with MinBLEICSetBarrierDecay() call + These functions control handling of boundary and inequality constraints. + +6. Additionally, user may set limit on number of internal iterations + by MinBLEICSetMaxIts() call. + This function allows to prevent algorithm from looping forever. + +7. User calls MinBLEICOptimize() function which takes algorithm state and + pointer (delegate, etc.) to callback function which calculates F/G. + +8. User calls MinBLEICResults() to get solution + +9. Optionally user may call MinBLEICRestartFrom() to solve another problem + with same N but another starting point. + MinBLEICRestartFrom() allows to reuse already initialized structure. + + +INPUT PARAMETERS: + N - problem dimension, N>0: + * if given, only leading N elements of X are used + * if not given, automatically determined from size ofX + X - starting point, array[N]: + * it is better to set X to a feasible point + * but X can be infeasible, in which case algorithm will try + to find feasible point first, using X as initial + approximation. + +OUTPUT PARAMETERS: + State - structure stores algorithm state + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleiccreate(const real_1d_array &x, minbleicstate &state) +{ + alglib_impl::ae_state _alglib_env_state; + ae_int_t n; + + n = x.length(); + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minbleiccreate(n, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minbleicstate*>(state.c_ptr()), &_alglib_env_state); + + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This function sets boundary constraints for BLEIC optimizer. + +Boundary constraints are inactive by default (after initial creation). +They are preserved after algorithm restart with MinBLEICRestartFrom(). + +INPUT PARAMETERS: + State - structure stores algorithm state + BndL - lower bounds, array[N]. + If some (all) variables are unbounded, you may specify + very small number or -INF. + BndU - upper bounds, array[N]. + If some (all) variables are unbounded, you may specify + very large number or +INF. + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicsetbc(const minbleicstate &state, const real_1d_array &bndl, const real_1d_array &bndu) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minbleicsetbc(const_cast<alglib_impl::minbleicstate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(bndl.c_ptr()), const_cast<alglib_impl::ae_vector*>(bndu.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This function sets linear constraints for BLEIC optimizer. + +Linear constraints are inactive by default (after initial creation). +They are preserved after algorithm restart with MinBLEICRestartFrom(). + +INPUT PARAMETERS: + State - structure previously allocated with MinBLEICCreate call. + C - linear constraints, array[K,N+1]. + Each row of C represents one constraint, either equality + or inequality (see below): + * first N elements correspond to coefficients, + * last element corresponds to the right part. + All elements of C (including right part) must be finite. + CT - type of constraints, array[K]: + * if CT[i]>0, then I-th constraint is C[i,*]*x >= C[i,n+1] + * if CT[i]=0, then I-th constraint is C[i,*]*x = C[i,n+1] + * if CT[i]<0, then I-th constraint is C[i,*]*x <= C[i,n+1] + K - number of equality/inequality constraints, K>=0: + * if given, only leading K elements of C/CT are used + * if not given, automatically determined from sizes of C/CT + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicsetlc(const minbleicstate &state, const real_2d_array &c, const integer_1d_array &ct, const ae_int_t k) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minbleicsetlc(const_cast<alglib_impl::minbleicstate*>(state.c_ptr()), const_cast<alglib_impl::ae_matrix*>(c.c_ptr()), const_cast<alglib_impl::ae_vector*>(ct.c_ptr()), k, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This function sets linear constraints for BLEIC optimizer. + +Linear constraints are inactive by default (after initial creation). +They are preserved after algorithm restart with MinBLEICRestartFrom(). + +INPUT PARAMETERS: + State - structure previously allocated with MinBLEICCreate call. + C - linear constraints, array[K,N+1]. + Each row of C represents one constraint, either equality + or inequality (see below): + * first N elements correspond to coefficients, + * last element corresponds to the right part. + All elements of C (including right part) must be finite. + CT - type of constraints, array[K]: + * if CT[i]>0, then I-th constraint is C[i,*]*x >= C[i,n+1] + * if CT[i]=0, then I-th constraint is C[i,*]*x = C[i,n+1] + * if CT[i]<0, then I-th constraint is C[i,*]*x <= C[i,n+1] + K - number of equality/inequality constraints, K>=0: + * if given, only leading K elements of C/CT are used + * if not given, automatically determined from sizes of C/CT + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicsetlc(const minbleicstate &state, const real_2d_array &c, const integer_1d_array &ct) +{ + alglib_impl::ae_state _alglib_env_state; + ae_int_t k; + if( (c.rows()!=ct.length())) + throw ap_error("Error while calling 'minbleicsetlc': looks like one of arguments has wrong size"); + k = c.rows(); + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minbleicsetlc(const_cast<alglib_impl::minbleicstate*>(state.c_ptr()), const_cast<alglib_impl::ae_matrix*>(c.c_ptr()), const_cast<alglib_impl::ae_vector*>(ct.c_ptr()), k, &_alglib_env_state); + + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This function sets stopping conditions for the underlying nonlinear CG +optimizer. It controls overall accuracy of solution. These conditions +should be strict enough in order for algorithm to converge. + +INPUT PARAMETERS: + State - structure which stores algorithm state + EpsG - >=0 + Algorithm finishes its work if 2-norm of the Lagrangian + gradient is less than or equal to EpsG. + EpsF - >=0 + The subroutine finishes its work if on k+1-th iteration + the condition |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1} + is satisfied. + EpsX - >=0 + The subroutine finishes its work if on k+1-th iteration + the condition |X(k+1)-X(k)| <= EpsX is fulfilled. + +Passing EpsG=0, EpsF=0 and EpsX=0 (simultaneously) will lead to +automatic stopping criterion selection. + +These conditions are used to terminate inner iterations. However, you +need to tune termination conditions for outer iterations too. + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicsetinnercond(const minbleicstate &state, const double epsg, const double epsf, const double epsx) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minbleicsetinnercond(const_cast<alglib_impl::minbleicstate*>(state.c_ptr()), epsg, epsf, epsx, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This function sets stopping conditions for outer iteration of BLEIC algo. + +These conditions control accuracy of constraint handling and amount of +infeasibility allowed in the solution. + +INPUT PARAMETERS: + State - structure which stores algorithm state + EpsX - >0, stopping condition on outer iteration step length + EpsI - >0, stopping condition on infeasibility + +Both EpsX and EpsI must be non-zero. + +MEANING OF EpsX + +EpsX is a stopping condition for outer iterations. Algorithm will stop +when solution of the current modified subproblem will be within EpsX +(using 2-norm) of the previous solution. + +MEANING OF EpsI + +EpsI controls feasibility properties - algorithm won't stop until all +inequality constraints will be satisfied with error (distance from current +point to the feasible area) at most EpsI. + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicsetoutercond(const minbleicstate &state, const double epsx, const double epsi) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minbleicsetoutercond(const_cast<alglib_impl::minbleicstate*>(state.c_ptr()), epsx, epsi, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This function sets initial barrier width. + +BLEIC optimizer uses modified barrier functions to handle inequality +constraints. These functions are almost constant in the inner parts of the +feasible area, but grow rapidly to the infinity OUTSIDE of the feasible +area. Barrier width is a distance from feasible area to the point where +modified barrier function becomes infinite. + +Barrier width must be: +* small enough (below some problem-dependent value) in order for algorithm + to converge. Necessary condition is that the target function must be + well described by linear model in the areas as small as barrier width. +* not VERY small (in order to avoid difficulties associated with rapid + changes in the modified function, ill-conditioning, round-off issues). + +Choosing appropriate barrier width is very important for efficient +optimization, and it often requires error and trial. You can use two +strategies when choosing barrier width: +* set barrier width with MinBLEICSetBarrierWidth() call. In this case you + should try different barrier widths and examine results. +* set decreasing barrier width by combining MinBLEICSetBarrierWidth() and + MinBLEICSetBarrierDecay() calls. In this case algorithm will decrease + barrier width after each outer iteration until it encounters optimal + barrier width. + +INPUT PARAMETERS: + State - structure which stores algorithm state + Mu - >0, initial barrier width + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicsetbarrierwidth(const minbleicstate &state, const double mu) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minbleicsetbarrierwidth(const_cast<alglib_impl::minbleicstate*>(state.c_ptr()), mu, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This function sets decay coefficient for barrier width. + +By default, no barrier decay is used (Decay=1.0). + +BLEIC optimizer uses modified barrier functions to handle inequality +constraints. These functions are almost constant in the inner parts of the +feasible area, but grow rapidly to the infinity OUTSIDE of the feasible +area. Barrier width is a distance from feasible area to the point where +modified barrier function becomes infinite. Decay coefficient allows us to +decrease barrier width from the initial (suboptimial) value until +optimal value will be met. + +We recommend you either to set MuDecay=1.0 (no decay) or use some moderate +value like 0.5-0.7 + +INPUT PARAMETERS: + State - structure which stores algorithm state + MuDecay - 0<MuDecay<=1, decay coefficient + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicsetbarrierdecay(const minbleicstate &state, const double mudecay) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minbleicsetbarrierdecay(const_cast<alglib_impl::minbleicstate*>(state.c_ptr()), mudecay, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This function allows to stop algorithm after specified number of inner +iterations. + +INPUT PARAMETERS: + State - structure which stores algorithm state + MaxIts - maximum number of inner iterations. + If MaxIts=0, the number of iterations is unlimited. + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicsetmaxits(const minbleicstate &state, const ae_int_t maxits) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minbleicsetmaxits(const_cast<alglib_impl::minbleicstate*>(state.c_ptr()), maxits, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This function turns on/off reporting. + +INPUT PARAMETERS: + State - structure which stores algorithm state + NeedXRep- whether iteration reports are needed or not + +If NeedXRep is True, algorithm will call rep() callback function if it is +provided to MinBLEICOptimize(). + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicsetxrep(const minbleicstate &state, const bool needxrep) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minbleicsetxrep(const_cast<alglib_impl::minbleicstate*>(state.c_ptr()), needxrep, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This function sets maximum step length + +INPUT PARAMETERS: + State - structure which stores algorithm state + StpMax - maximum step length, >=0. Set StpMax to 0.0, if you don't + want to limit step length. + +Use this subroutine when you optimize target function which contains exp() +or other fast growing functions, and optimization algorithm makes too +large steps which lead to overflow. This function allows us to reject +steps that are too large (and therefore expose us to the possible +overflow) without actually calculating function value at the x+stp*d. + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicsetstpmax(const minbleicstate &state, const double stpmax) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minbleicsetstpmax(const_cast<alglib_impl::minbleicstate*>(state.c_ptr()), stpmax, &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This function provides reverse communication interface +Reverse communication interface is not documented or recommended to use. +See below for functions which provide better documented API +*************************************************************************/ +bool minbleiciteration(const minbleicstate &state) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + ae_bool result = alglib_impl::minbleiciteration(const_cast<alglib_impl::minbleicstate*>(state.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return *(reinterpret_cast<bool*>(&result)); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + + +void minbleicoptimize(minbleicstate &state, + void (*grad)(const real_1d_array &x, double &func, real_1d_array &grad, void *ptr), + void (*rep)(const real_1d_array &x, double func, void *ptr), + void *ptr) +{ + alglib_impl::ae_state _alglib_env_state; + if( grad==NULL ) + throw ap_error("ALGLIB: error in 'minbleicoptimize()' (grad is NULL)"); + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + while( alglib_impl::minbleiciteration(state.c_ptr(), &_alglib_env_state) ) + { + if( state.needfg ) + { + grad(state.x, state.f, state.g, ptr); + continue; + } + if( state.xupdated ) + { + if( rep!=NULL ) + rep(state.x, state.f, ptr); + continue; + } + throw ap_error("ALGLIB: error in 'minbleicoptimize' (some derivatives were not provided?)"); + } + alglib_impl::ae_state_clear(&_alglib_env_state); + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + + + +/************************************************************************* +BLEIC results + +INPUT PARAMETERS: + State - algorithm state + +OUTPUT PARAMETERS: + X - array[0..N-1], solution + Rep - optimization report: + * Rep.TerminationType completetion code: + * -3 inconsistent constraints. Feasible point is + either nonexistent or too hard to find. Try to + restart optimizer with better initial + approximation + * -2 rounding errors prevent further improvement. + X contains best point found. + * 4 conditions on constraints are fulfilled + with error less than or equal to EpsC + * 5 MaxIts steps was taken + * 7 stopping conditions are too stringent, + further improvement is impossible, + X contains best point found so far. + * Rep.IterationsCount contains iterations count + * NFEV countains number of function calculations + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicresults(const minbleicstate &state, real_1d_array &x, minbleicreport &rep) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minbleicresults(const_cast<alglib_impl::minbleicstate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minbleicreport*>(rep.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +BLEIC results + +Buffered implementation of MinBLEICResults() which uses pre-allocated buffer +to store X[]. If buffer size is too small, it resizes buffer. It is +intended to be used in the inner cycles of performance critical algorithms +where array reallocation penalty is too large to be ignored. + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicresultsbuf(const minbleicstate &state, real_1d_array &x, minbleicreport &rep) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minbleicresultsbuf(const_cast<alglib_impl::minbleicstate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minbleicreport*>(rep.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} + +/************************************************************************* +This subroutine restarts algorithm from new point. +All optimization parameters (including constraints) are left unchanged. + +This function allows to solve multiple optimization problems (which +must have same number of dimensions) without object reallocation penalty. + +INPUT PARAMETERS: + State - structure previously allocated with MinBLEICCreate call. + X - new starting point. + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicrestartfrom(const minbleicstate &state, const real_1d_array &x) +{ + alglib_impl::ae_state _alglib_env_state; + alglib_impl::ae_state_init(&_alglib_env_state); + try + { + alglib_impl::minbleicrestartfrom(const_cast<alglib_impl::minbleicstate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), &_alglib_env_state); + alglib_impl::ae_state_clear(&_alglib_env_state); + return; + } + catch(alglib_impl::ae_error_type) + { + throw ap_error(_alglib_env_state.error_msg); + } + catch(...) + { + throw; + } +} +} + +///////////////////////////////////////////////////////////////////////// +// +// THIS SECTION CONTAINS IMPLEMENTATION OF COMPUTATIONAL CORE +// +///////////////////////////////////////////////////////////////////////// +namespace alglib_impl +{ +static void minlbfgs_clearrequestfields(minlbfgsstate* state, + ae_state *_state); + + +static ae_int_t minlm_lmmodefj = 0; +static ae_int_t minlm_lmmodefgj = 1; +static ae_int_t minlm_lmmodefgh = 2; +static ae_int_t minlm_lmflagnoprelbfgs = 1; +static ae_int_t minlm_lmflagnointlbfgs = 2; +static ae_int_t minlm_lmprelbfgsm = 5; +static ae_int_t minlm_lmintlbfgsits = 5; +static ae_int_t minlm_lbfgsnorealloc = 1; +static double minlm_lambdaup = 2.0; +static double minlm_lambdadown = 0.33; +static double minlm_suspiciousnu = 16; +static ae_int_t minlm_smallmodelage = 3; +static ae_int_t minlm_additers = 5; +static void minlm_lmprepare(ae_int_t n, + ae_int_t m, + ae_bool havegrad, + minlmstate* state, + ae_state *_state); +static void minlm_clearrequestfields(minlmstate* state, ae_state *_state); +static ae_bool minlm_increaselambda(double* lambdav, + double* nu, + ae_state *_state); +static void minlm_decreaselambda(double* lambdav, + double* nu, + ae_state *_state); + + +static ae_int_t minasa_n1 = 2; +static ae_int_t minasa_n2 = 2; +static double minasa_stpmin = 1.0E-300; +static double minasa_gpaftol = 0.0001; +static double minasa_gpadecay = 0.5; +static double minasa_asarho = 0.5; +static double minasa_asaboundedantigradnorm(minasastate* state, + ae_state *_state); +static double minasa_asaginorm(minasastate* state, ae_state *_state); +static double minasa_asad1norm(minasastate* state, ae_state *_state); +static ae_bool minasa_asauisempty(minasastate* state, ae_state *_state); +static void minasa_clearrequestfields(minasastate* state, + ae_state *_state); + + +static ae_int_t mincg_rscountdownlen = 10; +static void mincg_clearrequestfields(mincgstate* state, ae_state *_state); + + +static double minbleic_svdtol = 100; +static double minbleic_lmtol = 100; +static double minbleic_maxlmgrowth = 1000; +static double minbleic_minlagrangemul = 1.0E-50; +static double minbleic_maxlagrangemul = 1.0E+50; +static double minbleic_maxouterits = 20; +static ae_int_t minbleic_mucountdownlen = 15; +static ae_int_t minbleic_cscountdownlen = 5; +static void minbleic_clearrequestfields(minbleicstate* state, + ae_state *_state); +static void minbleic_makeprojection(minbleicstate* state, + /* Real */ ae_vector* x, + /* Real */ ae_vector* r, + double* rnorm2, + ae_state *_state); +static void minbleic_modifytargetfunction(minbleicstate* state, + /* Real */ ae_vector* x, + /* Real */ ae_vector* r, + double rnorm2, + double* f, + /* Real */ ae_vector* g, + double* gnorm, + double* mpgnorm, + double* mba, + double* fierr, + double* cserr, + ae_state *_state); +static void minbleic_penaltyfunction(minbleicstate* state, + /* Real */ ae_vector* x, + double* f, + /* Real */ ae_vector* g, + /* Real */ ae_vector* r, + double* mba, + double* fierr, + ae_state *_state); + + + + + +/************************************************************************* + LIMITED MEMORY BFGS METHOD FOR LARGE SCALE OPTIMIZATION + +DESCRIPTION: +The subroutine minimizes function F(x) of N arguments by using a quasi- +Newton method (LBFGS scheme) which is optimized to use a minimum amount +of memory. +The subroutine generates the approximation of an inverse Hessian matrix by +using information about the last M steps of the algorithm (instead of N). +It lessens a required amount of memory from a value of order N^2 to a +value of order 2*N*M. + + +REQUIREMENTS: +Algorithm will request following information during its operation: +* function value F and its gradient G (simultaneously) at given point X + + +USAGE: +1. User initializes algorithm state with MinLBFGSCreate() call +2. User tunes solver parameters with MinLBFGSSetCond() MinLBFGSSetStpMax() + and other functions +3. User calls MinLBFGSOptimize() function which takes algorithm state and + pointer (delegate, etc.) to callback function which calculates F/G. +4. User calls MinLBFGSResults() to get solution +5. Optionally user may call MinLBFGSRestartFrom() to solve another problem + with same N/M but another starting point and/or another function. + MinLBFGSRestartFrom() allows to reuse already initialized structure. + + +INPUT PARAMETERS: + N - problem dimension. N>0 + M - number of corrections in the BFGS scheme of Hessian + approximation update. Recommended value: 3<=M<=7. The smaller + value causes worse convergence, the bigger will not cause a + considerably better convergence, but will cause a fall in the + performance. M<=N. + X - initial solution approximation, array[0..N-1]. + + +OUTPUT PARAMETERS: + State - structure which stores algorithm state + + +NOTES: +1. you may tune stopping conditions with MinLBFGSSetCond() function +2. if target function contains exp() or other fast growing functions, and + optimization algorithm makes too large steps which leads to overflow, + use MinLBFGSSetStpMax() function to bound algorithm's steps. However, + L-BFGS rarely needs such a tuning. + + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minlbfgscreate(ae_int_t n, + ae_int_t m, + /* Real */ ae_vector* x, + minlbfgsstate* state, + ae_state *_state) +{ + + _minlbfgsstate_clear(state); + + ae_assert(n>=1, "MinLBFGSCreate: N<1!", _state); + ae_assert(m>=1, "MinLBFGSCreate: M<1", _state); + ae_assert(m<=n, "MinLBFGSCreate: M>N", _state); + ae_assert(x->cnt>=n, "MinLBFGSCreate: Length(X)<N!", _state); + ae_assert(isfinitevector(x, n, _state), "MinLBFGSCreate: X contains infinite or NaN values!", _state); + minlbfgscreatex(n, m, x, 0, state, _state); +} + + +/************************************************************************* +This function sets stopping conditions for L-BFGS optimization algorithm. + +INPUT PARAMETERS: + State - structure which stores algorithm state + EpsG - >=0 + The subroutine finishes its work if the condition + ||G||<EpsG is satisfied, where ||.|| means Euclidian norm, + G - gradient. + EpsF - >=0 + The subroutine finishes its work if on k+1-th iteration + the condition |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1} + is satisfied. + EpsX - >=0 + The subroutine finishes its work if on k+1-th iteration + the condition |X(k+1)-X(k)| <= EpsX is fulfilled. + MaxIts - maximum number of iterations. If MaxIts=0, the number of + iterations is unlimited. + +Passing EpsG=0, EpsF=0, EpsX=0 and MaxIts=0 (simultaneously) will lead to +automatic stopping criterion selection (small EpsX). + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minlbfgssetcond(minlbfgsstate* state, + double epsg, + double epsf, + double epsx, + ae_int_t maxits, + ae_state *_state) +{ + + + ae_assert(ae_isfinite(epsg, _state), "MinLBFGSSetCond: EpsG is not finite number!", _state); + ae_assert(ae_fp_greater_eq(epsg,0), "MinLBFGSSetCond: negative EpsG!", _state); + ae_assert(ae_isfinite(epsf, _state), "MinLBFGSSetCond: EpsF is not finite number!", _state); + ae_assert(ae_fp_greater_eq(epsf,0), "MinLBFGSSetCond: negative EpsF!", _state); + ae_assert(ae_isfinite(epsx, _state), "MinLBFGSSetCond: EpsX is not finite number!", _state); + ae_assert(ae_fp_greater_eq(epsx,0), "MinLBFGSSetCond: negative EpsX!", _state); + ae_assert(maxits>=0, "MinLBFGSSetCond: negative MaxIts!", _state); + if( ((ae_fp_eq(epsg,0)&&ae_fp_eq(epsf,0))&&ae_fp_eq(epsx,0))&&maxits==0 ) + { + epsx = 1.0E-6; + } + state->epsg = epsg; + state->epsf = epsf; + state->epsx = epsx; + state->maxits = maxits; +} + + +/************************************************************************* +This function turns on/off reporting. + +INPUT PARAMETERS: + State - structure which stores algorithm state + NeedXRep- whether iteration reports are needed or not + +If NeedXRep is True, algorithm will call rep() callback function if it is +provided to MinLBFGSOptimize(). + + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minlbfgssetxrep(minlbfgsstate* state, + ae_bool needxrep, + ae_state *_state) +{ + + + state->xrep = needxrep; +} + + +/************************************************************************* +This function sets maximum step length + +INPUT PARAMETERS: + State - structure which stores algorithm state + StpMax - maximum step length, >=0. Set StpMax to 0.0 (default), if + you don't want to limit step length. + +Use this subroutine when you optimize target function which contains exp() +or other fast growing functions, and optimization algorithm makes too +large steps which leads to overflow. This function allows us to reject +steps that are too large (and therefore expose us to the possible +overflow) without actually calculating function value at the x+stp*d. + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minlbfgssetstpmax(minlbfgsstate* state, + double stpmax, + ae_state *_state) +{ + + + ae_assert(ae_isfinite(stpmax, _state), "MinLBFGSSetStpMax: StpMax is not finite!", _state); + ae_assert(ae_fp_greater_eq(stpmax,0), "MinLBFGSSetStpMax: StpMax<0!", _state); + state->stpmax = stpmax; +} + + +/************************************************************************* +Extended subroutine for internal use only. + +Accepts additional parameters: + + Flags - additional settings: + * Flags = 0 means no additional settings + * Flags = 1 "do not allocate memory". used when solving + a many subsequent tasks with same N/M values. + First call MUST be without this flag bit set, + subsequent calls of MinLBFGS with same + MinLBFGSState structure can set Flags to 1. + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minlbfgscreatex(ae_int_t n, + ae_int_t m, + /* Real */ ae_vector* x, + ae_int_t flags, + minlbfgsstate* state, + ae_state *_state) +{ + ae_bool allocatemem; + + + ae_assert(n>=1, "MinLBFGS: N too small!", _state); + ae_assert(m>=1, "MinLBFGS: M too small!", _state); + ae_assert(m<=n, "MinLBFGS: M too large!", _state); + + /* + * Initialize + */ + state->n = n; + state->m = m; + state->flags = flags; + allocatemem = flags%2==0; + flags = flags/2; + if( allocatemem ) + { + ae_vector_set_length(&state->rho, m-1+1, _state); + ae_vector_set_length(&state->theta, m-1+1, _state); + ae_matrix_set_length(&state->y, m-1+1, n-1+1, _state); + ae_matrix_set_length(&state->s, m-1+1, n-1+1, _state); + ae_vector_set_length(&state->d, n-1+1, _state); + ae_vector_set_length(&state->x, n-1+1, _state); + ae_vector_set_length(&state->g, n-1+1, _state); + ae_vector_set_length(&state->work, n-1+1, _state); + } + minlbfgssetcond(state, 0, 0, 0, 0, _state); + minlbfgssetxrep(state, ae_false, _state); + minlbfgssetstpmax(state, 0, _state); + minlbfgsrestartfrom(state, x, _state); + state->prectype = 0; +} + + +/************************************************************************* +Modification of the preconditioner: +default preconditioner (simple scaling) is used. + +INPUT PARAMETERS: + State - structure which stores algorithm state + +After call to this function preconditioner is changed to the default one. + +NOTE: you can change preconditioner "on the fly", during algorithm +iterations. + + -- ALGLIB -- + Copyright 13.10.2010 by Bochkanov Sergey +*************************************************************************/ +void minlbfgssetdefaultpreconditioner(minlbfgsstate* state, + ae_state *_state) +{ + + + state->prectype = 0; +} + + +/************************************************************************* +Modification of the preconditioner: +Cholesky factorization of approximate Hessian is used. + +INPUT PARAMETERS: + State - structure which stores algorithm state + P - triangular preconditioner, Cholesky factorization of + the approximate Hessian. array[0..N-1,0..N-1], + (if larger, only leading N elements are used). + IsUpper - whether upper or lower triangle of P is given + (other triangle is not referenced) + +After call to this function preconditioner is changed to P (P is copied +into the internal buffer). + +NOTE: you can change preconditioner "on the fly", during algorithm +iterations. + +NOTE 2: P should be nonsingular. Exception will be thrown otherwise. It +also should be well conditioned, although only strict non-singularity is +tested. + + -- ALGLIB -- + Copyright 13.10.2010 by Bochkanov Sergey +*************************************************************************/ +void minlbfgssetcholeskypreconditioner(minlbfgsstate* state, + /* Real */ ae_matrix* p, + ae_bool isupper, + ae_state *_state) +{ + ae_int_t i; + double mx; + + + ae_assert(isfinitertrmatrix(p, state->n, isupper, _state), "MinLBFGSSetCholeskyPreconditioner: P contains infinite or NAN values!", _state); + mx = 0; + for(i=0; i<=state->n-1; i++) + { + mx = ae_maxreal(mx, ae_fabs(p->ptr.pp_double[i][i], _state), _state); + } + ae_assert(ae_fp_greater(mx,0), "MinLBFGSSetCholeskyPreconditioner: P is strictly singular!", _state); + if( state->denseh.rows<state->n||state->denseh.cols<state->n ) + { + ae_matrix_set_length(&state->denseh, state->n, state->n, _state); + } + state->prectype = 1; + if( isupper ) + { + rmatrixcopy(state->n, state->n, p, 0, 0, &state->denseh, 0, 0, _state); + } + else + { + rmatrixtranspose(state->n, state->n, p, 0, 0, &state->denseh, 0, 0, _state); + } +} + + +/************************************************************************* + + -- ALGLIB -- + Copyright 20.03.2009 by Bochkanov Sergey +*************************************************************************/ +ae_bool minlbfgsiteration(minlbfgsstate* state, ae_state *_state) +{ + ae_int_t n; + ae_int_t m; + ae_int_t maxits; + double epsf; + double epsg; + double epsx; + ae_int_t i; + ae_int_t j; + ae_int_t ic; + ae_int_t mcinfo; + double v; + double vv; + ae_bool result; + + + + /* + * Reverse communication preparations + * I know it looks ugly, but it works the same way + * anywhere from C++ to Python. + * + * This code initializes locals by: + * * random values determined during code + * generation - on first subroutine call + * * values from previous call - on subsequent calls + */ + if( state->rstate.stage>=0 ) + { + n = state->rstate.ia.ptr.p_int[0]; + m = state->rstate.ia.ptr.p_int[1]; + maxits = state->rstate.ia.ptr.p_int[2]; + i = state->rstate.ia.ptr.p_int[3]; + j = state->rstate.ia.ptr.p_int[4]; + ic = state->rstate.ia.ptr.p_int[5]; + mcinfo = state->rstate.ia.ptr.p_int[6]; + epsf = state->rstate.ra.ptr.p_double[0]; + epsg = state->rstate.ra.ptr.p_double[1]; + epsx = state->rstate.ra.ptr.p_double[2]; + v = state->rstate.ra.ptr.p_double[3]; + vv = state->rstate.ra.ptr.p_double[4]; + } + else + { + n = -983; + m = -989; + maxits = -834; + i = 900; + j = -287; + ic = 364; + mcinfo = 214; + epsf = -338; + epsg = -686; + epsx = 912; + v = 585; + vv = 497; + } + if( state->rstate.stage==0 ) + { + goto lbl_0; + } + if( state->rstate.stage==1 ) + { + goto lbl_1; + } + if( state->rstate.stage==2 ) + { + goto lbl_2; + } + if( state->rstate.stage==3 ) + { + goto lbl_3; + } + + /* + * Routine body + */ + + /* + * Unload frequently used variables from State structure + * (just for typing convinience) + */ + n = state->n; + m = state->m; + epsg = state->epsg; + epsf = state->epsf; + epsx = state->epsx; + maxits = state->maxits; + state->repterminationtype = 0; + state->repiterationscount = 0; + state->repnfev = 0; + + /* + * Calculate F/G at the initial point + */ + minlbfgs_clearrequestfields(state, _state); + state->needfg = ae_true; + state->rstate.stage = 0; + goto lbl_rcomm; +lbl_0: + state->needfg = ae_false; + if( !state->xrep ) + { + goto lbl_4; + } + minlbfgs_clearrequestfields(state, _state); + state->xupdated = ae_true; + state->rstate.stage = 1; + goto lbl_rcomm; +lbl_1: + state->xupdated = ae_false; +lbl_4: + state->repnfev = 1; + state->fold = state->f; + v = ae_v_dotproduct(&state->g.ptr.p_double[0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1)); + v = ae_sqrt(v, _state); + if( ae_fp_less_eq(v,epsg) ) + { + state->repterminationtype = 4; + result = ae_false; + return result; + } + + /* + * Choose initial step and direction. + * Apply preconditioner, if we have something other than default. + */ + if( ae_fp_eq(state->stpmax,0) ) + { + state->stp = ae_minreal(1.0/v, 1, _state); + } + else + { + state->stp = ae_minreal(1.0/v, state->stpmax, _state); + } + ae_v_moveneg(&state->d.ptr.p_double[0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1)); + if( state->prectype==1 ) + { + + /* + * Cholesky preconditioner is used + */ + fblscholeskysolve(&state->denseh, 1.0, n, ae_true, &state->d, &state->autobuf, _state); + } + + /* + * Main cycle + */ + state->k = 0; +lbl_6: + if( ae_false ) + { + goto lbl_7; + } + + /* + * Main cycle: prepare to 1-D line search + */ + state->p = state->k%m; + state->q = ae_minint(state->k, m-1, _state); + + /* + * Store X[k], G[k] + */ + ae_v_moveneg(&state->s.ptr.pp_double[state->p][0], 1, &state->x.ptr.p_double[0], 1, ae_v_len(0,n-1)); + ae_v_moveneg(&state->y.ptr.pp_double[state->p][0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1)); + + /* + * Minimize F(x+alpha*d) + * Calculate S[k], Y[k] + */ + state->mcstage = 0; + if( state->k!=0 ) + { + state->stp = 1.0; + } + linminnormalized(&state->d, &state->stp, n, _state); + mcsrch(n, &state->x, &state->f, &state->g, &state->d, &state->stp, state->stpmax, &mcinfo, &state->nfev, &state->work, &state->lstate, &state->mcstage, _state); +lbl_8: + if( state->mcstage==0 ) + { + goto lbl_9; + } + minlbfgs_clearrequestfields(state, _state); + state->needfg = ae_true; + state->rstate.stage = 2; + goto lbl_rcomm; +lbl_2: + state->needfg = ae_false; + mcsrch(n, &state->x, &state->f, &state->g, &state->d, &state->stp, state->stpmax, &mcinfo, &state->nfev, &state->work, &state->lstate, &state->mcstage, _state); + goto lbl_8; +lbl_9: + if( !state->xrep ) + { + goto lbl_10; + } + + /* + * report + */ + minlbfgs_clearrequestfields(state, _state); + state->xupdated = ae_true; + state->rstate.stage = 3; + goto lbl_rcomm; +lbl_3: + state->xupdated = ae_false; +lbl_10: + state->repnfev = state->repnfev+state->nfev; + state->repiterationscount = state->repiterationscount+1; + ae_v_add(&state->s.ptr.pp_double[state->p][0], 1, &state->x.ptr.p_double[0], 1, ae_v_len(0,n-1)); + ae_v_add(&state->y.ptr.pp_double[state->p][0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1)); + + /* + * Stopping conditions + */ + if( state->repiterationscount>=maxits&&maxits>0 ) + { + + /* + * Too many iterations + */ + state->repterminationtype = 5; + result = ae_false; + return result; + } + v = ae_v_dotproduct(&state->g.ptr.p_double[0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1)); + if( ae_fp_less_eq(ae_sqrt(v, _state),epsg) ) + { + + /* + * Gradient is small enough + */ + state->repterminationtype = 4; + result = ae_false; + return result; + } + if( ae_fp_less_eq(state->fold-state->f,epsf*ae_maxreal(ae_fabs(state->fold, _state), ae_maxreal(ae_fabs(state->f, _state), 1.0, _state), _state)) ) + { + + /* + * F(k+1)-F(k) is small enough + */ + state->repterminationtype = 1; + result = ae_false; + return result; + } + v = ae_v_dotproduct(&state->s.ptr.pp_double[state->p][0], 1, &state->s.ptr.pp_double[state->p][0], 1, ae_v_len(0,n-1)); + if( ae_fp_less_eq(ae_sqrt(v, _state),epsx) ) + { + + /* + * X(k+1)-X(k) is small enough + */ + state->repterminationtype = 2; + result = ae_false; + return result; + } + + /* + * If Wolfe conditions are satisfied, we can update + * limited memory model. + * + * However, if conditions are not satisfied (NFEV limit is met, + * function is too wild, ...), we'll skip L-BFGS update + */ + if( mcinfo!=1 ) + { + + /* + * Skip update. + * + * In such cases we'll initialize search direction by + * antigradient vector, because it leads to more + * transparent code with less number of special cases + */ + state->fold = state->f; + ae_v_moveneg(&state->d.ptr.p_double[0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1)); + } + else + { + + /* + * Calculate Rho[k], GammaK + */ + v = ae_v_dotproduct(&state->y.ptr.pp_double[state->p][0], 1, &state->s.ptr.pp_double[state->p][0], 1, ae_v_len(0,n-1)); + vv = ae_v_dotproduct(&state->y.ptr.pp_double[state->p][0], 1, &state->y.ptr.pp_double[state->p][0], 1, ae_v_len(0,n-1)); + if( ae_fp_eq(v,0)||ae_fp_eq(vv,0) ) + { + + /* + * Rounding errors make further iterations impossible. + */ + state->repterminationtype = -2; + result = ae_false; + return result; + } + state->rho.ptr.p_double[state->p] = 1/v; + state->gammak = v/vv; + + /* + * Calculate d(k+1) = -H(k+1)*g(k+1) + * + * for I:=K downto K-Q do + * V = s(i)^T * work(iteration:I) + * theta(i) = V + * work(iteration:I+1) = work(iteration:I) - V*Rho(i)*y(i) + * work(last iteration) = H0*work(last iteration) - preconditioner + * for I:=K-Q to K do + * V = y(i)^T*work(iteration:I) + * work(iteration:I+1) = work(iteration:I) +(-V+theta(i))*Rho(i)*s(i) + * + * NOW WORK CONTAINS d(k+1) + */ + ae_v_move(&state->work.ptr.p_double[0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1)); + for(i=state->k; i>=state->k-state->q; i--) + { + ic = i%m; + v = ae_v_dotproduct(&state->s.ptr.pp_double[ic][0], 1, &state->work.ptr.p_double[0], 1, ae_v_len(0,n-1)); + state->theta.ptr.p_double[ic] = v; + vv = v*state->rho.ptr.p_double[ic]; + ae_v_subd(&state->work.ptr.p_double[0], 1, &state->y.ptr.pp_double[ic][0], 1, ae_v_len(0,n-1), vv); + } + if( state->prectype==0 ) + { + + /* + * Simple preconditioner is used + */ + v = state->gammak; + ae_v_muld(&state->work.ptr.p_double[0], 1, ae_v_len(0,n-1), v); + } + if( state->prectype==1 ) + { + + /* + * Cholesky preconditioner is used + */ + fblscholeskysolve(&state->denseh, 1, n, ae_true, &state->work, &state->autobuf, _state); + } + for(i=state->k-state->q; i<=state->k; i++) + { + ic = i%m; + v = ae_v_dotproduct(&state->y.ptr.pp_double[ic][0], 1, &state->work.ptr.p_double[0], 1, ae_v_len(0,n-1)); + vv = state->rho.ptr.p_double[ic]*(-v+state->theta.ptr.p_double[ic]); + ae_v_addd(&state->work.ptr.p_double[0], 1, &state->s.ptr.pp_double[ic][0], 1, ae_v_len(0,n-1), vv); + } + ae_v_moveneg(&state->d.ptr.p_double[0], 1, &state->work.ptr.p_double[0], 1, ae_v_len(0,n-1)); + + /* + * Next step + */ + state->fold = state->f; + state->k = state->k+1; + } + goto lbl_6; +lbl_7: + result = ae_false; + return result; + + /* + * Saving state + */ +lbl_rcomm: + result = ae_true; + state->rstate.ia.ptr.p_int[0] = n; + state->rstate.ia.ptr.p_int[1] = m; + state->rstate.ia.ptr.p_int[2] = maxits; + state->rstate.ia.ptr.p_int[3] = i; + state->rstate.ia.ptr.p_int[4] = j; + state->rstate.ia.ptr.p_int[5] = ic; + state->rstate.ia.ptr.p_int[6] = mcinfo; + state->rstate.ra.ptr.p_double[0] = epsf; + state->rstate.ra.ptr.p_double[1] = epsg; + state->rstate.ra.ptr.p_double[2] = epsx; + state->rstate.ra.ptr.p_double[3] = v; + state->rstate.ra.ptr.p_double[4] = vv; + return result; +} + + +/************************************************************************* +L-BFGS algorithm results + +INPUT PARAMETERS: + State - algorithm state + +OUTPUT PARAMETERS: + X - array[0..N-1], solution + Rep - optimization report: + * Rep.TerminationType completetion code: + * -2 rounding errors prevent further improvement. + X contains best point found. + * -1 incorrect parameters were specified + * 1 relative function improvement is no more than + EpsF. + * 2 relative step is no more than EpsX. + * 4 gradient norm is no more than EpsG + * 5 MaxIts steps was taken + * 7 stopping conditions are too stringent, + further improvement is impossible + * Rep.IterationsCount contains iterations count + * NFEV countains number of function calculations + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minlbfgsresults(minlbfgsstate* state, + /* Real */ ae_vector* x, + minlbfgsreport* rep, + ae_state *_state) +{ + + ae_vector_clear(x); + _minlbfgsreport_clear(rep); + + minlbfgsresultsbuf(state, x, rep, _state); +} + + +/************************************************************************* +L-BFGS algorithm results + +Buffered implementation of MinLBFGSResults which uses pre-allocated buffer +to store X[]. If buffer size is too small, it resizes buffer. It is +intended to be used in the inner cycles of performance critical algorithms +where array reallocation penalty is too large to be ignored. + + -- ALGLIB -- + Copyright 20.08.2010 by Bochkanov Sergey +*************************************************************************/ +void minlbfgsresultsbuf(minlbfgsstate* state, + /* Real */ ae_vector* x, + minlbfgsreport* rep, + ae_state *_state) +{ + + + if( x->cnt<state->n ) + { + ae_vector_set_length(x, state->n, _state); + } + ae_v_move(&x->ptr.p_double[0], 1, &state->x.ptr.p_double[0], 1, ae_v_len(0,state->n-1)); + rep->iterationscount = state->repiterationscount; + rep->nfev = state->repnfev; + rep->terminationtype = state->repterminationtype; +} + + +/************************************************************************* +This subroutine restarts LBFGS algorithm from new point. All optimization +parameters are left unchanged. + +This function allows to solve multiple optimization problems (which +must have same number of dimensions) without object reallocation penalty. + +INPUT PARAMETERS: + State - structure used to store algorithm state + X - new starting point. + + -- ALGLIB -- + Copyright 30.07.2010 by Bochkanov Sergey +*************************************************************************/ +void minlbfgsrestartfrom(minlbfgsstate* state, + /* Real */ ae_vector* x, + ae_state *_state) +{ + + + ae_assert(x->cnt>=state->n, "MinLBFGSRestartFrom: Length(X)<N!", _state); + ae_assert(isfinitevector(x, state->n, _state), "MinLBFGSRestartFrom: X contains infinite or NaN values!", _state); + ae_v_move(&state->x.ptr.p_double[0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,state->n-1)); + ae_vector_set_length(&state->rstate.ia, 6+1, _state); + ae_vector_set_length(&state->rstate.ra, 4+1, _state); + state->rstate.stage = -1; + minlbfgs_clearrequestfields(state, _state); +} + + +/************************************************************************* +Clears request fileds (to be sure that we don't forgot to clear something) +*************************************************************************/ +static void minlbfgs_clearrequestfields(minlbfgsstate* state, + ae_state *_state) +{ + + + state->needfg = ae_false; + state->xupdated = ae_false; +} + + +ae_bool _minlbfgsstate_init(minlbfgsstate* p, ae_state *_state, ae_bool make_automatic) +{ + if( !ae_vector_init(&p->rho, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_matrix_init(&p->y, 0, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_matrix_init(&p->s, 0, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->theta, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->d, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->work, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_matrix_init(&p->denseh, 0, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->autobuf, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->g, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !_rcommstate_init(&p->rstate, _state, make_automatic) ) + return ae_false; + if( !_linminstate_init(&p->lstate, _state, make_automatic) ) + return ae_false; + return ae_true; +} + + +ae_bool _minlbfgsstate_init_copy(minlbfgsstate* dst, minlbfgsstate* src, ae_state *_state, ae_bool make_automatic) +{ + dst->n = src->n; + dst->m = src->m; + dst->epsg = src->epsg; + dst->epsf = src->epsf; + dst->epsx = src->epsx; + dst->maxits = src->maxits; + dst->flags = src->flags; + dst->xrep = src->xrep; + dst->stpmax = src->stpmax; + dst->nfev = src->nfev; + dst->mcstage = src->mcstage; + dst->k = src->k; + dst->q = src->q; + dst->p = src->p; + if( !ae_vector_init_copy(&dst->rho, &src->rho, _state, make_automatic) ) + return ae_false; + if( !ae_matrix_init_copy(&dst->y, &src->y, _state, make_automatic) ) + return ae_false; + if( !ae_matrix_init_copy(&dst->s, &src->s, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->theta, &src->theta, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->d, &src->d, _state, make_automatic) ) + return ae_false; + dst->stp = src->stp; + if( !ae_vector_init_copy(&dst->work, &src->work, _state, make_automatic) ) + return ae_false; + dst->fold = src->fold; + dst->prectype = src->prectype; + dst->gammak = src->gammak; + if( !ae_matrix_init_copy(&dst->denseh, &src->denseh, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->autobuf, &src->autobuf, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic) ) + return ae_false; + dst->f = src->f; + if( !ae_vector_init_copy(&dst->g, &src->g, _state, make_automatic) ) + return ae_false; + dst->needfg = src->needfg; + dst->xupdated = src->xupdated; + if( !_rcommstate_init_copy(&dst->rstate, &src->rstate, _state, make_automatic) ) + return ae_false; + dst->repiterationscount = src->repiterationscount; + dst->repnfev = src->repnfev; + dst->repterminationtype = src->repterminationtype; + if( !_linminstate_init_copy(&dst->lstate, &src->lstate, _state, make_automatic) ) + return ae_false; + return ae_true; +} + + +void _minlbfgsstate_clear(minlbfgsstate* p) +{ + ae_vector_clear(&p->rho); + ae_matrix_clear(&p->y); + ae_matrix_clear(&p->s); + ae_vector_clear(&p->theta); + ae_vector_clear(&p->d); + ae_vector_clear(&p->work); + ae_matrix_clear(&p->denseh); + ae_vector_clear(&p->autobuf); + ae_vector_clear(&p->x); + ae_vector_clear(&p->g); + _rcommstate_clear(&p->rstate); + _linminstate_clear(&p->lstate); +} + + +ae_bool _minlbfgsreport_init(minlbfgsreport* p, ae_state *_state, ae_bool make_automatic) +{ + return ae_true; +} + + +ae_bool _minlbfgsreport_init_copy(minlbfgsreport* dst, minlbfgsreport* src, ae_state *_state, ae_bool make_automatic) +{ + dst->iterationscount = src->iterationscount; + dst->nfev = src->nfev; + dst->terminationtype = src->terminationtype; + return ae_true; +} + + +void _minlbfgsreport_clear(minlbfgsreport* p) +{ +} + + + + +/************************************************************************* + IMPROVED LEVENBERG-MARQUARDT METHOD FOR + NON-LINEAR LEAST SQUARES OPTIMIZATION + +DESCRIPTION: +This function is used to find minimum of function which is represented as +sum of squares: + F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1]) +using value of function vector f[] and Jacobian of f[]. + + +REQUIREMENTS: +This algorithm will request following information during its operation: + +* function vector f[] at given point X +* function vector f[] and Jacobian of f[] (simultaneously) at given point + +There are several overloaded versions of MinLMOptimize() function which +correspond to different LM-like optimization algorithms provided by this +unit. You should choose version which accepts fvec() and jac() callbacks. +First one is used to calculate f[] at given point, second one calculates +f[] and Jacobian df[i]/dx[j]. + +You can try to initialize MinLMState structure with VJ function and then +use incorrect version of MinLMOptimize() (for example, version which +works with general form function and does not provide Jacobian), but it +will lead to exception being thrown after first attempt to calculate +Jacobian. + + +USAGE: +1. User initializes algorithm state with MinLMCreateVJ() call +2. User tunes solver parameters with MinLMSetCond(), MinLMSetStpMax() and + other functions +3. User calls MinLMOptimize() function which takes algorithm state and + callback functions. +4. User calls MinLMResults() to get solution +5. Optionally, user may call MinLMRestartFrom() to solve another problem + with same N/M but another starting point and/or another function. + MinLMRestartFrom() allows to reuse already initialized structure. + + +INPUT PARAMETERS: + N - dimension, N>1 + * if given, only leading N elements of X are used + * if not given, automatically determined from size of X + M - number of functions f[i] + X - initial solution, array[0..N-1] + +OUTPUT PARAMETERS: + State - structure which stores algorithm state + +NOTES: +1. you may tune stopping conditions with MinLMSetCond() function +2. if target function contains exp() or other fast growing functions, and + optimization algorithm makes too large steps which leads to overflow, + use MinLMSetStpMax() function to bound algorithm's steps. + + -- ALGLIB -- + Copyright 30.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minlmcreatevj(ae_int_t n, + ae_int_t m, + /* Real */ ae_vector* x, + minlmstate* state, + ae_state *_state) +{ + + _minlmstate_clear(state); + + ae_assert(n>=1, "MinLMCreateVJ: N<1!", _state); + ae_assert(m>=1, "MinLMCreateVJ: M<1!", _state); + ae_assert(x->cnt>=n, "MinLMCreateVJ: Length(X)<N!", _state); + ae_assert(isfinitevector(x, n, _state), "MinLMCreateVJ: X contains infinite or NaN values!", _state); + + /* + * initialize, check parameters + */ + state->n = n; + state->m = m; + state->algomode = 1; + state->hasf = ae_false; + state->hasfi = ae_true; + state->hasg = ae_false; + + /* + * second stage of initialization + */ + minlm_lmprepare(n, m, ae_false, state, _state); + minlmsetacctype(state, 0, _state); + minlmsetcond(state, 0, 0, 0, 0, _state); + minlmsetxrep(state, ae_false, _state); + minlmsetstpmax(state, 0, _state); + minlmrestartfrom(state, x, _state); +} + + +/************************************************************************* + IMPROVED LEVENBERG-MARQUARDT METHOD FOR + NON-LINEAR LEAST SQUARES OPTIMIZATION + +DESCRIPTION: +This function is used to find minimum of function which is represented as +sum of squares: + F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1]) +using value of function vector f[] only. Finite differences are used to +calculate Jacobian. + + +REQUIREMENTS: +This algorithm will request following information during its operation: +* function vector f[] at given point X + +There are several overloaded versions of MinLMOptimize() function which +correspond to different LM-like optimization algorithms provided by this +unit. You should choose version which accepts fvec() callback. + +You can try to initialize MinLMState structure with VJ function and then +use incorrect version of MinLMOptimize() (for example, version which +works with general form function and does not accept function vector), but +it will lead to exception being thrown after first attempt to calculate +Jacobian. + + +USAGE: +1. User initializes algorithm state with MinLMCreateV() call +2. User tunes solver parameters with MinLMSetCond(), MinLMSetStpMax() and + other functions +3. User calls MinLMOptimize() function which takes algorithm state and + callback functions. +4. User calls MinLMResults() to get solution +5. Optionally, user may call MinLMRestartFrom() to solve another problem + with same N/M but another starting point and/or another function. + MinLMRestartFrom() allows to reuse already initialized structure. + + +INPUT PARAMETERS: + N - dimension, N>1 + * if given, only leading N elements of X are used + * if not given, automatically determined from size of X + M - number of functions f[i] + X - initial solution, array[0..N-1] + DiffStep- differentiation step, >0 + +OUTPUT PARAMETERS: + State - structure which stores algorithm state + +See also MinLMIteration, MinLMResults. + +NOTES: +1. you may tune stopping conditions with MinLMSetCond() function +2. if target function contains exp() or other fast growing functions, and + optimization algorithm makes too large steps which leads to overflow, + use MinLMSetStpMax() function to bound algorithm's steps. + + -- ALGLIB -- + Copyright 30.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minlmcreatev(ae_int_t n, + ae_int_t m, + /* Real */ ae_vector* x, + double diffstep, + minlmstate* state, + ae_state *_state) +{ + + _minlmstate_clear(state); + + ae_assert(ae_isfinite(diffstep, _state), "MinLMCreateV: DiffStep is not finite!", _state); + ae_assert(ae_fp_greater(diffstep,0), "MinLMCreateV: DiffStep<=0!", _state); + ae_assert(n>=1, "MinLMCreateV: N<1!", _state); + ae_assert(m>=1, "MinLMCreateV: M<1!", _state); + ae_assert(x->cnt>=n, "MinLMCreateV: Length(X)<N!", _state); + ae_assert(isfinitevector(x, n, _state), "MinLMCreateV: X contains infinite or NaN values!", _state); + + /* + * initialize + */ + state->n = n; + state->m = m; + state->algomode = 0; + state->hasf = ae_false; + state->hasfi = ae_true; + state->hasg = ae_false; + state->diffstep = diffstep; + + /* + * second stage of initialization + */ + minlm_lmprepare(n, m, ae_false, state, _state); + minlmsetacctype(state, 1, _state); + minlmsetcond(state, 0, 0, 0, 0, _state); + minlmsetxrep(state, ae_false, _state); + minlmsetstpmax(state, 0, _state); + minlmrestartfrom(state, x, _state); +} + + +/************************************************************************* + LEVENBERG-MARQUARDT-LIKE METHOD FOR NON-LINEAR OPTIMIZATION + +DESCRIPTION: +This function is used to find minimum of general form (not "sum-of- +-squares") function + F = F(x[0], ..., x[n-1]) +using its gradient and Hessian. Levenberg-Marquardt modification with +L-BFGS pre-optimization and internal pre-conditioned L-BFGS optimization +after each Levenberg-Marquardt step is used. + + +REQUIREMENTS: +This algorithm will request following information during its operation: + +* function value F at given point X +* F and gradient G (simultaneously) at given point X +* F, G and Hessian H (simultaneously) at given point X + +There are several overloaded versions of MinLMOptimize() function which +correspond to different LM-like optimization algorithms provided by this +unit. You should choose version which accepts func(), grad() and hess() +function pointers. First pointer is used to calculate F at given point, +second one calculates F(x) and grad F(x), third one calculates F(x), +grad F(x), hess F(x). + +You can try to initialize MinLMState structure with FGH-function and then +use incorrect version of MinLMOptimize() (for example, version which does +not provide Hessian matrix), but it will lead to exception being thrown +after first attempt to calculate Hessian. + + +USAGE: +1. User initializes algorithm state with MinLMCreateFGH() call +2. User tunes solver parameters with MinLMSetCond(), MinLMSetStpMax() and + other functions +3. User calls MinLMOptimize() function which takes algorithm state and + pointers (delegates, etc.) to callback functions. +4. User calls MinLMResults() to get solution +5. Optionally, user may call MinLMRestartFrom() to solve another problem + with same N but another starting point and/or another function. + MinLMRestartFrom() allows to reuse already initialized structure. + + +INPUT PARAMETERS: + N - dimension, N>1 + * if given, only leading N elements of X are used + * if not given, automatically determined from size of X + X - initial solution, array[0..N-1] + +OUTPUT PARAMETERS: + State - structure which stores algorithm state + +NOTES: +1. you may tune stopping conditions with MinLMSetCond() function +2. if target function contains exp() or other fast growing functions, and + optimization algorithm makes too large steps which leads to overflow, + use MinLMSetStpMax() function to bound algorithm's steps. + + -- ALGLIB -- + Copyright 30.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minlmcreatefgh(ae_int_t n, + /* Real */ ae_vector* x, + minlmstate* state, + ae_state *_state) +{ + + _minlmstate_clear(state); + + ae_assert(n>=1, "MinLMCreateFGH: N<1!", _state); + ae_assert(x->cnt>=n, "MinLMCreateFGH: Length(X)<N!", _state); + ae_assert(isfinitevector(x, n, _state), "MinLMCreateFGH: X contains infinite or NaN values!", _state); + + /* + * initialize + */ + state->n = n; + state->m = 0; + state->algomode = 2; + state->hasf = ae_true; + state->hasfi = ae_false; + state->hasg = ae_true; + + /* + * init2 + */ + minlm_lmprepare(n, 0, ae_true, state, _state); + minlmsetacctype(state, 2, _state); + minlmsetcond(state, 0, 0, 0, 0, _state); + minlmsetxrep(state, ae_false, _state); + minlmsetstpmax(state, 0, _state); + minlmrestartfrom(state, x, _state); +} + + +/************************************************************************* + IMPROVED LEVENBERG-MARQUARDT METHOD FOR + NON-LINEAR LEAST SQUARES OPTIMIZATION + +DESCRIPTION: +This function is used to find minimum of function which is represented as +sum of squares: + F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1]) +using: +* value of function vector f[] +* value of Jacobian of f[] +* gradient of merit function F(x) + +This function creates optimizer which uses acceleration strategy 2. Cheap +gradient of merit function (which is twice the product of function vector +and Jacobian) is used for accelerated iterations (see User Guide for more +info on this subject). + +REQUIREMENTS: +This algorithm will request following information during its operation: + +* function vector f[] at given point X +* function vector f[] and Jacobian of f[] (simultaneously) at given point +* gradient of + +There are several overloaded versions of MinLMOptimize() function which +correspond to different LM-like optimization algorithms provided by this +unit. You should choose version which accepts fvec(), jac() and grad() +callbacks. First one is used to calculate f[] at given point, second one +calculates f[] and Jacobian df[i]/dx[j], last one calculates gradient of +merit function F(x). + +You can try to initialize MinLMState structure with VJ function and then +use incorrect version of MinLMOptimize() (for example, version which +works with general form function and does not provide Jacobian), but it +will lead to exception being thrown after first attempt to calculate +Jacobian. + + +USAGE: +1. User initializes algorithm state with MinLMCreateVGJ() call +2. User tunes solver parameters with MinLMSetCond(), MinLMSetStpMax() and + other functions +3. User calls MinLMOptimize() function which takes algorithm state and + callback functions. +4. User calls MinLMResults() to get solution +5. Optionally, user may call MinLMRestartFrom() to solve another problem + with same N/M but another starting point and/or another function. + MinLMRestartFrom() allows to reuse already initialized structure. + + +INPUT PARAMETERS: + N - dimension, N>1 + * if given, only leading N elements of X are used + * if not given, automatically determined from size of X + M - number of functions f[i] + X - initial solution, array[0..N-1] + +OUTPUT PARAMETERS: + State - structure which stores algorithm state + +NOTES: +1. you may tune stopping conditions with MinLMSetCond() function +2. if target function contains exp() or other fast growing functions, and + optimization algorithm makes too large steps which leads to overflow, + use MinLMSetStpMax() function to bound algorithm's steps. + + -- ALGLIB -- + Copyright 30.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minlmcreatevgj(ae_int_t n, + ae_int_t m, + /* Real */ ae_vector* x, + minlmstate* state, + ae_state *_state) +{ + + _minlmstate_clear(state); + + ae_assert(n>=1, "MinLMCreateVGJ: N<1!", _state); + ae_assert(m>=1, "MinLMCreateVGJ: M<1!", _state); + ae_assert(x->cnt>=n, "MinLMCreateVGJ: Length(X)<N!", _state); + ae_assert(isfinitevector(x, n, _state), "MinLMCreateVGJ: X contains infinite or NaN values!", _state); + + /* + * initialize, check parameters + */ + state->n = n; + state->m = m; + state->algomode = 1; + state->hasf = ae_false; + state->hasfi = ae_true; + state->hasg = ae_false; + + /* + * second stage of initialization + */ + minlm_lmprepare(n, m, ae_false, state, _state); + minlmsetacctype(state, 2, _state); + minlmsetcond(state, 0, 0, 0, 0, _state); + minlmsetxrep(state, ae_false, _state); + minlmsetstpmax(state, 0, _state); + minlmrestartfrom(state, x, _state); +} + + +/************************************************************************* + LEVENBERG-MARQUARDT-LIKE METHOD FOR + NON-LINEAR LEAST SQUARES OPTIMIZATION + +DESCRIPTION: + +This function is used to find minimum of function which is represented as +sum of squares: + F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1]) +using value of F(), gradient of F(), function vector f[] and Jacobian of +f[]. + +This function is considered obsolete since ALGLIB 3.1.0 and is present for +backward compatibility only. We recommend to use MinLMCreateVGJ, which +provides similar, but more consistent interface. + + -- ALGLIB -- + Copyright 30.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minlmcreatefgj(ae_int_t n, + ae_int_t m, + /* Real */ ae_vector* x, + minlmstate* state, + ae_state *_state) +{ + + _minlmstate_clear(state); + + ae_assert(n>=1, "MinLMCreateFGJ: N<1!", _state); + ae_assert(m>=1, "MinLMCreateFGJ: M<1!", _state); + ae_assert(x->cnt>=n, "MinLMCreateFGJ: Length(X)<N!", _state); + ae_assert(isfinitevector(x, n, _state), "MinLMCreateFGJ: X contains infinite or NaN values!", _state); + + /* + * initialize + */ + state->n = n; + state->m = m; + state->algomode = 1; + state->hasf = ae_true; + state->hasfi = ae_false; + state->hasg = ae_true; + + /* + * init2 + */ + minlm_lmprepare(n, m, ae_true, state, _state); + minlmsetacctype(state, 2, _state); + minlmsetcond(state, 0, 0, 0, 0, _state); + minlmsetxrep(state, ae_false, _state); + minlmsetstpmax(state, 0, _state); + minlmrestartfrom(state, x, _state); +} + + +/************************************************************************* + CLASSIC LEVENBERG-MARQUARDT METHOD FOR NON-LINEAR OPTIMIZATION + +DESCRIPTION: +This function is used to find minimum of function which is represented as +sum of squares: + F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1]) +using value of F(), function vector f[] and Jacobian of f[]. Classic +Levenberg-Marquardt method is used. + +This function is considered obsolete since ALGLIB 3.1.0 and is present for +backward compatibility only. We recommend to use MinLMCreateVJ, which +provides similar, but more consistent and feature-rich interface. + + -- ALGLIB -- + Copyright 30.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minlmcreatefj(ae_int_t n, + ae_int_t m, + /* Real */ ae_vector* x, + minlmstate* state, + ae_state *_state) +{ + + _minlmstate_clear(state); + + ae_assert(n>=1, "MinLMCreateFJ: N<1!", _state); + ae_assert(m>=1, "MinLMCreateFJ: M<1!", _state); + ae_assert(x->cnt>=n, "MinLMCreateFJ: Length(X)<N!", _state); + ae_assert(isfinitevector(x, n, _state), "MinLMCreateFJ: X contains infinite or NaN values!", _state); + + /* + * initialize + */ + state->n = n; + state->m = m; + state->algomode = 1; + state->hasf = ae_true; + state->hasfi = ae_false; + state->hasg = ae_false; + + /* + * init 2 + */ + minlm_lmprepare(n, m, ae_true, state, _state); + minlmsetacctype(state, 0, _state); + minlmsetcond(state, 0, 0, 0, 0, _state); + minlmsetxrep(state, ae_false, _state); + minlmsetstpmax(state, 0, _state); + minlmrestartfrom(state, x, _state); +} + + +/************************************************************************* +This function sets stopping conditions for Levenberg-Marquardt optimization +algorithm. + +INPUT PARAMETERS: + State - structure which stores algorithm state + EpsG - >=0 + The subroutine finishes its work if the condition + ||G||<EpsG is satisfied, where ||.|| means Euclidian norm, + G - gradient. + EpsF - >=0 + The subroutine finishes its work if on k+1-th iteration + the condition |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1} + is satisfied. + EpsX - >=0 + The subroutine finishes its work if on k+1-th iteration + the condition |X(k+1)-X(k)| <= EpsX is fulfilled. + MaxIts - maximum number of iterations. If MaxIts=0, the number of + iterations is unlimited. Only Levenberg-Marquardt + iterations are counted (L-BFGS/CG iterations are NOT + counted because their cost is very low compared to that of + LM). + +Passing EpsG=0, EpsF=0, EpsX=0 and MaxIts=0 (simultaneously) will lead to +automatic stopping criterion selection (small EpsX). + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minlmsetcond(minlmstate* state, + double epsg, + double epsf, + double epsx, + ae_int_t maxits, + ae_state *_state) +{ + + + ae_assert(ae_isfinite(epsg, _state), "MinLMSetCond: EpsG is not finite number!", _state); + ae_assert(ae_fp_greater_eq(epsg,0), "MinLMSetCond: negative EpsG!", _state); + ae_assert(ae_isfinite(epsf, _state), "MinLMSetCond: EpsF is not finite number!", _state); + ae_assert(ae_fp_greater_eq(epsf,0), "MinLMSetCond: negative EpsF!", _state); + ae_assert(ae_isfinite(epsx, _state), "MinLMSetCond: EpsX is not finite number!", _state); + ae_assert(ae_fp_greater_eq(epsx,0), "MinLMSetCond: negative EpsX!", _state); + ae_assert(maxits>=0, "MinLMSetCond: negative MaxIts!", _state); + if( ((ae_fp_eq(epsg,0)&&ae_fp_eq(epsf,0))&&ae_fp_eq(epsx,0))&&maxits==0 ) + { + epsx = 1.0E-6; + } + state->epsg = epsg; + state->epsf = epsf; + state->epsx = epsx; + state->maxits = maxits; +} + + +/************************************************************************* +This function turns on/off reporting. + +INPUT PARAMETERS: + State - structure which stores algorithm state + NeedXRep- whether iteration reports are needed or not + +If NeedXRep is True, algorithm will call rep() callback function if it is +provided to MinLMOptimize(). Both Levenberg-Marquardt and internal L-BFGS +iterations are reported. + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minlmsetxrep(minlmstate* state, ae_bool needxrep, ae_state *_state) +{ + + + state->xrep = needxrep; +} + + +/************************************************************************* +This function sets maximum step length + +INPUT PARAMETERS: + State - structure which stores algorithm state + StpMax - maximum step length, >=0. Set StpMax to 0.0, if you don't + want to limit step length. + +Use this subroutine when you optimize target function which contains exp() +or other fast growing functions, and optimization algorithm makes too +large steps which leads to overflow. This function allows us to reject +steps that are too large (and therefore expose us to the possible +overflow) without actually calculating function value at the x+stp*d. + +NOTE: non-zero StpMax leads to moderate performance degradation because +intermediate step of preconditioned L-BFGS optimization is incompatible +with limits on step size. + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minlmsetstpmax(minlmstate* state, double stpmax, ae_state *_state) +{ + + + ae_assert(ae_isfinite(stpmax, _state), "MinLMSetStpMax: StpMax is not finite!", _state); + ae_assert(ae_fp_greater_eq(stpmax,0), "MinLMSetStpMax: StpMax<0!", _state); + state->stpmax = stpmax; +} + + +/************************************************************************* +This function is used to change acceleration settings + +You can choose between three acceleration strategies: +* AccType=0, no acceleration. +* AccType=1, secant updates are used to update quadratic model after each + iteration. After fixed number of iterations (or after model breakdown) + we recalculate quadratic model using analytic Jacobian or finite + differences. Number of secant-based iterations depends on optimization + settings: about 3 iterations - when we have analytic Jacobian, up to 2*N + iterations - when we use finite differences to calculate Jacobian. +* AccType=2, after quadratic model is built and LM step is made, we use it + as preconditioner for several (5-10) iterations of L-BFGS algorithm. + +AccType=1 is recommended when Jacobian calculation cost is prohibitive +high (several Mx1 function vector calculations followed by several NxN +Cholesky factorizations are faster than calculation of one M*N Jacobian). +It should also be used when we have no Jacobian, because finite difference +approximation takes too much time to compute. + +AccType=2 is recommended when Jacobian is cheap - much more cheaper than +one Cholesky factorization. We can reduce number of Cholesky +factorizations at the cost of increased number of Jacobian calculations. +Sometimes it helps. + +Table below list optimization protocols (XYZ protocol corresponds to +MinLMCreateXYZ) and acceleration types they support (and use by default). + +ACCELERATION TYPES SUPPORTED BY OPTIMIZATION PROTOCOLS: + +protocol 0 1 2 comment +V + + +VJ + + + +FGH + + +VGJ + + + special protocol, not for widespread use +FJ + + obsolete protocol, not recommended +FGJ + + obsolete protocol, not recommended + +DAFAULT VALUES: + +protocol 0 1 2 comment +V x without acceleration it is so slooooooooow +VJ x +FGH x +VGJ x we've implicitly turned (2) by passing gradient +FJ x obsolete protocol, not recommended +FGJ x obsolete protocol, not recommended + +NOTE: this function should be called before optimization. Attempt to call +it during algorithm iterations may result in unexpected behavior. + +NOTE: attempt to call this function with unsupported protocol/acceleration +combination will result in exception being thrown. + + -- ALGLIB -- + Copyright 14.10.2010 by Bochkanov Sergey +*************************************************************************/ +void minlmsetacctype(minlmstate* state, + ae_int_t acctype, + ae_state *_state) +{ + + + ae_assert((acctype==0||acctype==1)||acctype==2, "MinLMSetAccType: incorrect AccType!", _state); + if( acctype==0 ) + { + state->maxmodelage = 0; + state->makeadditers = ae_false; + return; + } + if( acctype==1 ) + { + ae_assert(state->hasfi, "MinLMSetAccType: AccType=1 is incompatible with current protocol!", _state); + if( state->algomode==0 ) + { + state->maxmodelage = 2*state->n; + } + else + { + state->maxmodelage = minlm_smallmodelage; + } + state->makeadditers = ae_false; + return; + } + if( acctype==2 ) + { + ae_assert(state->algomode==1||state->algomode==2, "MinLMSetAccType: AccType=2 is incompatible with current protocol!", _state); + state->maxmodelage = 0; + state->makeadditers = ae_true; + return; + } +} + + +/************************************************************************* +NOTES: + +1. Depending on function used to create state structure, this algorithm + may accept Jacobian and/or Hessian and/or gradient. According to the + said above, there ase several versions of this function, which accept + different sets of callbacks. + + This flexibility opens way to subtle errors - you may create state with + MinLMCreateFGH() (optimization using Hessian), but call function which + does not accept Hessian. So when algorithm will request Hessian, there + will be no callback to call. In this case exception will be thrown. + + Be careful to avoid such errors because there is no way to find them at + compile time - you can see them at runtime only. + + -- ALGLIB -- + Copyright 10.03.2009 by Bochkanov Sergey +*************************************************************************/ +ae_bool minlmiteration(minlmstate* state, ae_state *_state) +{ + ae_int_t n; + ae_int_t m; + ae_bool bflag; + ae_int_t iflag; + double v; + double s; + double t; + ae_int_t i; + ae_int_t k; + ae_bool result; + + + + /* + * Reverse communication preparations + * I know it looks ugly, but it works the same way + * anywhere from C++ to Python. + * + * This code initializes locals by: + * * random values determined during code + * generation - on first subroutine call + * * values from previous call - on subsequent calls + */ + if( state->rstate.stage>=0 ) + { + n = state->rstate.ia.ptr.p_int[0]; + m = state->rstate.ia.ptr.p_int[1]; + iflag = state->rstate.ia.ptr.p_int[2]; + i = state->rstate.ia.ptr.p_int[3]; + k = state->rstate.ia.ptr.p_int[4]; + bflag = state->rstate.ba.ptr.p_bool[0]; + v = state->rstate.ra.ptr.p_double[0]; + s = state->rstate.ra.ptr.p_double[1]; + t = state->rstate.ra.ptr.p_double[2]; + } + else + { + n = -983; + m = -989; + iflag = -834; + i = 900; + k = -287; + bflag = ae_false; + v = 214; + s = -338; + t = -686; + } + if( state->rstate.stage==0 ) + { + goto lbl_0; + } + if( state->rstate.stage==1 ) + { + goto lbl_1; + } + if( state->rstate.stage==2 ) + { + goto lbl_2; + } + if( state->rstate.stage==3 ) + { + goto lbl_3; + } + if( state->rstate.stage==4 ) + { + goto lbl_4; + } + if( state->rstate.stage==5 ) + { + goto lbl_5; + } + if( state->rstate.stage==6 ) + { + goto lbl_6; + } + if( state->rstate.stage==7 ) + { + goto lbl_7; + } + if( state->rstate.stage==8 ) + { + goto lbl_8; + } + if( state->rstate.stage==9 ) + { + goto lbl_9; + } + if( state->rstate.stage==10 ) + { + goto lbl_10; + } + if( state->rstate.stage==11 ) + { + goto lbl_11; + } + if( state->rstate.stage==12 ) + { + goto lbl_12; + } + if( state->rstate.stage==13 ) + { + goto lbl_13; + } + if( state->rstate.stage==14 ) + { + goto lbl_14; + } + if( state->rstate.stage==15 ) + { + goto lbl_15; + } + if( state->rstate.stage==16 ) + { + goto lbl_16; + } + if( state->rstate.stage==17 ) + { + goto lbl_17; + } + + /* + * Routine body + */ + + /* + * prepare + */ + n = state->n; + m = state->m; + state->repiterationscount = 0; + state->repterminationtype = 0; + state->repnfunc = 0; + state->repnjac = 0; + state->repngrad = 0; + state->repnhess = 0; + state->repncholesky = 0; + + /* + * Initial report of current point + * + * Note 1: we rewrite State.X twice because + * user may accidentally change it after first call. + * + * Note 2: we set NeedF or NeedFI depending on what + * information about function we have. + */ + if( !state->xrep ) + { + goto lbl_18; + } + ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1)); + minlm_clearrequestfields(state, _state); + if( !state->hasf ) + { + goto lbl_20; + } + state->needf = ae_true; + state->rstate.stage = 0; + goto lbl_rcomm; +lbl_0: + state->needf = ae_false; + goto lbl_21; +lbl_20: + ae_assert(state->hasfi, "MinLM: internal error 2!", _state); + state->needfi = ae_true; + state->rstate.stage = 1; + goto lbl_rcomm; +lbl_1: + state->needfi = ae_false; + v = ae_v_dotproduct(&state->fi.ptr.p_double[0], 1, &state->fi.ptr.p_double[0], 1, ae_v_len(0,m-1)); + state->f = v; +lbl_21: + state->repnfunc = state->repnfunc+1; + ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1)); + minlm_clearrequestfields(state, _state); + state->xupdated = ae_true; + state->rstate.stage = 2; + goto lbl_rcomm; +lbl_2: + state->xupdated = ae_false; +lbl_18: + + /* + * Prepare control variables + */ + state->nu = 1; + state->lambdav = -ae_maxrealnumber; + state->modelage = state->maxmodelage+1; + state->deltaxready = ae_false; + state->deltafready = ae_false; + + /* + * Main cycle. + * + * We move through it until either: + * * one of the stopping conditions is met + * * we decide that stopping conditions are too stringent + * and break from cycle + * + */ +lbl_22: + if( ae_false ) + { + goto lbl_23; + } + + /* + * First, we have to prepare quadratic model for our function. + * We use BFlag to ensure that model is prepared; + * if it is false at the end of this block, something went wrong. + * + * We may either calculate brand new model or update old one. + * + * Before this block we have: + * * State.XBase - current position. + * * State.DeltaX - if DeltaXReady is True + * * State.DeltaF - if DeltaFReady is True + * + * After this block is over, we will have: + * * State.XBase - base point (unchanged) + * * State.FBase - F(XBase) + * * State.GBase - linear term + * * State.QuadraticModel - quadratic term + * * State.LambdaV - current estimate for lambda + * + * We also clear DeltaXReady/DeltaFReady flags + * after initialization is done. + */ + bflag = ae_false; + if( !(state->algomode==0||state->algomode==1) ) + { + goto lbl_24; + } + + /* + * Calculate f[] and Jacobian + */ + if( !(state->modelage>state->maxmodelage||!(state->deltaxready&&state->deltafready)) ) + { + goto lbl_26; + } + + /* + * Refresh model (using either finite differences or analytic Jacobian) + */ + if( state->algomode!=0 ) + { + goto lbl_28; + } + + /* + * Optimization using F values only. + * Use finite differences to estimate Jacobian. + */ + ae_assert(state->hasfi, "MinLMIteration: internal error when estimating Jacobian (no f[])", _state); + k = 0; +lbl_30: + if( k>n-1 ) + { + goto lbl_32; + } + ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1)); + state->x.ptr.p_double[k] = state->x.ptr.p_double[k]-state->diffstep; + minlm_clearrequestfields(state, _state); + state->needfi = ae_true; + state->rstate.stage = 3; + goto lbl_rcomm; +lbl_3: + state->repnfunc = state->repnfunc+1; + ae_v_move(&state->fm1.ptr.p_double[0], 1, &state->fi.ptr.p_double[0], 1, ae_v_len(0,m-1)); + ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1)); + state->x.ptr.p_double[k] = state->x.ptr.p_double[k]+state->diffstep; + minlm_clearrequestfields(state, _state); + state->needfi = ae_true; + state->rstate.stage = 4; + goto lbl_rcomm; +lbl_4: + state->repnfunc = state->repnfunc+1; + ae_v_move(&state->fp1.ptr.p_double[0], 1, &state->fi.ptr.p_double[0], 1, ae_v_len(0,m-1)); + v = 1/(2*state->diffstep); + ae_v_moved(&state->j.ptr.pp_double[0][k], state->j.stride, &state->fp1.ptr.p_double[0], 1, ae_v_len(0,m-1), v); + ae_v_subd(&state->j.ptr.pp_double[0][k], state->j.stride, &state->fm1.ptr.p_double[0], 1, ae_v_len(0,m-1), v); + k = k+1; + goto lbl_30; +lbl_32: + + /* + * Calculate F(XBase) + */ + ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1)); + minlm_clearrequestfields(state, _state); + state->needfi = ae_true; + state->rstate.stage = 5; + goto lbl_rcomm; +lbl_5: + state->needfi = ae_false; + state->repnfunc = state->repnfunc+1; + state->repnjac = state->repnjac+1; + + /* + * New model + */ + state->modelage = 0; + goto lbl_29; +lbl_28: + + /* + * Obtain f[] and Jacobian + */ + ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1)); + minlm_clearrequestfields(state, _state); + state->needfij = ae_true; + state->rstate.stage = 6; + goto lbl_rcomm; +lbl_6: + state->needfij = ae_false; + state->repnfunc = state->repnfunc+1; + state->repnjac = state->repnjac+1; + + /* + * New model + */ + state->modelage = 0; +lbl_29: + goto lbl_27; +lbl_26: + + /* + * State.J contains Jacobian or its current approximation; + * refresh it using secant updates: + * + * f(x0+dx) = f(x0) + J*dx, + * J_new = J_old + u*h' + * h = x_new-x_old + * u = (f_new - f_old - J_old*h)/(h'h) + * + * We can explicitly generate h and u, but it is + * preferential to do in-place calculations. Only + * I-th row of J_old is needed to calculate u[I], + * so we can update J row by row in one pass. + * + * NOTE: we expect that State.XBase contains new point, + * State.FBase contains old point, State.DeltaX and + * State.DeltaY contain updates from last step. + */ + ae_assert(state->deltaxready&&state->deltafready, "MinLMIteration: uninitialized DeltaX/DeltaF", _state); + t = ae_v_dotproduct(&state->deltax.ptr.p_double[0], 1, &state->deltax.ptr.p_double[0], 1, ae_v_len(0,n-1)); + ae_assert(ae_fp_neq(t,0), "MinLM: internal error (T=0)", _state); + for(i=0; i<=m-1; i++) + { + v = ae_v_dotproduct(&state->j.ptr.pp_double[i][0], 1, &state->deltax.ptr.p_double[0], 1, ae_v_len(0,n-1)); + v = (state->deltaf.ptr.p_double[i]-v)/t; + ae_v_addd(&state->j.ptr.pp_double[i][0], 1, &state->deltax.ptr.p_double[0], 1, ae_v_len(0,n-1), v); + } + ae_v_move(&state->fi.ptr.p_double[0], 1, &state->fibase.ptr.p_double[0], 1, ae_v_len(0,m-1)); + ae_v_add(&state->fi.ptr.p_double[0], 1, &state->deltaf.ptr.p_double[0], 1, ae_v_len(0,m-1)); + + /* + * Increase model age + */ + state->modelage = state->modelage+1; +lbl_27: + + /* + * Generate quadratic model: + * f(xbase+dx) = + * = (f0 + J*dx)'(f0 + J*dx) + * = f0^2 + dx'J'f0 + f0*J*dx + dx'J'J*dx + * = f0^2 + 2*f0*J*dx + dx'J'J*dx + * + * Note that we calculate 2*(J'J) instead of J'J because + * our quadratic model is based on Tailor decomposition, + * i.e. it has 0.5 before quadratic term. + */ + rmatrixgemm(n, n, m, 2.0, &state->j, 0, 0, 1, &state->j, 0, 0, 0, 0.0, &state->quadraticmodel, 0, 0, _state); + rmatrixmv(n, m, &state->j, 0, 0, 1, &state->fi, 0, &state->gbase, 0, _state); + ae_v_muld(&state->gbase.ptr.p_double[0], 1, ae_v_len(0,n-1), 2); + v = ae_v_dotproduct(&state->fi.ptr.p_double[0], 1, &state->fi.ptr.p_double[0], 1, ae_v_len(0,m-1)); + state->fbase = v; + ae_v_move(&state->fibase.ptr.p_double[0], 1, &state->fi.ptr.p_double[0], 1, ae_v_len(0,m-1)); + + /* + * set control variables + */ + bflag = ae_true; +lbl_24: + if( state->algomode!=2 ) + { + goto lbl_33; + } + ae_assert(!state->hasfi, "MinLMIteration: internal error (HasFI is True in Hessian-based mode)", _state); + + /* + * Obtain F, G, H + */ + ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1)); + minlm_clearrequestfields(state, _state); + state->needfgh = ae_true; + state->rstate.stage = 7; + goto lbl_rcomm; +lbl_7: + state->needfgh = ae_false; + state->repnfunc = state->repnfunc+1; + state->repngrad = state->repngrad+1; + state->repnhess = state->repnhess+1; + rmatrixcopy(n, n, &state->h, 0, 0, &state->quadraticmodel, 0, 0, _state); + ae_v_move(&state->gbase.ptr.p_double[0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1)); + state->fbase = state->f; + + /* + * set control variables + */ + bflag = ae_true; + state->modelage = 0; +lbl_33: + ae_assert(bflag, "MinLM: internal integrity check failed!", _state); + state->deltaxready = ae_false; + state->deltafready = ae_false; + + /* + * If Lambda is not initialized, initialize it using quadratic model + */ + if( ae_fp_less(state->lambdav,0) ) + { + state->lambdav = 0; + for(i=0; i<=n-1; i++) + { + state->lambdav = ae_maxreal(state->lambdav, ae_fabs(state->quadraticmodel.ptr.pp_double[i][i], _state), _state); + } + state->lambdav = 0.001*state->lambdav; + if( ae_fp_eq(state->lambdav,0) ) + { + state->lambdav = 1; + } + } + + /* + * Test stopping conditions for function gradient + */ + v = ae_v_dotproduct(&state->gbase.ptr.p_double[0], 1, &state->gbase.ptr.p_double[0], 1, ae_v_len(0,n-1)); + v = ae_sqrt(v, _state); + if( ae_fp_greater(v,state->epsg) ) + { + goto lbl_35; + } + if( state->modelage!=0 ) + { + goto lbl_37; + } + + /* + * Model is fresh, we can rely on it and terminate algorithm + */ + state->repterminationtype = 4; + if( !state->xrep ) + { + goto lbl_39; + } + ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1)); + state->f = state->fbase; + minlm_clearrequestfields(state, _state); + state->xupdated = ae_true; + state->rstate.stage = 8; + goto lbl_rcomm; +lbl_8: + state->xupdated = ae_false; +lbl_39: + result = ae_false; + return result; + goto lbl_38; +lbl_37: + + /* + * Model is not fresh, we should refresh it and test + * conditions once more + */ + state->modelage = state->maxmodelage+1; + goto lbl_22; +lbl_38: +lbl_35: + + /* + * Find value of Levenberg-Marquardt damping parameter which: + * * leads to positive definite damped model + * * within bounds specified by StpMax + * * generates step which decreases function value + * + * After this block IFlag is set to: + * * -2, if model update is needed (either Lambda growth is too large + * or step is too short, but we can't rely on model and stop iterations) + * * -1, if model is fresh, Lambda have grown too large, termination is needed + * * 0, if everything is OK, continue iterations + * + * State.Nu can have any value on enter, but after exit it is set to 1.0 + */ + iflag = -99; +lbl_41: + if( ae_false ) + { + goto lbl_42; + } + + /* + * Do we need model update? + */ + if( state->modelage>0&&ae_fp_greater_eq(state->nu,minlm_suspiciousnu) ) + { + iflag = -2; + goto lbl_42; + } + + /* + * DampedModel = QuadraticModel+lambda*I + */ + rmatrixcopy(n, n, &state->quadraticmodel, 0, 0, &state->dampedmodel, 0, 0, _state); + for(i=0; i<=n-1; i++) + { + state->dampedmodel.ptr.pp_double[i][i] = state->dampedmodel.ptr.pp_double[i][i]+state->lambdav; + } + + /* + * 1. try to solve (RawModel+Lambda*I)*dx = -g. + * increase lambda if left part is not positive definite. + * 2. bound step by StpMax + * increase lambda if step is larger than StpMax + * + * We use BFlag variable to indicate that we have to increase Lambda. + * If it is False, we will try to increase Lambda and move to new iteration. + */ + bflag = ae_true; + state->repncholesky = state->repncholesky+1; + if( spdmatrixcholeskyrec(&state->dampedmodel, 0, n, ae_true, &state->choleskybuf, _state) ) + { + ae_v_moveneg(&state->xdir.ptr.p_double[0], 1, &state->gbase.ptr.p_double[0], 1, ae_v_len(0,n-1)); + fblscholeskysolve(&state->dampedmodel, 1.0, n, ae_true, &state->xdir, &state->choleskybuf, _state); + v = ae_v_dotproduct(&state->xdir.ptr.p_double[0], 1, &state->xdir.ptr.p_double[0], 1, ae_v_len(0,n-1)); + if( ae_isfinite(v, _state) ) + { + v = ae_sqrt(v, _state); + if( ae_fp_greater(state->stpmax,0)&&ae_fp_greater(v,state->stpmax) ) + { + bflag = ae_false; + } + } + else + { + bflag = ae_false; + } + } + else + { + bflag = ae_false; + } + if( !bflag ) + { + + /* + * Solution failed: + * try to increase lambda to make matrix positive definite and continue. + */ + if( !minlm_increaselambda(&state->lambdav, &state->nu, _state) ) + { + iflag = -1; + goto lbl_42; + } + goto lbl_41; + } + + /* + * Step in State.XDir and it is bounded by StpMax. + * + * We should check stopping conditions on step size here. + * DeltaX, which is used for secant updates, is initialized here. + * + * This code is a bit tricky because sometimes XDir<>0, but + * it is so small that XDir+XBase==XBase (in finite precision + * arithmetics). So we set DeltaX to XBase, then + * add XDir, and then subtract XBase to get exact value of + * DeltaX. + * + * Step length is estimated using DeltaX. + * + * NOTE: stopping conditions are tested + * for fresh models only (ModelAge=0) + */ + ae_v_move(&state->deltax.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1)); + ae_v_add(&state->deltax.ptr.p_double[0], 1, &state->xdir.ptr.p_double[0], 1, ae_v_len(0,n-1)); + ae_v_sub(&state->deltax.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1)); + state->deltaxready = ae_true; + v = ae_v_dotproduct(&state->deltax.ptr.p_double[0], 1, &state->deltax.ptr.p_double[0], 1, ae_v_len(0,n-1)); + v = ae_sqrt(v, _state); + if( ae_fp_greater(v,state->epsx) ) + { + goto lbl_43; + } + if( state->modelage!=0 ) + { + goto lbl_45; + } + + /* + * Step is too short, model is fresh and we can rely on it. + * Terminating. + */ + state->repterminationtype = 2; + if( !state->xrep ) + { + goto lbl_47; + } + ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1)); + state->f = state->fbase; + minlm_clearrequestfields(state, _state); + state->xupdated = ae_true; + state->rstate.stage = 9; + goto lbl_rcomm; +lbl_9: + state->xupdated = ae_false; +lbl_47: + result = ae_false; + return result; + goto lbl_46; +lbl_45: + + /* + * Step is suspiciously short, but model is not fresh + * and we can't rely on it. + */ + iflag = -2; + goto lbl_42; +lbl_46: +lbl_43: + + /* + * Let's evaluate new step: + * a) if we have Fi vector, we evaluate it using rcomm, and + * then we manually calculate State.F as sum of squares of Fi[] + * b) if we have F value, we just evaluate it through rcomm interface + * + * We prefer (a) because we may need Fi vector for additional + * iterations + */ + ae_assert(state->hasfi||state->hasf, "MinLM: internal error 2!", _state); + ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1)); + ae_v_add(&state->x.ptr.p_double[0], 1, &state->xdir.ptr.p_double[0], 1, ae_v_len(0,n-1)); + minlm_clearrequestfields(state, _state); + if( !state->hasfi ) + { + goto lbl_49; + } + state->needfi = ae_true; + state->rstate.stage = 10; + goto lbl_rcomm; +lbl_10: + state->needfi = ae_false; + v = ae_v_dotproduct(&state->fi.ptr.p_double[0], 1, &state->fi.ptr.p_double[0], 1, ae_v_len(0,m-1)); + state->f = v; + ae_v_move(&state->deltaf.ptr.p_double[0], 1, &state->fi.ptr.p_double[0], 1, ae_v_len(0,m-1)); + ae_v_sub(&state->deltaf.ptr.p_double[0], 1, &state->fibase.ptr.p_double[0], 1, ae_v_len(0,m-1)); + state->deltafready = ae_true; + goto lbl_50; +lbl_49: + state->needf = ae_true; + state->rstate.stage = 11; + goto lbl_rcomm; +lbl_11: + state->needf = ae_false; +lbl_50: + state->repnfunc = state->repnfunc+1; + if( ae_fp_greater_eq(state->f,state->fbase) ) + { + + /* + * Increase lambda and continue + */ + if( !minlm_increaselambda(&state->lambdav, &state->nu, _state) ) + { + iflag = -1; + goto lbl_42; + } + goto lbl_41; + } + + /* + * We've found our step! + */ + iflag = 0; + goto lbl_42; + goto lbl_41; +lbl_42: + state->nu = 1; + ae_assert(iflag>=-2&&iflag<=0, "MinLM: internal integrity check failed!", _state); + if( iflag==-2 ) + { + state->modelage = state->maxmodelage+1; + goto lbl_22; + } + if( iflag==-1 ) + { + goto lbl_23; + } + + /* + * Levenberg-Marquardt step is ready. + * Compare predicted vs. actual decrease and decide what to do with lambda. + * + * NOTE: we expect that State.DeltaX contains direction of step, + * State.F contains function value at new point. + */ + ae_assert(state->deltaxready, "MinLM: deltaX is not ready", _state); + t = 0; + for(i=0; i<=n-1; i++) + { + v = ae_v_dotproduct(&state->quadraticmodel.ptr.pp_double[i][0], 1, &state->deltax.ptr.p_double[0], 1, ae_v_len(0,n-1)); + t = t+state->deltax.ptr.p_double[i]*state->gbase.ptr.p_double[i]+0.5*state->deltax.ptr.p_double[i]*v; + } + state->predicteddecrease = -t; + state->actualdecrease = -(state->f-state->fbase); + if( ae_fp_less_eq(state->predicteddecrease,0) ) + { + goto lbl_23; + } + v = state->actualdecrease/state->predicteddecrease; + if( ae_fp_greater_eq(v,0.1) ) + { + goto lbl_51; + } + if( minlm_increaselambda(&state->lambdav, &state->nu, _state) ) + { + goto lbl_53; + } + + /* + * Lambda is too large, we have to break iterations. + */ + state->repterminationtype = 7; + if( !state->xrep ) + { + goto lbl_55; + } + ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1)); + state->f = state->fbase; + minlm_clearrequestfields(state, _state); + state->xupdated = ae_true; + state->rstate.stage = 12; + goto lbl_rcomm; +lbl_12: + state->xupdated = ae_false; +lbl_55: + result = ae_false; + return result; +lbl_53: +lbl_51: + if( ae_fp_greater(v,0.5) ) + { + minlm_decreaselambda(&state->lambdav, &state->nu, _state); + } + + /* + * Accept step, report it and + * test stopping conditions on iterations count and function decrease. + * + * NOTE: we expect that State.DeltaX contains direction of step, + * State.F contains function value at new point. + * + * NOTE2: we should update XBase ONLY. In the beginning of the next + * iteration we expect that State.FIBase is NOT updated and + * contains old value of a function vector. + */ + ae_v_add(&state->xbase.ptr.p_double[0], 1, &state->deltax.ptr.p_double[0], 1, ae_v_len(0,n-1)); + if( !state->xrep ) + { + goto lbl_57; + } + ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1)); + minlm_clearrequestfields(state, _state); + state->xupdated = ae_true; + state->rstate.stage = 13; + goto lbl_rcomm; +lbl_13: + state->xupdated = ae_false; +lbl_57: + state->repiterationscount = state->repiterationscount+1; + if( state->repiterationscount>=state->maxits&&state->maxits>0 ) + { + state->repterminationtype = 5; + } + if( state->modelage==0 ) + { + if( ae_fp_less_eq(ae_fabs(state->f-state->fbase, _state),state->epsf*ae_maxreal(1, ae_maxreal(ae_fabs(state->f, _state), ae_fabs(state->fbase, _state), _state), _state)) ) + { + state->repterminationtype = 1; + } + } + if( state->repterminationtype<=0 ) + { + goto lbl_59; + } + if( !state->xrep ) + { + goto lbl_61; + } + + /* + * Report: XBase contains new point, F contains function value at new point + */ + ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1)); + minlm_clearrequestfields(state, _state); + state->xupdated = ae_true; + state->rstate.stage = 14; + goto lbl_rcomm; +lbl_14: + state->xupdated = ae_false; +lbl_61: + result = ae_false; + return result; +lbl_59: + state->modelage = state->modelage+1; + + /* + * Additional iterations for unconstrained problems: + * preconditioned L-BFGS is used. + * + * NOTE: additional iterations are incompatible with secant updates + * because they invalidate + */ + if( !(ae_fp_eq(state->stpmax,0)&&state->makeadditers) ) + { + goto lbl_63; + } + ae_assert(state->hasg||state->m!=0, "MinLM: no grad or Jacobian for additional iterations", _state); + + /* + * Make preconditioned iterations + */ + minlbfgssetcholeskypreconditioner(&state->internalstate, &state->dampedmodel, ae_true, _state); + minlbfgsrestartfrom(&state->internalstate, &state->xbase, _state); +lbl_65: + if( !minlbfgsiteration(&state->internalstate, _state) ) + { + goto lbl_66; + } + if( !state->internalstate.needfg ) + { + goto lbl_67; + } + ae_v_move(&state->x.ptr.p_double[0], 1, &state->internalstate.x.ptr.p_double[0], 1, ae_v_len(0,n-1)); + minlm_clearrequestfields(state, _state); + if( !state->hasg ) + { + goto lbl_69; + } + state->needfg = ae_true; + state->rstate.stage = 15; + goto lbl_rcomm; +lbl_15: + state->needfg = ae_false; + state->repngrad = state->repngrad+1; + ae_v_move(&state->internalstate.g.ptr.p_double[0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1)); + state->internalstate.f = state->f; + goto lbl_70; +lbl_69: + state->needfij = ae_true; + state->rstate.stage = 16; + goto lbl_rcomm; +lbl_16: + state->needfij = ae_false; + state->repnfunc = state->repnfunc+1; + state->repnjac = state->repnjac+1; + for(i=0; i<=n-1; i++) + { + state->internalstate.g.ptr.p_double[i] = 0; + } + for(i=0; i<=m-1; i++) + { + v = 2*state->fi.ptr.p_double[i]; + ae_v_addd(&state->internalstate.g.ptr.p_double[0], 1, &state->j.ptr.pp_double[i][0], 1, ae_v_len(0,n-1), v); + state->internalstate.f = state->internalstate.f+ae_sqr(state->fi.ptr.p_double[i], _state); + } +lbl_70: +lbl_67: + goto lbl_65; +lbl_66: + minlbfgsresultsbuf(&state->internalstate, &state->xbase, &state->internalrep, _state); + + /* + * Invalidate DeltaX/DeltaF (control variables used for integrity checks) + */ + state->deltaxready = ae_false; + state->deltafready = ae_false; +lbl_63: + goto lbl_22; +lbl_23: + + /* + * Lambda is too large, we have to break iterations. + */ + state->repterminationtype = 7; + if( !state->xrep ) + { + goto lbl_71; + } + ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1)); + state->f = state->fbase; + minlm_clearrequestfields(state, _state); + state->xupdated = ae_true; + state->rstate.stage = 17; + goto lbl_rcomm; +lbl_17: + state->xupdated = ae_false; +lbl_71: + result = ae_false; + return result; + + /* + * Saving state + */ +lbl_rcomm: + result = ae_true; + state->rstate.ia.ptr.p_int[0] = n; + state->rstate.ia.ptr.p_int[1] = m; + state->rstate.ia.ptr.p_int[2] = iflag; + state->rstate.ia.ptr.p_int[3] = i; + state->rstate.ia.ptr.p_int[4] = k; + state->rstate.ba.ptr.p_bool[0] = bflag; + state->rstate.ra.ptr.p_double[0] = v; + state->rstate.ra.ptr.p_double[1] = s; + state->rstate.ra.ptr.p_double[2] = t; + return result; +} + + +/************************************************************************* +Levenberg-Marquardt algorithm results + +INPUT PARAMETERS: + State - algorithm state + +OUTPUT PARAMETERS: + X - array[0..N-1], solution + Rep - optimization report; + see comments for this structure for more info. + + -- ALGLIB -- + Copyright 10.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minlmresults(minlmstate* state, + /* Real */ ae_vector* x, + minlmreport* rep, + ae_state *_state) +{ + + ae_vector_clear(x); + _minlmreport_clear(rep); + + minlmresultsbuf(state, x, rep, _state); +} + + +/************************************************************************* +Levenberg-Marquardt algorithm results + +Buffered implementation of MinLMResults(), which uses pre-allocated buffer +to store X[]. If buffer size is too small, it resizes buffer. It is +intended to be used in the inner cycles of performance critical algorithms +where array reallocation penalty is too large to be ignored. + + -- ALGLIB -- + Copyright 10.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minlmresultsbuf(minlmstate* state, + /* Real */ ae_vector* x, + minlmreport* rep, + ae_state *_state) +{ + + + if( x->cnt<state->n ) + { + ae_vector_set_length(x, state->n, _state); + } + ae_v_move(&x->ptr.p_double[0], 1, &state->x.ptr.p_double[0], 1, ae_v_len(0,state->n-1)); + rep->iterationscount = state->repiterationscount; + rep->terminationtype = state->repterminationtype; + rep->nfunc = state->repnfunc; + rep->njac = state->repnjac; + rep->ngrad = state->repngrad; + rep->nhess = state->repnhess; + rep->ncholesky = state->repncholesky; +} + + +/************************************************************************* +This subroutine restarts LM algorithm from new point. All optimization +parameters are left unchanged. + +This function allows to solve multiple optimization problems (which +must have same number of dimensions) without object reallocation penalty. + +INPUT PARAMETERS: + State - structure used for reverse communication previously + allocated with MinLMCreateXXX call. + X - new starting point. + + -- ALGLIB -- + Copyright 30.07.2010 by Bochkanov Sergey +*************************************************************************/ +void minlmrestartfrom(minlmstate* state, + /* Real */ ae_vector* x, + ae_state *_state) +{ + + + ae_assert(x->cnt>=state->n, "MinLMRestartFrom: Length(X)<N!", _state); + ae_assert(isfinitevector(x, state->n, _state), "MinLMRestartFrom: X contains infinite or NaN values!", _state); + ae_v_move(&state->xbase.ptr.p_double[0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,state->n-1)); + ae_vector_set_length(&state->rstate.ia, 4+1, _state); + ae_vector_set_length(&state->rstate.ba, 0+1, _state); + ae_vector_set_length(&state->rstate.ra, 2+1, _state); + state->rstate.stage = -1; + minlm_clearrequestfields(state, _state); +} + + +/************************************************************************* +Prepare internal structures (except for RComm). + +Note: M must be zero for FGH mode, non-zero for V/VJ/FJ/FGJ mode. +*************************************************************************/ +static void minlm_lmprepare(ae_int_t n, + ae_int_t m, + ae_bool havegrad, + minlmstate* state, + ae_state *_state) +{ + ae_int_t i; + + + if( n<=0||m<0 ) + { + return; + } + if( havegrad ) + { + ae_vector_set_length(&state->g, n, _state); + } + if( m!=0 ) + { + ae_matrix_set_length(&state->j, m, n, _state); + ae_vector_set_length(&state->fi, m, _state); + ae_vector_set_length(&state->fibase, m, _state); + ae_vector_set_length(&state->deltaf, m, _state); + ae_vector_set_length(&state->fm2, m, _state); + ae_vector_set_length(&state->fm1, m, _state); + ae_vector_set_length(&state->fp2, m, _state); + ae_vector_set_length(&state->fp1, m, _state); + } + else + { + ae_matrix_set_length(&state->h, n, n, _state); + } + ae_vector_set_length(&state->x, n, _state); + ae_vector_set_length(&state->deltax, n, _state); + ae_matrix_set_length(&state->quadraticmodel, n, n, _state); + ae_matrix_set_length(&state->dampedmodel, n, n, _state); + ae_vector_set_length(&state->xbase, n, _state); + ae_vector_set_length(&state->gbase, n, _state); + ae_vector_set_length(&state->xdir, n, _state); + + /* + * prepare internal L-BFGS + */ + for(i=0; i<=n-1; i++) + { + state->x.ptr.p_double[i] = 0; + } + minlbfgscreate(n, ae_minint(minlm_additers, n, _state), &state->x, &state->internalstate, _state); + minlbfgssetcond(&state->internalstate, 0.0, 0.0, 0.0, ae_minint(minlm_additers, n, _state), _state); +} + + +/************************************************************************* +Clears request fileds (to be sure that we don't forgot to clear something) +*************************************************************************/ +static void minlm_clearrequestfields(minlmstate* state, ae_state *_state) +{ + + + state->needf = ae_false; + state->needfg = ae_false; + state->needfgh = ae_false; + state->needfij = ae_false; + state->needfi = ae_false; + state->xupdated = ae_false; +} + + +/************************************************************************* +Increases lambda, returns False when there is a danger of overflow +*************************************************************************/ +static ae_bool minlm_increaselambda(double* lambdav, + double* nu, + ae_state *_state) +{ + double lnlambda; + double lnnu; + double lnlambdaup; + double lnmax; + ae_bool result; + + + result = ae_false; + lnlambda = ae_log(*lambdav, _state); + lnlambdaup = ae_log(minlm_lambdaup, _state); + lnnu = ae_log(*nu, _state); + lnmax = ae_log(ae_maxrealnumber, _state); + if( ae_fp_greater(lnlambda+lnlambdaup+lnnu,0.25*lnmax) ) + { + return result; + } + if( ae_fp_greater(lnnu+ae_log(2, _state),lnmax) ) + { + return result; + } + *lambdav = *lambdav*minlm_lambdaup*(*nu); + *nu = *nu*2; + result = ae_true; + return result; +} + + +/************************************************************************* +Decreases lambda, but leaves it unchanged when there is danger of underflow. +*************************************************************************/ +static void minlm_decreaselambda(double* lambdav, + double* nu, + ae_state *_state) +{ + + + *nu = 1; + if( ae_fp_less(ae_log(*lambdav, _state)+ae_log(minlm_lambdadown, _state),ae_log(ae_minrealnumber, _state)) ) + { + *lambdav = ae_minrealnumber; + } + else + { + *lambdav = *lambdav*minlm_lambdadown; + } +} + + +ae_bool _minlmstate_init(minlmstate* p, ae_state *_state, ae_bool make_automatic) +{ + if( !ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->fi, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_matrix_init(&p->j, 0, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_matrix_init(&p->h, 0, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->g, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->xbase, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->fibase, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->gbase, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_matrix_init(&p->quadraticmodel, 0, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_matrix_init(&p->dampedmodel, 0, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->xdir, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->deltax, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->deltaf, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !_rcommstate_init(&p->rstate, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->choleskybuf, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->fm2, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->fm1, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->fp2, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->fp1, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !_minlbfgsstate_init(&p->internalstate, _state, make_automatic) ) + return ae_false; + if( !_minlbfgsreport_init(&p->internalrep, _state, make_automatic) ) + return ae_false; + return ae_true; +} + + +ae_bool _minlmstate_init_copy(minlmstate* dst, minlmstate* src, ae_state *_state, ae_bool make_automatic) +{ + dst->n = src->n; + dst->m = src->m; + dst->diffstep = src->diffstep; + dst->epsg = src->epsg; + dst->epsf = src->epsf; + dst->epsx = src->epsx; + dst->maxits = src->maxits; + dst->xrep = src->xrep; + dst->stpmax = src->stpmax; + dst->maxmodelage = src->maxmodelage; + dst->makeadditers = src->makeadditers; + if( !ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic) ) + return ae_false; + dst->f = src->f; + if( !ae_vector_init_copy(&dst->fi, &src->fi, _state, make_automatic) ) + return ae_false; + if( !ae_matrix_init_copy(&dst->j, &src->j, _state, make_automatic) ) + return ae_false; + if( !ae_matrix_init_copy(&dst->h, &src->h, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->g, &src->g, _state, make_automatic) ) + return ae_false; + dst->needf = src->needf; + dst->needfg = src->needfg; + dst->needfgh = src->needfgh; + dst->needfij = src->needfij; + dst->needfi = src->needfi; + dst->xupdated = src->xupdated; + dst->algomode = src->algomode; + dst->hasf = src->hasf; + dst->hasfi = src->hasfi; + dst->hasg = src->hasg; + if( !ae_vector_init_copy(&dst->xbase, &src->xbase, _state, make_automatic) ) + return ae_false; + dst->fbase = src->fbase; + if( !ae_vector_init_copy(&dst->fibase, &src->fibase, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->gbase, &src->gbase, _state, make_automatic) ) + return ae_false; + if( !ae_matrix_init_copy(&dst->quadraticmodel, &src->quadraticmodel, _state, make_automatic) ) + return ae_false; + dst->lambdav = src->lambdav; + dst->nu = src->nu; + if( !ae_matrix_init_copy(&dst->dampedmodel, &src->dampedmodel, _state, make_automatic) ) + return ae_false; + dst->modelage = src->modelage; + if( !ae_vector_init_copy(&dst->xdir, &src->xdir, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->deltax, &src->deltax, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->deltaf, &src->deltaf, _state, make_automatic) ) + return ae_false; + dst->deltaxready = src->deltaxready; + dst->deltafready = src->deltafready; + dst->repiterationscount = src->repiterationscount; + dst->repterminationtype = src->repterminationtype; + dst->repnfunc = src->repnfunc; + dst->repnjac = src->repnjac; + dst->repngrad = src->repngrad; + dst->repnhess = src->repnhess; + dst->repncholesky = src->repncholesky; + if( !_rcommstate_init_copy(&dst->rstate, &src->rstate, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->choleskybuf, &src->choleskybuf, _state, make_automatic) ) + return ae_false; + dst->actualdecrease = src->actualdecrease; + dst->predicteddecrease = src->predicteddecrease; + if( !ae_vector_init_copy(&dst->fm2, &src->fm2, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->fm1, &src->fm1, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->fp2, &src->fp2, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->fp1, &src->fp1, _state, make_automatic) ) + return ae_false; + if( !_minlbfgsstate_init_copy(&dst->internalstate, &src->internalstate, _state, make_automatic) ) + return ae_false; + if( !_minlbfgsreport_init_copy(&dst->internalrep, &src->internalrep, _state, make_automatic) ) + return ae_false; + return ae_true; +} + + +void _minlmstate_clear(minlmstate* p) +{ + ae_vector_clear(&p->x); + ae_vector_clear(&p->fi); + ae_matrix_clear(&p->j); + ae_matrix_clear(&p->h); + ae_vector_clear(&p->g); + ae_vector_clear(&p->xbase); + ae_vector_clear(&p->fibase); + ae_vector_clear(&p->gbase); + ae_matrix_clear(&p->quadraticmodel); + ae_matrix_clear(&p->dampedmodel); + ae_vector_clear(&p->xdir); + ae_vector_clear(&p->deltax); + ae_vector_clear(&p->deltaf); + _rcommstate_clear(&p->rstate); + ae_vector_clear(&p->choleskybuf); + ae_vector_clear(&p->fm2); + ae_vector_clear(&p->fm1); + ae_vector_clear(&p->fp2); + ae_vector_clear(&p->fp1); + _minlbfgsstate_clear(&p->internalstate); + _minlbfgsreport_clear(&p->internalrep); +} + + +ae_bool _minlmreport_init(minlmreport* p, ae_state *_state, ae_bool make_automatic) +{ + return ae_true; +} + + +ae_bool _minlmreport_init_copy(minlmreport* dst, minlmreport* src, ae_state *_state, ae_bool make_automatic) +{ + dst->iterationscount = src->iterationscount; + dst->terminationtype = src->terminationtype; + dst->nfunc = src->nfunc; + dst->njac = src->njac; + dst->ngrad = src->ngrad; + dst->nhess = src->nhess; + dst->ncholesky = src->ncholesky; + return ae_true; +} + + +void _minlmreport_clear(minlmreport* p) +{ +} + + + + +/************************************************************************* + NONLINEAR BOUND CONSTRAINED OPTIMIZATION USING + MODIFIED ACTIVE SET ALGORITHM + WILLIAM W. HAGER AND HONGCHAO ZHANG + +DESCRIPTION: +The subroutine minimizes function F(x) of N arguments with bound +constraints: BndL[i] <= x[i] <= BndU[i] + +This method is globally convergent as long as grad(f) is Lipschitz +continuous on a level set: L = { x : f(x)<=f(x0) }. + + +REQUIREMENTS: +Algorithm will request following information during its operation: +* function value F and its gradient G (simultaneously) at given point X + + +USAGE: +1. User initializes algorithm state with MinASACreate() call +2. User tunes solver parameters with MinASASetCond() MinASASetStpMax() and + other functions +3. User calls MinASAOptimize() function which takes algorithm state and + pointer (delegate, etc.) to callback function which calculates F/G. +4. User calls MinASAResults() to get solution +5. Optionally, user may call MinASARestartFrom() to solve another problem + with same N but another starting point and/or another function. + MinASARestartFrom() allows to reuse already initialized structure. + + +INPUT PARAMETERS: + N - problem dimension, N>0: + * if given, only leading N elements of X are used + * if not given, automatically determined from sizes of + X/BndL/BndU. + X - starting point, array[0..N-1]. + BndL - lower bounds, array[0..N-1]. + all elements MUST be specified, i.e. all variables are + bounded. However, if some (all) variables are unbounded, + you may specify very small number as bound: -1000, -1.0E6 + or -1.0E300, or something like that. + BndU - upper bounds, array[0..N-1]. + all elements MUST be specified, i.e. all variables are + bounded. However, if some (all) variables are unbounded, + you may specify very large number as bound: +1000, +1.0E6 + or +1.0E300, or something like that. + +OUTPUT PARAMETERS: + State - structure stores algorithm state + +NOTES: + +1. you may tune stopping conditions with MinASASetCond() function +2. if target function contains exp() or other fast growing functions, and + optimization algorithm makes too large steps which leads to overflow, + use MinASASetStpMax() function to bound algorithm's steps. +3. this function does NOT support infinite/NaN values in X, BndL, BndU. + + -- ALGLIB -- + Copyright 25.03.2010 by Bochkanov Sergey +*************************************************************************/ +void minasacreate(ae_int_t n, + /* Real */ ae_vector* x, + /* Real */ ae_vector* bndl, + /* Real */ ae_vector* bndu, + minasastate* state, + ae_state *_state) +{ + ae_int_t i; + + _minasastate_clear(state); + + ae_assert(n>=1, "MinASA: N too small!", _state); + ae_assert(x->cnt>=n, "MinCGCreate: Length(X)<N!", _state); + ae_assert(isfinitevector(x, n, _state), "MinCGCreate: X contains infinite or NaN values!", _state); + ae_assert(bndl->cnt>=n, "MinCGCreate: Length(BndL)<N!", _state); + ae_assert(isfinitevector(bndl, n, _state), "MinCGCreate: BndL contains infinite or NaN values!", _state); + ae_assert(bndu->cnt>=n, "MinCGCreate: Length(BndU)<N!", _state); + ae_assert(isfinitevector(bndu, n, _state), "MinCGCreate: BndU contains infinite or NaN values!", _state); + for(i=0; i<=n-1; i++) + { + ae_assert(ae_fp_less_eq(bndl->ptr.p_double[i],bndu->ptr.p_double[i]), "MinASA: inconsistent bounds!", _state); + ae_assert(ae_fp_less_eq(bndl->ptr.p_double[i],x->ptr.p_double[i]), "MinASA: infeasible X!", _state); + ae_assert(ae_fp_less_eq(x->ptr.p_double[i],bndu->ptr.p_double[i]), "MinASA: infeasible X!", _state); + } + + /* + * Initialize + */ + state->n = n; + minasasetcond(state, 0, 0, 0, 0, _state); + minasasetxrep(state, ae_false, _state); + minasasetstpmax(state, 0, _state); + minasasetalgorithm(state, -1, _state); + ae_vector_set_length(&state->bndl, n, _state); + ae_vector_set_length(&state->bndu, n, _state); + ae_vector_set_length(&state->ak, n, _state); + ae_vector_set_length(&state->xk, n, _state); + ae_vector_set_length(&state->dk, n, _state); + ae_vector_set_length(&state->an, n, _state); + ae_vector_set_length(&state->xn, n, _state); + ae_vector_set_length(&state->dn, n, _state); + ae_vector_set_length(&state->x, n, _state); + ae_vector_set_length(&state->d, n, _state); + ae_vector_set_length(&state->g, n, _state); + ae_vector_set_length(&state->gc, n, _state); + ae_vector_set_length(&state->work, n, _state); + ae_vector_set_length(&state->yk, n, _state); + minasarestartfrom(state, x, bndl, bndu, _state); +} + + +/************************************************************************* +This function sets stopping conditions for the ASA optimization algorithm. + +INPUT PARAMETERS: + State - structure which stores algorithm state + EpsG - >=0 + The subroutine finishes its work if the condition + ||G||<EpsG is satisfied, where ||.|| means Euclidian norm, + G - gradient. + EpsF - >=0 + The subroutine finishes its work if on k+1-th iteration + the condition |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1} + is satisfied. + EpsX - >=0 + The subroutine finishes its work if on k+1-th iteration + the condition |X(k+1)-X(k)| <= EpsX is fulfilled. + MaxIts - maximum number of iterations. If MaxIts=0, the number of + iterations is unlimited. + +Passing EpsG=0, EpsF=0, EpsX=0 and MaxIts=0 (simultaneously) will lead to +automatic stopping criterion selection (small EpsX). + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minasasetcond(minasastate* state, + double epsg, + double epsf, + double epsx, + ae_int_t maxits, + ae_state *_state) +{ + + + ae_assert(ae_isfinite(epsg, _state), "MinASASetCond: EpsG is not finite number!", _state); + ae_assert(ae_fp_greater_eq(epsg,0), "MinASASetCond: negative EpsG!", _state); + ae_assert(ae_isfinite(epsf, _state), "MinASASetCond: EpsF is not finite number!", _state); + ae_assert(ae_fp_greater_eq(epsf,0), "MinASASetCond: negative EpsF!", _state); + ae_assert(ae_isfinite(epsx, _state), "MinASASetCond: EpsX is not finite number!", _state); + ae_assert(ae_fp_greater_eq(epsx,0), "MinASASetCond: negative EpsX!", _state); + ae_assert(maxits>=0, "MinASASetCond: negative MaxIts!", _state); + if( ((ae_fp_eq(epsg,0)&&ae_fp_eq(epsf,0))&&ae_fp_eq(epsx,0))&&maxits==0 ) + { + epsx = 1.0E-6; + } + state->epsg = epsg; + state->epsf = epsf; + state->epsx = epsx; + state->maxits = maxits; +} + + +/************************************************************************* +This function turns on/off reporting. + +INPUT PARAMETERS: + State - structure which stores algorithm state + NeedXRep- whether iteration reports are needed or not + +If NeedXRep is True, algorithm will call rep() callback function if it is +provided to MinASAOptimize(). + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minasasetxrep(minasastate* state, ae_bool needxrep, ae_state *_state) +{ + + + state->xrep = needxrep; +} + + +/************************************************************************* +This function sets optimization algorithm. + +INPUT PARAMETERS: + State - structure which stores algorithm stat + UAType - algorithm type: + * -1 automatic selection of the best algorithm + * 0 DY (Dai and Yuan) algorithm + * 1 Hybrid DY-HS algorithm + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minasasetalgorithm(minasastate* state, + ae_int_t algotype, + ae_state *_state) +{ + + + ae_assert(algotype>=-1&&algotype<=1, "MinASASetAlgorithm: incorrect AlgoType!", _state); + if( algotype==-1 ) + { + algotype = 1; + } + state->cgtype = algotype; +} + + +/************************************************************************* +This function sets maximum step length + +INPUT PARAMETERS: + State - structure which stores algorithm state + StpMax - maximum step length, >=0. Set StpMax to 0.0, if you don't + want to limit step length (zero by default). + +Use this subroutine when you optimize target function which contains exp() +or other fast growing functions, and optimization algorithm makes too +large steps which leads to overflow. This function allows us to reject +steps that are too large (and therefore expose us to the possible +overflow) without actually calculating function value at the x+stp*d. + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minasasetstpmax(minasastate* state, double stpmax, ae_state *_state) +{ + + + ae_assert(ae_isfinite(stpmax, _state), "MinASASetStpMax: StpMax is not finite!", _state); + ae_assert(ae_fp_greater_eq(stpmax,0), "MinASASetStpMax: StpMax<0!", _state); + state->stpmax = stpmax; +} + + +/************************************************************************* + + -- ALGLIB -- + Copyright 20.03.2009 by Bochkanov Sergey +*************************************************************************/ +ae_bool minasaiteration(minasastate* state, ae_state *_state) +{ + ae_int_t n; + ae_int_t i; + double betak; + double v; + double vv; + ae_int_t mcinfo; + ae_bool b; + ae_bool stepfound; + ae_int_t diffcnt; + ae_bool result; + + + + /* + * Reverse communication preparations + * I know it looks ugly, but it works the same way + * anywhere from C++ to Python. + * + * This code initializes locals by: + * * random values determined during code + * generation - on first subroutine call + * * values from previous call - on subsequent calls + */ + if( state->rstate.stage>=0 ) + { + n = state->rstate.ia.ptr.p_int[0]; + i = state->rstate.ia.ptr.p_int[1]; + mcinfo = state->rstate.ia.ptr.p_int[2]; + diffcnt = state->rstate.ia.ptr.p_int[3]; + b = state->rstate.ba.ptr.p_bool[0]; + stepfound = state->rstate.ba.ptr.p_bool[1]; + betak = state->rstate.ra.ptr.p_double[0]; + v = state->rstate.ra.ptr.p_double[1]; + vv = state->rstate.ra.ptr.p_double[2]; + } + else + { + n = -983; + i = -989; + mcinfo = -834; + diffcnt = 900; + b = ae_true; + stepfound = ae_false; + betak = 214; + v = -338; + vv = -686; + } + if( state->rstate.stage==0 ) + { + goto lbl_0; + } + if( state->rstate.stage==1 ) + { + goto lbl_1; + } + if( state->rstate.stage==2 ) + { + goto lbl_2; + } + if( state->rstate.stage==3 ) + { + goto lbl_3; + } + if( state->rstate.stage==4 ) + { + goto lbl_4; + } + if( state->rstate.stage==5 ) + { + goto lbl_5; + } + if( state->rstate.stage==6 ) + { + goto lbl_6; + } + if( state->rstate.stage==7 ) + { + goto lbl_7; + } + if( state->rstate.stage==8 ) + { + goto lbl_8; + } + if( state->rstate.stage==9 ) + { + goto lbl_9; + } + if( state->rstate.stage==10 ) + { + goto lbl_10; + } + if( state->rstate.stage==11 ) + { + goto lbl_11; + } + if( state->rstate.stage==12 ) + { + goto lbl_12; + } + if( state->rstate.stage==13 ) + { + goto lbl_13; + } + if( state->rstate.stage==14 ) + { + goto lbl_14; + } + + /* + * Routine body + */ + + /* + * Prepare + */ + n = state->n; + state->repterminationtype = 0; + state->repiterationscount = 0; + state->repnfev = 0; + state->debugrestartscount = 0; + state->cgtype = 1; + ae_v_move(&state->xk.ptr.p_double[0], 1, &state->x.ptr.p_double[0], 1, ae_v_len(0,n-1)); + for(i=0; i<=n-1; i++) + { + if( ae_fp_eq(state->xk.ptr.p_double[i],state->bndl.ptr.p_double[i])||ae_fp_eq(state->xk.ptr.p_double[i],state->bndu.ptr.p_double[i]) ) + { + state->ak.ptr.p_double[i] = 0; + } + else + { + state->ak.ptr.p_double[i] = 1; + } + } + state->mu = 0.1; + state->curalgo = 0; + + /* + * Calculate F/G, initialize algorithm + */ + minasa_clearrequestfields(state, _state); + state->needfg = ae_true; + state->rstate.stage = 0; + goto lbl_rcomm; +lbl_0: + state->needfg = ae_false; + if( !state->xrep ) + { + goto lbl_15; + } + + /* + * progress report + */ + minasa_clearrequestfields(state, _state); + state->xupdated = ae_true; + state->rstate.stage = 1; + goto lbl_rcomm; +lbl_1: + state->xupdated = ae_false; +lbl_15: + if( ae_fp_less_eq(minasa_asaboundedantigradnorm(state, _state),state->epsg) ) + { + state->repterminationtype = 4; + result = ae_false; + return result; + } + state->repnfev = state->repnfev+1; + + /* + * Main cycle + * + * At the beginning of new iteration: + * * CurAlgo stores current algorithm selector + * * State.XK, State.F and State.G store current X/F/G + * * State.AK stores current set of active constraints + */ +lbl_17: + if( ae_false ) + { + goto lbl_18; + } + + /* + * GPA algorithm + */ + if( state->curalgo!=0 ) + { + goto lbl_19; + } + state->k = 0; + state->acount = 0; +lbl_21: + if( ae_false ) + { + goto lbl_22; + } + + /* + * Determine Dk = proj(xk - gk)-xk + */ + for(i=0; i<=n-1; i++) + { + state->d.ptr.p_double[i] = boundval(state->xk.ptr.p_double[i]-state->g.ptr.p_double[i], state->bndl.ptr.p_double[i], state->bndu.ptr.p_double[i], _state)-state->xk.ptr.p_double[i]; + } + + /* + * Armijo line search. + * * exact search with alpha=1 is tried first, + * 'exact' means that we evaluate f() EXACTLY at + * bound(x-g,bndl,bndu), without intermediate floating + * point operations. + * * alpha<1 are tried if explicit search wasn't successful + * Result is placed into XN. + * + * Two types of search are needed because we can't + * just use second type with alpha=1 because in finite + * precision arithmetics (x1-x0)+x0 may differ from x1. + * So while x1 is correctly bounded (it lie EXACTLY on + * boundary, if it is active), (x1-x0)+x0 may be + * not bounded. + */ + v = ae_v_dotproduct(&state->d.ptr.p_double[0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1)); + state->dginit = v; + state->finit = state->f; + if( !(ae_fp_less_eq(minasa_asad1norm(state, _state),state->stpmax)||ae_fp_eq(state->stpmax,0)) ) + { + goto lbl_23; + } + + /* + * Try alpha=1 step first + */ + for(i=0; i<=n-1; i++) + { + state->x.ptr.p_double[i] = boundval(state->xk.ptr.p_double[i]-state->g.ptr.p_double[i], state->bndl.ptr.p_double[i], state->bndu.ptr.p_double[i], _state); + } + minasa_clearrequestfields(state, _state); + state->needfg = ae_true; + state->rstate.stage = 2; + goto lbl_rcomm; +lbl_2: + state->needfg = ae_false; + state->repnfev = state->repnfev+1; + stepfound = ae_fp_less_eq(state->f,state->finit+minasa_gpaftol*state->dginit); + goto lbl_24; +lbl_23: + stepfound = ae_false; +lbl_24: + if( !stepfound ) + { + goto lbl_25; + } + + /* + * we are at the boundary(ies) + */ + ae_v_move(&state->xn.ptr.p_double[0], 1, &state->x.ptr.p_double[0], 1, ae_v_len(0,n-1)); + state->stp = 1; + goto lbl_26; +lbl_25: + + /* + * alpha=1 is too large, try smaller values + */ + state->stp = 1; + linminnormalized(&state->d, &state->stp, n, _state); + state->dginit = state->dginit/state->stp; + state->stp = minasa_gpadecay*state->stp; + if( ae_fp_greater(state->stpmax,0) ) + { + state->stp = ae_minreal(state->stp, state->stpmax, _state); + } +lbl_27: + if( ae_false ) + { + goto lbl_28; + } + v = state->stp; + ae_v_move(&state->x.ptr.p_double[0], 1, &state->xk.ptr.p_double[0], 1, ae_v_len(0,n-1)); + ae_v_addd(&state->x.ptr.p_double[0], 1, &state->d.ptr.p_double[0], 1, ae_v_len(0,n-1), v); + minasa_clearrequestfields(state, _state); + state->needfg = ae_true; + state->rstate.stage = 3; + goto lbl_rcomm; +lbl_3: + state->needfg = ae_false; + state->repnfev = state->repnfev+1; + if( ae_fp_less_eq(state->stp,minasa_stpmin) ) + { + goto lbl_28; + } + if( ae_fp_less_eq(state->f,state->finit+state->stp*minasa_gpaftol*state->dginit) ) + { + goto lbl_28; + } + state->stp = state->stp*minasa_gpadecay; + goto lbl_27; +lbl_28: + ae_v_move(&state->xn.ptr.p_double[0], 1, &state->x.ptr.p_double[0], 1, ae_v_len(0,n-1)); +lbl_26: + state->repiterationscount = state->repiterationscount+1; + if( !state->xrep ) + { + goto lbl_29; + } + + /* + * progress report + */ + minasa_clearrequestfields(state, _state); + state->xupdated = ae_true; + state->rstate.stage = 4; + goto lbl_rcomm; +lbl_4: + state->xupdated = ae_false; +lbl_29: + + /* + * Calculate new set of active constraints. + * Reset counter if active set was changed. + * Prepare for the new iteration + */ + for(i=0; i<=n-1; i++) + { + if( ae_fp_eq(state->xn.ptr.p_double[i],state->bndl.ptr.p_double[i])||ae_fp_eq(state->xn.ptr.p_double[i],state->bndu.ptr.p_double[i]) ) + { + state->an.ptr.p_double[i] = 0; + } + else + { + state->an.ptr.p_double[i] = 1; + } + } + for(i=0; i<=n-1; i++) + { + if( ae_fp_neq(state->ak.ptr.p_double[i],state->an.ptr.p_double[i]) ) + { + state->acount = -1; + break; + } + } + state->acount = state->acount+1; + ae_v_move(&state->xk.ptr.p_double[0], 1, &state->xn.ptr.p_double[0], 1, ae_v_len(0,n-1)); + ae_v_move(&state->ak.ptr.p_double[0], 1, &state->an.ptr.p_double[0], 1, ae_v_len(0,n-1)); + + /* + * Stopping conditions + */ + if( !(state->repiterationscount>=state->maxits&&state->maxits>0) ) + { + goto lbl_31; + } + + /* + * Too many iterations + */ + state->repterminationtype = 5; + if( !state->xrep ) + { + goto lbl_33; + } + minasa_clearrequestfields(state, _state); + state->xupdated = ae_true; + state->rstate.stage = 5; + goto lbl_rcomm; +lbl_5: + state->xupdated = ae_false; +lbl_33: + result = ae_false; + return result; +lbl_31: + if( ae_fp_greater(minasa_asaboundedantigradnorm(state, _state),state->epsg) ) + { + goto lbl_35; + } + + /* + * Gradient is small enough + */ + state->repterminationtype = 4; + if( !state->xrep ) + { + goto lbl_37; + } + minasa_clearrequestfields(state, _state); + state->xupdated = ae_true; + state->rstate.stage = 6; + goto lbl_rcomm; +lbl_6: + state->xupdated = ae_false; +lbl_37: + result = ae_false; + return result; +lbl_35: + v = ae_v_dotproduct(&state->d.ptr.p_double[0], 1, &state->d.ptr.p_double[0], 1, ae_v_len(0,n-1)); + if( ae_fp_greater(ae_sqrt(v, _state)*state->stp,state->epsx) ) + { + goto lbl_39; + } + + /* + * Step size is too small, no further improvement is + * possible + */ + state->repterminationtype = 2; + if( !state->xrep ) + { + goto lbl_41; + } + minasa_clearrequestfields(state, _state); + state->xupdated = ae_true; + state->rstate.stage = 7; + goto lbl_rcomm; +lbl_7: + state->xupdated = ae_false; +lbl_41: + result = ae_false; + return result; +lbl_39: + if( ae_fp_greater(state->finit-state->f,state->epsf*ae_maxreal(ae_fabs(state->finit, _state), ae_maxreal(ae_fabs(state->f, _state), 1.0, _state), _state)) ) + { + goto lbl_43; + } + + /* + * F(k+1)-F(k) is small enough + */ + state->repterminationtype = 1; + if( !state->xrep ) + { + goto lbl_45; + } + minasa_clearrequestfields(state, _state); + state->xupdated = ae_true; + state->rstate.stage = 8; + goto lbl_rcomm; +lbl_8: + state->xupdated = ae_false; +lbl_45: + result = ae_false; + return result; +lbl_43: + + /* + * Decide - should we switch algorithm or not + */ + if( minasa_asauisempty(state, _state) ) + { + if( ae_fp_greater_eq(minasa_asaginorm(state, _state),state->mu*minasa_asad1norm(state, _state)) ) + { + state->curalgo = 1; + goto lbl_22; + } + else + { + state->mu = state->mu*minasa_asarho; + } + } + else + { + if( state->acount==minasa_n1 ) + { + if( ae_fp_greater_eq(minasa_asaginorm(state, _state),state->mu*minasa_asad1norm(state, _state)) ) + { + state->curalgo = 1; + goto lbl_22; + } + } + } + + /* + * Next iteration + */ + state->k = state->k+1; + goto lbl_21; +lbl_22: +lbl_19: + + /* + * CG algorithm + */ + if( state->curalgo!=1 ) + { + goto lbl_47; + } + + /* + * first, check that there are non-active constraints. + * move to GPA algorithm, if all constraints are active + */ + b = ae_true; + for(i=0; i<=n-1; i++) + { + if( ae_fp_neq(state->ak.ptr.p_double[i],0) ) + { + b = ae_false; + break; + } + } + if( b ) + { + state->curalgo = 0; + goto lbl_17; + } + + /* + * CG iterations + */ + state->fold = state->f; + ae_v_move(&state->xk.ptr.p_double[0], 1, &state->x.ptr.p_double[0], 1, ae_v_len(0,n-1)); + for(i=0; i<=n-1; i++) + { + state->dk.ptr.p_double[i] = -state->g.ptr.p_double[i]*state->ak.ptr.p_double[i]; + state->gc.ptr.p_double[i] = state->g.ptr.p_double[i]*state->ak.ptr.p_double[i]; + } +lbl_49: + if( ae_false ) + { + goto lbl_50; + } + + /* + * Store G[k] for later calculation of Y[k] + */ + for(i=0; i<=n-1; i++) + { + state->yk.ptr.p_double[i] = -state->gc.ptr.p_double[i]; + } + + /* + * Make a CG step in direction given by DK[]: + * * calculate step. Step projection into feasible set + * is used. It has several benefits: a) step may be + * found with usual line search, b) multiple constraints + * may be activated with one step, c) activated constraints + * are detected in a natural way - just compare x[i] with + * bounds + * * update active set, set B to True, if there + * were changes in the set. + */ + ae_v_move(&state->d.ptr.p_double[0], 1, &state->dk.ptr.p_double[0], 1, ae_v_len(0,n-1)); + ae_v_move(&state->xn.ptr.p_double[0], 1, &state->xk.ptr.p_double[0], 1, ae_v_len(0,n-1)); + state->mcstage = 0; + state->stp = 1; + linminnormalized(&state->d, &state->stp, n, _state); + if( ae_fp_neq(state->laststep,0) ) + { + state->stp = state->laststep; + } + mcsrch(n, &state->xn, &state->f, &state->gc, &state->d, &state->stp, state->stpmax, &mcinfo, &state->nfev, &state->work, &state->lstate, &state->mcstage, _state); +lbl_51: + if( state->mcstage==0 ) + { + goto lbl_52; + } + + /* + * preprocess data: bound State.XN so it belongs to the + * feasible set and store it in the State.X + */ + for(i=0; i<=n-1; i++) + { + state->x.ptr.p_double[i] = boundval(state->xn.ptr.p_double[i], state->bndl.ptr.p_double[i], state->bndu.ptr.p_double[i], _state); + } + + /* + * RComm + */ + minasa_clearrequestfields(state, _state); + state->needfg = ae_true; + state->rstate.stage = 9; + goto lbl_rcomm; +lbl_9: + state->needfg = ae_false; + + /* + * postprocess data: zero components of G corresponding to + * the active constraints + */ + for(i=0; i<=n-1; i++) + { + if( ae_fp_eq(state->x.ptr.p_double[i],state->bndl.ptr.p_double[i])||ae_fp_eq(state->x.ptr.p_double[i],state->bndu.ptr.p_double[i]) ) + { + state->gc.ptr.p_double[i] = 0; + } + else + { + state->gc.ptr.p_double[i] = state->g.ptr.p_double[i]; + } + } + mcsrch(n, &state->xn, &state->f, &state->gc, &state->d, &state->stp, state->stpmax, &mcinfo, &state->nfev, &state->work, &state->lstate, &state->mcstage, _state); + goto lbl_51; +lbl_52: + diffcnt = 0; + for(i=0; i<=n-1; i++) + { + + /* + * XN contains unprojected result, project it, + * save copy to X (will be used for progress reporting) + */ + state->xn.ptr.p_double[i] = boundval(state->xn.ptr.p_double[i], state->bndl.ptr.p_double[i], state->bndu.ptr.p_double[i], _state); + + /* + * update active set + */ + if( ae_fp_eq(state->xn.ptr.p_double[i],state->bndl.ptr.p_double[i])||ae_fp_eq(state->xn.ptr.p_double[i],state->bndu.ptr.p_double[i]) ) + { + state->an.ptr.p_double[i] = 0; + } + else + { + state->an.ptr.p_double[i] = 1; + } + if( ae_fp_neq(state->an.ptr.p_double[i],state->ak.ptr.p_double[i]) ) + { + diffcnt = diffcnt+1; + } + state->ak.ptr.p_double[i] = state->an.ptr.p_double[i]; + } + ae_v_move(&state->xk.ptr.p_double[0], 1, &state->xn.ptr.p_double[0], 1, ae_v_len(0,n-1)); + state->repnfev = state->repnfev+state->nfev; + state->repiterationscount = state->repiterationscount+1; + if( !state->xrep ) + { + goto lbl_53; + } + + /* + * progress report + */ + minasa_clearrequestfields(state, _state); + state->xupdated = ae_true; + state->rstate.stage = 10; + goto lbl_rcomm; +lbl_10: + state->xupdated = ae_false; +lbl_53: + + /* + * Update info about step length + */ + v = ae_v_dotproduct(&state->d.ptr.p_double[0], 1, &state->d.ptr.p_double[0], 1, ae_v_len(0,n-1)); + state->laststep = ae_sqrt(v, _state)*state->stp; + + /* + * Check stopping conditions. + */ + if( ae_fp_greater(minasa_asaboundedantigradnorm(state, _state),state->epsg) ) + { + goto lbl_55; + } + + /* + * Gradient is small enough + */ + state->repterminationtype = 4; + if( !state->xrep ) + { + goto lbl_57; + } + minasa_clearrequestfields(state, _state); + state->xupdated = ae_true; + state->rstate.stage = 11; + goto lbl_rcomm; +lbl_11: + state->xupdated = ae_false; +lbl_57: + result = ae_false; + return result; +lbl_55: + if( !(state->repiterationscount>=state->maxits&&state->maxits>0) ) + { + goto lbl_59; + } + + /* + * Too many iterations + */ + state->repterminationtype = 5; + if( !state->xrep ) + { + goto lbl_61; + } + minasa_clearrequestfields(state, _state); + state->xupdated = ae_true; + state->rstate.stage = 12; + goto lbl_rcomm; +lbl_12: + state->xupdated = ae_false; +lbl_61: + result = ae_false; + return result; +lbl_59: + if( !(ae_fp_greater_eq(minasa_asaginorm(state, _state),state->mu*minasa_asad1norm(state, _state))&&diffcnt==0) ) + { + goto lbl_63; + } + + /* + * These conditions (EpsF/EpsX) are explicitly or implicitly + * related to the current step size and influenced + * by changes in the active constraints. + * + * For these reasons they are checked only when we don't + * want to 'unstick' at the end of the iteration and there + * were no changes in the active set. + * + * NOTE: consition |G|>=Mu*|D1| must be exactly opposite + * to the condition used to switch back to GPA. At least + * one inequality must be strict, otherwise infinite cycle + * may occur when |G|=Mu*|D1| (we DON'T test stopping + * conditions and we DON'T switch to GPA, so we cycle + * indefinitely). + */ + if( ae_fp_greater(state->fold-state->f,state->epsf*ae_maxreal(ae_fabs(state->fold, _state), ae_maxreal(ae_fabs(state->f, _state), 1.0, _state), _state)) ) + { + goto lbl_65; + } + + /* + * F(k+1)-F(k) is small enough + */ + state->repterminationtype = 1; + if( !state->xrep ) + { + goto lbl_67; + } + minasa_clearrequestfields(state, _state); + state->xupdated = ae_true; + state->rstate.stage = 13; + goto lbl_rcomm; +lbl_13: + state->xupdated = ae_false; +lbl_67: + result = ae_false; + return result; +lbl_65: + if( ae_fp_greater(state->laststep,state->epsx) ) + { + goto lbl_69; + } + + /* + * X(k+1)-X(k) is small enough + */ + state->repterminationtype = 2; + if( !state->xrep ) + { + goto lbl_71; + } + minasa_clearrequestfields(state, _state); + state->xupdated = ae_true; + state->rstate.stage = 14; + goto lbl_rcomm; +lbl_14: + state->xupdated = ae_false; +lbl_71: + result = ae_false; + return result; +lbl_69: +lbl_63: + + /* + * Check conditions for switching + */ + if( ae_fp_less(minasa_asaginorm(state, _state),state->mu*minasa_asad1norm(state, _state)) ) + { + state->curalgo = 0; + goto lbl_50; + } + if( diffcnt>0 ) + { + if( minasa_asauisempty(state, _state)||diffcnt>=minasa_n2 ) + { + state->curalgo = 1; + } + else + { + state->curalgo = 0; + } + goto lbl_50; + } + + /* + * Calculate D(k+1) + * + * Line search may result in: + * * maximum feasible step being taken (already processed) + * * point satisfying Wolfe conditions + * * some kind of error (CG is restarted by assigning 0.0 to Beta) + */ + if( mcinfo==1 ) + { + + /* + * Standard Wolfe conditions are satisfied: + * * calculate Y[K] and BetaK + */ + ae_v_add(&state->yk.ptr.p_double[0], 1, &state->gc.ptr.p_double[0], 1, ae_v_len(0,n-1)); + vv = ae_v_dotproduct(&state->yk.ptr.p_double[0], 1, &state->dk.ptr.p_double[0], 1, ae_v_len(0,n-1)); + v = ae_v_dotproduct(&state->gc.ptr.p_double[0], 1, &state->gc.ptr.p_double[0], 1, ae_v_len(0,n-1)); + state->betady = v/vv; + v = ae_v_dotproduct(&state->gc.ptr.p_double[0], 1, &state->yk.ptr.p_double[0], 1, ae_v_len(0,n-1)); + state->betahs = v/vv; + if( state->cgtype==0 ) + { + betak = state->betady; + } + if( state->cgtype==1 ) + { + betak = ae_maxreal(0, ae_minreal(state->betady, state->betahs, _state), _state); + } + } + else + { + + /* + * Something is wrong (may be function is too wild or too flat). + * + * We'll set BetaK=0, which will restart CG algorithm. + * We can stop later (during normal checks) if stopping conditions are met. + */ + betak = 0; + state->debugrestartscount = state->debugrestartscount+1; + } + ae_v_moveneg(&state->dn.ptr.p_double[0], 1, &state->gc.ptr.p_double[0], 1, ae_v_len(0,n-1)); + ae_v_addd(&state->dn.ptr.p_double[0], 1, &state->dk.ptr.p_double[0], 1, ae_v_len(0,n-1), betak); + ae_v_move(&state->dk.ptr.p_double[0], 1, &state->dn.ptr.p_double[0], 1, ae_v_len(0,n-1)); + + /* + * update other information + */ + state->fold = state->f; + state->k = state->k+1; + goto lbl_49; +lbl_50: +lbl_47: + goto lbl_17; +lbl_18: + result = ae_false; + return result; + + /* + * Saving state + */ +lbl_rcomm: + result = ae_true; + state->rstate.ia.ptr.p_int[0] = n; + state->rstate.ia.ptr.p_int[1] = i; + state->rstate.ia.ptr.p_int[2] = mcinfo; + state->rstate.ia.ptr.p_int[3] = diffcnt; + state->rstate.ba.ptr.p_bool[0] = b; + state->rstate.ba.ptr.p_bool[1] = stepfound; + state->rstate.ra.ptr.p_double[0] = betak; + state->rstate.ra.ptr.p_double[1] = v; + state->rstate.ra.ptr.p_double[2] = vv; + return result; +} + + +/************************************************************************* +ASA results + +INPUT PARAMETERS: + State - algorithm state + +OUTPUT PARAMETERS: + X - array[0..N-1], solution + Rep - optimization report: + * Rep.TerminationType completetion code: + * -2 rounding errors prevent further improvement. + X contains best point found. + * -1 incorrect parameters were specified + * 1 relative function improvement is no more than + EpsF. + * 2 relative step is no more than EpsX. + * 4 gradient norm is no more than EpsG + * 5 MaxIts steps was taken + * 7 stopping conditions are too stringent, + further improvement is impossible + * Rep.IterationsCount contains iterations count + * NFEV countains number of function calculations + * ActiveConstraints contains number of active constraints + + -- ALGLIB -- + Copyright 20.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minasaresults(minasastate* state, + /* Real */ ae_vector* x, + minasareport* rep, + ae_state *_state) +{ + + ae_vector_clear(x); + _minasareport_clear(rep); + + minasaresultsbuf(state, x, rep, _state); +} + + +/************************************************************************* +ASA results + +Buffered implementation of MinASAResults() which uses pre-allocated buffer +to store X[]. If buffer size is too small, it resizes buffer. It is +intended to be used in the inner cycles of performance critical algorithms +where array reallocation penalty is too large to be ignored. + + -- ALGLIB -- + Copyright 20.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minasaresultsbuf(minasastate* state, + /* Real */ ae_vector* x, + minasareport* rep, + ae_state *_state) +{ + ae_int_t i; + + + if( x->cnt<state->n ) + { + ae_vector_set_length(x, state->n, _state); + } + ae_v_move(&x->ptr.p_double[0], 1, &state->x.ptr.p_double[0], 1, ae_v_len(0,state->n-1)); + rep->iterationscount = state->repiterationscount; + rep->nfev = state->repnfev; + rep->terminationtype = state->repterminationtype; + rep->activeconstraints = 0; + for(i=0; i<=state->n-1; i++) + { + if( ae_fp_eq(state->ak.ptr.p_double[i],0) ) + { + rep->activeconstraints = rep->activeconstraints+1; + } + } +} + + +/************************************************************************* +This subroutine restarts CG algorithm from new point. All optimization +parameters are left unchanged. + +This function allows to solve multiple optimization problems (which +must have same number of dimensions) without object reallocation penalty. + +INPUT PARAMETERS: + State - structure previously allocated with MinCGCreate call. + X - new starting point. + BndL - new lower bounds + BndU - new upper bounds + + -- ALGLIB -- + Copyright 30.07.2010 by Bochkanov Sergey +*************************************************************************/ +void minasarestartfrom(minasastate* state, + /* Real */ ae_vector* x, + /* Real */ ae_vector* bndl, + /* Real */ ae_vector* bndu, + ae_state *_state) +{ + + + ae_assert(x->cnt>=state->n, "MinASARestartFrom: Length(X)<N!", _state); + ae_assert(isfinitevector(x, state->n, _state), "MinASARestartFrom: X contains infinite or NaN values!", _state); + ae_assert(bndl->cnt>=state->n, "MinASARestartFrom: Length(BndL)<N!", _state); + ae_assert(isfinitevector(bndl, state->n, _state), "MinASARestartFrom: BndL contains infinite or NaN values!", _state); + ae_assert(bndu->cnt>=state->n, "MinASARestartFrom: Length(BndU)<N!", _state); + ae_assert(isfinitevector(bndu, state->n, _state), "MinASARestartFrom: BndU contains infinite or NaN values!", _state); + ae_v_move(&state->x.ptr.p_double[0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,state->n-1)); + ae_v_move(&state->bndl.ptr.p_double[0], 1, &bndl->ptr.p_double[0], 1, ae_v_len(0,state->n-1)); + ae_v_move(&state->bndu.ptr.p_double[0], 1, &bndu->ptr.p_double[0], 1, ae_v_len(0,state->n-1)); + state->laststep = 0; + ae_vector_set_length(&state->rstate.ia, 3+1, _state); + ae_vector_set_length(&state->rstate.ba, 1+1, _state); + ae_vector_set_length(&state->rstate.ra, 2+1, _state); + state->rstate.stage = -1; + minasa_clearrequestfields(state, _state); +} + + +/************************************************************************* +Returns norm of bounded anti-gradient. + +Bounded antigradient is a vector obtained from anti-gradient by zeroing +components which point outwards: + result = norm(v) + v[i]=0 if ((-g[i]<0)and(x[i]=bndl[i])) or + ((-g[i]>0)and(x[i]=bndu[i])) + v[i]=-g[i] otherwise + +This function may be used to check a stopping criterion. + + -- ALGLIB -- + Copyright 20.03.2009 by Bochkanov Sergey +*************************************************************************/ +static double minasa_asaboundedantigradnorm(minasastate* state, + ae_state *_state) +{ + ae_int_t i; + double v; + double result; + + + result = 0; + for(i=0; i<=state->n-1; i++) + { + v = -state->g.ptr.p_double[i]; + if( ae_fp_eq(state->x.ptr.p_double[i],state->bndl.ptr.p_double[i])&&ae_fp_less(-state->g.ptr.p_double[i],0) ) + { + v = 0; + } + if( ae_fp_eq(state->x.ptr.p_double[i],state->bndu.ptr.p_double[i])&&ae_fp_greater(-state->g.ptr.p_double[i],0) ) + { + v = 0; + } + result = result+ae_sqr(v, _state); + } + result = ae_sqrt(result, _state); + return result; +} + + +/************************************************************************* +Returns norm of GI(x). + +GI(x) is a gradient vector whose components associated with active +constraints are zeroed. It differs from bounded anti-gradient because +components of GI(x) are zeroed independently of sign(g[i]), and +anti-gradient's components are zeroed with respect to both constraint and +sign. + + -- ALGLIB -- + Copyright 20.03.2009 by Bochkanov Sergey +*************************************************************************/ +static double minasa_asaginorm(minasastate* state, ae_state *_state) +{ + ae_int_t i; + double result; + + + result = 0; + for(i=0; i<=state->n-1; i++) + { + if( ae_fp_neq(state->x.ptr.p_double[i],state->bndl.ptr.p_double[i])&&ae_fp_neq(state->x.ptr.p_double[i],state->bndu.ptr.p_double[i]) ) + { + result = result+ae_sqr(state->g.ptr.p_double[i], _state); + } + } + result = ae_sqrt(result, _state); + return result; +} + + +/************************************************************************* +Returns norm(D1(State.X)) + +For a meaning of D1 see 'NEW ACTIVE SET ALGORITHM FOR BOX CONSTRAINED +OPTIMIZATION' by WILLIAM W. HAGER AND HONGCHAO ZHANG. + + -- ALGLIB -- + Copyright 20.03.2009 by Bochkanov Sergey +*************************************************************************/ +static double minasa_asad1norm(minasastate* state, ae_state *_state) +{ + ae_int_t i; + double result; + + + result = 0; + for(i=0; i<=state->n-1; i++) + { + result = result+ae_sqr(boundval(state->x.ptr.p_double[i]-state->g.ptr.p_double[i], state->bndl.ptr.p_double[i], state->bndu.ptr.p_double[i], _state)-state->x.ptr.p_double[i], _state); + } + result = ae_sqrt(result, _state); + return result; +} + + +/************************************************************************* +Returns True, if U set is empty. + +* State.X is used as point, +* State.G - as gradient, +* D is calculated within function (because State.D may have different + meaning depending on current optimization algorithm) + +For a meaning of U see 'NEW ACTIVE SET ALGORITHM FOR BOX CONSTRAINED +OPTIMIZATION' by WILLIAM W. HAGER AND HONGCHAO ZHANG. + + -- ALGLIB -- + Copyright 20.03.2009 by Bochkanov Sergey +*************************************************************************/ +static ae_bool minasa_asauisempty(minasastate* state, ae_state *_state) +{ + ae_int_t i; + double d; + double d2; + double d32; + ae_bool result; + + + d = minasa_asad1norm(state, _state); + d2 = ae_sqrt(d, _state); + d32 = d*d2; + result = ae_true; + for(i=0; i<=state->n-1; i++) + { + if( ae_fp_greater_eq(ae_fabs(state->g.ptr.p_double[i], _state),d2)&&ae_fp_greater_eq(ae_minreal(state->x.ptr.p_double[i]-state->bndl.ptr.p_double[i], state->bndu.ptr.p_double[i]-state->x.ptr.p_double[i], _state),d32) ) + { + result = ae_false; + return result; + } + } + return result; +} + + +/************************************************************************* +Clears request fileds (to be sure that we don't forgot to clear something) +*************************************************************************/ +static void minasa_clearrequestfields(minasastate* state, + ae_state *_state) +{ + + + state->needfg = ae_false; + state->xupdated = ae_false; +} + + +ae_bool _minasastate_init(minasastate* p, ae_state *_state, ae_bool make_automatic) +{ + if( !ae_vector_init(&p->bndl, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->bndu, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->ak, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->xk, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->dk, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->an, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->xn, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->dn, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->d, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->work, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->yk, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->gc, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->g, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !_rcommstate_init(&p->rstate, _state, make_automatic) ) + return ae_false; + if( !_linminstate_init(&p->lstate, _state, make_automatic) ) + return ae_false; + return ae_true; +} + + +ae_bool _minasastate_init_copy(minasastate* dst, minasastate* src, ae_state *_state, ae_bool make_automatic) +{ + dst->n = src->n; + dst->epsg = src->epsg; + dst->epsf = src->epsf; + dst->epsx = src->epsx; + dst->maxits = src->maxits; + dst->xrep = src->xrep; + dst->stpmax = src->stpmax; + dst->cgtype = src->cgtype; + dst->k = src->k; + dst->nfev = src->nfev; + dst->mcstage = src->mcstage; + if( !ae_vector_init_copy(&dst->bndl, &src->bndl, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->bndu, &src->bndu, _state, make_automatic) ) + return ae_false; + dst->curalgo = src->curalgo; + dst->acount = src->acount; + dst->mu = src->mu; + dst->finit = src->finit; + dst->dginit = src->dginit; + if( !ae_vector_init_copy(&dst->ak, &src->ak, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->xk, &src->xk, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->dk, &src->dk, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->an, &src->an, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->xn, &src->xn, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->dn, &src->dn, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->d, &src->d, _state, make_automatic) ) + return ae_false; + dst->fold = src->fold; + dst->stp = src->stp; + if( !ae_vector_init_copy(&dst->work, &src->work, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->yk, &src->yk, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->gc, &src->gc, _state, make_automatic) ) + return ae_false; + dst->laststep = src->laststep; + if( !ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic) ) + return ae_false; + dst->f = src->f; + if( !ae_vector_init_copy(&dst->g, &src->g, _state, make_automatic) ) + return ae_false; + dst->needfg = src->needfg; + dst->xupdated = src->xupdated; + if( !_rcommstate_init_copy(&dst->rstate, &src->rstate, _state, make_automatic) ) + return ae_false; + dst->repiterationscount = src->repiterationscount; + dst->repnfev = src->repnfev; + dst->repterminationtype = src->repterminationtype; + dst->debugrestartscount = src->debugrestartscount; + if( !_linminstate_init_copy(&dst->lstate, &src->lstate, _state, make_automatic) ) + return ae_false; + dst->betahs = src->betahs; + dst->betady = src->betady; + return ae_true; +} + + +void _minasastate_clear(minasastate* p) +{ + ae_vector_clear(&p->bndl); + ae_vector_clear(&p->bndu); + ae_vector_clear(&p->ak); + ae_vector_clear(&p->xk); + ae_vector_clear(&p->dk); + ae_vector_clear(&p->an); + ae_vector_clear(&p->xn); + ae_vector_clear(&p->dn); + ae_vector_clear(&p->d); + ae_vector_clear(&p->work); + ae_vector_clear(&p->yk); + ae_vector_clear(&p->gc); + ae_vector_clear(&p->x); + ae_vector_clear(&p->g); + _rcommstate_clear(&p->rstate); + _linminstate_clear(&p->lstate); +} + + +ae_bool _minasareport_init(minasareport* p, ae_state *_state, ae_bool make_automatic) +{ + return ae_true; +} + + +ae_bool _minasareport_init_copy(minasareport* dst, minasareport* src, ae_state *_state, ae_bool make_automatic) +{ + dst->iterationscount = src->iterationscount; + dst->nfev = src->nfev; + dst->terminationtype = src->terminationtype; + dst->activeconstraints = src->activeconstraints; + return ae_true; +} + + +void _minasareport_clear(minasareport* p) +{ +} + + + + +/************************************************************************* + NONLINEAR CONJUGATE GRADIENT METHOD + +DESCRIPTION: +The subroutine minimizes function F(x) of N arguments by using one of the +nonlinear conjugate gradient methods. + +These CG methods are globally convergent (even on non-convex functions) as +long as grad(f) is Lipschitz continuous in a some neighborhood of the +L = { x : f(x)<=f(x0) }. + + +REQUIREMENTS: +Algorithm will request following information during its operation: +* function value F and its gradient G (simultaneously) at given point X + + +USAGE: +1. User initializes algorithm state with MinCGCreate() call +2. User tunes solver parameters with MinCGSetCond(), MinCGSetStpMax() and + other functions +3. User calls MinCGOptimize() function which takes algorithm state and + pointer (delegate, etc.) to callback function which calculates F/G. +4. User calls MinCGResults() to get solution +5. Optionally, user may call MinCGRestartFrom() to solve another problem + with same N but another starting point and/or another function. + MinCGRestartFrom() allows to reuse already initialized structure. + + +INPUT PARAMETERS: + N - problem dimension, N>0: + * if given, only leading N elements of X are used + * if not given, automatically determined from size of X + X - starting point, array[0..N-1]. + +OUTPUT PARAMETERS: + State - structure which stores algorithm state + + -- ALGLIB -- + Copyright 25.03.2010 by Bochkanov Sergey +*************************************************************************/ +void mincgcreate(ae_int_t n, + /* Real */ ae_vector* x, + mincgstate* state, + ae_state *_state) +{ + + _mincgstate_clear(state); + + ae_assert(n>=1, "MinCGCreate: N too small!", _state); + ae_assert(x->cnt>=n, "MinCGCreate: Length(X)<N!", _state); + ae_assert(isfinitevector(x, n, _state), "MinCGCreate: X contains infinite or NaN values!", _state); + + /* + * Initialize + */ + state->n = n; + mincgsetcond(state, 0, 0, 0, 0, _state); + mincgsetxrep(state, ae_false, _state); + mincgsetdrep(state, ae_false, _state); + mincgsetstpmax(state, 0, _state); + mincgsetcgtype(state, -1, _state); + ae_vector_set_length(&state->xk, n, _state); + ae_vector_set_length(&state->dk, n, _state); + ae_vector_set_length(&state->xn, n, _state); + ae_vector_set_length(&state->dn, n, _state); + ae_vector_set_length(&state->x, n, _state); + ae_vector_set_length(&state->d, n, _state); + ae_vector_set_length(&state->g, n, _state); + ae_vector_set_length(&state->work, n, _state); + ae_vector_set_length(&state->yk, n, _state); + mincgrestartfrom(state, x, _state); +} + + +/************************************************************************* +This function sets stopping conditions for CG optimization algorithm. + +INPUT PARAMETERS: + State - structure which stores algorithm state + EpsG - >=0 + The subroutine finishes its work if the condition + ||G||<EpsG is satisfied, where ||.|| means Euclidian norm, + G - gradient. + EpsF - >=0 + The subroutine finishes its work if on k+1-th iteration + the condition |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1} + is satisfied. + EpsX - >=0 + The subroutine finishes its work if on k+1-th iteration + the condition |X(k+1)-X(k)| <= EpsX is fulfilled. + MaxIts - maximum number of iterations. If MaxIts=0, the number of + iterations is unlimited. + +Passing EpsG=0, EpsF=0, EpsX=0 and MaxIts=0 (simultaneously) will lead to +automatic stopping criterion selection (small EpsX). + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void mincgsetcond(mincgstate* state, + double epsg, + double epsf, + double epsx, + ae_int_t maxits, + ae_state *_state) +{ + + + ae_assert(ae_isfinite(epsg, _state), "MinCGSetCond: EpsG is not finite number!", _state); + ae_assert(ae_fp_greater_eq(epsg,0), "MinCGSetCond: negative EpsG!", _state); + ae_assert(ae_isfinite(epsf, _state), "MinCGSetCond: EpsF is not finite number!", _state); + ae_assert(ae_fp_greater_eq(epsf,0), "MinCGSetCond: negative EpsF!", _state); + ae_assert(ae_isfinite(epsx, _state), "MinCGSetCond: EpsX is not finite number!", _state); + ae_assert(ae_fp_greater_eq(epsx,0), "MinCGSetCond: negative EpsX!", _state); + ae_assert(maxits>=0, "MinCGSetCond: negative MaxIts!", _state); + if( ((ae_fp_eq(epsg,0)&&ae_fp_eq(epsf,0))&&ae_fp_eq(epsx,0))&&maxits==0 ) + { + epsx = 1.0E-6; + } + state->epsg = epsg; + state->epsf = epsf; + state->epsx = epsx; + state->maxits = maxits; +} + + +/************************************************************************* +This function turns on/off reporting. + +INPUT PARAMETERS: + State - structure which stores algorithm state + NeedXRep- whether iteration reports are needed or not + +If NeedXRep is True, algorithm will call rep() callback function if it is +provided to MinCGOptimize(). + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void mincgsetxrep(mincgstate* state, ae_bool needxrep, ae_state *_state) +{ + + + state->xrep = needxrep; +} + + +/************************************************************************* +This function turns on/off line search reports. +These reports are described in more details in developer-only comments on +MinCGState object. + +INPUT PARAMETERS: + State - structure which stores algorithm state + NeedDRep- whether line search reports are needed or not + +This function is intended for private use only. Turning it on artificially +may cause program failure. + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void mincgsetdrep(mincgstate* state, ae_bool needdrep, ae_state *_state) +{ + + + state->drep = needdrep; +} + + +/************************************************************************* +This function sets CG algorithm. + +INPUT PARAMETERS: + State - structure which stores algorithm state + CGType - algorithm type: + * -1 automatic selection of the best algorithm + * 0 DY (Dai and Yuan) algorithm + * 1 Hybrid DY-HS algorithm + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void mincgsetcgtype(mincgstate* state, ae_int_t cgtype, ae_state *_state) +{ + + + ae_assert(cgtype>=-1&&cgtype<=1, "MinCGSetCGType: incorrect CGType!", _state); + if( cgtype==-1 ) + { + cgtype = 1; + } + state->cgtype = cgtype; +} + + +/************************************************************************* +This function sets maximum step length + +INPUT PARAMETERS: + State - structure which stores algorithm state + StpMax - maximum step length, >=0. Set StpMax to 0.0, if you don't + want to limit step length. + +Use this subroutine when you optimize target function which contains exp() +or other fast growing functions, and optimization algorithm makes too +large steps which leads to overflow. This function allows us to reject +steps that are too large (and therefore expose us to the possible +overflow) without actually calculating function value at the x+stp*d. + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void mincgsetstpmax(mincgstate* state, double stpmax, ae_state *_state) +{ + + + ae_assert(ae_isfinite(stpmax, _state), "MinCGSetStpMax: StpMax is not finite!", _state); + ae_assert(ae_fp_greater_eq(stpmax,0), "MinCGSetStpMax: StpMax<0!", _state); + state->stpmax = stpmax; +} + + +/************************************************************************* +This function allows to suggest initial step length to the CG algorithm. + +Suggested step length is used as starting point for the line search. It +can be useful when you have badly scaled problem, i.e. when ||grad|| +(which is used as initial estimate for the first step) is many orders of +magnitude different from the desired step. + +Line search may fail on such problems without good estimate of initial +step length. Imagine, for example, problem with ||grad||=10^50 and desired +step equal to 0.1 Line search function will use 10^50 as initial step, +then it will decrease step length by 2 (up to 20 attempts) and will get +10^44, which is still too large. + +This function allows us to tell than line search should be started from +some moderate step length, like 1.0, so algorithm will be able to detect +desired step length in a several searches. + +This function influences only first iteration of algorithm. It should be +called between MinCGCreate/MinCGRestartFrom() call and MinCGOptimize call. + +INPUT PARAMETERS: + State - structure used to store algorithm state. + Stp - initial estimate of the step length. + Can be zero (no estimate). + + -- ALGLIB -- + Copyright 30.07.2010 by Bochkanov Sergey +*************************************************************************/ +void mincgsuggeststep(mincgstate* state, double stp, ae_state *_state) +{ + + + ae_assert(ae_isfinite(stp, _state), "MinCGSuggestStep: Stp is infinite or NAN", _state); + ae_assert(ae_fp_greater_eq(stp,0), "MinCGSuggestStep: Stp<0", _state); + state->suggestedstep = stp; +} + + +/************************************************************************* + + -- ALGLIB -- + Copyright 20.04.2009 by Bochkanov Sergey +*************************************************************************/ +ae_bool mincgiteration(mincgstate* state, ae_state *_state) +{ + ae_int_t n; + ae_int_t i; + double betak; + double v; + double vv; + ae_int_t mcinfo; + ae_bool result; + + + + /* + * Reverse communication preparations + * I know it looks ugly, but it works the same way + * anywhere from C++ to Python. + * + * This code initializes locals by: + * * random values determined during code + * generation - on first subroutine call + * * values from previous call - on subsequent calls + */ + if( state->rstate.stage>=0 ) + { + n = state->rstate.ia.ptr.p_int[0]; + i = state->rstate.ia.ptr.p_int[1]; + mcinfo = state->rstate.ia.ptr.p_int[2]; + betak = state->rstate.ra.ptr.p_double[0]; + v = state->rstate.ra.ptr.p_double[1]; + vv = state->rstate.ra.ptr.p_double[2]; + } + else + { + n = -983; + i = -989; + mcinfo = -834; + betak = 900; + v = -287; + vv = 364; + } + if( state->rstate.stage==0 ) + { + goto lbl_0; + } + if( state->rstate.stage==1 ) + { + goto lbl_1; + } + if( state->rstate.stage==2 ) + { + goto lbl_2; + } + if( state->rstate.stage==3 ) + { + goto lbl_3; + } + if( state->rstate.stage==4 ) + { + goto lbl_4; + } + if( state->rstate.stage==5 ) + { + goto lbl_5; + } + + /* + * Routine body + */ + + /* + * Prepare + */ + n = state->n; + state->repterminationtype = 0; + state->repiterationscount = 0; + state->repnfev = 0; + state->debugrestartscount = 0; + + /* + * Calculate F/G, XK and DK, initialize algorithm + */ + ae_v_move(&state->xk.ptr.p_double[0], 1, &state->x.ptr.p_double[0], 1, ae_v_len(0,n-1)); + mincg_clearrequestfields(state, _state); + state->needfg = ae_true; + state->rstate.stage = 0; + goto lbl_rcomm; +lbl_0: + state->needfg = ae_false; + ae_v_moveneg(&state->dk.ptr.p_double[0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1)); + if( !state->xrep ) + { + goto lbl_6; + } + mincg_clearrequestfields(state, _state); + state->xupdated = ae_true; + state->rstate.stage = 1; + goto lbl_rcomm; +lbl_1: + state->xupdated = ae_false; +lbl_6: + v = ae_v_dotproduct(&state->g.ptr.p_double[0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1)); + v = ae_sqrt(v, _state); + if( ae_fp_eq(v,0) ) + { + ae_v_move(&state->xn.ptr.p_double[0], 1, &state->xk.ptr.p_double[0], 1, ae_v_len(0,n-1)); + state->repterminationtype = 4; + result = ae_false; + return result; + } + state->repnfev = 1; + state->k = 0; + state->fold = state->f; + + /* + * Main cycle + */ + state->laststep = state->suggestedstep; + state->rstimer = mincg_rscountdownlen; +lbl_8: + if( ae_false ) + { + goto lbl_9; + } + + /* + * Store G[k] for later calculation of Y[k] + */ + ae_v_moveneg(&state->yk.ptr.p_double[0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1)); + + /* + * Calculate X(k+1): minimize F(x+alpha*d) + */ + ae_v_move(&state->d.ptr.p_double[0], 1, &state->dk.ptr.p_double[0], 1, ae_v_len(0,n-1)); + ae_v_move(&state->x.ptr.p_double[0], 1, &state->xk.ptr.p_double[0], 1, ae_v_len(0,n-1)); + state->mcstage = 0; + state->stp = 1.0; + linminnormalized(&state->d, &state->stp, n, _state); + if( ae_fp_neq(state->laststep,0) ) + { + state->stp = state->laststep; + } + state->curstpmax = state->stpmax; + if( !state->drep ) + { + goto lbl_10; + } + + /* + * Report beginning of line search (if needed) + */ + mincg_clearrequestfields(state, _state); + state->lsstart = ae_true; + state->rstate.stage = 2; + goto lbl_rcomm; +lbl_2: + state->lsstart = ae_false; +lbl_10: + mcsrch(n, &state->x, &state->f, &state->g, &state->d, &state->stp, state->curstpmax, &mcinfo, &state->nfev, &state->work, &state->lstate, &state->mcstage, _state); +lbl_12: + if( state->mcstage==0 ) + { + goto lbl_13; + } + mincg_clearrequestfields(state, _state); + state->needfg = ae_true; + state->rstate.stage = 3; + goto lbl_rcomm; +lbl_3: + state->needfg = ae_false; + mcsrch(n, &state->x, &state->f, &state->g, &state->d, &state->stp, state->curstpmax, &mcinfo, &state->nfev, &state->work, &state->lstate, &state->mcstage, _state); + goto lbl_12; +lbl_13: + if( !state->drep ) + { + goto lbl_14; + } + + /* + * Report end of line search (if needed) + */ + mincg_clearrequestfields(state, _state); + state->lsend = ae_true; + state->rstate.stage = 4; + goto lbl_rcomm; +lbl_4: + state->lsend = ae_false; +lbl_14: + if( !state->xrep ) + { + goto lbl_16; + } + mincg_clearrequestfields(state, _state); + state->xupdated = ae_true; + state->rstate.stage = 5; + goto lbl_rcomm; +lbl_5: + state->xupdated = ae_false; +lbl_16: + ae_v_move(&state->xn.ptr.p_double[0], 1, &state->x.ptr.p_double[0], 1, ae_v_len(0,n-1)); + if( mcinfo==1 ) + { + + /* + * Standard Wolfe conditions hold + * Calculate Y[K] and BetaK + */ + ae_v_add(&state->yk.ptr.p_double[0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1)); + vv = ae_v_dotproduct(&state->yk.ptr.p_double[0], 1, &state->dk.ptr.p_double[0], 1, ae_v_len(0,n-1)); + v = ae_v_dotproduct(&state->g.ptr.p_double[0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1)); + state->betady = v/vv; + v = ae_v_dotproduct(&state->g.ptr.p_double[0], 1, &state->yk.ptr.p_double[0], 1, ae_v_len(0,n-1)); + state->betahs = v/vv; + if( state->cgtype==0 ) + { + betak = state->betady; + } + if( state->cgtype==1 ) + { + betak = ae_maxreal(0, ae_minreal(state->betady, state->betahs, _state), _state); + } + } + else + { + + /* + * Something is wrong (may be function is too wild or too flat). + * + * We'll set BetaK=0, which will restart CG algorithm. + * We can stop later (during normal checks) if stopping conditions are met. + */ + betak = 0; + state->debugrestartscount = state->debugrestartscount+1; + } + if( mcinfo==1||mcinfo==5 ) + { + state->rstimer = mincg_rscountdownlen; + } + else + { + state->rstimer = state->rstimer-1; + } + + /* + * Calculate D(k+1) + */ + ae_v_moveneg(&state->dn.ptr.p_double[0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1)); + ae_v_addd(&state->dn.ptr.p_double[0], 1, &state->dk.ptr.p_double[0], 1, ae_v_len(0,n-1), betak); + + /* + * Update info about step length + */ + v = ae_v_dotproduct(&state->d.ptr.p_double[0], 1, &state->d.ptr.p_double[0], 1, ae_v_len(0,n-1)); + state->laststep = ae_sqrt(v, _state)*state->stp; + + /* + * Update information and Hessian. + * Check stopping conditions. + */ + state->repnfev = state->repnfev+state->nfev; + state->repiterationscount = state->repiterationscount+1; + if( state->repiterationscount>=state->maxits&&state->maxits>0 ) + { + + /* + * Too many iterations + */ + state->repterminationtype = 5; + result = ae_false; + return result; + } + v = ae_v_dotproduct(&state->g.ptr.p_double[0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1)); + if( ae_fp_less_eq(ae_sqrt(v, _state),state->epsg) ) + { + + /* + * Gradient is small enough + */ + state->repterminationtype = 4; + result = ae_false; + return result; + } + if( ae_fp_less_eq(state->fold-state->f,state->epsf*ae_maxreal(ae_fabs(state->fold, _state), ae_maxreal(ae_fabs(state->f, _state), 1.0, _state), _state)) ) + { + + /* + * F(k+1)-F(k) is small enough + */ + state->repterminationtype = 1; + result = ae_false; + return result; + } + if( ae_fp_less_eq(state->laststep,state->epsx) ) + { + + /* + * X(k+1)-X(k) is small enough + */ + state->repterminationtype = 2; + result = ae_false; + return result; + } + if( state->rstimer<=0 ) + { + + /* + * Too many subsequent restarts + */ + state->repterminationtype = 7; + result = ae_false; + return result; + } + + /* + * Shift Xk/Dk, update other information + */ + ae_v_move(&state->xk.ptr.p_double[0], 1, &state->xn.ptr.p_double[0], 1, ae_v_len(0,n-1)); + ae_v_move(&state->dk.ptr.p_double[0], 1, &state->dn.ptr.p_double[0], 1, ae_v_len(0,n-1)); + state->fold = state->f; + state->k = state->k+1; + goto lbl_8; +lbl_9: + result = ae_false; + return result; + + /* + * Saving state + */ +lbl_rcomm: + result = ae_true; + state->rstate.ia.ptr.p_int[0] = n; + state->rstate.ia.ptr.p_int[1] = i; + state->rstate.ia.ptr.p_int[2] = mcinfo; + state->rstate.ra.ptr.p_double[0] = betak; + state->rstate.ra.ptr.p_double[1] = v; + state->rstate.ra.ptr.p_double[2] = vv; + return result; +} + + +/************************************************************************* +Conjugate gradient results + +INPUT PARAMETERS: + State - algorithm state + +OUTPUT PARAMETERS: + X - array[0..N-1], solution + Rep - optimization report: + * Rep.TerminationType completetion code: + * 1 relative function improvement is no more than + EpsF. + * 2 relative step is no more than EpsX. + * 4 gradient norm is no more than EpsG + * 5 MaxIts steps was taken + * 7 stopping conditions are too stringent, + further improvement is impossible, + we return best X found so far + * Rep.IterationsCount contains iterations count + * NFEV countains number of function calculations + + -- ALGLIB -- + Copyright 20.04.2009 by Bochkanov Sergey +*************************************************************************/ +void mincgresults(mincgstate* state, + /* Real */ ae_vector* x, + mincgreport* rep, + ae_state *_state) +{ + + ae_vector_clear(x); + _mincgreport_clear(rep); + + mincgresultsbuf(state, x, rep, _state); +} + + +/************************************************************************* +Conjugate gradient results + +Buffered implementation of MinCGResults(), which uses pre-allocated buffer +to store X[]. If buffer size is too small, it resizes buffer. It is +intended to be used in the inner cycles of performance critical algorithms +where array reallocation penalty is too large to be ignored. + + -- ALGLIB -- + Copyright 20.04.2009 by Bochkanov Sergey +*************************************************************************/ +void mincgresultsbuf(mincgstate* state, + /* Real */ ae_vector* x, + mincgreport* rep, + ae_state *_state) +{ + + + if( x->cnt<state->n ) + { + ae_vector_set_length(x, state->n, _state); + } + ae_v_move(&x->ptr.p_double[0], 1, &state->xn.ptr.p_double[0], 1, ae_v_len(0,state->n-1)); + rep->iterationscount = state->repiterationscount; + rep->nfev = state->repnfev; + rep->terminationtype = state->repterminationtype; +} + + +/************************************************************************* +This subroutine restarts CG algorithm from new point. All optimization +parameters are left unchanged. + +This function allows to solve multiple optimization problems (which +must have same number of dimensions) without object reallocation penalty. + +INPUT PARAMETERS: + State - structure used to store algorithm state. + X - new starting point. + + -- ALGLIB -- + Copyright 30.07.2010 by Bochkanov Sergey +*************************************************************************/ +void mincgrestartfrom(mincgstate* state, + /* Real */ ae_vector* x, + ae_state *_state) +{ + + + ae_assert(x->cnt>=state->n, "MinCGRestartFrom: Length(X)<N!", _state); + ae_assert(isfinitevector(x, state->n, _state), "MinCGCreate: X contains infinite or NaN values!", _state); + ae_v_move(&state->x.ptr.p_double[0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,state->n-1)); + mincgsuggeststep(state, 0.0, _state); + ae_vector_set_length(&state->rstate.ia, 2+1, _state); + ae_vector_set_length(&state->rstate.ra, 2+1, _state); + state->rstate.stage = -1; + mincg_clearrequestfields(state, _state); +} + + +/************************************************************************* +Clears request fileds (to be sure that we don't forgot to clear something) +*************************************************************************/ +static void mincg_clearrequestfields(mincgstate* state, ae_state *_state) +{ + + + state->needfg = ae_false; + state->xupdated = ae_false; + state->lsstart = ae_false; + state->lsend = ae_false; +} + + +ae_bool _mincgstate_init(mincgstate* p, ae_state *_state, ae_bool make_automatic) +{ + if( !ae_vector_init(&p->xk, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->dk, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->xn, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->dn, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->d, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->work, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->yk, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->g, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !_rcommstate_init(&p->rstate, _state, make_automatic) ) + return ae_false; + if( !_linminstate_init(&p->lstate, _state, make_automatic) ) + return ae_false; + return ae_true; +} + + +ae_bool _mincgstate_init_copy(mincgstate* dst, mincgstate* src, ae_state *_state, ae_bool make_automatic) +{ + dst->n = src->n; + dst->epsg = src->epsg; + dst->epsf = src->epsf; + dst->epsx = src->epsx; + dst->maxits = src->maxits; + dst->stpmax = src->stpmax; + dst->suggestedstep = src->suggestedstep; + dst->xrep = src->xrep; + dst->drep = src->drep; + dst->cgtype = src->cgtype; + dst->nfev = src->nfev; + dst->mcstage = src->mcstage; + dst->k = src->k; + if( !ae_vector_init_copy(&dst->xk, &src->xk, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->dk, &src->dk, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->xn, &src->xn, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->dn, &src->dn, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->d, &src->d, _state, make_automatic) ) + return ae_false; + dst->fold = src->fold; + dst->stp = src->stp; + dst->curstpmax = src->curstpmax; + if( !ae_vector_init_copy(&dst->work, &src->work, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->yk, &src->yk, _state, make_automatic) ) + return ae_false; + dst->laststep = src->laststep; + dst->rstimer = src->rstimer; + if( !ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic) ) + return ae_false; + dst->f = src->f; + if( !ae_vector_init_copy(&dst->g, &src->g, _state, make_automatic) ) + return ae_false; + dst->needfg = src->needfg; + dst->xupdated = src->xupdated; + dst->lsstart = src->lsstart; + dst->lsend = src->lsend; + if( !_rcommstate_init_copy(&dst->rstate, &src->rstate, _state, make_automatic) ) + return ae_false; + dst->repiterationscount = src->repiterationscount; + dst->repnfev = src->repnfev; + dst->repterminationtype = src->repterminationtype; + dst->debugrestartscount = src->debugrestartscount; + if( !_linminstate_init_copy(&dst->lstate, &src->lstate, _state, make_automatic) ) + return ae_false; + dst->betahs = src->betahs; + dst->betady = src->betady; + return ae_true; +} + + +void _mincgstate_clear(mincgstate* p) +{ + ae_vector_clear(&p->xk); + ae_vector_clear(&p->dk); + ae_vector_clear(&p->xn); + ae_vector_clear(&p->dn); + ae_vector_clear(&p->d); + ae_vector_clear(&p->work); + ae_vector_clear(&p->yk); + ae_vector_clear(&p->x); + ae_vector_clear(&p->g); + _rcommstate_clear(&p->rstate); + _linminstate_clear(&p->lstate); +} + + +ae_bool _mincgreport_init(mincgreport* p, ae_state *_state, ae_bool make_automatic) +{ + return ae_true; +} + + +ae_bool _mincgreport_init_copy(mincgreport* dst, mincgreport* src, ae_state *_state, ae_bool make_automatic) +{ + dst->iterationscount = src->iterationscount; + dst->nfev = src->nfev; + dst->terminationtype = src->terminationtype; + return ae_true; +} + + +void _mincgreport_clear(mincgreport* p) +{ +} + + + + +/************************************************************************* + BOUND CONSTRAINED OPTIMIZATION + WITH ADDITIONAL LINEAR EQUALITY AND INEQUALITY CONSTRAINTS + +DESCRIPTION: +The subroutine minimizes function F(x) of N arguments subject to any +combination of: +* bound constraints +* linear inequality constraints +* linear equality constraints + +REQUIREMENTS: +* function value and gradient +* grad(f) must be Lipschitz continuous on a level set: L = { x : f(x)<=f(x0) } +* function must be defined even in the infeasible points (algorithm make take + steps in the infeasible area before converging to the feasible point) +* starting point X0 must be feasible or not too far away from the feasible set +* problem must satisfy strict complementary conditions + +USAGE: + +Constrained optimization if far more complex than the unconstrained one. +Here we give very brief outline of the BLEIC optimizer. We strongly recommend +you to read examples in the ALGLIB Reference Manual and to read ALGLIB User Guide +on optimization, which is available at http://www.alglib.net/optimization/ + +1. User initializes algorithm state with MinBLEICCreate() call + +2. USer adds boundary and/or linear constraints by calling + MinBLEICSetBC() and MinBLEICSetLC() functions. + +3. User sets stopping conditions for underlying unconstrained solver + with MinBLEICSetInnerCond() call. + This function controls accuracy of underlying optimization algorithm. + +4. User sets stopping conditions for outer iteration by calling + MinBLEICSetOuterCond() function. + This function controls handling of boundary and inequality constraints. + +5. User tunes barrier parameters: + * barrier width with MinBLEICSetBarrierWidth() call + * (optionally) dynamics of the barrier width with MinBLEICSetBarrierDecay() call + These functions control handling of boundary and inequality constraints. + +6. Additionally, user may set limit on number of internal iterations + by MinBLEICSetMaxIts() call. + This function allows to prevent algorithm from looping forever. + +7. User calls MinBLEICOptimize() function which takes algorithm state and + pointer (delegate, etc.) to callback function which calculates F/G. + +8. User calls MinBLEICResults() to get solution + +9. Optionally user may call MinBLEICRestartFrom() to solve another problem + with same N but another starting point. + MinBLEICRestartFrom() allows to reuse already initialized structure. + + +INPUT PARAMETERS: + N - problem dimension, N>0: + * if given, only leading N elements of X are used + * if not given, automatically determined from size ofX + X - starting point, array[N]: + * it is better to set X to a feasible point + * but X can be infeasible, in which case algorithm will try + to find feasible point first, using X as initial + approximation. + +OUTPUT PARAMETERS: + State - structure stores algorithm state + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleiccreate(ae_int_t n, + /* Real */ ae_vector* x, + minbleicstate* state, + ae_state *_state) +{ + ae_frame _frame_block; + ae_int_t i; + ae_matrix c; + ae_vector ct; + + ae_frame_make(_state, &_frame_block); + _minbleicstate_clear(state); + ae_matrix_init(&c, 0, 0, DT_REAL, _state, ae_true); + ae_vector_init(&ct, 0, DT_INT, _state, ae_true); + + ae_assert(n>=1, "MinBLEICCreate: N<1", _state); + ae_assert(x->cnt>=n, "MinBLEICCreate: Length(X)<N", _state); + ae_assert(isfinitevector(x, n, _state), "MinBLEICCreate: X contains infinite or NaN values!", _state); + + /* + * Initialize. + */ + state->n = n; + ae_vector_set_length(&state->x, n, _state); + ae_vector_set_length(&state->g, n, _state); + ae_vector_set_length(&state->bndl, n, _state); + ae_vector_set_length(&state->hasbndl, n, _state); + ae_vector_set_length(&state->bndu, n, _state); + ae_vector_set_length(&state->hasbndu, n, _state); + ae_vector_set_length(&state->xcur, n, _state); + ae_vector_set_length(&state->xprev, n, _state); + ae_vector_set_length(&state->xstart, n, _state); + ae_vector_set_length(&state->xe, n, _state); + for(i=0; i<=n-1; i++) + { + state->bndl.ptr.p_double[i] = _state->v_neginf; + state->hasbndl.ptr.p_bool[i] = ae_false; + state->bndu.ptr.p_double[i] = _state->v_posinf; + state->hasbndu.ptr.p_bool[i] = ae_false; + } + minbleicsetlc(state, &c, &ct, 0, _state); + minbleicsetinnercond(state, 0.0, 0.0, 0.0, _state); + minbleicsetoutercond(state, 1.0E-6, 1.0E-6, _state); + minbleicsetbarrierwidth(state, 1.0E-3, _state); + minbleicsetbarrierdecay(state, 1.0, _state); + minbleicsetmaxits(state, 0, _state); + minbleicsetxrep(state, ae_false, _state); + minbleicsetstpmax(state, 0.0, _state); + minbleicrestartfrom(state, x, _state); + mincgcreate(n, x, &state->cgstate, _state); + ae_frame_leave(_state); +} + + +/************************************************************************* +This function sets boundary constraints for BLEIC optimizer. + +Boundary constraints are inactive by default (after initial creation). +They are preserved after algorithm restart with MinBLEICRestartFrom(). + +INPUT PARAMETERS: + State - structure stores algorithm state + BndL - lower bounds, array[N]. + If some (all) variables are unbounded, you may specify + very small number or -INF. + BndU - upper bounds, array[N]. + If some (all) variables are unbounded, you may specify + very large number or +INF. + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicsetbc(minbleicstate* state, + /* Real */ ae_vector* bndl, + /* Real */ ae_vector* bndu, + ae_state *_state) +{ + ae_int_t i; + ae_int_t n; + + + n = state->n; + ae_assert(bndl->cnt>=n, "MinBLEICSetBC: Length(BndL)<N", _state); + ae_assert(bndu->cnt>=n, "MinBLEICSetBC: Length(BndU)<N", _state); + for(i=0; i<=n-1; i++) + { + ae_assert(ae_isfinite(bndl->ptr.p_double[i], _state)||ae_isneginf(bndl->ptr.p_double[i], _state), "MinBLEICSetBC: BndL contains NAN or +INF", _state); + ae_assert(ae_isfinite(bndu->ptr.p_double[i], _state)||ae_isposinf(bndu->ptr.p_double[i], _state), "MinBLEICSetBC: BndL contains NAN or -INF", _state); + state->bndl.ptr.p_double[i] = bndl->ptr.p_double[i]; + state->hasbndl.ptr.p_bool[i] = ae_isfinite(bndl->ptr.p_double[i], _state); + state->bndu.ptr.p_double[i] = bndu->ptr.p_double[i]; + state->hasbndu.ptr.p_bool[i] = ae_isfinite(bndu->ptr.p_double[i], _state); + } +} + + +/************************************************************************* +This function sets linear constraints for BLEIC optimizer. + +Linear constraints are inactive by default (after initial creation). +They are preserved after algorithm restart with MinBLEICRestartFrom(). + +INPUT PARAMETERS: + State - structure previously allocated with MinBLEICCreate call. + C - linear constraints, array[K,N+1]. + Each row of C represents one constraint, either equality + or inequality (see below): + * first N elements correspond to coefficients, + * last element corresponds to the right part. + All elements of C (including right part) must be finite. + CT - type of constraints, array[K]: + * if CT[i]>0, then I-th constraint is C[i,*]*x >= C[i,n+1] + * if CT[i]=0, then I-th constraint is C[i,*]*x = C[i,n+1] + * if CT[i]<0, then I-th constraint is C[i,*]*x <= C[i,n+1] + K - number of equality/inequality constraints, K>=0: + * if given, only leading K elements of C/CT are used + * if not given, automatically determined from sizes of C/CT + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicsetlc(minbleicstate* state, + /* Real */ ae_matrix* c, + /* Integer */ ae_vector* ct, + ae_int_t k, + ae_state *_state) +{ + ae_int_t n; + ae_int_t m; + ae_int_t i; + ae_int_t idx; + ae_bool b; + double v; + + + n = state->n; + + /* + * First, check for errors in the inputs + */ + ae_assert(k>=0, "MinBLEICSetLC: K<0", _state); + ae_assert(c->cols>=n+1||k==0, "MinBLEICSetLC: Cols(C)<N+1", _state); + ae_assert(c->rows>=k, "MinBLEICSetLC: Rows(C)<K", _state); + ae_assert(ct->cnt>=k, "MinBLEICSetLC: Length(CT)<K", _state); + ae_assert(apservisfinitematrix(c, k, n+1, _state), "MinBLEICSetLC: C contains infinite or NaN values!", _state); + + /* + * Determine number of constraints, + * allocate space and copy + */ + state->cecnt = 0; + state->cicnt = 0; + for(i=0; i<=k-1; i++) + { + if( ct->ptr.p_int[i]!=0 ) + { + state->cicnt = state->cicnt+1; + } + else + { + state->cecnt = state->cecnt+1; + } + } + rmatrixsetlengthatleast(&state->ci, state->cicnt, n+1, _state); + rmatrixsetlengthatleast(&state->ce, state->cecnt, n+1, _state); + idx = 0; + for(i=0; i<=k-1; i++) + { + if( ct->ptr.p_int[i]!=0 ) + { + ae_v_move(&state->ci.ptr.pp_double[idx][0], 1, &c->ptr.pp_double[i][0], 1, ae_v_len(0,n)); + if( ct->ptr.p_int[i]<0 ) + { + ae_v_muld(&state->ci.ptr.pp_double[idx][0], 1, ae_v_len(0,n), -1); + } + idx = idx+1; + } + } + idx = 0; + for(i=0; i<=k-1; i++) + { + if( ct->ptr.p_int[i]==0 ) + { + ae_v_move(&state->ce.ptr.pp_double[idx][0], 1, &c->ptr.pp_double[i][0], 1, ae_v_len(0,n)); + idx = idx+1; + } + } + + /* + * Calculate ortohognal basis of row space of CE. + * Determine actual basis size, drop vectors corresponding to + * small singular values. + * + * NOTE: it is important to use "W[I]>W[0]*Tol" form (strict + * inequality instead of non-strict) because it allows us to + * handle situations with zero CE in a natural and elegant way: + * all singular values are zero, and even W[0] itself is not + * greater than W[0]*tol. + */ + if( state->cecnt>0 ) + { + b = rmatrixsvd(&state->ce, state->cecnt, n, 1, 1, 2, &state->w, &state->cesvl, &state->cebasis, _state); + ae_assert(b, "MinBLEIC: inconvergence of internal SVD", _state); + state->cedim = 0; + m = ae_minint(state->cecnt, n, _state); + for(i=0; i<=m-1; i++) + { + if( ae_fp_greater(state->w.ptr.p_double[i],state->w.ptr.p_double[0]*minbleic_svdtol*ae_machineepsilon) ) + { + state->cedim = state->cedim+1; + } + } + } + else + { + state->cedim = 0; + } + + /* + * Calculate XE: solution of CE*x = b. + * Fill it with zeros if CEDim=0 + */ + if( state->cedim>0 ) + { + rvectorsetlengthatleast(&state->tmp0, state->cedim, _state); + for(i=0; i<=state->cedim-1; i++) + { + state->tmp0.ptr.p_double[i] = 0; + } + for(i=0; i<=state->cecnt-1; i++) + { + v = state->ce.ptr.pp_double[i][n]; + ae_v_addd(&state->tmp0.ptr.p_double[0], 1, &state->cesvl.ptr.pp_double[i][0], 1, ae_v_len(0,state->cedim-1), v); + } + for(i=0; i<=state->cedim-1; i++) + { + state->tmp0.ptr.p_double[i] = state->tmp0.ptr.p_double[i]/state->w.ptr.p_double[i]; + } + for(i=0; i<=n-1; i++) + { + state->xe.ptr.p_double[i] = 0; + } + for(i=0; i<=state->cedim-1; i++) + { + v = state->tmp0.ptr.p_double[i]; + ae_v_addd(&state->xe.ptr.p_double[0], 1, &state->cebasis.ptr.pp_double[i][0], 1, ae_v_len(0,n-1), v); + } + } + else + { + + /* + * no constraints, fill with zeros + */ + for(i=0; i<=n-1; i++) + { + state->xe.ptr.p_double[i] = 0; + } + } +} + + +/************************************************************************* +This function sets stopping conditions for the underlying nonlinear CG +optimizer. It controls overall accuracy of solution. These conditions +should be strict enough in order for algorithm to converge. + +INPUT PARAMETERS: + State - structure which stores algorithm state + EpsG - >=0 + Algorithm finishes its work if 2-norm of the Lagrangian + gradient is less than or equal to EpsG. + EpsF - >=0 + The subroutine finishes its work if on k+1-th iteration + the condition |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1} + is satisfied. + EpsX - >=0 + The subroutine finishes its work if on k+1-th iteration + the condition |X(k+1)-X(k)| <= EpsX is fulfilled. + +Passing EpsG=0, EpsF=0 and EpsX=0 (simultaneously) will lead to +automatic stopping criterion selection. + +These conditions are used to terminate inner iterations. However, you +need to tune termination conditions for outer iterations too. + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicsetinnercond(minbleicstate* state, + double epsg, + double epsf, + double epsx, + ae_state *_state) +{ + + + ae_assert(ae_isfinite(epsg, _state), "MinBLEICSetInnerCond: EpsG is not finite number", _state); + ae_assert(ae_fp_greater_eq(epsg,0), "MinBLEICSetInnerCond: negative EpsG", _state); + ae_assert(ae_isfinite(epsf, _state), "MinBLEICSetInnerCond: EpsF is not finite number", _state); + ae_assert(ae_fp_greater_eq(epsf,0), "MinBLEICSetInnerCond: negative EpsF", _state); + ae_assert(ae_isfinite(epsx, _state), "MinBLEICSetInnerCond: EpsX is not finite number", _state); + ae_assert(ae_fp_greater_eq(epsx,0), "MinBLEICSetInnerCond: negative EpsX", _state); + state->innerepsg = epsg; + state->innerepsf = epsf; + state->innerepsx = epsx; +} + + +/************************************************************************* +This function sets stopping conditions for outer iteration of BLEIC algo. + +These conditions control accuracy of constraint handling and amount of +infeasibility allowed in the solution. + +INPUT PARAMETERS: + State - structure which stores algorithm state + EpsX - >0, stopping condition on outer iteration step length + EpsI - >0, stopping condition on infeasibility + +Both EpsX and EpsI must be non-zero. + +MEANING OF EpsX + +EpsX is a stopping condition for outer iterations. Algorithm will stop +when solution of the current modified subproblem will be within EpsX +(using 2-norm) of the previous solution. + +MEANING OF EpsI + +EpsI controls feasibility properties - algorithm won't stop until all +inequality constraints will be satisfied with error (distance from current +point to the feasible area) at most EpsI. + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicsetoutercond(minbleicstate* state, + double epsx, + double epsi, + ae_state *_state) +{ + + + ae_assert(ae_isfinite(epsx, _state), "MinBLEICSetOuterCond: EpsX is not finite number", _state); + ae_assert(ae_fp_greater(epsx,0), "MinBLEICSetOuterCond: non-positive EpsX", _state); + ae_assert(ae_isfinite(epsi, _state), "MinBLEICSetOuterCond: EpsI is not finite number", _state); + ae_assert(ae_fp_greater(epsi,0), "MinBLEICSetOuterCond: non-positive EpsI", _state); + state->outerepsx = epsx; + state->outerepsi = epsi; +} + + +/************************************************************************* +This function sets initial barrier width. + +BLEIC optimizer uses modified barrier functions to handle inequality +constraints. These functions are almost constant in the inner parts of the +feasible area, but grow rapidly to the infinity OUTSIDE of the feasible +area. Barrier width is a distance from feasible area to the point where +modified barrier function becomes infinite. + +Barrier width must be: +* small enough (below some problem-dependent value) in order for algorithm + to converge. Necessary condition is that the target function must be + well described by linear model in the areas as small as barrier width. +* not VERY small (in order to avoid difficulties associated with rapid + changes in the modified function, ill-conditioning, round-off issues). + +Choosing appropriate barrier width is very important for efficient +optimization, and it often requires error and trial. You can use two +strategies when choosing barrier width: +* set barrier width with MinBLEICSetBarrierWidth() call. In this case you + should try different barrier widths and examine results. +* set decreasing barrier width by combining MinBLEICSetBarrierWidth() and + MinBLEICSetBarrierDecay() calls. In this case algorithm will decrease + barrier width after each outer iteration until it encounters optimal + barrier width. + +INPUT PARAMETERS: + State - structure which stores algorithm state + Mu - >0, initial barrier width + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicsetbarrierwidth(minbleicstate* state, + double mu, + ae_state *_state) +{ + + + ae_assert(ae_isfinite(mu, _state), "MinBLEICSetBarrierWidth: Mu is not finite number", _state); + ae_assert(ae_fp_greater(mu,0), "MinBLEICSetBarrierWidth: non-positive Mu", _state); + state->mustart = mu; +} + + +/************************************************************************* +This function sets decay coefficient for barrier width. + +By default, no barrier decay is used (Decay=1.0). + +BLEIC optimizer uses modified barrier functions to handle inequality +constraints. These functions are almost constant in the inner parts of the +feasible area, but grow rapidly to the infinity OUTSIDE of the feasible +area. Barrier width is a distance from feasible area to the point where +modified barrier function becomes infinite. Decay coefficient allows us to +decrease barrier width from the initial (suboptimial) value until +optimal value will be met. + +We recommend you either to set MuDecay=1.0 (no decay) or use some moderate +value like 0.5-0.7 + +INPUT PARAMETERS: + State - structure which stores algorithm state + MuDecay - 0<MuDecay<=1, decay coefficient + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicsetbarrierdecay(minbleicstate* state, + double mudecay, + ae_state *_state) +{ + + + ae_assert(ae_isfinite(mudecay, _state), "MinBLEICSetBarrierDecay: MuDecay is not finite number", _state); + ae_assert(ae_fp_greater(mudecay,0), "MinBLEICSetBarrierDecay: non-positive MuDecay", _state); + ae_assert(ae_fp_less_eq(mudecay,1), "MinBLEICSetBarrierDecay: MuDecay>1", _state); + state->mudecay = mudecay; +} + + +/************************************************************************* +This function allows to stop algorithm after specified number of inner +iterations. + +INPUT PARAMETERS: + State - structure which stores algorithm state + MaxIts - maximum number of inner iterations. + If MaxIts=0, the number of iterations is unlimited. + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicsetmaxits(minbleicstate* state, + ae_int_t maxits, + ae_state *_state) +{ + + + ae_assert(maxits>=0, "MinBLEICSetCond: negative MaxIts!", _state); + state->maxits = maxits; +} + + +/************************************************************************* +This function turns on/off reporting. + +INPUT PARAMETERS: + State - structure which stores algorithm state + NeedXRep- whether iteration reports are needed or not + +If NeedXRep is True, algorithm will call rep() callback function if it is +provided to MinBLEICOptimize(). + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicsetxrep(minbleicstate* state, + ae_bool needxrep, + ae_state *_state) +{ + + + state->xrep = needxrep; +} + + +/************************************************************************* +This function sets maximum step length + +INPUT PARAMETERS: + State - structure which stores algorithm state + StpMax - maximum step length, >=0. Set StpMax to 0.0, if you don't + want to limit step length. + +Use this subroutine when you optimize target function which contains exp() +or other fast growing functions, and optimization algorithm makes too +large steps which lead to overflow. This function allows us to reject +steps that are too large (and therefore expose us to the possible +overflow) without actually calculating function value at the x+stp*d. + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicsetstpmax(minbleicstate* state, + double stpmax, + ae_state *_state) +{ + + + ae_assert(ae_isfinite(stpmax, _state), "MinBLEICSetStpMax: StpMax is not finite!", _state); + ae_assert(ae_fp_greater_eq(stpmax,0), "MinBLEICSetStpMax: StpMax<0!", _state); + state->stpmax = stpmax; +} + + +/************************************************************************* + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +ae_bool minbleiciteration(minbleicstate* state, ae_state *_state) +{ + ae_int_t n; + ae_int_t m; + ae_int_t i; + double v; + double vv; + ae_bool b; + ae_bool result; + + + + /* + * Reverse communication preparations + * I know it looks ugly, but it works the same way + * anywhere from C++ to Python. + * + * This code initializes locals by: + * * random values determined during code + * generation - on first subroutine call + * * values from previous call - on subsequent calls + */ + if( state->rstate.stage>=0 ) + { + n = state->rstate.ia.ptr.p_int[0]; + m = state->rstate.ia.ptr.p_int[1]; + i = state->rstate.ia.ptr.p_int[2]; + b = state->rstate.ba.ptr.p_bool[0]; + v = state->rstate.ra.ptr.p_double[0]; + vv = state->rstate.ra.ptr.p_double[1]; + } + else + { + n = -983; + m = -989; + i = -834; + b = ae_false; + v = -287; + vv = 364; + } + if( state->rstate.stage==0 ) + { + goto lbl_0; + } + if( state->rstate.stage==1 ) + { + goto lbl_1; + } + if( state->rstate.stage==2 ) + { + goto lbl_2; + } + if( state->rstate.stage==3 ) + { + goto lbl_3; + } + if( state->rstate.stage==4 ) + { + goto lbl_4; + } + + /* + * Routine body + */ + + /* + * Prepare. + */ + n = state->n; + state->repterminationtype = 0; + state->repinneriterationscount = 0; + state->repouteriterationscount = 0; + state->repnfev = 0; + state->repdebugeqerr = 0.0; + state->repdebugfs = _state->v_nan; + state->repdebugff = _state->v_nan; + state->repdebugdx = _state->v_nan; + rvectorsetlengthatleast(&state->r, n, _state); + rvectorsetlengthatleast(&state->tmp1, n, _state); + + /* + * Check that equality constraints are consistent within EpsC. + * If not - premature termination. + */ + if( state->cedim>0 ) + { + state->repdebugeqerr = 0.0; + for(i=0; i<=state->cecnt-1; i++) + { + v = ae_v_dotproduct(&state->ce.ptr.pp_double[i][0], 1, &state->xe.ptr.p_double[0], 1, ae_v_len(0,n-1)); + state->repdebugeqerr = state->repdebugeqerr+ae_sqr(v-state->ce.ptr.pp_double[i][n], _state); + } + state->repdebugeqerr = ae_sqrt(state->repdebugeqerr, _state); + if( ae_fp_greater(state->repdebugeqerr,state->outerepsi) ) + { + state->repterminationtype = -3; + result = ae_false; + return result; + } + } + + /* + * Find feasible point. + * + * We make up to 16*N iterations of nonlinear CG trying to + * minimize penalty for violation of inequality constraints. + * + * if P is magnitude of violation, then penalty function has form: + * * P*P+P for non-zero violation (P>=0) + * * 0.0 for absence of violation (P<0) + * Such function is non-smooth at P=0.0, but its nonsmoothness + * allows us to rapidly converge to the feasible point. + */ + minbleic_makeprojection(state, &state->xcur, &state->r, &vv, _state); + mincgrestartfrom(&state->cgstate, &state->xcur, _state); + mincgsetcond(&state->cgstate, 0.0, 0.0, 0.0, 16*n, _state); + mincgsetxrep(&state->cgstate, ae_false, _state); + while(mincgiteration(&state->cgstate, _state)) + { + if( state->cgstate.needfg ) + { + ae_v_move(&state->x.ptr.p_double[0], 1, &state->cgstate.x.ptr.p_double[0], 1, ae_v_len(0,n-1)); + minbleic_penaltyfunction(state, &state->x, &state->cgstate.f, &state->cgstate.g, &state->r, &state->mba, &state->errfeas, _state); + continue; + } + } + mincgresults(&state->cgstate, &state->xcur, &state->cgrep, _state); + ae_v_move(&state->tmp1.ptr.p_double[0], 1, &state->xcur.ptr.p_double[0], 1, ae_v_len(0,n-1)); + minbleic_penaltyfunction(state, &state->tmp1, &state->f, &state->g, &state->r, &state->mba, &state->errfeas, _state); + if( ae_fp_greater(state->errfeas,state->outerepsi) ) + { + state->repterminationtype = -3; + result = ae_false; + return result; + } + + /* + * Initialize RepDebugFS with function value at initial point + */ + ae_v_move(&state->x.ptr.p_double[0], 1, &state->xcur.ptr.p_double[0], 1, ae_v_len(0,n-1)); + minbleic_clearrequestfields(state, _state); + state->needfg = ae_true; + state->rstate.stage = 0; + goto lbl_rcomm; +lbl_0: + state->needfg = ae_false; + state->repnfev = state->repnfev+1; + state->repdebugfs = state->f; + + /* + * Calculate number of inequality constraints and allocate + * array for Lagrange multipliers. + * + * Initialize Lagrange multipliers and penalty term + */ + state->lmcnt = state->cicnt; + for(i=0; i<=n-1; i++) + { + if( state->hasbndl.ptr.p_bool[i] ) + { + state->lmcnt = state->lmcnt+1; + } + if( state->hasbndu.ptr.p_bool[i] ) + { + state->lmcnt = state->lmcnt+1; + } + } + if( state->lmcnt>0 ) + { + rvectorsetlengthatleast(&state->lm, state->lmcnt, _state); + } + for(i=0; i<=state->lmcnt-1; i++) + { + state->lm.ptr.p_double[i] = 1.0; + } + state->mu = ae_maxreal(state->mustart, state->outerepsi, _state); + + /* + * BndMax is a maximum over right parts of inequality constraints. + * It is used later to bound Mu from below. + */ + state->bndmax = 0.0; + for(i=0; i<=n-1; i++) + { + if( state->hasbndl.ptr.p_bool[i] ) + { + state->bndmax = ae_maxreal(state->bndmax, ae_fabs(state->bndl.ptr.p_double[i], _state), _state); + } + if( state->hasbndu.ptr.p_bool[i] ) + { + state->bndmax = ae_maxreal(state->bndmax, ae_fabs(state->bndu.ptr.p_double[i], _state), _state); + } + } + for(i=0; i<=state->cicnt-1; i++) + { + state->bndmax = ae_maxreal(state->bndmax, ae_fabs(state->ci.ptr.pp_double[i][n], _state), _state); + } + + /* + * External cycle: optimization subject to current + * estimate of Lagrange multipliers and penalty term + */ + state->itsleft = state->maxits; + state->mucounter = minbleic_mucountdownlen; + ae_v_move(&state->xprev.ptr.p_double[0], 1, &state->xcur.ptr.p_double[0], 1, ae_v_len(0,n-1)); + ae_v_move(&state->xstart.ptr.p_double[0], 1, &state->xcur.ptr.p_double[0], 1, ae_v_len(0,n-1)); +lbl_5: + if( ae_false ) + { + goto lbl_6; + } + + /* + * Inner cycle: CG with projections and penalty functions + */ + mincgrestartfrom(&state->cgstate, &state->xcur, _state); + mincgsetcond(&state->cgstate, state->innerepsg, state->innerepsf, state->innerepsx, state->itsleft, _state); + mincgsetxrep(&state->cgstate, state->xrep, _state); + mincgsetdrep(&state->cgstate, ae_true, _state); + mincgsetstpmax(&state->cgstate, state->stpmax, _state); +lbl_7: + if( !mincgiteration(&state->cgstate, _state) ) + { + goto lbl_8; + } + if( state->cgstate.lsstart ) + { + + /* + * Beginning of the line search: set upper limit on step size + * to prevent algo from leaving area where barrier function + * is defined. + * + * We calculate State.CGState.CurStpMax in two steps: + * * first, we calculate it as the distance from the current + * point to the boundary where modified barrier function + * overflows; distance is taken along State.CGState.D + * * then we multiply it by 0.999 to make sure that we won't + * make step into the boundary due to the rounding noise + */ + if( ae_fp_eq(state->cgstate.curstpmax,0) ) + { + state->cgstate.curstpmax = 1.0E50; + } + state->boundary = -0.9*state->mu; + state->closetobarrier = ae_false; + for(i=0; i<=n-1; i++) + { + if( state->hasbndl.ptr.p_bool[i] ) + { + v = state->cgstate.x.ptr.p_double[i]-state->bndl.ptr.p_double[i]; + if( ae_fp_less(state->cgstate.d.ptr.p_double[i],0) ) + { + state->cgstate.curstpmax = safeminposrv(v+state->mu, -state->cgstate.d.ptr.p_double[i], state->cgstate.curstpmax, _state); + } + if( ae_fp_greater(state->cgstate.d.ptr.p_double[i],0)&&ae_fp_less_eq(v,state->boundary) ) + { + state->cgstate.curstpmax = safeminposrv(-v, state->cgstate.d.ptr.p_double[i], state->cgstate.curstpmax, _state); + state->closetobarrier = ae_true; + } + } + if( state->hasbndu.ptr.p_bool[i] ) + { + v = state->bndu.ptr.p_double[i]-state->cgstate.x.ptr.p_double[i]; + if( ae_fp_greater(state->cgstate.d.ptr.p_double[i],0) ) + { + state->cgstate.curstpmax = safeminposrv(v+state->mu, state->cgstate.d.ptr.p_double[i], state->cgstate.curstpmax, _state); + } + if( ae_fp_less(state->cgstate.d.ptr.p_double[i],0)&&ae_fp_less_eq(v,state->boundary) ) + { + state->cgstate.curstpmax = safeminposrv(-v, -state->cgstate.d.ptr.p_double[i], state->cgstate.curstpmax, _state); + state->closetobarrier = ae_true; + } + } + } + for(i=0; i<=state->cicnt-1; i++) + { + v = ae_v_dotproduct(&state->ci.ptr.pp_double[i][0], 1, &state->cgstate.d.ptr.p_double[0], 1, ae_v_len(0,n-1)); + vv = ae_v_dotproduct(&state->ci.ptr.pp_double[i][0], 1, &state->cgstate.x.ptr.p_double[0], 1, ae_v_len(0,n-1)); + vv = vv-state->ci.ptr.pp_double[i][n]; + if( ae_fp_less(v,0) ) + { + state->cgstate.curstpmax = safeminposrv(vv+state->mu, -v, state->cgstate.curstpmax, _state); + } + if( ae_fp_greater(v,0)&&ae_fp_less_eq(vv,state->boundary) ) + { + state->cgstate.curstpmax = safeminposrv(-vv, v, state->cgstate.curstpmax, _state); + state->closetobarrier = ae_true; + } + } + state->cgstate.curstpmax = 0.999*state->cgstate.curstpmax; + if( state->closetobarrier ) + { + state->cgstate.stp = 0.5*state->cgstate.curstpmax; + } + goto lbl_7; + } + if( !state->cgstate.needfg ) + { + goto lbl_9; + } + + /* + * * get X from CG + * * project X into equality constrained subspace + * * RComm (note: X is stored in Tmp1 to prevent accidental corruption by user) + * * modify target function with barriers and penalties + * * pass F/G back to nonlinear CG + */ + ae_v_move(&state->x.ptr.p_double[0], 1, &state->cgstate.x.ptr.p_double[0], 1, ae_v_len(0,n-1)); + minbleic_makeprojection(state, &state->x, &state->r, &vv, _state); + ae_v_move(&state->tmp1.ptr.p_double[0], 1, &state->x.ptr.p_double[0], 1, ae_v_len(0,n-1)); + minbleic_clearrequestfields(state, _state); + state->needfg = ae_true; + state->rstate.stage = 1; + goto lbl_rcomm; +lbl_1: + state->needfg = ae_false; + ae_v_move(&state->x.ptr.p_double[0], 1, &state->tmp1.ptr.p_double[0], 1, ae_v_len(0,n-1)); + minbleic_modifytargetfunction(state, &state->x, &state->r, vv, &state->f, &state->g, &state->gnorm, &state->mpgnorm, &state->mba, &state->errfeas, &state->errslack, _state); + state->cgstate.f = state->f; + ae_v_move(&state->cgstate.g.ptr.p_double[0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1)); + goto lbl_7; +lbl_9: + if( !state->cgstate.xupdated ) + { + goto lbl_11; + } + + /* + * Report + */ + ae_v_move(&state->x.ptr.p_double[0], 1, &state->cgstate.x.ptr.p_double[0], 1, ae_v_len(0,n-1)); + state->f = state->cgstate.f; + minbleic_clearrequestfields(state, _state); + state->xupdated = ae_true; + state->rstate.stage = 2; + goto lbl_rcomm; +lbl_2: + state->xupdated = ae_false; + goto lbl_7; +lbl_11: + goto lbl_7; +lbl_8: + mincgresults(&state->cgstate, &state->xcur, &state->cgrep, _state); + state->repinneriterationscount = state->repinneriterationscount+state->cgrep.iterationscount; + state->repouteriterationscount = state->repouteriterationscount+1; + state->repnfev = state->repnfev+state->cgrep.nfev; + + /* + * Update RepDebugFF with function value at current point + */ + ae_v_move(&state->x.ptr.p_double[0], 1, &state->xcur.ptr.p_double[0], 1, ae_v_len(0,n-1)); + minbleic_clearrequestfields(state, _state); + state->needfg = ae_true; + state->rstate.stage = 3; + goto lbl_rcomm; +lbl_3: + state->needfg = ae_false; + state->repnfev = state->repnfev+1; + state->repdebugff = state->f; + + /* + * Update Lagrange multipliers and Mu + * + * We calculate three values during update: + * * LMDif - absolute differense between new and old multipliers + * * LMNorm - inf-norm of Lagrange vector + * * LMGrowth - maximum componentwise relative growth of Lagrange vector + * + * We limit growth of Lagrange multipliers by MaxLMGrowth, + * it allows us to stabilize algorithm when it moves deep into + * the infeasible area. + * + * We calculate modified target function at XCur in order to get + * information about overall problem properties. Some values + * calculated here will be used later: + * * ErrFeas - feasibility error + * * ErrSlack - complementary slackness error + */ + ae_v_move(&state->x.ptr.p_double[0], 1, &state->xcur.ptr.p_double[0], 1, ae_v_len(0,n-1)); + minbleic_makeprojection(state, &state->x, &state->r, &vv, _state); + ae_v_move(&state->tmp1.ptr.p_double[0], 1, &state->x.ptr.p_double[0], 1, ae_v_len(0,n-1)); + minbleic_clearrequestfields(state, _state); + state->needfg = ae_true; + state->rstate.stage = 4; + goto lbl_rcomm; +lbl_4: + state->needfg = ae_false; + ae_v_move(&state->x.ptr.p_double[0], 1, &state->tmp1.ptr.p_double[0], 1, ae_v_len(0,n-1)); + minbleic_modifytargetfunction(state, &state->x, &state->r, vv, &state->f, &state->g, &state->gnorm, &state->mpgnorm, &state->mba, &state->errfeas, &state->errslack, _state); + m = 0; + state->lmdif = 0; + state->lmnorm = 0; + state->lmgrowth = 0; + for(i=0; i<=n-1; i++) + { + if( state->hasbndl.ptr.p_bool[i] ) + { + barrierfunc(state->xcur.ptr.p_double[i]-state->bndl.ptr.p_double[i], state->mu, &state->v0, &state->v1, &state->v2, _state); + v = state->lm.ptr.p_double[m]; + vv = ae_minreal(minbleic_maxlmgrowth, -state->mu*state->v1, _state); + state->lm.ptr.p_double[m] = vv*state->lm.ptr.p_double[m]; + state->lmnorm = ae_maxreal(state->lmnorm, ae_fabs(v, _state), _state); + state->lmdif = ae_maxreal(state->lmdif, ae_fabs(state->lm.ptr.p_double[m]-v, _state), _state); + state->lmgrowth = ae_maxreal(state->lmgrowth, vv, _state); + m = m+1; + } + if( state->hasbndu.ptr.p_bool[i] ) + { + barrierfunc(state->bndu.ptr.p_double[i]-state->xcur.ptr.p_double[i], state->mu, &state->v0, &state->v1, &state->v2, _state); + v = state->lm.ptr.p_double[m]; + vv = ae_minreal(minbleic_maxlmgrowth, -state->mu*state->v1, _state); + state->lm.ptr.p_double[m] = vv*state->lm.ptr.p_double[m]; + state->lmnorm = ae_maxreal(state->lmnorm, ae_fabs(v, _state), _state); + state->lmdif = ae_maxreal(state->lmdif, ae_fabs(state->lm.ptr.p_double[m]-v, _state), _state); + state->lmgrowth = ae_maxreal(state->lmgrowth, vv, _state); + m = m+1; + } + } + for(i=0; i<=state->cicnt-1; i++) + { + v = ae_v_dotproduct(&state->ci.ptr.pp_double[i][0], 1, &state->xcur.ptr.p_double[0], 1, ae_v_len(0,n-1)); + v = v-state->ci.ptr.pp_double[i][n]; + barrierfunc(v, state->mu, &state->v0, &state->v1, &state->v2, _state); + v = state->lm.ptr.p_double[m]; + vv = ae_minreal(minbleic_maxlmgrowth, -state->mu*state->v1, _state); + state->lm.ptr.p_double[m] = vv*state->lm.ptr.p_double[m]; + state->lmnorm = ae_maxreal(state->lmnorm, ae_fabs(v, _state), _state); + state->lmdif = ae_maxreal(state->lmdif, ae_fabs(state->lm.ptr.p_double[m]-v, _state), _state); + state->lmgrowth = ae_maxreal(state->lmgrowth, vv, _state); + m = m+1; + } + if( ae_fp_greater(state->mba,-0.2*state->mudecay*state->mu) ) + { + state->mu = ae_maxreal(state->mudecay*state->mu, 1.0E6*ae_machineepsilon*state->bndmax, _state); + } + if( ae_fp_less(state->mu,state->outerepsi) ) + { + state->mucounter = state->mucounter-1; + state->mu = state->outerepsi; + } + + /* + * Check for stopping: + * * "normal", outer step size is small enough, infeasibility is within bounds + * * "inconsistent", if Lagrange multipliers increased beyond threshold given by MaxLagrangeMul + * * "too stringent", in other cases + */ + v = 0; + for(i=0; i<=n-1; i++) + { + v = v+ae_sqr(state->xcur.ptr.p_double[i]-state->xprev.ptr.p_double[i], _state); + } + v = ae_sqrt(v, _state); + if( ae_fp_less_eq(state->errfeas,state->outerepsi)&&ae_fp_less_eq(v,state->outerepsx) ) + { + state->repterminationtype = 4; + goto lbl_6; + } + if( state->maxits>0 ) + { + state->itsleft = state->itsleft-state->cgrep.iterationscount; + if( state->itsleft<=0 ) + { + state->repterminationtype = 5; + goto lbl_6; + } + } + if( ae_fp_greater_eq(state->repouteriterationscount,minbleic_maxouterits) ) + { + state->repterminationtype = 5; + goto lbl_6; + } + if( ae_fp_less(state->lmdif,minbleic_lmtol*ae_machineepsilon*state->lmnorm)||ae_fp_less(state->lmnorm,minbleic_minlagrangemul) ) + { + state->repterminationtype = 7; + goto lbl_6; + } + if( state->mucounter<=0 ) + { + state->repterminationtype = 7; + goto lbl_6; + } + if( ae_fp_greater(state->lmnorm,minbleic_maxlagrangemul) ) + { + state->repterminationtype = -3; + goto lbl_6; + } + + /* + * Next iteration + */ + ae_v_move(&state->xprev.ptr.p_double[0], 1, &state->xcur.ptr.p_double[0], 1, ae_v_len(0,n-1)); + goto lbl_5; +lbl_6: + + /* + * We've stopped, fill debug information + */ + state->repdebugeqerr = 0.0; + for(i=0; i<=state->cecnt-1; i++) + { + v = ae_v_dotproduct(&state->ce.ptr.pp_double[i][0], 1, &state->xe.ptr.p_double[0], 1, ae_v_len(0,n-1)); + state->repdebugeqerr = state->repdebugeqerr+ae_sqr(v-state->ce.ptr.pp_double[i][n], _state); + } + state->repdebugeqerr = ae_sqrt(state->repdebugeqerr, _state); + state->repdebugdx = 0; + for(i=0; i<=n-1; i++) + { + state->repdebugdx = state->repdebugdx+ae_sqr(state->xcur.ptr.p_double[i]-state->xstart.ptr.p_double[i], _state); + } + state->repdebugdx = ae_sqrt(state->repdebugdx, _state); + result = ae_false; + return result; + + /* + * Saving state + */ +lbl_rcomm: + result = ae_true; + state->rstate.ia.ptr.p_int[0] = n; + state->rstate.ia.ptr.p_int[1] = m; + state->rstate.ia.ptr.p_int[2] = i; + state->rstate.ba.ptr.p_bool[0] = b; + state->rstate.ra.ptr.p_double[0] = v; + state->rstate.ra.ptr.p_double[1] = vv; + return result; +} + + +/************************************************************************* +BLEIC results + +INPUT PARAMETERS: + State - algorithm state + +OUTPUT PARAMETERS: + X - array[0..N-1], solution + Rep - optimization report: + * Rep.TerminationType completetion code: + * -3 inconsistent constraints. Feasible point is + either nonexistent or too hard to find. Try to + restart optimizer with better initial + approximation + * -2 rounding errors prevent further improvement. + X contains best point found. + * 4 conditions on constraints are fulfilled + with error less than or equal to EpsC + * 5 MaxIts steps was taken + * 7 stopping conditions are too stringent, + further improvement is impossible, + X contains best point found so far. + * Rep.IterationsCount contains iterations count + * NFEV countains number of function calculations + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicresults(minbleicstate* state, + /* Real */ ae_vector* x, + minbleicreport* rep, + ae_state *_state) +{ + + ae_vector_clear(x); + _minbleicreport_clear(rep); + + minbleicresultsbuf(state, x, rep, _state); +} + + +/************************************************************************* +BLEIC results + +Buffered implementation of MinBLEICResults() which uses pre-allocated buffer +to store X[]. If buffer size is too small, it resizes buffer. It is +intended to be used in the inner cycles of performance critical algorithms +where array reallocation penalty is too large to be ignored. + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicresultsbuf(minbleicstate* state, + /* Real */ ae_vector* x, + minbleicreport* rep, + ae_state *_state) +{ + + + if( x->cnt<state->n ) + { + ae_vector_set_length(x, state->n, _state); + } + ae_v_move(&x->ptr.p_double[0], 1, &state->xcur.ptr.p_double[0], 1, ae_v_len(0,state->n-1)); + rep->inneriterationscount = state->repinneriterationscount; + rep->outeriterationscount = state->repouteriterationscount; + rep->nfev = state->repnfev; + rep->terminationtype = state->repterminationtype; + rep->debugeqerr = state->repdebugeqerr; + rep->debugfs = state->repdebugfs; + rep->debugff = state->repdebugff; + rep->debugdx = state->repdebugdx; +} + + +/************************************************************************* +This subroutine restarts algorithm from new point. +All optimization parameters (including constraints) are left unchanged. + +This function allows to solve multiple optimization problems (which +must have same number of dimensions) without object reallocation penalty. + +INPUT PARAMETERS: + State - structure previously allocated with MinBLEICCreate call. + X - new starting point. + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicrestartfrom(minbleicstate* state, + /* Real */ ae_vector* x, + ae_state *_state) +{ + ae_int_t n; + + + n = state->n; + + /* + * First, check for errors in the inputs + */ + ae_assert(x->cnt>=n, "MinBLEICRestartFrom: Length(X)<N", _state); + ae_assert(isfinitevector(x, n, _state), "MinBLEICRestartFrom: X contains infinite or NaN values!", _state); + + /* + * Set XC + */ + ae_v_move(&state->xcur.ptr.p_double[0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,n-1)); + + /* + * prepare RComm facilities + */ + ae_vector_set_length(&state->rstate.ia, 2+1, _state); + ae_vector_set_length(&state->rstate.ba, 0+1, _state); + ae_vector_set_length(&state->rstate.ra, 1+1, _state); + state->rstate.stage = -1; + minbleic_clearrequestfields(state, _state); +} + + +/************************************************************************* +Modified barrier function calculated at point X with respect to the +barrier parameter Mu. + +Functions, its first and second derivatives are calculated. +*************************************************************************/ +void barrierfunc(double x, + double mu, + double* f, + double* df, + double* d2f, + ae_state *_state) +{ + double c0; + double c1; + double c2; + double xpmu; + + *f = 0; + *df = 0; + *d2f = 0; + + xpmu = x+mu; + if( ae_fp_greater(xpmu,0.5*mu) ) + { + *f = -ae_log(x/mu+1, _state); + *df = -1/(x+mu); + *d2f = 1/(xpmu*xpmu); + return; + } + if( ae_fp_greater(xpmu,0) ) + { + c0 = -ae_log(0.5, _state)-0.5; + c1 = -1/mu; + c2 = mu/4; + *f = c0+c1*(x+0.5*mu)+c2/xpmu; + *df = c1-c2/((x+mu)*(x+mu)); + *d2f = 2*c2/((x+mu)*(x+mu)*(x+mu)); + return; + } + *f = ae_maxrealnumber; + *df = 0; + *d2f = 0; +} + + +/************************************************************************* +Clears request fileds (to be sure that we don't forget to clear something) +*************************************************************************/ +static void minbleic_clearrequestfields(minbleicstate* state, + ae_state *_state) +{ + + + state->needfg = ae_false; + state->xupdated = ae_false; +} + + +/************************************************************************* +This function makes projection of X into equality constrained subspace. +It calculates set of additional values which are used later for +modification of the target function F. + +INPUT PARAMETERS: + State - optimizer state (we use its fields to get information + about constraints) + X - vector being projected + R - preallocated buffer, used to store residual from projection + +OUTPUT PARAMETERS: + X - projection of input X + R - residual + RNorm - residual norm squared, used later to modify target function +*************************************************************************/ +static void minbleic_makeprojection(minbleicstate* state, + /* Real */ ae_vector* x, + /* Real */ ae_vector* r, + double* rnorm2, + ae_state *_state) +{ + double v; + ae_int_t i; + ae_int_t n; + + *rnorm2 = 0; + + n = state->n; + + /* + * * subtract XE from X + * * project X + * * calculate norm of deviation from null space, store it in VV + * * calculate residual from projection, store it in R + * * add XE to X + */ + ae_v_sub(&x->ptr.p_double[0], 1, &state->xe.ptr.p_double[0], 1, ae_v_len(0,n-1)); + *rnorm2 = 0; + for(i=0; i<=n-1; i++) + { + r->ptr.p_double[i] = 0; + } + for(i=0; i<=state->cedim-1; i++) + { + v = ae_v_dotproduct(&x->ptr.p_double[0], 1, &state->cebasis.ptr.pp_double[i][0], 1, ae_v_len(0,n-1)); + ae_v_subd(&x->ptr.p_double[0], 1, &state->cebasis.ptr.pp_double[i][0], 1, ae_v_len(0,n-1), v); + ae_v_addd(&r->ptr.p_double[0], 1, &state->cebasis.ptr.pp_double[i][0], 1, ae_v_len(0,n-1), v); + *rnorm2 = *rnorm2+ae_sqr(v, _state); + } + ae_v_add(&x->ptr.p_double[0], 1, &state->xe.ptr.p_double[0], 1, ae_v_len(0,n-1)); +} + + +/************************************************************************* +This subroutine applies modifications to the target function given by +its value F and gradient G at the projected point X which lies in the +equality constrained subspace. + +Following modifications are applied: +* modified barrier functions to handle inequality constraints + (both F and G are modified) +* projection of gradient into equality constrained subspace + (only G is modified) +* quadratic penalty for deviations from equality constrained subspace + (both F and G are modified) + +It also calculates gradient norm (three different norms for three +different types of gradient), feasibility and complementary slackness +errors. + +INPUT PARAMETERS: + State - optimizer state (we use its fields to get information + about constraints) + X - point (projected into equality constrained subspace) + R - residual from projection + RNorm2 - residual norm squared + F - function value at X + G - function gradient at X + +OUTPUT PARAMETERS: + F - modified function value at X + G - modified function gradient at X + GNorm - 2-norm of unmodified G + MPGNorm - 2-norm of modified G + MBA - minimum argument of barrier functions. + If X is strictly feasible, it is greater than zero. + If X lies on a boundary, it is zero. + It is negative for infeasible X. + FIErr - 2-norm of feasibility error with respect to + inequality/bound constraints + CSErr - 2-norm of complementarity slackness error +*************************************************************************/ +static void minbleic_modifytargetfunction(minbleicstate* state, + /* Real */ ae_vector* x, + /* Real */ ae_vector* r, + double rnorm2, + double* f, + /* Real */ ae_vector* g, + double* gnorm, + double* mpgnorm, + double* mba, + double* fierr, + double* cserr, + ae_state *_state) +{ + double v; + double vv; + double t; + ae_int_t i; + ae_int_t n; + ae_int_t m; + double v0; + double v1; + double v2; + ae_bool hasconstraints; + + *gnorm = 0; + *mpgnorm = 0; + *mba = 0; + *fierr = 0; + *cserr = 0; + + n = state->n; + *mba = ae_maxrealnumber; + hasconstraints = ae_false; + + /* + * GNorm + */ + v = ae_v_dotproduct(&g->ptr.p_double[0], 1, &g->ptr.p_double[0], 1, ae_v_len(0,n-1)); + *gnorm = ae_sqrt(v, _state); + + /* + * Process bound and inequality constraints. + * Bound constraints with +-INF are ignored. + * Here M is used to store number of constraints processed. + */ + m = 0; + *fierr = 0; + *cserr = 0; + for(i=0; i<=n-1; i++) + { + if( state->hasbndl.ptr.p_bool[i] ) + { + v = x->ptr.p_double[i]-state->bndl.ptr.p_double[i]; + *mba = ae_minreal(v, *mba, _state); + barrierfunc(v, state->mu, &v0, &v1, &v2, _state); + *f = *f+state->mu*state->lm.ptr.p_double[m]*v0; + g->ptr.p_double[i] = g->ptr.p_double[i]+state->mu*state->lm.ptr.p_double[m]*v1; + if( ae_fp_less(v,0) ) + { + *fierr = *fierr+v*v; + } + t = -state->lm.ptr.p_double[m]*v; + *cserr = *cserr+t*t; + m = m+1; + hasconstraints = ae_true; + } + if( state->hasbndu.ptr.p_bool[i] ) + { + v = state->bndu.ptr.p_double[i]-x->ptr.p_double[i]; + *mba = ae_minreal(v, *mba, _state); + barrierfunc(v, state->mu, &v0, &v1, &v2, _state); + *f = *f+state->mu*state->lm.ptr.p_double[m]*v0; + g->ptr.p_double[i] = g->ptr.p_double[i]-state->mu*state->lm.ptr.p_double[m]*v1; + if( ae_fp_less(v,0) ) + { + *fierr = *fierr+v*v; + } + t = -state->lm.ptr.p_double[m]*v; + *cserr = *cserr+t*t; + m = m+1; + hasconstraints = ae_true; + } + } + for(i=0; i<=state->cicnt-1; i++) + { + v = ae_v_dotproduct(&state->ci.ptr.pp_double[i][0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,n-1)); + v = v-state->ci.ptr.pp_double[i][n]; + *mba = ae_minreal(v, *mba, _state); + barrierfunc(v, state->mu, &v0, &v1, &v2, _state); + *f = *f+state->mu*state->lm.ptr.p_double[m]*v0; + vv = state->mu*state->lm.ptr.p_double[m]*v1; + ae_v_addd(&g->ptr.p_double[0], 1, &state->ci.ptr.pp_double[i][0], 1, ae_v_len(0,n-1), vv); + if( ae_fp_less(v,0) ) + { + *fierr = *fierr+v*v; + } + t = -state->lm.ptr.p_double[m]*v; + *cserr = *cserr+t*t; + m = m+1; + hasconstraints = ae_true; + } + *fierr = ae_sqrt(*fierr, _state); + *cserr = ae_sqrt(*cserr, _state); + if( !hasconstraints ) + { + *mba = 0.0; + } + + /* + * Process equality constraints: + * * modify F to handle penalty term for equality constraints + * * project gradient on null space of equality constraints + * * add penalty term for equality constraints to gradient + */ + *f = *f+rnorm2; + for(i=0; i<=state->cedim-1; i++) + { + v = ae_v_dotproduct(&g->ptr.p_double[0], 1, &state->cebasis.ptr.pp_double[i][0], 1, ae_v_len(0,n-1)); + ae_v_subd(&g->ptr.p_double[0], 1, &state->cebasis.ptr.pp_double[i][0], 1, ae_v_len(0,n-1), v); + } + ae_v_addd(&g->ptr.p_double[0], 1, &r->ptr.p_double[0], 1, ae_v_len(0,n-1), 2); + + /* + * MPGNorm + */ + v = ae_v_dotproduct(&g->ptr.p_double[0], 1, &g->ptr.p_double[0], 1, ae_v_len(0,n-1)); + *mpgnorm = ae_sqrt(v, _state); +} + + +/************************************************************************* +This subroutine calculates penalty for violation of equality/inequality +constraints. It is used to find feasible point. + +Following modifications are applied: +* quadratic penalty for deviations from inequality constrained subspace + (both F and G are modified) +* projection of gradient into equality constrained subspace + (only G is modified) +* quadratic penalty for deviations from equality constrained subspace + (both F and G are modified) + +INPUT PARAMETERS: + State - optimizer state (we use its fields to get information + about constraints) + X - point (modified by function) + G - preallocated array[N] + R - preallocated array[N] + +OUTPUT PARAMETERS: + X - projection of X into equality constrained subspace + F - modified function value at X + G - modified function gradient at X + MBA - minimum argument of barrier functions. + If X is strictly feasible, it is greater than zero. + If X lies on a boundary, it is zero. + It is negative for infeasible X. + FIErr - 2-norm of feasibility error with respect to + inequality/bound constraints +*************************************************************************/ +static void minbleic_penaltyfunction(minbleicstate* state, + /* Real */ ae_vector* x, + double* f, + /* Real */ ae_vector* g, + /* Real */ ae_vector* r, + double* mba, + double* fierr, + ae_state *_state) +{ + double v; + double vv; + ae_int_t i; + ae_int_t m; + ae_int_t n; + double rnorm2; + ae_bool hasconstraints; + + *mba = 0; + *fierr = 0; + + n = state->n; + *mba = ae_maxrealnumber; + hasconstraints = ae_false; + + /* + * Initialize F/G + */ + *f = 0.0; + for(i=0; i<=n-1; i++) + { + g->ptr.p_double[i] = 0.0; + } + + /* + * Calculate projection of X: + * * subtract XE from X + * * project X + * * calculate norm of deviation from null space, store it in VV + * * calculate residual from projection, store it in R + * * add XE to X + */ + ae_v_sub(&x->ptr.p_double[0], 1, &state->xe.ptr.p_double[0], 1, ae_v_len(0,n-1)); + rnorm2 = 0; + for(i=0; i<=n-1; i++) + { + r->ptr.p_double[i] = 0; + } + for(i=0; i<=state->cedim-1; i++) + { + v = ae_v_dotproduct(&x->ptr.p_double[0], 1, &state->cebasis.ptr.pp_double[i][0], 1, ae_v_len(0,n-1)); + ae_v_subd(&x->ptr.p_double[0], 1, &state->cebasis.ptr.pp_double[i][0], 1, ae_v_len(0,n-1), v); + ae_v_addd(&r->ptr.p_double[0], 1, &state->cebasis.ptr.pp_double[i][0], 1, ae_v_len(0,n-1), v); + rnorm2 = rnorm2+ae_sqr(v, _state); + } + ae_v_add(&x->ptr.p_double[0], 1, &state->xe.ptr.p_double[0], 1, ae_v_len(0,n-1)); + + /* + * Process bound and inequality constraints. + * Bound constraints with +-INF are ignored. + * Here M is used to store number of constraints processed. + */ + m = 0; + *fierr = 0; + for(i=0; i<=n-1; i++) + { + if( state->hasbndl.ptr.p_bool[i] ) + { + v = x->ptr.p_double[i]-state->bndl.ptr.p_double[i]; + *mba = ae_minreal(v, *mba, _state); + if( ae_fp_less(v,0) ) + { + *f = *f+v*v; + g->ptr.p_double[i] = g->ptr.p_double[i]+2*v; + *fierr = *fierr+v*v; + } + m = m+1; + hasconstraints = ae_true; + } + if( state->hasbndu.ptr.p_bool[i] ) + { + v = state->bndu.ptr.p_double[i]-x->ptr.p_double[i]; + *mba = ae_minreal(v, *mba, _state); + if( ae_fp_less(v,0) ) + { + *f = *f+v*v; + g->ptr.p_double[i] = g->ptr.p_double[i]-2*v; + *fierr = *fierr+v*v; + } + m = m+1; + hasconstraints = ae_true; + } + } + for(i=0; i<=state->cicnt-1; i++) + { + v = ae_v_dotproduct(&state->ci.ptr.pp_double[i][0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,n-1)); + v = v-state->ci.ptr.pp_double[i][n]; + *mba = ae_minreal(v, *mba, _state); + if( ae_fp_less(v,0) ) + { + *f = *f+v*v; + vv = 2*v; + ae_v_addd(&g->ptr.p_double[0], 1, &state->ci.ptr.pp_double[i][0], 1, ae_v_len(0,n-1), vv); + *fierr = *fierr+v*v; + } + m = m+1; + hasconstraints = ae_true; + } + *fierr = ae_sqrt(*fierr, _state); + if( !hasconstraints ) + { + *mba = 0.0; + } + + /* + * Process equality constraints: + * * modify F to handle penalty term for equality constraints + * * project gradient on null space of equality constraints + * * add penalty term for equality constraints to gradient + */ + *f = *f+rnorm2; + for(i=0; i<=state->cedim-1; i++) + { + v = ae_v_dotproduct(&g->ptr.p_double[0], 1, &state->cebasis.ptr.pp_double[i][0], 1, ae_v_len(0,n-1)); + ae_v_subd(&g->ptr.p_double[0], 1, &state->cebasis.ptr.pp_double[i][0], 1, ae_v_len(0,n-1), v); + } + ae_v_addd(&g->ptr.p_double[0], 1, &r->ptr.p_double[0], 1, ae_v_len(0,n-1), 2); +} + + +ae_bool _minbleicstate_init(minbleicstate* p, ae_state *_state, ae_bool make_automatic) +{ + if( !ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->g, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !_rcommstate_init(&p->rstate, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->xcur, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->xprev, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->xstart, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_matrix_init(&p->ce, 0, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_matrix_init(&p->ci, 0, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_matrix_init(&p->cebasis, 0, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_matrix_init(&p->cesvl, 0, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->xe, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->bndl, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->hasbndl, 0, DT_BOOL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->bndu, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->hasbndu, 0, DT_BOOL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->lm, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->w, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->tmp0, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->tmp1, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init(&p->r, 0, DT_REAL, _state, make_automatic) ) + return ae_false; + if( !_linminstate_init(&p->lstate, _state, make_automatic) ) + return ae_false; + if( !_mincgstate_init(&p->cgstate, _state, make_automatic) ) + return ae_false; + if( !_mincgreport_init(&p->cgrep, _state, make_automatic) ) + return ae_false; + return ae_true; +} + + +ae_bool _minbleicstate_init_copy(minbleicstate* dst, minbleicstate* src, ae_state *_state, ae_bool make_automatic) +{ + dst->n = src->n; + dst->innerepsg = src->innerepsg; + dst->innerepsf = src->innerepsf; + dst->innerepsx = src->innerepsx; + dst->outerepsx = src->outerepsx; + dst->outerepsi = src->outerepsi; + dst->maxits = src->maxits; + dst->xrep = src->xrep; + dst->stpmax = src->stpmax; + dst->cgtype = src->cgtype; + dst->mustart = src->mustart; + dst->mudecay = src->mudecay; + if( !ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic) ) + return ae_false; + dst->f = src->f; + if( !ae_vector_init_copy(&dst->g, &src->g, _state, make_automatic) ) + return ae_false; + dst->needfg = src->needfg; + dst->xupdated = src->xupdated; + if( !_rcommstate_init_copy(&dst->rstate, &src->rstate, _state, make_automatic) ) + return ae_false; + dst->repinneriterationscount = src->repinneriterationscount; + dst->repouteriterationscount = src->repouteriterationscount; + dst->repnfev = src->repnfev; + dst->repterminationtype = src->repterminationtype; + dst->repdebugeqerr = src->repdebugeqerr; + dst->repdebugfs = src->repdebugfs; + dst->repdebugff = src->repdebugff; + dst->repdebugdx = src->repdebugdx; + if( !ae_vector_init_copy(&dst->xcur, &src->xcur, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->xprev, &src->xprev, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->xstart, &src->xstart, _state, make_automatic) ) + return ae_false; + dst->itsleft = src->itsleft; + dst->mucounter = src->mucounter; + if( !ae_matrix_init_copy(&dst->ce, &src->ce, _state, make_automatic) ) + return ae_false; + if( !ae_matrix_init_copy(&dst->ci, &src->ci, _state, make_automatic) ) + return ae_false; + if( !ae_matrix_init_copy(&dst->cebasis, &src->cebasis, _state, make_automatic) ) + return ae_false; + if( !ae_matrix_init_copy(&dst->cesvl, &src->cesvl, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->xe, &src->xe, _state, make_automatic) ) + return ae_false; + dst->cecnt = src->cecnt; + dst->cicnt = src->cicnt; + dst->cedim = src->cedim; + if( !ae_vector_init_copy(&dst->bndl, &src->bndl, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->hasbndl, &src->hasbndl, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->bndu, &src->bndu, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->hasbndu, &src->hasbndu, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->lm, &src->lm, _state, make_automatic) ) + return ae_false; + dst->lmcnt = src->lmcnt; + dst->mu = src->mu; + if( !ae_vector_init_copy(&dst->w, &src->w, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->tmp0, &src->tmp0, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->tmp1, &src->tmp1, _state, make_automatic) ) + return ae_false; + if( !ae_vector_init_copy(&dst->r, &src->r, _state, make_automatic) ) + return ae_false; + dst->v0 = src->v0; + dst->v1 = src->v1; + dst->v2 = src->v2; + dst->t = src->t; + dst->errfeas = src->errfeas; + dst->errslack = src->errslack; + dst->gnorm = src->gnorm; + dst->mpgnorm = src->mpgnorm; + dst->lmdif = src->lmdif; + dst->lmnorm = src->lmnorm; + dst->lmgrowth = src->lmgrowth; + dst->mba = src->mba; + dst->boundary = src->boundary; + dst->closetobarrier = src->closetobarrier; + dst->bndmax = src->bndmax; + if( !_linminstate_init_copy(&dst->lstate, &src->lstate, _state, make_automatic) ) + return ae_false; + if( !_mincgstate_init_copy(&dst->cgstate, &src->cgstate, _state, make_automatic) ) + return ae_false; + if( !_mincgreport_init_copy(&dst->cgrep, &src->cgrep, _state, make_automatic) ) + return ae_false; + return ae_true; +} + + +void _minbleicstate_clear(minbleicstate* p) +{ + ae_vector_clear(&p->x); + ae_vector_clear(&p->g); + _rcommstate_clear(&p->rstate); + ae_vector_clear(&p->xcur); + ae_vector_clear(&p->xprev); + ae_vector_clear(&p->xstart); + ae_matrix_clear(&p->ce); + ae_matrix_clear(&p->ci); + ae_matrix_clear(&p->cebasis); + ae_matrix_clear(&p->cesvl); + ae_vector_clear(&p->xe); + ae_vector_clear(&p->bndl); + ae_vector_clear(&p->hasbndl); + ae_vector_clear(&p->bndu); + ae_vector_clear(&p->hasbndu); + ae_vector_clear(&p->lm); + ae_vector_clear(&p->w); + ae_vector_clear(&p->tmp0); + ae_vector_clear(&p->tmp1); + ae_vector_clear(&p->r); + _linminstate_clear(&p->lstate); + _mincgstate_clear(&p->cgstate); + _mincgreport_clear(&p->cgrep); +} + + +ae_bool _minbleicreport_init(minbleicreport* p, ae_state *_state, ae_bool make_automatic) +{ + return ae_true; +} + + +ae_bool _minbleicreport_init_copy(minbleicreport* dst, minbleicreport* src, ae_state *_state, ae_bool make_automatic) +{ + dst->inneriterationscount = src->inneriterationscount; + dst->outeriterationscount = src->outeriterationscount; + dst->nfev = src->nfev; + dst->terminationtype = src->terminationtype; + dst->debugeqerr = src->debugeqerr; + dst->debugfs = src->debugfs; + dst->debugff = src->debugff; + dst->debugdx = src->debugdx; + return ae_true; +} + + +void _minbleicreport_clear(minbleicreport* p) +{ +} + + + +} + diff --git a/contrib/lbfgs/optimization.h b/contrib/lbfgs/optimization.h new file mode 100755 index 0000000000..86a6c0a0ad --- /dev/null +++ b/contrib/lbfgs/optimization.h @@ -0,0 +1,2649 @@ +/************************************************************************* +Copyright (c) Sergey Bochkanov (ALGLIB project). + +>>> SOURCE LICENSE >>> +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation (www.fsf.org); either version 2 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +A copy of the GNU General Public License is available at +http://www.fsf.org/licensing/licenses +>>> END OF LICENSE >>> +*************************************************************************/ +#ifndef _optimization_pkg_h +#define _optimization_pkg_h +#include "ap.h" +#include "alglibinternal.h" +#include "linalg.h" +#include "alglibmisc.h" + +///////////////////////////////////////////////////////////////////////// +// +// THIS SECTION CONTAINS COMPUTATIONAL CORE DECLARATIONS (DATATYPES) +// +///////////////////////////////////////////////////////////////////////// +namespace alglib_impl +{ +typedef struct +{ + ae_int_t n; + ae_int_t m; + double epsg; + double epsf; + double epsx; + ae_int_t maxits; + ae_int_t flags; + ae_bool xrep; + double stpmax; + ae_int_t nfev; + ae_int_t mcstage; + ae_int_t k; + ae_int_t q; + ae_int_t p; + ae_vector rho; + ae_matrix y; + ae_matrix s; + ae_vector theta; + ae_vector d; + double stp; + ae_vector work; + double fold; + ae_int_t prectype; + double gammak; + ae_matrix denseh; + ae_vector autobuf; + ae_vector x; + double f; + ae_vector g; + ae_bool needfg; + ae_bool xupdated; + rcommstate rstate; + ae_int_t repiterationscount; + ae_int_t repnfev; + ae_int_t repterminationtype; + linminstate lstate; +} minlbfgsstate; +typedef struct +{ + ae_int_t iterationscount; + ae_int_t nfev; + ae_int_t terminationtype; +} minlbfgsreport; +typedef struct +{ + ae_int_t n; + ae_int_t m; + double diffstep; + double epsg; + double epsf; + double epsx; + ae_int_t maxits; + ae_bool xrep; + double stpmax; + ae_int_t maxmodelage; + ae_bool makeadditers; + ae_vector x; + double f; + ae_vector fi; + ae_matrix j; + ae_matrix h; + ae_vector g; + ae_bool needf; + ae_bool needfg; + ae_bool needfgh; + ae_bool needfij; + ae_bool needfi; + ae_bool xupdated; + ae_int_t algomode; + ae_bool hasf; + ae_bool hasfi; + ae_bool hasg; + ae_vector xbase; + double fbase; + ae_vector fibase; + ae_vector gbase; + ae_matrix quadraticmodel; + double lambdav; + double nu; + ae_matrix dampedmodel; + ae_int_t modelage; + ae_vector xdir; + ae_vector deltax; + ae_vector deltaf; + ae_bool deltaxready; + ae_bool deltafready; + ae_int_t repiterationscount; + ae_int_t repterminationtype; + ae_int_t repnfunc; + ae_int_t repnjac; + ae_int_t repngrad; + ae_int_t repnhess; + ae_int_t repncholesky; + rcommstate rstate; + ae_vector choleskybuf; + double actualdecrease; + double predicteddecrease; + ae_vector fm2; + ae_vector fm1; + ae_vector fp2; + ae_vector fp1; + minlbfgsstate internalstate; + minlbfgsreport internalrep; +} minlmstate; +typedef struct +{ + ae_int_t iterationscount; + ae_int_t terminationtype; + ae_int_t nfunc; + ae_int_t njac; + ae_int_t ngrad; + ae_int_t nhess; + ae_int_t ncholesky; +} minlmreport; +typedef struct +{ + ae_int_t n; + double epsg; + double epsf; + double epsx; + ae_int_t maxits; + ae_bool xrep; + double stpmax; + ae_int_t cgtype; + ae_int_t k; + ae_int_t nfev; + ae_int_t mcstage; + ae_vector bndl; + ae_vector bndu; + ae_int_t curalgo; + ae_int_t acount; + double mu; + double finit; + double dginit; + ae_vector ak; + ae_vector xk; + ae_vector dk; + ae_vector an; + ae_vector xn; + ae_vector dn; + ae_vector d; + double fold; + double stp; + ae_vector work; + ae_vector yk; + ae_vector gc; + double laststep; + ae_vector x; + double f; + ae_vector g; + ae_bool needfg; + ae_bool xupdated; + rcommstate rstate; + ae_int_t repiterationscount; + ae_int_t repnfev; + ae_int_t repterminationtype; + ae_int_t debugrestartscount; + linminstate lstate; + double betahs; + double betady; +} minasastate; +typedef struct +{ + ae_int_t iterationscount; + ae_int_t nfev; + ae_int_t terminationtype; + ae_int_t activeconstraints; +} minasareport; +typedef struct +{ + ae_int_t n; + double epsg; + double epsf; + double epsx; + ae_int_t maxits; + double stpmax; + double suggestedstep; + ae_bool xrep; + ae_bool drep; + ae_int_t cgtype; + ae_int_t nfev; + ae_int_t mcstage; + ae_int_t k; + ae_vector xk; + ae_vector dk; + ae_vector xn; + ae_vector dn; + ae_vector d; + double fold; + double stp; + double curstpmax; + ae_vector work; + ae_vector yk; + double laststep; + ae_int_t rstimer; + ae_vector x; + double f; + ae_vector g; + ae_bool needfg; + ae_bool xupdated; + ae_bool lsstart; + ae_bool lsend; + rcommstate rstate; + ae_int_t repiterationscount; + ae_int_t repnfev; + ae_int_t repterminationtype; + ae_int_t debugrestartscount; + linminstate lstate; + double betahs; + double betady; +} mincgstate; +typedef struct +{ + ae_int_t iterationscount; + ae_int_t nfev; + ae_int_t terminationtype; +} mincgreport; +typedef struct +{ + ae_int_t n; + double innerepsg; + double innerepsf; + double innerepsx; + double outerepsx; + double outerepsi; + ae_int_t maxits; + ae_bool xrep; + double stpmax; + ae_int_t cgtype; + double mustart; + double mudecay; + ae_vector x; + double f; + ae_vector g; + ae_bool needfg; + ae_bool xupdated; + rcommstate rstate; + ae_int_t repinneriterationscount; + ae_int_t repouteriterationscount; + ae_int_t repnfev; + ae_int_t repterminationtype; + double repdebugeqerr; + double repdebugfs; + double repdebugff; + double repdebugdx; + ae_vector xcur; + ae_vector xprev; + ae_vector xstart; + ae_int_t itsleft; + ae_int_t mucounter; + ae_matrix ce; + ae_matrix ci; + ae_matrix cebasis; + ae_matrix cesvl; + ae_vector xe; + ae_int_t cecnt; + ae_int_t cicnt; + ae_int_t cedim; + ae_vector bndl; + ae_vector hasbndl; + ae_vector bndu; + ae_vector hasbndu; + ae_vector lm; + ae_int_t lmcnt; + double mu; + ae_vector w; + ae_vector tmp0; + ae_vector tmp1; + ae_vector r; + double v0; + double v1; + double v2; + double t; + double errfeas; + double errslack; + double gnorm; + double mpgnorm; + double lmdif; + double lmnorm; + double lmgrowth; + double mba; + double boundary; + ae_bool closetobarrier; + double bndmax; + linminstate lstate; + mincgstate cgstate; + mincgreport cgrep; +} minbleicstate; +typedef struct +{ + ae_int_t inneriterationscount; + ae_int_t outeriterationscount; + ae_int_t nfev; + ae_int_t terminationtype; + double debugeqerr; + double debugfs; + double debugff; + double debugdx; +} minbleicreport; + +} + +///////////////////////////////////////////////////////////////////////// +// +// THIS SECTION CONTAINS C++ INTERFACE +// +///////////////////////////////////////////////////////////////////////// +namespace alglib +{ + +/************************************************************************* + +*************************************************************************/ +class _minlbfgsstate_owner +{ +public: + _minlbfgsstate_owner(); + _minlbfgsstate_owner(const _minlbfgsstate_owner &rhs); + _minlbfgsstate_owner& operator=(const _minlbfgsstate_owner &rhs); + virtual ~_minlbfgsstate_owner(); + alglib_impl::minlbfgsstate* c_ptr(); + alglib_impl::minlbfgsstate* c_ptr() const; +protected: + alglib_impl::minlbfgsstate *p_struct; +}; +class minlbfgsstate : public _minlbfgsstate_owner +{ +public: + minlbfgsstate(); + minlbfgsstate(const minlbfgsstate &rhs); + minlbfgsstate& operator=(const minlbfgsstate &rhs); + virtual ~minlbfgsstate(); + ae_bool &needfg; + ae_bool &xupdated; + double &f; + real_1d_array g; + real_1d_array x; + +}; + + +/************************************************************************* + +*************************************************************************/ +class _minlbfgsreport_owner +{ +public: + _minlbfgsreport_owner(); + _minlbfgsreport_owner(const _minlbfgsreport_owner &rhs); + _minlbfgsreport_owner& operator=(const _minlbfgsreport_owner &rhs); + virtual ~_minlbfgsreport_owner(); + alglib_impl::minlbfgsreport* c_ptr(); + alglib_impl::minlbfgsreport* c_ptr() const; +protected: + alglib_impl::minlbfgsreport *p_struct; +}; +class minlbfgsreport : public _minlbfgsreport_owner +{ +public: + minlbfgsreport(); + minlbfgsreport(const minlbfgsreport &rhs); + minlbfgsreport& operator=(const minlbfgsreport &rhs); + virtual ~minlbfgsreport(); + ae_int_t &iterationscount; + ae_int_t &nfev; + ae_int_t &terminationtype; + +}; + +/************************************************************************* +Levenberg-Marquardt optimizer. + +This structure should be created using one of the MinLMCreate???() +functions. You should not access its fields directly; use ALGLIB functions +to work with it. +*************************************************************************/ +class _minlmstate_owner +{ +public: + _minlmstate_owner(); + _minlmstate_owner(const _minlmstate_owner &rhs); + _minlmstate_owner& operator=(const _minlmstate_owner &rhs); + virtual ~_minlmstate_owner(); + alglib_impl::minlmstate* c_ptr(); + alglib_impl::minlmstate* c_ptr() const; +protected: + alglib_impl::minlmstate *p_struct; +}; +class minlmstate : public _minlmstate_owner +{ +public: + minlmstate(); + minlmstate(const minlmstate &rhs); + minlmstate& operator=(const minlmstate &rhs); + virtual ~minlmstate(); + ae_bool &needf; + ae_bool &needfg; + ae_bool &needfgh; + ae_bool &needfi; + ae_bool &needfij; + ae_bool &xupdated; + double &f; + real_1d_array fi; + real_1d_array g; + real_2d_array h; + real_2d_array j; + real_1d_array x; + +}; + + +/************************************************************************* +Optimization report, filled by MinLMResults() function + +FIELDS: +* TerminationType, completetion code: + * -9 derivative correctness check failed; + see Rep.WrongNum, Rep.WrongI, Rep.WrongJ for + more information. + * 1 relative function improvement is no more than + EpsF. + * 2 relative step is no more than EpsX. + * 4 gradient is no more than EpsG. + * 5 MaxIts steps was taken + * 7 stopping conditions are too stringent, + further improvement is impossible +* IterationsCount, contains iterations count +* NFunc, number of function calculations +* NJac, number of Jacobi matrix calculations +* NGrad, number of gradient calculations +* NHess, number of Hessian calculations +* NCholesky, number of Cholesky decomposition calculations +*************************************************************************/ +class _minlmreport_owner +{ +public: + _minlmreport_owner(); + _minlmreport_owner(const _minlmreport_owner &rhs); + _minlmreport_owner& operator=(const _minlmreport_owner &rhs); + virtual ~_minlmreport_owner(); + alglib_impl::minlmreport* c_ptr(); + alglib_impl::minlmreport* c_ptr() const; +protected: + alglib_impl::minlmreport *p_struct; +}; +class minlmreport : public _minlmreport_owner +{ +public: + minlmreport(); + minlmreport(const minlmreport &rhs); + minlmreport& operator=(const minlmreport &rhs); + virtual ~minlmreport(); + ae_int_t &iterationscount; + ae_int_t &terminationtype; + ae_int_t &nfunc; + ae_int_t &njac; + ae_int_t &ngrad; + ae_int_t &nhess; + ae_int_t &ncholesky; + +}; + +/************************************************************************* + +*************************************************************************/ +class _minasastate_owner +{ +public: + _minasastate_owner(); + _minasastate_owner(const _minasastate_owner &rhs); + _minasastate_owner& operator=(const _minasastate_owner &rhs); + virtual ~_minasastate_owner(); + alglib_impl::minasastate* c_ptr(); + alglib_impl::minasastate* c_ptr() const; +protected: + alglib_impl::minasastate *p_struct; +}; +class minasastate : public _minasastate_owner +{ +public: + minasastate(); + minasastate(const minasastate &rhs); + minasastate& operator=(const minasastate &rhs); + virtual ~minasastate(); + ae_bool &needfg; + ae_bool &xupdated; + double &f; + real_1d_array g; + real_1d_array x; + +}; + + +/************************************************************************* + +*************************************************************************/ +class _minasareport_owner +{ +public: + _minasareport_owner(); + _minasareport_owner(const _minasareport_owner &rhs); + _minasareport_owner& operator=(const _minasareport_owner &rhs); + virtual ~_minasareport_owner(); + alglib_impl::minasareport* c_ptr(); + alglib_impl::minasareport* c_ptr() const; +protected: + alglib_impl::minasareport *p_struct; +}; +class minasareport : public _minasareport_owner +{ +public: + minasareport(); + minasareport(const minasareport &rhs); + minasareport& operator=(const minasareport &rhs); + virtual ~minasareport(); + ae_int_t &iterationscount; + ae_int_t &nfev; + ae_int_t &terminationtype; + ae_int_t &activeconstraints; + +}; + +/************************************************************************* +This object stores state of the nonlinear CG optimizer. + +You should use ALGLIB functions to work with this object. +*************************************************************************/ +class _mincgstate_owner +{ +public: + _mincgstate_owner(); + _mincgstate_owner(const _mincgstate_owner &rhs); + _mincgstate_owner& operator=(const _mincgstate_owner &rhs); + virtual ~_mincgstate_owner(); + alglib_impl::mincgstate* c_ptr(); + alglib_impl::mincgstate* c_ptr() const; +protected: + alglib_impl::mincgstate *p_struct; +}; +class mincgstate : public _mincgstate_owner +{ +public: + mincgstate(); + mincgstate(const mincgstate &rhs); + mincgstate& operator=(const mincgstate &rhs); + virtual ~mincgstate(); + ae_bool &needfg; + ae_bool &xupdated; + double &f; + real_1d_array g; + real_1d_array x; + +}; + + +/************************************************************************* + +*************************************************************************/ +class _mincgreport_owner +{ +public: + _mincgreport_owner(); + _mincgreport_owner(const _mincgreport_owner &rhs); + _mincgreport_owner& operator=(const _mincgreport_owner &rhs); + virtual ~_mincgreport_owner(); + alglib_impl::mincgreport* c_ptr(); + alglib_impl::mincgreport* c_ptr() const; +protected: + alglib_impl::mincgreport *p_struct; +}; +class mincgreport : public _mincgreport_owner +{ +public: + mincgreport(); + mincgreport(const mincgreport &rhs); + mincgreport& operator=(const mincgreport &rhs); + virtual ~mincgreport(); + ae_int_t &iterationscount; + ae_int_t &nfev; + ae_int_t &terminationtype; + +}; + +/************************************************************************* +This object stores nonlinear optimizer state. +You should use functions provided by MinBLEIC subpackage to work with this +object +*************************************************************************/ +class _minbleicstate_owner +{ +public: + _minbleicstate_owner(); + _minbleicstate_owner(const _minbleicstate_owner &rhs); + _minbleicstate_owner& operator=(const _minbleicstate_owner &rhs); + virtual ~_minbleicstate_owner(); + alglib_impl::minbleicstate* c_ptr(); + alglib_impl::minbleicstate* c_ptr() const; +protected: + alglib_impl::minbleicstate *p_struct; +}; +class minbleicstate : public _minbleicstate_owner +{ +public: + minbleicstate(); + minbleicstate(const minbleicstate &rhs); + minbleicstate& operator=(const minbleicstate &rhs); + virtual ~minbleicstate(); + ae_bool &needfg; + ae_bool &xupdated; + double &f; + real_1d_array g; + real_1d_array x; + +}; + + +/************************************************************************* +This structure stores optimization report: +* InnerIterationsCount number of inner iterations +* OuterIterationsCount number of outer iterations +* NFEV number of gradient evaluations + +There are additional fields which can be used for debugging: +* DebugEqErr error in the equality constraints (2-norm) +* DebugFS f, calculated at projection of initial point + to the feasible set +* DebugFF f, calculated at the final point +* DebugDX |X_start-X_final| +*************************************************************************/ +class _minbleicreport_owner +{ +public: + _minbleicreport_owner(); + _minbleicreport_owner(const _minbleicreport_owner &rhs); + _minbleicreport_owner& operator=(const _minbleicreport_owner &rhs); + virtual ~_minbleicreport_owner(); + alglib_impl::minbleicreport* c_ptr(); + alglib_impl::minbleicreport* c_ptr() const; +protected: + alglib_impl::minbleicreport *p_struct; +}; +class minbleicreport : public _minbleicreport_owner +{ +public: + minbleicreport(); + minbleicreport(const minbleicreport &rhs); + minbleicreport& operator=(const minbleicreport &rhs); + virtual ~minbleicreport(); + ae_int_t &inneriterationscount; + ae_int_t &outeriterationscount; + ae_int_t &nfev; + ae_int_t &terminationtype; + double &debugeqerr; + double &debugfs; + double &debugff; + double &debugdx; + +}; + +/************************************************************************* + LIMITED MEMORY BFGS METHOD FOR LARGE SCALE OPTIMIZATION + +DESCRIPTION: +The subroutine minimizes function F(x) of N arguments by using a quasi- +Newton method (LBFGS scheme) which is optimized to use a minimum amount +of memory. +The subroutine generates the approximation of an inverse Hessian matrix by +using information about the last M steps of the algorithm (instead of N). +It lessens a required amount of memory from a value of order N^2 to a +value of order 2*N*M. + + +REQUIREMENTS: +Algorithm will request following information during its operation: +* function value F and its gradient G (simultaneously) at given point X + + +USAGE: +1. User initializes algorithm state with MinLBFGSCreate() call +2. User tunes solver parameters with MinLBFGSSetCond() MinLBFGSSetStpMax() + and other functions +3. User calls MinLBFGSOptimize() function which takes algorithm state and + pointer (delegate, etc.) to callback function which calculates F/G. +4. User calls MinLBFGSResults() to get solution +5. Optionally user may call MinLBFGSRestartFrom() to solve another problem + with same N/M but another starting point and/or another function. + MinLBFGSRestartFrom() allows to reuse already initialized structure. + + +INPUT PARAMETERS: + N - problem dimension. N>0 + M - number of corrections in the BFGS scheme of Hessian + approximation update. Recommended value: 3<=M<=7. The smaller + value causes worse convergence, the bigger will not cause a + considerably better convergence, but will cause a fall in the + performance. M<=N. + X - initial solution approximation, array[0..N-1]. + + +OUTPUT PARAMETERS: + State - structure which stores algorithm state + + +NOTES: +1. you may tune stopping conditions with MinLBFGSSetCond() function +2. if target function contains exp() or other fast growing functions, and + optimization algorithm makes too large steps which leads to overflow, + use MinLBFGSSetStpMax() function to bound algorithm's steps. However, + L-BFGS rarely needs such a tuning. + + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minlbfgscreate(const ae_int_t n, const ae_int_t m, const real_1d_array &x, minlbfgsstate &state); +void minlbfgscreate(const ae_int_t m, const real_1d_array &x, minlbfgsstate &state); + + +/************************************************************************* +This function sets stopping conditions for L-BFGS optimization algorithm. + +INPUT PARAMETERS: + State - structure which stores algorithm state + EpsG - >=0 + The subroutine finishes its work if the condition + ||G||<EpsG is satisfied, where ||.|| means Euclidian norm, + G - gradient. + EpsF - >=0 + The subroutine finishes its work if on k+1-th iteration + the condition |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1} + is satisfied. + EpsX - >=0 + The subroutine finishes its work if on k+1-th iteration + the condition |X(k+1)-X(k)| <= EpsX is fulfilled. + MaxIts - maximum number of iterations. If MaxIts=0, the number of + iterations is unlimited. + +Passing EpsG=0, EpsF=0, EpsX=0 and MaxIts=0 (simultaneously) will lead to +automatic stopping criterion selection (small EpsX). + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minlbfgssetcond(const minlbfgsstate &state, const double epsg, const double epsf, const double epsx, const ae_int_t maxits); + + +/************************************************************************* +This function turns on/off reporting. + +INPUT PARAMETERS: + State - structure which stores algorithm state + NeedXRep- whether iteration reports are needed or not + +If NeedXRep is True, algorithm will call rep() callback function if it is +provided to MinLBFGSOptimize(). + + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minlbfgssetxrep(const minlbfgsstate &state, const bool needxrep); + + +/************************************************************************* +This function sets maximum step length + +INPUT PARAMETERS: + State - structure which stores algorithm state + StpMax - maximum step length, >=0. Set StpMax to 0.0 (default), if + you don't want to limit step length. + +Use this subroutine when you optimize target function which contains exp() +or other fast growing functions, and optimization algorithm makes too +large steps which leads to overflow. This function allows us to reject +steps that are too large (and therefore expose us to the possible +overflow) without actually calculating function value at the x+stp*d. + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minlbfgssetstpmax(const minlbfgsstate &state, const double stpmax); + + +/************************************************************************* +Modification of the preconditioner: +default preconditioner (simple scaling) is used. + +INPUT PARAMETERS: + State - structure which stores algorithm state + +After call to this function preconditioner is changed to the default one. + +NOTE: you can change preconditioner "on the fly", during algorithm +iterations. + + -- ALGLIB -- + Copyright 13.10.2010 by Bochkanov Sergey +*************************************************************************/ +void minlbfgssetdefaultpreconditioner(const minlbfgsstate &state); + + +/************************************************************************* +Modification of the preconditioner: +Cholesky factorization of approximate Hessian is used. + +INPUT PARAMETERS: + State - structure which stores algorithm state + P - triangular preconditioner, Cholesky factorization of + the approximate Hessian. array[0..N-1,0..N-1], + (if larger, only leading N elements are used). + IsUpper - whether upper or lower triangle of P is given + (other triangle is not referenced) + +After call to this function preconditioner is changed to P (P is copied +into the internal buffer). + +NOTE: you can change preconditioner "on the fly", during algorithm +iterations. + +NOTE 2: P should be nonsingular. Exception will be thrown otherwise. It +also should be well conditioned, although only strict non-singularity is +tested. + + -- ALGLIB -- + Copyright 13.10.2010 by Bochkanov Sergey +*************************************************************************/ +void minlbfgssetcholeskypreconditioner(const minlbfgsstate &state, const real_2d_array &p, const bool isupper); + + +/************************************************************************* +This function provides reverse communication interface +Reverse communication interface is not documented or recommended to use. +See below for functions which provide better documented API +*************************************************************************/ +bool minlbfgsiteration(const minlbfgsstate &state); + + +/************************************************************************* +This family of functions is used to launcn iterations of nonlinear optimizer + +These functions accept following parameters: + state - algorithm state + grad - callback which calculates function (or merit function) + value func and gradient grad at given point x + rep - optional callback which is called after each iteration + can be NULL + ptr - optional pointer which is passed to func/grad/hess/jac/rep + can be NULL + + + -- ALGLIB -- + Copyright 20.03.2009 by Bochkanov Sergey + +*************************************************************************/ +void minlbfgsoptimize(minlbfgsstate &state, + void (*grad)(const real_1d_array &x, double &func, real_1d_array &grad, void *ptr), + void (*rep)(const real_1d_array &x, double func, void *ptr) = NULL, + void *ptr = NULL); + + +/************************************************************************* +L-BFGS algorithm results + +INPUT PARAMETERS: + State - algorithm state + +OUTPUT PARAMETERS: + X - array[0..N-1], solution + Rep - optimization report: + * Rep.TerminationType completetion code: + * -2 rounding errors prevent further improvement. + X contains best point found. + * -1 incorrect parameters were specified + * 1 relative function improvement is no more than + EpsF. + * 2 relative step is no more than EpsX. + * 4 gradient norm is no more than EpsG + * 5 MaxIts steps was taken + * 7 stopping conditions are too stringent, + further improvement is impossible + * Rep.IterationsCount contains iterations count + * NFEV countains number of function calculations + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minlbfgsresults(const minlbfgsstate &state, real_1d_array &x, minlbfgsreport &rep); + + +/************************************************************************* +L-BFGS algorithm results + +Buffered implementation of MinLBFGSResults which uses pre-allocated buffer +to store X[]. If buffer size is too small, it resizes buffer. It is +intended to be used in the inner cycles of performance critical algorithms +where array reallocation penalty is too large to be ignored. + + -- ALGLIB -- + Copyright 20.08.2010 by Bochkanov Sergey +*************************************************************************/ +void minlbfgsresultsbuf(const minlbfgsstate &state, real_1d_array &x, minlbfgsreport &rep); + + +/************************************************************************* +This subroutine restarts LBFGS algorithm from new point. All optimization +parameters are left unchanged. + +This function allows to solve multiple optimization problems (which +must have same number of dimensions) without object reallocation penalty. + +INPUT PARAMETERS: + State - structure used to store algorithm state + X - new starting point. + + -- ALGLIB -- + Copyright 30.07.2010 by Bochkanov Sergey +*************************************************************************/ +void minlbfgsrestartfrom(const minlbfgsstate &state, const real_1d_array &x); + +/************************************************************************* + IMPROVED LEVENBERG-MARQUARDT METHOD FOR + NON-LINEAR LEAST SQUARES OPTIMIZATION + +DESCRIPTION: +This function is used to find minimum of function which is represented as +sum of squares: + F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1]) +using value of function vector f[] and Jacobian of f[]. + + +REQUIREMENTS: +This algorithm will request following information during its operation: + +* function vector f[] at given point X +* function vector f[] and Jacobian of f[] (simultaneously) at given point + +There are several overloaded versions of MinLMOptimize() function which +correspond to different LM-like optimization algorithms provided by this +unit. You should choose version which accepts fvec() and jac() callbacks. +First one is used to calculate f[] at given point, second one calculates +f[] and Jacobian df[i]/dx[j]. + +You can try to initialize MinLMState structure with VJ function and then +use incorrect version of MinLMOptimize() (for example, version which +works with general form function and does not provide Jacobian), but it +will lead to exception being thrown after first attempt to calculate +Jacobian. + + +USAGE: +1. User initializes algorithm state with MinLMCreateVJ() call +2. User tunes solver parameters with MinLMSetCond(), MinLMSetStpMax() and + other functions +3. User calls MinLMOptimize() function which takes algorithm state and + callback functions. +4. User calls MinLMResults() to get solution +5. Optionally, user may call MinLMRestartFrom() to solve another problem + with same N/M but another starting point and/or another function. + MinLMRestartFrom() allows to reuse already initialized structure. + + +INPUT PARAMETERS: + N - dimension, N>1 + * if given, only leading N elements of X are used + * if not given, automatically determined from size of X + M - number of functions f[i] + X - initial solution, array[0..N-1] + +OUTPUT PARAMETERS: + State - structure which stores algorithm state + +NOTES: +1. you may tune stopping conditions with MinLMSetCond() function +2. if target function contains exp() or other fast growing functions, and + optimization algorithm makes too large steps which leads to overflow, + use MinLMSetStpMax() function to bound algorithm's steps. + + -- ALGLIB -- + Copyright 30.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minlmcreatevj(const ae_int_t n, const ae_int_t m, const real_1d_array &x, minlmstate &state); +void minlmcreatevj(const ae_int_t m, const real_1d_array &x, minlmstate &state); + + +/************************************************************************* + IMPROVED LEVENBERG-MARQUARDT METHOD FOR + NON-LINEAR LEAST SQUARES OPTIMIZATION + +DESCRIPTION: +This function is used to find minimum of function which is represented as +sum of squares: + F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1]) +using value of function vector f[] only. Finite differences are used to +calculate Jacobian. + + +REQUIREMENTS: +This algorithm will request following information during its operation: +* function vector f[] at given point X + +There are several overloaded versions of MinLMOptimize() function which +correspond to different LM-like optimization algorithms provided by this +unit. You should choose version which accepts fvec() callback. + +You can try to initialize MinLMState structure with VJ function and then +use incorrect version of MinLMOptimize() (for example, version which +works with general form function and does not accept function vector), but +it will lead to exception being thrown after first attempt to calculate +Jacobian. + + +USAGE: +1. User initializes algorithm state with MinLMCreateV() call +2. User tunes solver parameters with MinLMSetCond(), MinLMSetStpMax() and + other functions +3. User calls MinLMOptimize() function which takes algorithm state and + callback functions. +4. User calls MinLMResults() to get solution +5. Optionally, user may call MinLMRestartFrom() to solve another problem + with same N/M but another starting point and/or another function. + MinLMRestartFrom() allows to reuse already initialized structure. + + +INPUT PARAMETERS: + N - dimension, N>1 + * if given, only leading N elements of X are used + * if not given, automatically determined from size of X + M - number of functions f[i] + X - initial solution, array[0..N-1] + DiffStep- differentiation step, >0 + +OUTPUT PARAMETERS: + State - structure which stores algorithm state + +See also MinLMIteration, MinLMResults. + +NOTES: +1. you may tune stopping conditions with MinLMSetCond() function +2. if target function contains exp() or other fast growing functions, and + optimization algorithm makes too large steps which leads to overflow, + use MinLMSetStpMax() function to bound algorithm's steps. + + -- ALGLIB -- + Copyright 30.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minlmcreatev(const ae_int_t n, const ae_int_t m, const real_1d_array &x, const double diffstep, minlmstate &state); +void minlmcreatev(const ae_int_t m, const real_1d_array &x, const double diffstep, minlmstate &state); + + +/************************************************************************* + LEVENBERG-MARQUARDT-LIKE METHOD FOR NON-LINEAR OPTIMIZATION + +DESCRIPTION: +This function is used to find minimum of general form (not "sum-of- +-squares") function + F = F(x[0], ..., x[n-1]) +using its gradient and Hessian. Levenberg-Marquardt modification with +L-BFGS pre-optimization and internal pre-conditioned L-BFGS optimization +after each Levenberg-Marquardt step is used. + + +REQUIREMENTS: +This algorithm will request following information during its operation: + +* function value F at given point X +* F and gradient G (simultaneously) at given point X +* F, G and Hessian H (simultaneously) at given point X + +There are several overloaded versions of MinLMOptimize() function which +correspond to different LM-like optimization algorithms provided by this +unit. You should choose version which accepts func(), grad() and hess() +function pointers. First pointer is used to calculate F at given point, +second one calculates F(x) and grad F(x), third one calculates F(x), +grad F(x), hess F(x). + +You can try to initialize MinLMState structure with FGH-function and then +use incorrect version of MinLMOptimize() (for example, version which does +not provide Hessian matrix), but it will lead to exception being thrown +after first attempt to calculate Hessian. + + +USAGE: +1. User initializes algorithm state with MinLMCreateFGH() call +2. User tunes solver parameters with MinLMSetCond(), MinLMSetStpMax() and + other functions +3. User calls MinLMOptimize() function which takes algorithm state and + pointers (delegates, etc.) to callback functions. +4. User calls MinLMResults() to get solution +5. Optionally, user may call MinLMRestartFrom() to solve another problem + with same N but another starting point and/or another function. + MinLMRestartFrom() allows to reuse already initialized structure. + + +INPUT PARAMETERS: + N - dimension, N>1 + * if given, only leading N elements of X are used + * if not given, automatically determined from size of X + X - initial solution, array[0..N-1] + +OUTPUT PARAMETERS: + State - structure which stores algorithm state + +NOTES: +1. you may tune stopping conditions with MinLMSetCond() function +2. if target function contains exp() or other fast growing functions, and + optimization algorithm makes too large steps which leads to overflow, + use MinLMSetStpMax() function to bound algorithm's steps. + + -- ALGLIB -- + Copyright 30.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minlmcreatefgh(const ae_int_t n, const real_1d_array &x, minlmstate &state); +void minlmcreatefgh(const real_1d_array &x, minlmstate &state); + + +/************************************************************************* + IMPROVED LEVENBERG-MARQUARDT METHOD FOR + NON-LINEAR LEAST SQUARES OPTIMIZATION + +DESCRIPTION: +This function is used to find minimum of function which is represented as +sum of squares: + F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1]) +using: +* value of function vector f[] +* value of Jacobian of f[] +* gradient of merit function F(x) + +This function creates optimizer which uses acceleration strategy 2. Cheap +gradient of merit function (which is twice the product of function vector +and Jacobian) is used for accelerated iterations (see User Guide for more +info on this subject). + +REQUIREMENTS: +This algorithm will request following information during its operation: + +* function vector f[] at given point X +* function vector f[] and Jacobian of f[] (simultaneously) at given point +* gradient of + +There are several overloaded versions of MinLMOptimize() function which +correspond to different LM-like optimization algorithms provided by this +unit. You should choose version which accepts fvec(), jac() and grad() +callbacks. First one is used to calculate f[] at given point, second one +calculates f[] and Jacobian df[i]/dx[j], last one calculates gradient of +merit function F(x). + +You can try to initialize MinLMState structure with VJ function and then +use incorrect version of MinLMOptimize() (for example, version which +works with general form function and does not provide Jacobian), but it +will lead to exception being thrown after first attempt to calculate +Jacobian. + + +USAGE: +1. User initializes algorithm state with MinLMCreateVGJ() call +2. User tunes solver parameters with MinLMSetCond(), MinLMSetStpMax() and + other functions +3. User calls MinLMOptimize() function which takes algorithm state and + callback functions. +4. User calls MinLMResults() to get solution +5. Optionally, user may call MinLMRestartFrom() to solve another problem + with same N/M but another starting point and/or another function. + MinLMRestartFrom() allows to reuse already initialized structure. + + +INPUT PARAMETERS: + N - dimension, N>1 + * if given, only leading N elements of X are used + * if not given, automatically determined from size of X + M - number of functions f[i] + X - initial solution, array[0..N-1] + +OUTPUT PARAMETERS: + State - structure which stores algorithm state + +NOTES: +1. you may tune stopping conditions with MinLMSetCond() function +2. if target function contains exp() or other fast growing functions, and + optimization algorithm makes too large steps which leads to overflow, + use MinLMSetStpMax() function to bound algorithm's steps. + + -- ALGLIB -- + Copyright 30.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minlmcreatevgj(const ae_int_t n, const ae_int_t m, const real_1d_array &x, minlmstate &state); +void minlmcreatevgj(const ae_int_t m, const real_1d_array &x, minlmstate &state); + + +/************************************************************************* + LEVENBERG-MARQUARDT-LIKE METHOD FOR + NON-LINEAR LEAST SQUARES OPTIMIZATION + +DESCRIPTION: + +This function is used to find minimum of function which is represented as +sum of squares: + F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1]) +using value of F(), gradient of F(), function vector f[] and Jacobian of +f[]. + +This function is considered obsolete since ALGLIB 3.1.0 and is present for +backward compatibility only. We recommend to use MinLMCreateVGJ, which +provides similar, but more consistent interface. + + -- ALGLIB -- + Copyright 30.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minlmcreatefgj(const ae_int_t n, const ae_int_t m, const real_1d_array &x, minlmstate &state); +void minlmcreatefgj(const ae_int_t m, const real_1d_array &x, minlmstate &state); + + +/************************************************************************* + CLASSIC LEVENBERG-MARQUARDT METHOD FOR NON-LINEAR OPTIMIZATION + +DESCRIPTION: +This function is used to find minimum of function which is represented as +sum of squares: + F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1]) +using value of F(), function vector f[] and Jacobian of f[]. Classic +Levenberg-Marquardt method is used. + +This function is considered obsolete since ALGLIB 3.1.0 and is present for +backward compatibility only. We recommend to use MinLMCreateVJ, which +provides similar, but more consistent and feature-rich interface. + + -- ALGLIB -- + Copyright 30.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minlmcreatefj(const ae_int_t n, const ae_int_t m, const real_1d_array &x, minlmstate &state); +void minlmcreatefj(const ae_int_t m, const real_1d_array &x, minlmstate &state); + + +/************************************************************************* +This function sets stopping conditions for Levenberg-Marquardt optimization +algorithm. + +INPUT PARAMETERS: + State - structure which stores algorithm state + EpsG - >=0 + The subroutine finishes its work if the condition + ||G||<EpsG is satisfied, where ||.|| means Euclidian norm, + G - gradient. + EpsF - >=0 + The subroutine finishes its work if on k+1-th iteration + the condition |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1} + is satisfied. + EpsX - >=0 + The subroutine finishes its work if on k+1-th iteration + the condition |X(k+1)-X(k)| <= EpsX is fulfilled. + MaxIts - maximum number of iterations. If MaxIts=0, the number of + iterations is unlimited. Only Levenberg-Marquardt + iterations are counted (L-BFGS/CG iterations are NOT + counted because their cost is very low compared to that of + LM). + +Passing EpsG=0, EpsF=0, EpsX=0 and MaxIts=0 (simultaneously) will lead to +automatic stopping criterion selection (small EpsX). + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minlmsetcond(const minlmstate &state, const double epsg, const double epsf, const double epsx, const ae_int_t maxits); + + +/************************************************************************* +This function turns on/off reporting. + +INPUT PARAMETERS: + State - structure which stores algorithm state + NeedXRep- whether iteration reports are needed or not + +If NeedXRep is True, algorithm will call rep() callback function if it is +provided to MinLMOptimize(). Both Levenberg-Marquardt and internal L-BFGS +iterations are reported. + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minlmsetxrep(const minlmstate &state, const bool needxrep); + + +/************************************************************************* +This function sets maximum step length + +INPUT PARAMETERS: + State - structure which stores algorithm state + StpMax - maximum step length, >=0. Set StpMax to 0.0, if you don't + want to limit step length. + +Use this subroutine when you optimize target function which contains exp() +or other fast growing functions, and optimization algorithm makes too +large steps which leads to overflow. This function allows us to reject +steps that are too large (and therefore expose us to the possible +overflow) without actually calculating function value at the x+stp*d. + +NOTE: non-zero StpMax leads to moderate performance degradation because +intermediate step of preconditioned L-BFGS optimization is incompatible +with limits on step size. + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minlmsetstpmax(const minlmstate &state, const double stpmax); + + +/************************************************************************* +This function is used to change acceleration settings + +You can choose between three acceleration strategies: +* AccType=0, no acceleration. +* AccType=1, secant updates are used to update quadratic model after each + iteration. After fixed number of iterations (or after model breakdown) + we recalculate quadratic model using analytic Jacobian or finite + differences. Number of secant-based iterations depends on optimization + settings: about 3 iterations - when we have analytic Jacobian, up to 2*N + iterations - when we use finite differences to calculate Jacobian. +* AccType=2, after quadratic model is built and LM step is made, we use it + as preconditioner for several (5-10) iterations of L-BFGS algorithm. + +AccType=1 is recommended when Jacobian calculation cost is prohibitive +high (several Mx1 function vector calculations followed by several NxN +Cholesky factorizations are faster than calculation of one M*N Jacobian). +It should also be used when we have no Jacobian, because finite difference +approximation takes too much time to compute. + +AccType=2 is recommended when Jacobian is cheap - much more cheaper than +one Cholesky factorization. We can reduce number of Cholesky +factorizations at the cost of increased number of Jacobian calculations. +Sometimes it helps. + +Table below list optimization protocols (XYZ protocol corresponds to +MinLMCreateXYZ) and acceleration types they support (and use by default). + +ACCELERATION TYPES SUPPORTED BY OPTIMIZATION PROTOCOLS: + +protocol 0 1 2 comment +V + + +VJ + + + +FGH + + +VGJ + + + special protocol, not for widespread use +FJ + + obsolete protocol, not recommended +FGJ + + obsolete protocol, not recommended + +DAFAULT VALUES: + +protocol 0 1 2 comment +V x without acceleration it is so slooooooooow +VJ x +FGH x +VGJ x we've implicitly turned (2) by passing gradient +FJ x obsolete protocol, not recommended +FGJ x obsolete protocol, not recommended + +NOTE: this function should be called before optimization. Attempt to call +it during algorithm iterations may result in unexpected behavior. + +NOTE: attempt to call this function with unsupported protocol/acceleration +combination will result in exception being thrown. + + -- ALGLIB -- + Copyright 14.10.2010 by Bochkanov Sergey +*************************************************************************/ +void minlmsetacctype(const minlmstate &state, const ae_int_t acctype); + + +/************************************************************************* +This function provides reverse communication interface +Reverse communication interface is not documented or recommended to use. +See below for functions which provide better documented API +*************************************************************************/ +bool minlmiteration(const minlmstate &state); + + +/************************************************************************* +This family of functions is used to launcn iterations of nonlinear optimizer + +These functions accept following parameters: + state - algorithm state + func - callback which calculates function (or merit function) + value func at given point x + grad - callback which calculates function (or merit function) + value func and gradient grad at given point x + hess - callback which calculates function (or merit function) + value func, gradient grad and Hessian hess at given point x + fvec - callback which calculates function vector fi[] + at given point x + jac - callback which calculates function vector fi[] + and Jacobian jac at given point x + rep - optional callback which is called after each iteration + can be NULL + ptr - optional pointer which is passed to func/grad/hess/jac/rep + can be NULL + +NOTES: + +1. Depending on function used to create state structure, this algorithm + may accept Jacobian and/or Hessian and/or gradient. According to the + said above, there ase several versions of this function, which accept + different sets of callbacks. + + This flexibility opens way to subtle errors - you may create state with + MinLMCreateFGH() (optimization using Hessian), but call function which + does not accept Hessian. So when algorithm will request Hessian, there + will be no callback to call. In this case exception will be thrown. + + Be careful to avoid such errors because there is no way to find them at + compile time - you can see them at runtime only. + + -- ALGLIB -- + Copyright 10.03.2009 by Bochkanov Sergey + +*************************************************************************/ +void minlmoptimize(minlmstate &state, + void (*fvec)(const real_1d_array &x, real_1d_array &fi, void *ptr), + void (*rep)(const real_1d_array &x, double func, void *ptr) = NULL, + void *ptr = NULL); +void minlmoptimize(minlmstate &state, + void (*fvec)(const real_1d_array &x, real_1d_array &fi, void *ptr), + void (*jac)(const real_1d_array &x, real_1d_array &fi, real_2d_array &jac, void *ptr), + void (*rep)(const real_1d_array &x, double func, void *ptr) = NULL, + void *ptr = NULL); +void minlmoptimize(minlmstate &state, + void (*func)(const real_1d_array &x, double &func, void *ptr), + void (*grad)(const real_1d_array &x, double &func, real_1d_array &grad, void *ptr), + void (*hess)(const real_1d_array &x, double &func, real_1d_array &grad, real_2d_array &hess, void *ptr), + void (*rep)(const real_1d_array &x, double func, void *ptr) = NULL, + void *ptr = NULL); +void minlmoptimize(minlmstate &state, + void (*func)(const real_1d_array &x, double &func, void *ptr), + void (*jac)(const real_1d_array &x, real_1d_array &fi, real_2d_array &jac, void *ptr), + void (*rep)(const real_1d_array &x, double func, void *ptr) = NULL, + void *ptr = NULL); +void minlmoptimize(minlmstate &state, + void (*func)(const real_1d_array &x, double &func, void *ptr), + void (*grad)(const real_1d_array &x, double &func, real_1d_array &grad, void *ptr), + void (*jac)(const real_1d_array &x, real_1d_array &fi, real_2d_array &jac, void *ptr), + void (*rep)(const real_1d_array &x, double func, void *ptr) = NULL, + void *ptr = NULL); + + +/************************************************************************* +Levenberg-Marquardt algorithm results + +INPUT PARAMETERS: + State - algorithm state + +OUTPUT PARAMETERS: + X - array[0..N-1], solution + Rep - optimization report; + see comments for this structure for more info. + + -- ALGLIB -- + Copyright 10.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minlmresults(const minlmstate &state, real_1d_array &x, minlmreport &rep); + + +/************************************************************************* +Levenberg-Marquardt algorithm results + +Buffered implementation of MinLMResults(), which uses pre-allocated buffer +to store X[]. If buffer size is too small, it resizes buffer. It is +intended to be used in the inner cycles of performance critical algorithms +where array reallocation penalty is too large to be ignored. + + -- ALGLIB -- + Copyright 10.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minlmresultsbuf(const minlmstate &state, real_1d_array &x, minlmreport &rep); + + +/************************************************************************* +This subroutine restarts LM algorithm from new point. All optimization +parameters are left unchanged. + +This function allows to solve multiple optimization problems (which +must have same number of dimensions) without object reallocation penalty. + +INPUT PARAMETERS: + State - structure used for reverse communication previously + allocated with MinLMCreateXXX call. + X - new starting point. + + -- ALGLIB -- + Copyright 30.07.2010 by Bochkanov Sergey +*************************************************************************/ +void minlmrestartfrom(const minlmstate &state, const real_1d_array &x); + +/************************************************************************* + NONLINEAR BOUND CONSTRAINED OPTIMIZATION USING + MODIFIED ACTIVE SET ALGORITHM + WILLIAM W. HAGER AND HONGCHAO ZHANG + +DESCRIPTION: +The subroutine minimizes function F(x) of N arguments with bound +constraints: BndL[i] <= x[i] <= BndU[i] + +This method is globally convergent as long as grad(f) is Lipschitz +continuous on a level set: L = { x : f(x)<=f(x0) }. + + +REQUIREMENTS: +Algorithm will request following information during its operation: +* function value F and its gradient G (simultaneously) at given point X + + +USAGE: +1. User initializes algorithm state with MinASACreate() call +2. User tunes solver parameters with MinASASetCond() MinASASetStpMax() and + other functions +3. User calls MinASAOptimize() function which takes algorithm state and + pointer (delegate, etc.) to callback function which calculates F/G. +4. User calls MinASAResults() to get solution +5. Optionally, user may call MinASARestartFrom() to solve another problem + with same N but another starting point and/or another function. + MinASARestartFrom() allows to reuse already initialized structure. + + +INPUT PARAMETERS: + N - problem dimension, N>0: + * if given, only leading N elements of X are used + * if not given, automatically determined from sizes of + X/BndL/BndU. + X - starting point, array[0..N-1]. + BndL - lower bounds, array[0..N-1]. + all elements MUST be specified, i.e. all variables are + bounded. However, if some (all) variables are unbounded, + you may specify very small number as bound: -1000, -1.0E6 + or -1.0E300, or something like that. + BndU - upper bounds, array[0..N-1]. + all elements MUST be specified, i.e. all variables are + bounded. However, if some (all) variables are unbounded, + you may specify very large number as bound: +1000, +1.0E6 + or +1.0E300, or something like that. + +OUTPUT PARAMETERS: + State - structure stores algorithm state + +NOTES: + +1. you may tune stopping conditions with MinASASetCond() function +2. if target function contains exp() or other fast growing functions, and + optimization algorithm makes too large steps which leads to overflow, + use MinASASetStpMax() function to bound algorithm's steps. +3. this function does NOT support infinite/NaN values in X, BndL, BndU. + + -- ALGLIB -- + Copyright 25.03.2010 by Bochkanov Sergey +*************************************************************************/ +void minasacreate(const ae_int_t n, const real_1d_array &x, const real_1d_array &bndl, const real_1d_array &bndu, minasastate &state); +void minasacreate(const real_1d_array &x, const real_1d_array &bndl, const real_1d_array &bndu, minasastate &state); + + +/************************************************************************* +This function sets stopping conditions for the ASA optimization algorithm. + +INPUT PARAMETERS: + State - structure which stores algorithm state + EpsG - >=0 + The subroutine finishes its work if the condition + ||G||<EpsG is satisfied, where ||.|| means Euclidian norm, + G - gradient. + EpsF - >=0 + The subroutine finishes its work if on k+1-th iteration + the condition |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1} + is satisfied. + EpsX - >=0 + The subroutine finishes its work if on k+1-th iteration + the condition |X(k+1)-X(k)| <= EpsX is fulfilled. + MaxIts - maximum number of iterations. If MaxIts=0, the number of + iterations is unlimited. + +Passing EpsG=0, EpsF=0, EpsX=0 and MaxIts=0 (simultaneously) will lead to +automatic stopping criterion selection (small EpsX). + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minasasetcond(const minasastate &state, const double epsg, const double epsf, const double epsx, const ae_int_t maxits); + + +/************************************************************************* +This function turns on/off reporting. + +INPUT PARAMETERS: + State - structure which stores algorithm state + NeedXRep- whether iteration reports are needed or not + +If NeedXRep is True, algorithm will call rep() callback function if it is +provided to MinASAOptimize(). + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minasasetxrep(const minasastate &state, const bool needxrep); + + +/************************************************************************* +This function sets optimization algorithm. + +INPUT PARAMETERS: + State - structure which stores algorithm stat + UAType - algorithm type: + * -1 automatic selection of the best algorithm + * 0 DY (Dai and Yuan) algorithm + * 1 Hybrid DY-HS algorithm + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minasasetalgorithm(const minasastate &state, const ae_int_t algotype); + + +/************************************************************************* +This function sets maximum step length + +INPUT PARAMETERS: + State - structure which stores algorithm state + StpMax - maximum step length, >=0. Set StpMax to 0.0, if you don't + want to limit step length (zero by default). + +Use this subroutine when you optimize target function which contains exp() +or other fast growing functions, and optimization algorithm makes too +large steps which leads to overflow. This function allows us to reject +steps that are too large (and therefore expose us to the possible +overflow) without actually calculating function value at the x+stp*d. + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minasasetstpmax(const minasastate &state, const double stpmax); + + +/************************************************************************* +This function provides reverse communication interface +Reverse communication interface is not documented or recommended to use. +See below for functions which provide better documented API +*************************************************************************/ +bool minasaiteration(const minasastate &state); + + +/************************************************************************* +This family of functions is used to launcn iterations of nonlinear optimizer + +These functions accept following parameters: + state - algorithm state + grad - callback which calculates function (or merit function) + value func and gradient grad at given point x + rep - optional callback which is called after each iteration + can be NULL + ptr - optional pointer which is passed to func/grad/hess/jac/rep + can be NULL + + + -- ALGLIB -- + Copyright 20.03.2009 by Bochkanov Sergey + +*************************************************************************/ +void minasaoptimize(minasastate &state, + void (*grad)(const real_1d_array &x, double &func, real_1d_array &grad, void *ptr), + void (*rep)(const real_1d_array &x, double func, void *ptr) = NULL, + void *ptr = NULL); + + +/************************************************************************* +ASA results + +INPUT PARAMETERS: + State - algorithm state + +OUTPUT PARAMETERS: + X - array[0..N-1], solution + Rep - optimization report: + * Rep.TerminationType completetion code: + * -2 rounding errors prevent further improvement. + X contains best point found. + * -1 incorrect parameters were specified + * 1 relative function improvement is no more than + EpsF. + * 2 relative step is no more than EpsX. + * 4 gradient norm is no more than EpsG + * 5 MaxIts steps was taken + * 7 stopping conditions are too stringent, + further improvement is impossible + * Rep.IterationsCount contains iterations count + * NFEV countains number of function calculations + * ActiveConstraints contains number of active constraints + + -- ALGLIB -- + Copyright 20.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minasaresults(const minasastate &state, real_1d_array &x, minasareport &rep); + + +/************************************************************************* +ASA results + +Buffered implementation of MinASAResults() which uses pre-allocated buffer +to store X[]. If buffer size is too small, it resizes buffer. It is +intended to be used in the inner cycles of performance critical algorithms +where array reallocation penalty is too large to be ignored. + + -- ALGLIB -- + Copyright 20.03.2009 by Bochkanov Sergey +*************************************************************************/ +void minasaresultsbuf(const minasastate &state, real_1d_array &x, minasareport &rep); + + +/************************************************************************* +This subroutine restarts CG algorithm from new point. All optimization +parameters are left unchanged. + +This function allows to solve multiple optimization problems (which +must have same number of dimensions) without object reallocation penalty. + +INPUT PARAMETERS: + State - structure previously allocated with MinCGCreate call. + X - new starting point. + BndL - new lower bounds + BndU - new upper bounds + + -- ALGLIB -- + Copyright 30.07.2010 by Bochkanov Sergey +*************************************************************************/ +void minasarestartfrom(const minasastate &state, const real_1d_array &x, const real_1d_array &bndl, const real_1d_array &bndu); + +/************************************************************************* + NONLINEAR CONJUGATE GRADIENT METHOD + +DESCRIPTION: +The subroutine minimizes function F(x) of N arguments by using one of the +nonlinear conjugate gradient methods. + +These CG methods are globally convergent (even on non-convex functions) as +long as grad(f) is Lipschitz continuous in a some neighborhood of the +L = { x : f(x)<=f(x0) }. + + +REQUIREMENTS: +Algorithm will request following information during its operation: +* function value F and its gradient G (simultaneously) at given point X + + +USAGE: +1. User initializes algorithm state with MinCGCreate() call +2. User tunes solver parameters with MinCGSetCond(), MinCGSetStpMax() and + other functions +3. User calls MinCGOptimize() function which takes algorithm state and + pointer (delegate, etc.) to callback function which calculates F/G. +4. User calls MinCGResults() to get solution +5. Optionally, user may call MinCGRestartFrom() to solve another problem + with same N but another starting point and/or another function. + MinCGRestartFrom() allows to reuse already initialized structure. + + +INPUT PARAMETERS: + N - problem dimension, N>0: + * if given, only leading N elements of X are used + * if not given, automatically determined from size of X + X - starting point, array[0..N-1]. + +OUTPUT PARAMETERS: + State - structure which stores algorithm state + + -- ALGLIB -- + Copyright 25.03.2010 by Bochkanov Sergey +*************************************************************************/ +void mincgcreate(const ae_int_t n, const real_1d_array &x, mincgstate &state); +void mincgcreate(const real_1d_array &x, mincgstate &state); + + +/************************************************************************* +This function sets stopping conditions for CG optimization algorithm. + +INPUT PARAMETERS: + State - structure which stores algorithm state + EpsG - >=0 + The subroutine finishes its work if the condition + ||G||<EpsG is satisfied, where ||.|| means Euclidian norm, + G - gradient. + EpsF - >=0 + The subroutine finishes its work if on k+1-th iteration + the condition |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1} + is satisfied. + EpsX - >=0 + The subroutine finishes its work if on k+1-th iteration + the condition |X(k+1)-X(k)| <= EpsX is fulfilled. + MaxIts - maximum number of iterations. If MaxIts=0, the number of + iterations is unlimited. + +Passing EpsG=0, EpsF=0, EpsX=0 and MaxIts=0 (simultaneously) will lead to +automatic stopping criterion selection (small EpsX). + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void mincgsetcond(const mincgstate &state, const double epsg, const double epsf, const double epsx, const ae_int_t maxits); + + +/************************************************************************* +This function turns on/off reporting. + +INPUT PARAMETERS: + State - structure which stores algorithm state + NeedXRep- whether iteration reports are needed or not + +If NeedXRep is True, algorithm will call rep() callback function if it is +provided to MinCGOptimize(). + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void mincgsetxrep(const mincgstate &state, const bool needxrep); + + +/************************************************************************* +This function sets CG algorithm. + +INPUT PARAMETERS: + State - structure which stores algorithm state + CGType - algorithm type: + * -1 automatic selection of the best algorithm + * 0 DY (Dai and Yuan) algorithm + * 1 Hybrid DY-HS algorithm + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void mincgsetcgtype(const mincgstate &state, const ae_int_t cgtype); + + +/************************************************************************* +This function sets maximum step length + +INPUT PARAMETERS: + State - structure which stores algorithm state + StpMax - maximum step length, >=0. Set StpMax to 0.0, if you don't + want to limit step length. + +Use this subroutine when you optimize target function which contains exp() +or other fast growing functions, and optimization algorithm makes too +large steps which leads to overflow. This function allows us to reject +steps that are too large (and therefore expose us to the possible +overflow) without actually calculating function value at the x+stp*d. + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void mincgsetstpmax(const mincgstate &state, const double stpmax); + + +/************************************************************************* +This function allows to suggest initial step length to the CG algorithm. + +Suggested step length is used as starting point for the line search. It +can be useful when you have badly scaled problem, i.e. when ||grad|| +(which is used as initial estimate for the first step) is many orders of +magnitude different from the desired step. + +Line search may fail on such problems without good estimate of initial +step length. Imagine, for example, problem with ||grad||=10^50 and desired +step equal to 0.1 Line search function will use 10^50 as initial step, +then it will decrease step length by 2 (up to 20 attempts) and will get +10^44, which is still too large. + +This function allows us to tell than line search should be started from +some moderate step length, like 1.0, so algorithm will be able to detect +desired step length in a several searches. + +This function influences only first iteration of algorithm. It should be +called between MinCGCreate/MinCGRestartFrom() call and MinCGOptimize call. + +INPUT PARAMETERS: + State - structure used to store algorithm state. + Stp - initial estimate of the step length. + Can be zero (no estimate). + + -- ALGLIB -- + Copyright 30.07.2010 by Bochkanov Sergey +*************************************************************************/ +void mincgsuggeststep(const mincgstate &state, const double stp); + + +/************************************************************************* +This function provides reverse communication interface +Reverse communication interface is not documented or recommended to use. +See below for functions which provide better documented API +*************************************************************************/ +bool mincgiteration(const mincgstate &state); + + +/************************************************************************* +This family of functions is used to launcn iterations of nonlinear optimizer + +These functions accept following parameters: + state - algorithm state + grad - callback which calculates function (or merit function) + value func and gradient grad at given point x + rep - optional callback which is called after each iteration + can be NULL + ptr - optional pointer which is passed to func/grad/hess/jac/rep + can be NULL + + + -- ALGLIB -- + Copyright 20.04.2009 by Bochkanov Sergey + +*************************************************************************/ +void mincgoptimize(mincgstate &state, + void (*grad)(const real_1d_array &x, double &func, real_1d_array &grad, void *ptr), + void (*rep)(const real_1d_array &x, double func, void *ptr) = NULL, + void *ptr = NULL); + + +/************************************************************************* +Conjugate gradient results + +INPUT PARAMETERS: + State - algorithm state + +OUTPUT PARAMETERS: + X - array[0..N-1], solution + Rep - optimization report: + * Rep.TerminationType completetion code: + * 1 relative function improvement is no more than + EpsF. + * 2 relative step is no more than EpsX. + * 4 gradient norm is no more than EpsG + * 5 MaxIts steps was taken + * 7 stopping conditions are too stringent, + further improvement is impossible, + we return best X found so far + * Rep.IterationsCount contains iterations count + * NFEV countains number of function calculations + + -- ALGLIB -- + Copyright 20.04.2009 by Bochkanov Sergey +*************************************************************************/ +void mincgresults(const mincgstate &state, real_1d_array &x, mincgreport &rep); + + +/************************************************************************* +Conjugate gradient results + +Buffered implementation of MinCGResults(), which uses pre-allocated buffer +to store X[]. If buffer size is too small, it resizes buffer. It is +intended to be used in the inner cycles of performance critical algorithms +where array reallocation penalty is too large to be ignored. + + -- ALGLIB -- + Copyright 20.04.2009 by Bochkanov Sergey +*************************************************************************/ +void mincgresultsbuf(const mincgstate &state, real_1d_array &x, mincgreport &rep); + + +/************************************************************************* +This subroutine restarts CG algorithm from new point. All optimization +parameters are left unchanged. + +This function allows to solve multiple optimization problems (which +must have same number of dimensions) without object reallocation penalty. + +INPUT PARAMETERS: + State - structure used to store algorithm state. + X - new starting point. + + -- ALGLIB -- + Copyright 30.07.2010 by Bochkanov Sergey +*************************************************************************/ +void mincgrestartfrom(const mincgstate &state, const real_1d_array &x); + +/************************************************************************* + BOUND CONSTRAINED OPTIMIZATION + WITH ADDITIONAL LINEAR EQUALITY AND INEQUALITY CONSTRAINTS + +DESCRIPTION: +The subroutine minimizes function F(x) of N arguments subject to any +combination of: +* bound constraints +* linear inequality constraints +* linear equality constraints + +REQUIREMENTS: +* function value and gradient +* grad(f) must be Lipschitz continuous on a level set: L = { x : f(x)<=f(x0) } +* function must be defined even in the infeasible points (algorithm make take + steps in the infeasible area before converging to the feasible point) +* starting point X0 must be feasible or not too far away from the feasible set +* problem must satisfy strict complementary conditions + +USAGE: + +Constrained optimization if far more complex than the unconstrained one. +Here we give very brief outline of the BLEIC optimizer. We strongly recommend +you to read examples in the ALGLIB Reference Manual and to read ALGLIB User Guide +on optimization, which is available at http://www.alglib.net/optimization/ + +1. User initializes algorithm state with MinBLEICCreate() call + +2. USer adds boundary and/or linear constraints by calling + MinBLEICSetBC() and MinBLEICSetLC() functions. + +3. User sets stopping conditions for underlying unconstrained solver + with MinBLEICSetInnerCond() call. + This function controls accuracy of underlying optimization algorithm. + +4. User sets stopping conditions for outer iteration by calling + MinBLEICSetOuterCond() function. + This function controls handling of boundary and inequality constraints. + +5. User tunes barrier parameters: + * barrier width with MinBLEICSetBarrierWidth() call + * (optionally) dynamics of the barrier width with MinBLEICSetBarrierDecay() call + These functions control handling of boundary and inequality constraints. + +6. Additionally, user may set limit on number of internal iterations + by MinBLEICSetMaxIts() call. + This function allows to prevent algorithm from looping forever. + +7. User calls MinBLEICOptimize() function which takes algorithm state and + pointer (delegate, etc.) to callback function which calculates F/G. + +8. User calls MinBLEICResults() to get solution + +9. Optionally user may call MinBLEICRestartFrom() to solve another problem + with same N but another starting point. + MinBLEICRestartFrom() allows to reuse already initialized structure. + + +INPUT PARAMETERS: + N - problem dimension, N>0: + * if given, only leading N elements of X are used + * if not given, automatically determined from size ofX + X - starting point, array[N]: + * it is better to set X to a feasible point + * but X can be infeasible, in which case algorithm will try + to find feasible point first, using X as initial + approximation. + +OUTPUT PARAMETERS: + State - structure stores algorithm state + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleiccreate(const ae_int_t n, const real_1d_array &x, minbleicstate &state); +void minbleiccreate(const real_1d_array &x, minbleicstate &state); + + +/************************************************************************* +This function sets boundary constraints for BLEIC optimizer. + +Boundary constraints are inactive by default (after initial creation). +They are preserved after algorithm restart with MinBLEICRestartFrom(). + +INPUT PARAMETERS: + State - structure stores algorithm state + BndL - lower bounds, array[N]. + If some (all) variables are unbounded, you may specify + very small number or -INF. + BndU - upper bounds, array[N]. + If some (all) variables are unbounded, you may specify + very large number or +INF. + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicsetbc(const minbleicstate &state, const real_1d_array &bndl, const real_1d_array &bndu); + + +/************************************************************************* +This function sets linear constraints for BLEIC optimizer. + +Linear constraints are inactive by default (after initial creation). +They are preserved after algorithm restart with MinBLEICRestartFrom(). + +INPUT PARAMETERS: + State - structure previously allocated with MinBLEICCreate call. + C - linear constraints, array[K,N+1]. + Each row of C represents one constraint, either equality + or inequality (see below): + * first N elements correspond to coefficients, + * last element corresponds to the right part. + All elements of C (including right part) must be finite. + CT - type of constraints, array[K]: + * if CT[i]>0, then I-th constraint is C[i,*]*x >= C[i,n+1] + * if CT[i]=0, then I-th constraint is C[i,*]*x = C[i,n+1] + * if CT[i]<0, then I-th constraint is C[i,*]*x <= C[i,n+1] + K - number of equality/inequality constraints, K>=0: + * if given, only leading K elements of C/CT are used + * if not given, automatically determined from sizes of C/CT + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicsetlc(const minbleicstate &state, const real_2d_array &c, const integer_1d_array &ct, const ae_int_t k); +void minbleicsetlc(const minbleicstate &state, const real_2d_array &c, const integer_1d_array &ct); + + +/************************************************************************* +This function sets stopping conditions for the underlying nonlinear CG +optimizer. It controls overall accuracy of solution. These conditions +should be strict enough in order for algorithm to converge. + +INPUT PARAMETERS: + State - structure which stores algorithm state + EpsG - >=0 + Algorithm finishes its work if 2-norm of the Lagrangian + gradient is less than or equal to EpsG. + EpsF - >=0 + The subroutine finishes its work if on k+1-th iteration + the condition |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1} + is satisfied. + EpsX - >=0 + The subroutine finishes its work if on k+1-th iteration + the condition |X(k+1)-X(k)| <= EpsX is fulfilled. + +Passing EpsG=0, EpsF=0 and EpsX=0 (simultaneously) will lead to +automatic stopping criterion selection. + +These conditions are used to terminate inner iterations. However, you +need to tune termination conditions for outer iterations too. + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicsetinnercond(const minbleicstate &state, const double epsg, const double epsf, const double epsx); + + +/************************************************************************* +This function sets stopping conditions for outer iteration of BLEIC algo. + +These conditions control accuracy of constraint handling and amount of +infeasibility allowed in the solution. + +INPUT PARAMETERS: + State - structure which stores algorithm state + EpsX - >0, stopping condition on outer iteration step length + EpsI - >0, stopping condition on infeasibility + +Both EpsX and EpsI must be non-zero. + +MEANING OF EpsX + +EpsX is a stopping condition for outer iterations. Algorithm will stop +when solution of the current modified subproblem will be within EpsX +(using 2-norm) of the previous solution. + +MEANING OF EpsI + +EpsI controls feasibility properties - algorithm won't stop until all +inequality constraints will be satisfied with error (distance from current +point to the feasible area) at most EpsI. + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicsetoutercond(const minbleicstate &state, const double epsx, const double epsi); + + +/************************************************************************* +This function sets initial barrier width. + +BLEIC optimizer uses modified barrier functions to handle inequality +constraints. These functions are almost constant in the inner parts of the +feasible area, but grow rapidly to the infinity OUTSIDE of the feasible +area. Barrier width is a distance from feasible area to the point where +modified barrier function becomes infinite. + +Barrier width must be: +* small enough (below some problem-dependent value) in order for algorithm + to converge. Necessary condition is that the target function must be + well described by linear model in the areas as small as barrier width. +* not VERY small (in order to avoid difficulties associated with rapid + changes in the modified function, ill-conditioning, round-off issues). + +Choosing appropriate barrier width is very important for efficient +optimization, and it often requires error and trial. You can use two +strategies when choosing barrier width: +* set barrier width with MinBLEICSetBarrierWidth() call. In this case you + should try different barrier widths and examine results. +* set decreasing barrier width by combining MinBLEICSetBarrierWidth() and + MinBLEICSetBarrierDecay() calls. In this case algorithm will decrease + barrier width after each outer iteration until it encounters optimal + barrier width. + +INPUT PARAMETERS: + State - structure which stores algorithm state + Mu - >0, initial barrier width + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicsetbarrierwidth(const minbleicstate &state, const double mu); + + +/************************************************************************* +This function sets decay coefficient for barrier width. + +By default, no barrier decay is used (Decay=1.0). + +BLEIC optimizer uses modified barrier functions to handle inequality +constraints. These functions are almost constant in the inner parts of the +feasible area, but grow rapidly to the infinity OUTSIDE of the feasible +area. Barrier width is a distance from feasible area to the point where +modified barrier function becomes infinite. Decay coefficient allows us to +decrease barrier width from the initial (suboptimial) value until +optimal value will be met. + +We recommend you either to set MuDecay=1.0 (no decay) or use some moderate +value like 0.5-0.7 + +INPUT PARAMETERS: + State - structure which stores algorithm state + MuDecay - 0<MuDecay<=1, decay coefficient + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicsetbarrierdecay(const minbleicstate &state, const double mudecay); + + +/************************************************************************* +This function allows to stop algorithm after specified number of inner +iterations. + +INPUT PARAMETERS: + State - structure which stores algorithm state + MaxIts - maximum number of inner iterations. + If MaxIts=0, the number of iterations is unlimited. + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicsetmaxits(const minbleicstate &state, const ae_int_t maxits); + + +/************************************************************************* +This function turns on/off reporting. + +INPUT PARAMETERS: + State - structure which stores algorithm state + NeedXRep- whether iteration reports are needed or not + +If NeedXRep is True, algorithm will call rep() callback function if it is +provided to MinBLEICOptimize(). + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicsetxrep(const minbleicstate &state, const bool needxrep); + + +/************************************************************************* +This function sets maximum step length + +INPUT PARAMETERS: + State - structure which stores algorithm state + StpMax - maximum step length, >=0. Set StpMax to 0.0, if you don't + want to limit step length. + +Use this subroutine when you optimize target function which contains exp() +or other fast growing functions, and optimization algorithm makes too +large steps which lead to overflow. This function allows us to reject +steps that are too large (and therefore expose us to the possible +overflow) without actually calculating function value at the x+stp*d. + + -- ALGLIB -- + Copyright 02.04.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicsetstpmax(const minbleicstate &state, const double stpmax); + + +/************************************************************************* +This function provides reverse communication interface +Reverse communication interface is not documented or recommended to use. +See below for functions which provide better documented API +*************************************************************************/ +bool minbleiciteration(const minbleicstate &state); + + +/************************************************************************* +This family of functions is used to launcn iterations of nonlinear optimizer + +These functions accept following parameters: + state - algorithm state + grad - callback which calculates function (or merit function) + value func and gradient grad at given point x + rep - optional callback which is called after each iteration + can be NULL + ptr - optional pointer which is passed to func/grad/hess/jac/rep + can be NULL + + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey + +*************************************************************************/ +void minbleicoptimize(minbleicstate &state, + void (*grad)(const real_1d_array &x, double &func, real_1d_array &grad, void *ptr), + void (*rep)(const real_1d_array &x, double func, void *ptr) = NULL, + void *ptr = NULL); + + +/************************************************************************* +BLEIC results + +INPUT PARAMETERS: + State - algorithm state + +OUTPUT PARAMETERS: + X - array[0..N-1], solution + Rep - optimization report: + * Rep.TerminationType completetion code: + * -3 inconsistent constraints. Feasible point is + either nonexistent or too hard to find. Try to + restart optimizer with better initial + approximation + * -2 rounding errors prevent further improvement. + X contains best point found. + * 4 conditions on constraints are fulfilled + with error less than or equal to EpsC + * 5 MaxIts steps was taken + * 7 stopping conditions are too stringent, + further improvement is impossible, + X contains best point found so far. + * Rep.IterationsCount contains iterations count + * NFEV countains number of function calculations + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicresults(const minbleicstate &state, real_1d_array &x, minbleicreport &rep); + + +/************************************************************************* +BLEIC results + +Buffered implementation of MinBLEICResults() which uses pre-allocated buffer +to store X[]. If buffer size is too small, it resizes buffer. It is +intended to be used in the inner cycles of performance critical algorithms +where array reallocation penalty is too large to be ignored. + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicresultsbuf(const minbleicstate &state, real_1d_array &x, minbleicreport &rep); + + +/************************************************************************* +This subroutine restarts algorithm from new point. +All optimization parameters (including constraints) are left unchanged. + +This function allows to solve multiple optimization problems (which +must have same number of dimensions) without object reallocation penalty. + +INPUT PARAMETERS: + State - structure previously allocated with MinBLEICCreate call. + X - new starting point. + + -- ALGLIB -- + Copyright 28.11.2010 by Bochkanov Sergey +*************************************************************************/ +void minbleicrestartfrom(const minbleicstate &state, const real_1d_array &x); +} + +///////////////////////////////////////////////////////////////////////// +// +// THIS SECTION CONTAINS COMPUTATIONAL CORE DECLARATIONS (FUNCTIONS) +// +///////////////////////////////////////////////////////////////////////// +namespace alglib_impl +{ +void minlbfgscreate(ae_int_t n, + ae_int_t m, + /* Real */ ae_vector* x, + minlbfgsstate* state, + ae_state *_state); +void minlbfgssetcond(minlbfgsstate* state, + double epsg, + double epsf, + double epsx, + ae_int_t maxits, + ae_state *_state); +void minlbfgssetxrep(minlbfgsstate* state, + ae_bool needxrep, + ae_state *_state); +void minlbfgssetstpmax(minlbfgsstate* state, + double stpmax, + ae_state *_state); +void minlbfgscreatex(ae_int_t n, + ae_int_t m, + /* Real */ ae_vector* x, + ae_int_t flags, + minlbfgsstate* state, + ae_state *_state); +void minlbfgssetdefaultpreconditioner(minlbfgsstate* state, + ae_state *_state); +void minlbfgssetcholeskypreconditioner(minlbfgsstate* state, + /* Real */ ae_matrix* p, + ae_bool isupper, + ae_state *_state); +ae_bool minlbfgsiteration(minlbfgsstate* state, ae_state *_state); +void minlbfgsresults(minlbfgsstate* state, + /* Real */ ae_vector* x, + minlbfgsreport* rep, + ae_state *_state); +void minlbfgsresultsbuf(minlbfgsstate* state, + /* Real */ ae_vector* x, + minlbfgsreport* rep, + ae_state *_state); +void minlbfgsrestartfrom(minlbfgsstate* state, + /* Real */ ae_vector* x, + ae_state *_state); +ae_bool _minlbfgsstate_init(minlbfgsstate* p, ae_state *_state, ae_bool make_automatic); +ae_bool _minlbfgsstate_init_copy(minlbfgsstate* dst, minlbfgsstate* src, ae_state *_state, ae_bool make_automatic); +void _minlbfgsstate_clear(minlbfgsstate* p); +ae_bool _minlbfgsreport_init(minlbfgsreport* p, ae_state *_state, ae_bool make_automatic); +ae_bool _minlbfgsreport_init_copy(minlbfgsreport* dst, minlbfgsreport* src, ae_state *_state, ae_bool make_automatic); +void _minlbfgsreport_clear(minlbfgsreport* p); +void minlmcreatevj(ae_int_t n, + ae_int_t m, + /* Real */ ae_vector* x, + minlmstate* state, + ae_state *_state); +void minlmcreatev(ae_int_t n, + ae_int_t m, + /* Real */ ae_vector* x, + double diffstep, + minlmstate* state, + ae_state *_state); +void minlmcreatefgh(ae_int_t n, + /* Real */ ae_vector* x, + minlmstate* state, + ae_state *_state); +void minlmcreatevgj(ae_int_t n, + ae_int_t m, + /* Real */ ae_vector* x, + minlmstate* state, + ae_state *_state); +void minlmcreatefgj(ae_int_t n, + ae_int_t m, + /* Real */ ae_vector* x, + minlmstate* state, + ae_state *_state); +void minlmcreatefj(ae_int_t n, + ae_int_t m, + /* Real */ ae_vector* x, + minlmstate* state, + ae_state *_state); +void minlmsetcond(minlmstate* state, + double epsg, + double epsf, + double epsx, + ae_int_t maxits, + ae_state *_state); +void minlmsetxrep(minlmstate* state, ae_bool needxrep, ae_state *_state); +void minlmsetstpmax(minlmstate* state, double stpmax, ae_state *_state); +void minlmsetacctype(minlmstate* state, + ae_int_t acctype, + ae_state *_state); +ae_bool minlmiteration(minlmstate* state, ae_state *_state); +void minlmresults(minlmstate* state, + /* Real */ ae_vector* x, + minlmreport* rep, + ae_state *_state); +void minlmresultsbuf(minlmstate* state, + /* Real */ ae_vector* x, + minlmreport* rep, + ae_state *_state); +void minlmrestartfrom(minlmstate* state, + /* Real */ ae_vector* x, + ae_state *_state); +ae_bool _minlmstate_init(minlmstate* p, ae_state *_state, ae_bool make_automatic); +ae_bool _minlmstate_init_copy(minlmstate* dst, minlmstate* src, ae_state *_state, ae_bool make_automatic); +void _minlmstate_clear(minlmstate* p); +ae_bool _minlmreport_init(minlmreport* p, ae_state *_state, ae_bool make_automatic); +ae_bool _minlmreport_init_copy(minlmreport* dst, minlmreport* src, ae_state *_state, ae_bool make_automatic); +void _minlmreport_clear(minlmreport* p); +void minasacreate(ae_int_t n, + /* Real */ ae_vector* x, + /* Real */ ae_vector* bndl, + /* Real */ ae_vector* bndu, + minasastate* state, + ae_state *_state); +void minasasetcond(minasastate* state, + double epsg, + double epsf, + double epsx, + ae_int_t maxits, + ae_state *_state); +void minasasetxrep(minasastate* state, ae_bool needxrep, ae_state *_state); +void minasasetalgorithm(minasastate* state, + ae_int_t algotype, + ae_state *_state); +void minasasetstpmax(minasastate* state, double stpmax, ae_state *_state); +ae_bool minasaiteration(minasastate* state, ae_state *_state); +void minasaresults(minasastate* state, + /* Real */ ae_vector* x, + minasareport* rep, + ae_state *_state); +void minasaresultsbuf(minasastate* state, + /* Real */ ae_vector* x, + minasareport* rep, + ae_state *_state); +void minasarestartfrom(minasastate* state, + /* Real */ ae_vector* x, + /* Real */ ae_vector* bndl, + /* Real */ ae_vector* bndu, + ae_state *_state); +ae_bool _minasastate_init(minasastate* p, ae_state *_state, ae_bool make_automatic); +ae_bool _minasastate_init_copy(minasastate* dst, minasastate* src, ae_state *_state, ae_bool make_automatic); +void _minasastate_clear(minasastate* p); +ae_bool _minasareport_init(minasareport* p, ae_state *_state, ae_bool make_automatic); +ae_bool _minasareport_init_copy(minasareport* dst, minasareport* src, ae_state *_state, ae_bool make_automatic); +void _minasareport_clear(minasareport* p); +void mincgcreate(ae_int_t n, + /* Real */ ae_vector* x, + mincgstate* state, + ae_state *_state); +void mincgsetcond(mincgstate* state, + double epsg, + double epsf, + double epsx, + ae_int_t maxits, + ae_state *_state); +void mincgsetxrep(mincgstate* state, ae_bool needxrep, ae_state *_state); +void mincgsetdrep(mincgstate* state, ae_bool needdrep, ae_state *_state); +void mincgsetcgtype(mincgstate* state, ae_int_t cgtype, ae_state *_state); +void mincgsetstpmax(mincgstate* state, double stpmax, ae_state *_state); +void mincgsuggeststep(mincgstate* state, double stp, ae_state *_state); +ae_bool mincgiteration(mincgstate* state, ae_state *_state); +void mincgresults(mincgstate* state, + /* Real */ ae_vector* x, + mincgreport* rep, + ae_state *_state); +void mincgresultsbuf(mincgstate* state, + /* Real */ ae_vector* x, + mincgreport* rep, + ae_state *_state); +void mincgrestartfrom(mincgstate* state, + /* Real */ ae_vector* x, + ae_state *_state); +ae_bool _mincgstate_init(mincgstate* p, ae_state *_state, ae_bool make_automatic); +ae_bool _mincgstate_init_copy(mincgstate* dst, mincgstate* src, ae_state *_state, ae_bool make_automatic); +void _mincgstate_clear(mincgstate* p); +ae_bool _mincgreport_init(mincgreport* p, ae_state *_state, ae_bool make_automatic); +ae_bool _mincgreport_init_copy(mincgreport* dst, mincgreport* src, ae_state *_state, ae_bool make_automatic); +void _mincgreport_clear(mincgreport* p); +void minbleiccreate(ae_int_t n, + /* Real */ ae_vector* x, + minbleicstate* state, + ae_state *_state); +void minbleicsetbc(minbleicstate* state, + /* Real */ ae_vector* bndl, + /* Real */ ae_vector* bndu, + ae_state *_state); +void minbleicsetlc(minbleicstate* state, + /* Real */ ae_matrix* c, + /* Integer */ ae_vector* ct, + ae_int_t k, + ae_state *_state); +void minbleicsetinnercond(minbleicstate* state, + double epsg, + double epsf, + double epsx, + ae_state *_state); +void minbleicsetoutercond(minbleicstate* state, + double epsx, + double epsi, + ae_state *_state); +void minbleicsetbarrierwidth(minbleicstate* state, + double mu, + ae_state *_state); +void minbleicsetbarrierdecay(minbleicstate* state, + double mudecay, + ae_state *_state); +void minbleicsetmaxits(minbleicstate* state, + ae_int_t maxits, + ae_state *_state); +void minbleicsetxrep(minbleicstate* state, + ae_bool needxrep, + ae_state *_state); +void minbleicsetstpmax(minbleicstate* state, + double stpmax, + ae_state *_state); +ae_bool minbleiciteration(minbleicstate* state, ae_state *_state); +void minbleicresults(minbleicstate* state, + /* Real */ ae_vector* x, + minbleicreport* rep, + ae_state *_state); +void minbleicresultsbuf(minbleicstate* state, + /* Real */ ae_vector* x, + minbleicreport* rep, + ae_state *_state); +void minbleicrestartfrom(minbleicstate* state, + /* Real */ ae_vector* x, + ae_state *_state); +void barrierfunc(double x, + double mu, + double* f, + double* df, + double* d2f, + ae_state *_state); +ae_bool _minbleicstate_init(minbleicstate* p, ae_state *_state, ae_bool make_automatic); +ae_bool _minbleicstate_init_copy(minbleicstate* dst, minbleicstate* src, ae_state *_state, ae_bool make_automatic); +void _minbleicstate_clear(minbleicstate* p); +ae_bool _minbleicreport_init(minbleicreport* p, ae_state *_state, ae_bool make_automatic); +ae_bool _minbleicreport_init_copy(minbleicreport* dst, minbleicreport* src, ae_state *_state, ae_bool make_automatic); +void _minbleicreport_clear(minbleicreport* p); + +} +#endif + diff --git a/contrib/lbfgs/stdafx.h b/contrib/lbfgs/stdafx.h new file mode 100755 index 0000000000..99a8091366 --- /dev/null +++ b/contrib/lbfgs/stdafx.h @@ -0,0 +1,2 @@ + + -- GitLab