From 8ae178e66d150d12d718f2d055102ef911bfedd3 Mon Sep 17 00:00:00 2001
From: Tristan Carrier Baudouin <tristan.carrier@uclouvain.be>
Date: Fri, 4 Mar 2011 11:11:30 +0000
Subject: [PATCH] lbfgs library

---
 contrib/lbfgs/CMakeLists.txt     |    10 +
 contrib/lbfgs/alglibinternal.cpp | 10712 ++++++++++
 contrib/lbfgs/alglibinternal.h   |   707 +
 contrib/lbfgs/alglibmisc.cpp     |  3083 +++
 contrib/lbfgs/alglibmisc.h       |   685 +
 contrib/lbfgs/ap.cpp             |  8952 +++++++++
 contrib/lbfgs/ap.h               |  1203 ++
 contrib/lbfgs/linalg.cpp         | 30199 +++++++++++++++++++++++++++++
 contrib/lbfgs/linalg.h           |  4101 ++++
 contrib/lbfgs/optimization.cpp   | 11827 +++++++++++
 contrib/lbfgs/optimization.h     |  2649 +++
 contrib/lbfgs/stdafx.h           |     2 +
 12 files changed, 74130 insertions(+)
 create mode 100644 contrib/lbfgs/CMakeLists.txt
 create mode 100755 contrib/lbfgs/alglibinternal.cpp
 create mode 100755 contrib/lbfgs/alglibinternal.h
 create mode 100755 contrib/lbfgs/alglibmisc.cpp
 create mode 100755 contrib/lbfgs/alglibmisc.h
 create mode 100755 contrib/lbfgs/ap.cpp
 create mode 100755 contrib/lbfgs/ap.h
 create mode 100755 contrib/lbfgs/linalg.cpp
 create mode 100755 contrib/lbfgs/linalg.h
 create mode 100755 contrib/lbfgs/optimization.cpp
 create mode 100755 contrib/lbfgs/optimization.h
 create mode 100755 contrib/lbfgs/stdafx.h

diff --git a/contrib/lbfgs/CMakeLists.txt b/contrib/lbfgs/CMakeLists.txt
new file mode 100644
index 0000000000..5c8988732e
--- /dev/null
+++ b/contrib/lbfgs/CMakeLists.txt
@@ -0,0 +1,10 @@
+set(SRC
+  ap.cpp
+  alglibinternal.cpp
+  alglibmisc.cpp
+  linalg.cpp
+  optimization.cpp
+)
+
+file(GLOB_RECURSE HDR RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.h)
+append_gmsh_src(contrib/lbfgs "${SRC};${HDR}")
diff --git a/contrib/lbfgs/alglibinternal.cpp b/contrib/lbfgs/alglibinternal.cpp
new file mode 100755
index 0000000000..4d0ddef2a5
--- /dev/null
+++ b/contrib/lbfgs/alglibinternal.cpp
@@ -0,0 +1,10712 @@
+/*************************************************************************
+Copyright (c) Sergey Bochkanov (ALGLIB project).
+
+>>> SOURCE LICENSE >>>
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation (www.fsf.org); either version 2 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+A copy of the GNU General Public License is available at
+http://www.fsf.org/licensing/licenses
+>>> END OF LICENSE >>>
+*************************************************************************/
+#include "stdafx.h"
+#include "alglibinternal.h"
+
+// disable some irrelevant warnings
+#if (AE_COMPILER==AE_MSVC)
+#pragma warning(disable:4100)
+#pragma warning(disable:4127)
+#pragma warning(disable:4702)
+#pragma warning(disable:4996)
+#endif
+using namespace std;
+
+/////////////////////////////////////////////////////////////////////////
+//
+// THIS SECTION CONTAINS IMPLEMENTATION OF C++ INTERFACE
+//
+/////////////////////////////////////////////////////////////////////////
+namespace alglib
+{
+
+
+}
+
+/////////////////////////////////////////////////////////////////////////
+//
+// THIS SECTION CONTAINS IMPLEMENTATION OF COMPUTATIONAL CORE
+//
+/////////////////////////////////////////////////////////////////////////
+namespace alglib_impl
+{
+static void tsort_tagsortfastirec(/* Real    */ ae_vector* a,
+     /* Integer */ ae_vector* b,
+     /* Real    */ ae_vector* bufa,
+     /* Integer */ ae_vector* bufb,
+     ae_int_t i1,
+     ae_int_t i2,
+     ae_state *_state);
+static void tsort_tagsortfastrrec(/* Real    */ ae_vector* a,
+     /* Real    */ ae_vector* b,
+     /* Real    */ ae_vector* bufa,
+     /* Real    */ ae_vector* bufb,
+     ae_int_t i1,
+     ae_int_t i2,
+     ae_state *_state);
+static void tsort_tagsortfastrec(/* Real    */ ae_vector* a,
+     /* Real    */ ae_vector* bufa,
+     ae_int_t i1,
+     ae_int_t i2,
+     ae_state *_state);
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+static void hsschur_internalauxschur(ae_bool wantt,
+     ae_bool wantz,
+     ae_int_t n,
+     ae_int_t ilo,
+     ae_int_t ihi,
+     /* Real    */ ae_matrix* h,
+     /* Real    */ ae_vector* wr,
+     /* Real    */ ae_vector* wi,
+     ae_int_t iloz,
+     ae_int_t ihiz,
+     /* Real    */ ae_matrix* z,
+     /* Real    */ ae_vector* work,
+     /* Real    */ ae_vector* workv3,
+     /* Real    */ ae_vector* workc1,
+     /* Real    */ ae_vector* works1,
+     ae_int_t* info,
+     ae_state *_state);
+static void hsschur_aux2x2schur(double* a,
+     double* b,
+     double* c,
+     double* d,
+     double* rt1r,
+     double* rt1i,
+     double* rt2r,
+     double* rt2i,
+     double* cs,
+     double* sn,
+     ae_state *_state);
+static double hsschur_extschursign(double a, double b, ae_state *_state);
+static ae_int_t hsschur_extschursigntoone(double b, ae_state *_state);
+
+
+
+
+static ae_bool safesolve_cbasicsolveandupdate(ae_complex alpha,
+     ae_complex beta,
+     double lnmax,
+     double bnorm,
+     double maxgrowth,
+     double* xnorm,
+     ae_complex* x,
+     ae_state *_state);
+
+
+static void xblas_xsum(/* Real    */ ae_vector* w,
+     double mx,
+     ae_int_t n,
+     double* r,
+     double* rerr,
+     ae_state *_state);
+static double xblas_xfastpow(double r, ae_int_t n, ae_state *_state);
+
+
+static double linmin_ftol = 0.001;
+static double linmin_xtol = 100*ae_machineepsilon;
+static double linmin_gtol = 0.3;
+static ae_int_t linmin_maxfev = 20;
+static double linmin_stpmin = 1.0E-50;
+static double linmin_defstpmax = 1.0E+50;
+static double linmin_armijofactor = 1.3;
+static void linmin_mcstep(double* stx,
+     double* fx,
+     double* dx,
+     double* sty,
+     double* fy,
+     double* dy,
+     double* stp,
+     double fp,
+     double dp,
+     ae_bool* brackt,
+     double stmin,
+     double stmax,
+     ae_int_t* info,
+     ae_state *_state);
+
+
+static ae_int_t ftbase_ftbaseplanentrysize = 8;
+static ae_int_t ftbase_ftbasecffttask = 0;
+static ae_int_t ftbase_ftbaserfhttask = 1;
+static ae_int_t ftbase_ftbaserffttask = 2;
+static ae_int_t ftbase_fftcooleytukeyplan = 0;
+static ae_int_t ftbase_fftbluesteinplan = 1;
+static ae_int_t ftbase_fftcodeletplan = 2;
+static ae_int_t ftbase_fhtcooleytukeyplan = 3;
+static ae_int_t ftbase_fhtcodeletplan = 4;
+static ae_int_t ftbase_fftrealcooleytukeyplan = 5;
+static ae_int_t ftbase_fftemptyplan = 6;
+static ae_int_t ftbase_fhtn2plan = 999;
+static ae_int_t ftbase_ftbaseupdatetw = 4;
+static ae_int_t ftbase_ftbasecodeletrecommended = 5;
+static double ftbase_ftbaseinefficiencyfactor = 1.3;
+static ae_int_t ftbase_ftbasemaxsmoothfactor = 5;
+static void ftbase_ftbasegenerateplanrec(ae_int_t n,
+     ae_int_t tasktype,
+     ftplan* plan,
+     ae_int_t* plansize,
+     ae_int_t* precomputedsize,
+     ae_int_t* planarraysize,
+     ae_int_t* tmpmemsize,
+     ae_int_t* stackmemsize,
+     ae_int_t stackptr,
+     ae_state *_state);
+static void ftbase_ftbaseprecomputeplanrec(ftplan* plan,
+     ae_int_t entryoffset,
+     ae_int_t stackptr,
+     ae_state *_state);
+static void ftbase_ffttwcalc(/* Real    */ ae_vector* a,
+     ae_int_t aoffset,
+     ae_int_t n1,
+     ae_int_t n2,
+     ae_state *_state);
+static void ftbase_internalcomplexlintranspose(/* Real    */ ae_vector* a,
+     ae_int_t m,
+     ae_int_t n,
+     ae_int_t astart,
+     /* Real    */ ae_vector* buf,
+     ae_state *_state);
+static void ftbase_internalreallintranspose(/* Real    */ ae_vector* a,
+     ae_int_t m,
+     ae_int_t n,
+     ae_int_t astart,
+     /* Real    */ ae_vector* buf,
+     ae_state *_state);
+static void ftbase_ffticltrec(/* Real    */ ae_vector* a,
+     ae_int_t astart,
+     ae_int_t astride,
+     /* Real    */ ae_vector* b,
+     ae_int_t bstart,
+     ae_int_t bstride,
+     ae_int_t m,
+     ae_int_t n,
+     ae_state *_state);
+static void ftbase_fftirltrec(/* Real    */ ae_vector* a,
+     ae_int_t astart,
+     ae_int_t astride,
+     /* Real    */ ae_vector* b,
+     ae_int_t bstart,
+     ae_int_t bstride,
+     ae_int_t m,
+     ae_int_t n,
+     ae_state *_state);
+static void ftbase_ftbasefindsmoothrec(ae_int_t n,
+     ae_int_t seed,
+     ae_int_t leastfactor,
+     ae_int_t* best,
+     ae_state *_state);
+static void ftbase_fftarrayresize(/* Integer */ ae_vector* a,
+     ae_int_t* asize,
+     ae_int_t newasize,
+     ae_state *_state);
+static void ftbase_reffht(/* Real    */ ae_vector* a,
+     ae_int_t n,
+     ae_int_t offs,
+     ae_state *_state);
+
+
+
+
+
+
+
+
+
+/*************************************************************************
+This function sorts array of real keys by ascending.
+
+Its results are:
+* sorted array A
+* permutation tables P1, P2
+
+Algorithm outputs permutation tables using two formats:
+* as usual permutation of [0..N-1]. If P1[i]=j, then sorted A[i]  contains
+  value which was moved there from J-th position.
+* as a sequence of pairwise permutations. Sorted A[] may  be  obtained  by
+  swaping A[i] and A[P2[i]] for all i from 0 to N-1.
+  
+INPUT PARAMETERS:
+    A       -   unsorted array
+    N       -   array size
+
+OUPUT PARAMETERS:
+    A       -   sorted array
+    P1, P2  -   permutation tables, array[N]
+    
+NOTES:
+    this function assumes that A[] is finite; it doesn't checks that
+    condition. All other conditions (size of input arrays, etc.) are not
+    checked too.
+
+  -- ALGLIB --
+     Copyright 14.05.2008 by Bochkanov Sergey
+*************************************************************************/
+void tagsort(/* Real    */ ae_vector* a,
+     ae_int_t n,
+     /* Integer */ ae_vector* p1,
+     /* Integer */ ae_vector* p2,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_int_t i;
+    ae_vector pv;
+    ae_vector vp;
+    ae_vector bufa;
+    ae_vector bufb;
+    ae_int_t lv;
+    ae_int_t lp;
+    ae_int_t rv;
+    ae_int_t rp;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_clear(p1);
+    ae_vector_clear(p2);
+    ae_vector_init(&pv, 0, DT_INT, _state, ae_true);
+    ae_vector_init(&vp, 0, DT_INT, _state, ae_true);
+    ae_vector_init(&bufa, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&bufb, 0, DT_INT, _state, ae_true);
+
+    
+    /*
+     * Special cases
+     */
+    if( n<=0 )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    if( n==1 )
+    {
+        ae_vector_set_length(p1, 0+1, _state);
+        ae_vector_set_length(p2, 0+1, _state);
+        p1->ptr.p_int[0] = 0;
+        p2->ptr.p_int[0] = 0;
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * General case, N>1: prepare permutations table P1
+     */
+    ae_vector_set_length(p1, n-1+1, _state);
+    for(i=0; i<=n-1; i++)
+    {
+        p1->ptr.p_int[i] = i;
+    }
+    
+    /*
+     * General case, N>1: sort, update P1
+     */
+    ae_vector_set_length(&bufa, n, _state);
+    ae_vector_set_length(&bufb, n, _state);
+    tagsortfasti(a, p1, &bufa, &bufb, n, _state);
+    
+    /*
+     * General case, N>1: fill permutations table P2
+     *
+     * To fill P2 we maintain two arrays:
+     * * PV, Position(Value). PV[i] contains position of I-th key at the moment
+     * * VP, Value(Position). VP[i] contains key which has position I at the moment
+     *
+     * At each step we making permutation of two items:
+     *   Left, which is given by position/value pair LP/LV
+     *   and Right, which is given by RP/RV
+     * and updating PV[] and VP[] correspondingly.
+     */
+    ae_vector_set_length(&pv, n-1+1, _state);
+    ae_vector_set_length(&vp, n-1+1, _state);
+    ae_vector_set_length(p2, n-1+1, _state);
+    for(i=0; i<=n-1; i++)
+    {
+        pv.ptr.p_int[i] = i;
+        vp.ptr.p_int[i] = i;
+    }
+    for(i=0; i<=n-1; i++)
+    {
+        
+        /*
+         * calculate LP, LV, RP, RV
+         */
+        lp = i;
+        lv = vp.ptr.p_int[lp];
+        rv = p1->ptr.p_int[i];
+        rp = pv.ptr.p_int[rv];
+        
+        /*
+         * Fill P2
+         */
+        p2->ptr.p_int[i] = rp;
+        
+        /*
+         * update PV and VP
+         */
+        vp.ptr.p_int[lp] = rv;
+        vp.ptr.p_int[rp] = lv;
+        pv.ptr.p_int[lv] = rp;
+        pv.ptr.p_int[rv] = lp;
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Same as TagSort, but optimized for real keys and integer labels.
+
+A is sorted, and same permutations are applied to B.
+
+NOTES:
+1.  this function assumes that A[] is finite; it doesn't checks that
+    condition. All other conditions (size of input arrays, etc.) are not
+    checked too.
+2.  this function uses two buffers, BufA and BufB, each is N elements large.
+    They may be preallocated (which will save some time) or not, in which
+    case function will automatically allocate memory.
+
+  -- ALGLIB --
+     Copyright 11.12.2008 by Bochkanov Sergey
+*************************************************************************/
+void tagsortfasti(/* Real    */ ae_vector* a,
+     /* Integer */ ae_vector* b,
+     /* Real    */ ae_vector* bufa,
+     /* Integer */ ae_vector* bufb,
+     ae_int_t n,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    ae_bool isascending;
+    ae_bool isdescending;
+    double tmpr;
+    ae_int_t tmpi;
+
+
+    
+    /*
+     * Special case
+     */
+    if( n<=1 )
+    {
+        return;
+    }
+    
+    /*
+     * Test for already sorted set
+     */
+    isascending = ae_true;
+    isdescending = ae_true;
+    for(i=1; i<=n-1; i++)
+    {
+        isascending = isascending&&a->ptr.p_double[i]>=a->ptr.p_double[i-1];
+        isdescending = isdescending&&a->ptr.p_double[i]<=a->ptr.p_double[i-1];
+    }
+    if( isascending )
+    {
+        return;
+    }
+    if( isdescending )
+    {
+        for(i=0; i<=n-1; i++)
+        {
+            j = n-1-i;
+            if( j<=i )
+            {
+                break;
+            }
+            tmpr = a->ptr.p_double[i];
+            a->ptr.p_double[i] = a->ptr.p_double[j];
+            a->ptr.p_double[j] = tmpr;
+            tmpi = b->ptr.p_int[i];
+            b->ptr.p_int[i] = b->ptr.p_int[j];
+            b->ptr.p_int[j] = tmpi;
+        }
+        return;
+    }
+    
+    /*
+     * General case
+     */
+    if( bufa->cnt<n )
+    {
+        ae_vector_set_length(bufa, n, _state);
+    }
+    if( bufb->cnt<n )
+    {
+        ae_vector_set_length(bufb, n, _state);
+    }
+    tsort_tagsortfastirec(a, b, bufa, bufb, 0, n-1, _state);
+}
+
+
+/*************************************************************************
+Same as TagSort, but optimized for real keys and real labels.
+
+A is sorted, and same permutations are applied to B.
+
+NOTES:
+1.  this function assumes that A[] is finite; it doesn't checks that
+    condition. All other conditions (size of input arrays, etc.) are not
+    checked too.
+2.  this function uses two buffers, BufA and BufB, each is N elements large.
+    They may be preallocated (which will save some time) or not, in which
+    case function will automatically allocate memory.
+
+  -- ALGLIB --
+     Copyright 11.12.2008 by Bochkanov Sergey
+*************************************************************************/
+void tagsortfastr(/* Real    */ ae_vector* a,
+     /* Real    */ ae_vector* b,
+     /* Real    */ ae_vector* bufa,
+     /* Real    */ ae_vector* bufb,
+     ae_int_t n,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    ae_bool isascending;
+    ae_bool isdescending;
+    double tmpr;
+
+
+    
+    /*
+     * Special case
+     */
+    if( n<=1 )
+    {
+        return;
+    }
+    
+    /*
+     * Test for already sorted set
+     */
+    isascending = ae_true;
+    isdescending = ae_true;
+    for(i=1; i<=n-1; i++)
+    {
+        isascending = isascending&&a->ptr.p_double[i]>=a->ptr.p_double[i-1];
+        isdescending = isdescending&&a->ptr.p_double[i]<=a->ptr.p_double[i-1];
+    }
+    if( isascending )
+    {
+        return;
+    }
+    if( isdescending )
+    {
+        for(i=0; i<=n-1; i++)
+        {
+            j = n-1-i;
+            if( j<=i )
+            {
+                break;
+            }
+            tmpr = a->ptr.p_double[i];
+            a->ptr.p_double[i] = a->ptr.p_double[j];
+            a->ptr.p_double[j] = tmpr;
+            tmpr = b->ptr.p_double[i];
+            b->ptr.p_double[i] = b->ptr.p_double[j];
+            b->ptr.p_double[j] = tmpr;
+        }
+        return;
+    }
+    
+    /*
+     * General case
+     */
+    if( bufa->cnt<n )
+    {
+        ae_vector_set_length(bufa, n, _state);
+    }
+    if( bufb->cnt<n )
+    {
+        ae_vector_set_length(bufb, n, _state);
+    }
+    tsort_tagsortfastrrec(a, b, bufa, bufb, 0, n-1, _state);
+}
+
+
+/*************************************************************************
+Same as TagSort, but optimized for real keys without labels.
+
+A is sorted, and that's all.
+
+NOTES:
+1.  this function assumes that A[] is finite; it doesn't checks that
+    condition. All other conditions (size of input arrays, etc.) are not
+    checked too.
+2.  this function uses buffer, BufA, which is N elements large. It may be
+    preallocated (which will save some time) or not, in which case
+    function will automatically allocate memory.
+
+  -- ALGLIB --
+     Copyright 11.12.2008 by Bochkanov Sergey
+*************************************************************************/
+void tagsortfast(/* Real    */ ae_vector* a,
+     /* Real    */ ae_vector* bufa,
+     ae_int_t n,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    ae_bool isascending;
+    ae_bool isdescending;
+    double tmpr;
+
+
+    
+    /*
+     * Special case
+     */
+    if( n<=1 )
+    {
+        return;
+    }
+    
+    /*
+     * Test for already sorted set
+     */
+    isascending = ae_true;
+    isdescending = ae_true;
+    for(i=1; i<=n-1; i++)
+    {
+        isascending = isascending&&a->ptr.p_double[i]>=a->ptr.p_double[i-1];
+        isdescending = isdescending&&a->ptr.p_double[i]<=a->ptr.p_double[i-1];
+    }
+    if( isascending )
+    {
+        return;
+    }
+    if( isdescending )
+    {
+        for(i=0; i<=n-1; i++)
+        {
+            j = n-1-i;
+            if( j<=i )
+            {
+                break;
+            }
+            tmpr = a->ptr.p_double[i];
+            a->ptr.p_double[i] = a->ptr.p_double[j];
+            a->ptr.p_double[j] = tmpr;
+        }
+        return;
+    }
+    
+    /*
+     * General case
+     */
+    if( bufa->cnt<n )
+    {
+        ae_vector_set_length(bufa, n, _state);
+    }
+    tsort_tagsortfastrec(a, bufa, 0, n-1, _state);
+}
+
+
+/*************************************************************************
+Heap operations: adds element to the heap
+
+PARAMETERS:
+    A       -   heap itself, must be at least array[0..N]
+    B       -   array of integer tags, which are updated according to
+                permutations in the heap
+    N       -   size of the heap (without new element).
+                updated on output
+    VA      -   value of the element being added
+    VB      -   value of the tag
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void tagheappushi(/* Real    */ ae_vector* a,
+     /* Integer */ ae_vector* b,
+     ae_int_t* n,
+     double va,
+     ae_int_t vb,
+     ae_state *_state)
+{
+    ae_int_t j;
+    ae_int_t k;
+    double v;
+
+
+    if( *n<0 )
+    {
+        return;
+    }
+    
+    /*
+     * N=0 is a special case
+     */
+    if( *n==0 )
+    {
+        a->ptr.p_double[0] = va;
+        b->ptr.p_int[0] = vb;
+        *n = *n+1;
+        return;
+    }
+    
+    /*
+     * add current point to the heap
+     * (add to the bottom, then move up)
+     *
+     * we don't write point to the heap
+     * until its final position is determined
+     * (it allow us to reduce number of array access operations)
+     */
+    j = *n;
+    *n = *n+1;
+    while(j>0)
+    {
+        k = (j-1)/2;
+        v = a->ptr.p_double[k];
+        if( ae_fp_less(v,va) )
+        {
+            
+            /*
+             * swap with higher element
+             */
+            a->ptr.p_double[j] = v;
+            b->ptr.p_int[j] = b->ptr.p_int[k];
+            j = k;
+        }
+        else
+        {
+            
+            /*
+             * element in its place. terminate.
+             */
+            break;
+        }
+    }
+    a->ptr.p_double[j] = va;
+    b->ptr.p_int[j] = vb;
+}
+
+
+/*************************************************************************
+Heap operations: replaces top element with new element
+(which is moved down)
+
+PARAMETERS:
+    A       -   heap itself, must be at least array[0..N-1]
+    B       -   array of integer tags, which are updated according to
+                permutations in the heap
+    N       -   size of the heap
+    VA      -   value of the element which replaces top element
+    VB      -   value of the tag
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void tagheapreplacetopi(/* Real    */ ae_vector* a,
+     /* Integer */ ae_vector* b,
+     ae_int_t n,
+     double va,
+     ae_int_t vb,
+     ae_state *_state)
+{
+    ae_int_t j;
+    ae_int_t k1;
+    ae_int_t k2;
+    double v;
+    double v1;
+    double v2;
+
+
+    if( n<1 )
+    {
+        return;
+    }
+    
+    /*
+     * N=1 is a special case
+     */
+    if( n==1 )
+    {
+        a->ptr.p_double[0] = va;
+        b->ptr.p_int[0] = vb;
+        return;
+    }
+    
+    /*
+     * move down through heap:
+     * * J  -   current element
+     * * K1 -   first child (always exists)
+     * * K2 -   second child (may not exists)
+     *
+     * we don't write point to the heap
+     * until its final position is determined
+     * (it allow us to reduce number of array access operations)
+     */
+    j = 0;
+    k1 = 1;
+    k2 = 2;
+    while(k1<n)
+    {
+        if( k2>=n )
+        {
+            
+            /*
+             * only one child.
+             *
+             * swap and terminate (because this child
+             * have no siblings due to heap structure)
+             */
+            v = a->ptr.p_double[k1];
+            if( ae_fp_greater(v,va) )
+            {
+                a->ptr.p_double[j] = v;
+                b->ptr.p_int[j] = b->ptr.p_int[k1];
+                j = k1;
+            }
+            break;
+        }
+        else
+        {
+            
+            /*
+             * two childs
+             */
+            v1 = a->ptr.p_double[k1];
+            v2 = a->ptr.p_double[k2];
+            if( ae_fp_greater(v1,v2) )
+            {
+                if( ae_fp_less(va,v1) )
+                {
+                    a->ptr.p_double[j] = v1;
+                    b->ptr.p_int[j] = b->ptr.p_int[k1];
+                    j = k1;
+                }
+                else
+                {
+                    break;
+                }
+            }
+            else
+            {
+                if( ae_fp_less(va,v2) )
+                {
+                    a->ptr.p_double[j] = v2;
+                    b->ptr.p_int[j] = b->ptr.p_int[k2];
+                    j = k2;
+                }
+                else
+                {
+                    break;
+                }
+            }
+            k1 = 2*j+1;
+            k2 = 2*j+2;
+        }
+    }
+    a->ptr.p_double[j] = va;
+    b->ptr.p_int[j] = vb;
+}
+
+
+/*************************************************************************
+Heap operations: pops top element from the heap
+
+PARAMETERS:
+    A       -   heap itself, must be at least array[0..N-1]
+    B       -   array of integer tags, which are updated according to
+                permutations in the heap
+    N       -   size of the heap, N>=1
+
+On output top element is moved to A[N-1], B[N-1], heap is reordered, N is
+decreased by 1.
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void tagheappopi(/* Real    */ ae_vector* a,
+     /* Integer */ ae_vector* b,
+     ae_int_t* n,
+     ae_state *_state)
+{
+    double va;
+    ae_int_t vb;
+
+
+    if( *n<1 )
+    {
+        return;
+    }
+    
+    /*
+     * N=1 is a special case
+     */
+    if( *n==1 )
+    {
+        *n = 0;
+        return;
+    }
+    
+    /*
+     * swap top element and last element,
+     * then reorder heap
+     */
+    va = a->ptr.p_double[*n-1];
+    vb = b->ptr.p_int[*n-1];
+    a->ptr.p_double[*n-1] = a->ptr.p_double[0];
+    b->ptr.p_int[*n-1] = b->ptr.p_int[0];
+    *n = *n-1;
+    tagheapreplacetopi(a, b, *n, va, vb, _state);
+}
+
+
+/*************************************************************************
+Internal TagSortFastI: sorts A[I1...I2] (both bounds are included),
+applies same permutations to B.
+
+  -- ALGLIB --
+     Copyright 06.09.2010 by Bochkanov Sergey
+*************************************************************************/
+static void tsort_tagsortfastirec(/* Real    */ ae_vector* a,
+     /* Integer */ ae_vector* b,
+     /* Real    */ ae_vector* bufa,
+     /* Integer */ ae_vector* bufb,
+     ae_int_t i1,
+     ae_int_t i2,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t k;
+    ae_int_t cntless;
+    ae_int_t cnteq;
+    ae_int_t cntgreater;
+    double tmpr;
+    ae_int_t tmpi;
+    double v0;
+    double v1;
+    double v2;
+    double vp;
+
+
+    
+    /*
+     * Fast exit
+     */
+    if( i2<=i1 )
+    {
+        return;
+    }
+    
+    /*
+     * Non-recursive sort for small arrays
+     */
+    if( i2-i1<=16 )
+    {
+        for(j=i1+1; j<=i2; j++)
+        {
+            
+            /*
+             * Search elements [I1..J-1] for place to insert Jth element.
+             *
+             * This code stops immediately if we can leave A[J] at J-th position
+             * (all elements have same value of A[J] larger than any of them)
+             */
+            tmpr = a->ptr.p_double[j];
+            tmpi = j;
+            for(k=j-1; k>=i1; k--)
+            {
+                if( a->ptr.p_double[k]<=tmpr )
+                {
+                    break;
+                }
+                tmpi = k;
+            }
+            k = tmpi;
+            
+            /*
+             * Insert Jth element into Kth position
+             */
+            if( k!=j )
+            {
+                tmpr = a->ptr.p_double[j];
+                tmpi = b->ptr.p_int[j];
+                for(i=j-1; i>=k; i--)
+                {
+                    a->ptr.p_double[i+1] = a->ptr.p_double[i];
+                    b->ptr.p_int[i+1] = b->ptr.p_int[i];
+                }
+                a->ptr.p_double[k] = tmpr;
+                b->ptr.p_int[k] = tmpi;
+            }
+        }
+        return;
+    }
+    
+    /*
+     * Quicksort: choose pivot
+     * Here we assume that I2-I1>=2
+     */
+    v0 = a->ptr.p_double[i1];
+    v1 = a->ptr.p_double[i1+(i2-i1)/2];
+    v2 = a->ptr.p_double[i2];
+    if( v0>v1 )
+    {
+        tmpr = v1;
+        v1 = v0;
+        v0 = tmpr;
+    }
+    if( v1>v2 )
+    {
+        tmpr = v2;
+        v2 = v1;
+        v1 = tmpr;
+    }
+    if( v0>v1 )
+    {
+        tmpr = v1;
+        v1 = v0;
+        v0 = tmpr;
+    }
+    vp = v1;
+    
+    /*
+     * now pass through A/B and:
+     * * move elements that are LESS than VP to the left of A/B
+     * * move elements that are EQUAL to VP to the right of BufA/BufB (in the reverse order)
+     * * move elements that are GREATER than VP to the left of BufA/BufB (in the normal order
+     * * move elements from the tail of BufA/BufB to the middle of A/B (restoring normal order)
+     * * move elements from the left of BufA/BufB to the end of A/B
+     */
+    cntless = 0;
+    cnteq = 0;
+    cntgreater = 0;
+    for(i=i1; i<=i2; i++)
+    {
+        v0 = a->ptr.p_double[i];
+        if( v0<vp )
+        {
+            
+            /*
+             * LESS
+             */
+            k = i1+cntless;
+            if( i!=k )
+            {
+                a->ptr.p_double[k] = v0;
+                b->ptr.p_int[k] = b->ptr.p_int[i];
+            }
+            cntless = cntless+1;
+            continue;
+        }
+        if( v0==vp )
+        {
+            
+            /*
+             * EQUAL
+             */
+            k = i2-cnteq;
+            bufa->ptr.p_double[k] = v0;
+            bufb->ptr.p_int[k] = b->ptr.p_int[i];
+            cnteq = cnteq+1;
+            continue;
+        }
+        
+        /*
+         * GREATER
+         */
+        k = i1+cntgreater;
+        bufa->ptr.p_double[k] = v0;
+        bufb->ptr.p_int[k] = b->ptr.p_int[i];
+        cntgreater = cntgreater+1;
+    }
+    for(i=0; i<=cnteq-1; i++)
+    {
+        j = i1+cntless+cnteq-1-i;
+        k = i2+i-(cnteq-1);
+        a->ptr.p_double[j] = bufa->ptr.p_double[k];
+        b->ptr.p_int[j] = bufb->ptr.p_int[k];
+    }
+    for(i=0; i<=cntgreater-1; i++)
+    {
+        j = i1+cntless+cnteq+i;
+        k = i1+i;
+        a->ptr.p_double[j] = bufa->ptr.p_double[k];
+        b->ptr.p_int[j] = bufb->ptr.p_int[k];
+    }
+    
+    /*
+     * Sort left and right parts of the array (ignoring middle part)
+     */
+    tsort_tagsortfastirec(a, b, bufa, bufb, i1, i1+cntless-1, _state);
+    tsort_tagsortfastirec(a, b, bufa, bufb, i1+cntless+cnteq, i2, _state);
+}
+
+
+/*************************************************************************
+Internal TagSortFastR: sorts A[I1...I2] (both bounds are included),
+applies same permutations to B.
+
+  -- ALGLIB --
+     Copyright 06.09.2010 by Bochkanov Sergey
+*************************************************************************/
+static void tsort_tagsortfastrrec(/* Real    */ ae_vector* a,
+     /* Real    */ ae_vector* b,
+     /* Real    */ ae_vector* bufa,
+     /* Real    */ ae_vector* bufb,
+     ae_int_t i1,
+     ae_int_t i2,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t k;
+    double tmpr;
+    double tmpr2;
+    ae_int_t tmpi;
+    ae_int_t cntless;
+    ae_int_t cnteq;
+    ae_int_t cntgreater;
+    double v0;
+    double v1;
+    double v2;
+    double vp;
+
+
+    
+    /*
+     * Fast exit
+     */
+    if( i2<=i1 )
+    {
+        return;
+    }
+    
+    /*
+     * Non-recursive sort for small arrays
+     */
+    if( i2-i1<=16 )
+    {
+        for(j=i1+1; j<=i2; j++)
+        {
+            
+            /*
+             * Search elements [I1..J-1] for place to insert Jth element.
+             *
+             * This code stops immediatly if we can leave A[J] at J-th position
+             * (all elements have same value of A[J] larger than any of them)
+             */
+            tmpr = a->ptr.p_double[j];
+            tmpi = j;
+            for(k=j-1; k>=i1; k--)
+            {
+                if( a->ptr.p_double[k]<=tmpr )
+                {
+                    break;
+                }
+                tmpi = k;
+            }
+            k = tmpi;
+            
+            /*
+             * Insert Jth element into Kth position
+             */
+            if( k!=j )
+            {
+                tmpr = a->ptr.p_double[j];
+                tmpr2 = b->ptr.p_double[j];
+                for(i=j-1; i>=k; i--)
+                {
+                    a->ptr.p_double[i+1] = a->ptr.p_double[i];
+                    b->ptr.p_double[i+1] = b->ptr.p_double[i];
+                }
+                a->ptr.p_double[k] = tmpr;
+                b->ptr.p_double[k] = tmpr2;
+            }
+        }
+        return;
+    }
+    
+    /*
+     * Quicksort: choose pivot
+     * Here we assume that I2-I1>=16
+     */
+    v0 = a->ptr.p_double[i1];
+    v1 = a->ptr.p_double[i1+(i2-i1)/2];
+    v2 = a->ptr.p_double[i2];
+    if( v0>v1 )
+    {
+        tmpr = v1;
+        v1 = v0;
+        v0 = tmpr;
+    }
+    if( v1>v2 )
+    {
+        tmpr = v2;
+        v2 = v1;
+        v1 = tmpr;
+    }
+    if( v0>v1 )
+    {
+        tmpr = v1;
+        v1 = v0;
+        v0 = tmpr;
+    }
+    vp = v1;
+    
+    /*
+     * now pass through A/B and:
+     * * move elements that are LESS than VP to the left of A/B
+     * * move elements that are EQUAL to VP to the right of BufA/BufB (in the reverse order)
+     * * move elements that are GREATER than VP to the left of BufA/BufB (in the normal order
+     * * move elements from the tail of BufA/BufB to the middle of A/B (restoring normal order)
+     * * move elements from the left of BufA/BufB to the end of A/B
+     */
+    cntless = 0;
+    cnteq = 0;
+    cntgreater = 0;
+    for(i=i1; i<=i2; i++)
+    {
+        v0 = a->ptr.p_double[i];
+        if( v0<vp )
+        {
+            
+            /*
+             * LESS
+             */
+            k = i1+cntless;
+            if( i!=k )
+            {
+                a->ptr.p_double[k] = v0;
+                b->ptr.p_double[k] = b->ptr.p_double[i];
+            }
+            cntless = cntless+1;
+            continue;
+        }
+        if( v0==vp )
+        {
+            
+            /*
+             * EQUAL
+             */
+            k = i2-cnteq;
+            bufa->ptr.p_double[k] = v0;
+            bufb->ptr.p_double[k] = b->ptr.p_double[i];
+            cnteq = cnteq+1;
+            continue;
+        }
+        
+        /*
+         * GREATER
+         */
+        k = i1+cntgreater;
+        bufa->ptr.p_double[k] = v0;
+        bufb->ptr.p_double[k] = b->ptr.p_double[i];
+        cntgreater = cntgreater+1;
+    }
+    for(i=0; i<=cnteq-1; i++)
+    {
+        j = i1+cntless+cnteq-1-i;
+        k = i2+i-(cnteq-1);
+        a->ptr.p_double[j] = bufa->ptr.p_double[k];
+        b->ptr.p_double[j] = bufb->ptr.p_double[k];
+    }
+    for(i=0; i<=cntgreater-1; i++)
+    {
+        j = i1+cntless+cnteq+i;
+        k = i1+i;
+        a->ptr.p_double[j] = bufa->ptr.p_double[k];
+        b->ptr.p_double[j] = bufb->ptr.p_double[k];
+    }
+    
+    /*
+     * Sort left and right parts of the array (ignoring middle part)
+     */
+    tsort_tagsortfastrrec(a, b, bufa, bufb, i1, i1+cntless-1, _state);
+    tsort_tagsortfastrrec(a, b, bufa, bufb, i1+cntless+cnteq, i2, _state);
+}
+
+
+/*************************************************************************
+Internal TagSortFastI: sorts A[I1...I2] (both bounds are included),
+applies same permutations to B.
+
+  -- ALGLIB --
+     Copyright 06.09.2010 by Bochkanov Sergey
+*************************************************************************/
+static void tsort_tagsortfastrec(/* Real    */ ae_vector* a,
+     /* Real    */ ae_vector* bufa,
+     ae_int_t i1,
+     ae_int_t i2,
+     ae_state *_state)
+{
+    ae_int_t cntless;
+    ae_int_t cnteq;
+    ae_int_t cntgreater;
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t k;
+    double tmpr;
+    ae_int_t tmpi;
+    double v0;
+    double v1;
+    double v2;
+    double vp;
+
+
+    
+    /*
+     * Fast exit
+     */
+    if( i2<=i1 )
+    {
+        return;
+    }
+    
+    /*
+     * Non-recursive sort for small arrays
+     */
+    if( i2-i1<=16 )
+    {
+        for(j=i1+1; j<=i2; j++)
+        {
+            
+            /*
+             * Search elements [I1..J-1] for place to insert Jth element.
+             *
+             * This code stops immediatly if we can leave A[J] at J-th position
+             * (all elements have same value of A[J] larger than any of them)
+             */
+            tmpr = a->ptr.p_double[j];
+            tmpi = j;
+            for(k=j-1; k>=i1; k--)
+            {
+                if( a->ptr.p_double[k]<=tmpr )
+                {
+                    break;
+                }
+                tmpi = k;
+            }
+            k = tmpi;
+            
+            /*
+             * Insert Jth element into Kth position
+             */
+            if( k!=j )
+            {
+                tmpr = a->ptr.p_double[j];
+                for(i=j-1; i>=k; i--)
+                {
+                    a->ptr.p_double[i+1] = a->ptr.p_double[i];
+                }
+                a->ptr.p_double[k] = tmpr;
+            }
+        }
+        return;
+    }
+    
+    /*
+     * Quicksort: choose pivot
+     * Here we assume that I2-I1>=16
+     */
+    v0 = a->ptr.p_double[i1];
+    v1 = a->ptr.p_double[i1+(i2-i1)/2];
+    v2 = a->ptr.p_double[i2];
+    if( v0>v1 )
+    {
+        tmpr = v1;
+        v1 = v0;
+        v0 = tmpr;
+    }
+    if( v1>v2 )
+    {
+        tmpr = v2;
+        v2 = v1;
+        v1 = tmpr;
+    }
+    if( v0>v1 )
+    {
+        tmpr = v1;
+        v1 = v0;
+        v0 = tmpr;
+    }
+    vp = v1;
+    
+    /*
+     * now pass through A/B and:
+     * * move elements that are LESS than VP to the left of A/B
+     * * move elements that are EQUAL to VP to the right of BufA/BufB (in the reverse order)
+     * * move elements that are GREATER than VP to the left of BufA/BufB (in the normal order
+     * * move elements from the tail of BufA/BufB to the middle of A/B (restoring normal order)
+     * * move elements from the left of BufA/BufB to the end of A/B
+     */
+    cntless = 0;
+    cnteq = 0;
+    cntgreater = 0;
+    for(i=i1; i<=i2; i++)
+    {
+        v0 = a->ptr.p_double[i];
+        if( v0<vp )
+        {
+            
+            /*
+             * LESS
+             */
+            k = i1+cntless;
+            if( i!=k )
+            {
+                a->ptr.p_double[k] = v0;
+            }
+            cntless = cntless+1;
+            continue;
+        }
+        if( v0==vp )
+        {
+            
+            /*
+             * EQUAL
+             */
+            k = i2-cnteq;
+            bufa->ptr.p_double[k] = v0;
+            cnteq = cnteq+1;
+            continue;
+        }
+        
+        /*
+         * GREATER
+         */
+        k = i1+cntgreater;
+        bufa->ptr.p_double[k] = v0;
+        cntgreater = cntgreater+1;
+    }
+    for(i=0; i<=cnteq-1; i++)
+    {
+        j = i1+cntless+cnteq-1-i;
+        k = i2+i-(cnteq-1);
+        a->ptr.p_double[j] = bufa->ptr.p_double[k];
+    }
+    for(i=0; i<=cntgreater-1; i++)
+    {
+        j = i1+cntless+cnteq+i;
+        k = i1+i;
+        a->ptr.p_double[j] = bufa->ptr.p_double[k];
+    }
+    
+    /*
+     * Sort left and right parts of the array (ignoring middle part)
+     */
+    tsort_tagsortfastrec(a, bufa, i1, i1+cntless-1, _state);
+    tsort_tagsortfastrec(a, bufa, i1+cntless+cnteq, i2, _state);
+}
+
+
+
+
+/*************************************************************************
+This  function  generates  1-dimensional  general  interpolation task with
+moderate Lipshitz constant (close to 1.0)
+
+If N=1 then suborutine generates only one point at the middle of [A,B]
+
+  -- ALGLIB --
+     Copyright 02.12.2009 by Bochkanov Sergey
+*************************************************************************/
+void taskgenint1d(double a,
+     double b,
+     ae_int_t n,
+     /* Real    */ ae_vector* x,
+     /* Real    */ ae_vector* y,
+     ae_state *_state)
+{
+    ae_int_t i;
+    double h;
+
+    ae_vector_clear(x);
+    ae_vector_clear(y);
+
+    ae_assert(n>=1, "TaskGenInterpolationEqdist1D: N<1!", _state);
+    ae_vector_set_length(x, n, _state);
+    ae_vector_set_length(y, n, _state);
+    if( n>1 )
+    {
+        x->ptr.p_double[0] = a;
+        y->ptr.p_double[0] = 2*ae_randomreal(_state)-1;
+        h = (b-a)/(n-1);
+        for(i=1; i<=n-1; i++)
+        {
+            if( i!=n-1 )
+            {
+                x->ptr.p_double[i] = a+(i+0.2*(2*ae_randomreal(_state)-1))*h;
+            }
+            else
+            {
+                x->ptr.p_double[i] = b;
+            }
+            y->ptr.p_double[i] = y->ptr.p_double[i-1]+(2*ae_randomreal(_state)-1)*(x->ptr.p_double[i]-x->ptr.p_double[i-1]);
+        }
+    }
+    else
+    {
+        x->ptr.p_double[0] = 0.5*(a+b);
+        y->ptr.p_double[0] = 2*ae_randomreal(_state)-1;
+    }
+}
+
+
+/*************************************************************************
+This function generates  1-dimensional equidistant interpolation task with
+moderate Lipshitz constant (close to 1.0)
+
+If N=1 then suborutine generates only one point at the middle of [A,B]
+
+  -- ALGLIB --
+     Copyright 02.12.2009 by Bochkanov Sergey
+*************************************************************************/
+void taskgenint1dequidist(double a,
+     double b,
+     ae_int_t n,
+     /* Real    */ ae_vector* x,
+     /* Real    */ ae_vector* y,
+     ae_state *_state)
+{
+    ae_int_t i;
+    double h;
+
+    ae_vector_clear(x);
+    ae_vector_clear(y);
+
+    ae_assert(n>=1, "TaskGenInterpolationEqdist1D: N<1!", _state);
+    ae_vector_set_length(x, n, _state);
+    ae_vector_set_length(y, n, _state);
+    if( n>1 )
+    {
+        x->ptr.p_double[0] = a;
+        y->ptr.p_double[0] = 2*ae_randomreal(_state)-1;
+        h = (b-a)/(n-1);
+        for(i=1; i<=n-1; i++)
+        {
+            x->ptr.p_double[i] = a+i*h;
+            y->ptr.p_double[i] = y->ptr.p_double[i-1]+(2*ae_randomreal(_state)-1)*h;
+        }
+    }
+    else
+    {
+        x->ptr.p_double[0] = 0.5*(a+b);
+        y->ptr.p_double[0] = 2*ae_randomreal(_state)-1;
+    }
+}
+
+
+/*************************************************************************
+This function generates  1-dimensional Chebyshev-1 interpolation task with
+moderate Lipshitz constant (close to 1.0)
+
+If N=1 then suborutine generates only one point at the middle of [A,B]
+
+  -- ALGLIB --
+     Copyright 02.12.2009 by Bochkanov Sergey
+*************************************************************************/
+void taskgenint1dcheb1(double a,
+     double b,
+     ae_int_t n,
+     /* Real    */ ae_vector* x,
+     /* Real    */ ae_vector* y,
+     ae_state *_state)
+{
+    ae_int_t i;
+
+    ae_vector_clear(x);
+    ae_vector_clear(y);
+
+    ae_assert(n>=1, "TaskGenInterpolation1DCheb1: N<1!", _state);
+    ae_vector_set_length(x, n, _state);
+    ae_vector_set_length(y, n, _state);
+    if( n>1 )
+    {
+        for(i=0; i<=n-1; i++)
+        {
+            x->ptr.p_double[i] = 0.5*(b+a)+0.5*(b-a)*ae_cos(ae_pi*(2*i+1)/(2*n), _state);
+            if( i==0 )
+            {
+                y->ptr.p_double[i] = 2*ae_randomreal(_state)-1;
+            }
+            else
+            {
+                y->ptr.p_double[i] = y->ptr.p_double[i-1]+(2*ae_randomreal(_state)-1)*(x->ptr.p_double[i]-x->ptr.p_double[i-1]);
+            }
+        }
+    }
+    else
+    {
+        x->ptr.p_double[0] = 0.5*(a+b);
+        y->ptr.p_double[0] = 2*ae_randomreal(_state)-1;
+    }
+}
+
+
+/*************************************************************************
+This function generates  1-dimensional Chebyshev-2 interpolation task with
+moderate Lipshitz constant (close to 1.0)
+
+If N=1 then suborutine generates only one point at the middle of [A,B]
+
+  -- ALGLIB --
+     Copyright 02.12.2009 by Bochkanov Sergey
+*************************************************************************/
+void taskgenint1dcheb2(double a,
+     double b,
+     ae_int_t n,
+     /* Real    */ ae_vector* x,
+     /* Real    */ ae_vector* y,
+     ae_state *_state)
+{
+    ae_int_t i;
+
+    ae_vector_clear(x);
+    ae_vector_clear(y);
+
+    ae_assert(n>=1, "TaskGenInterpolation1DCheb2: N<1!", _state);
+    ae_vector_set_length(x, n, _state);
+    ae_vector_set_length(y, n, _state);
+    if( n>1 )
+    {
+        for(i=0; i<=n-1; i++)
+        {
+            x->ptr.p_double[i] = 0.5*(b+a)+0.5*(b-a)*ae_cos(ae_pi*i/(n-1), _state);
+            if( i==0 )
+            {
+                y->ptr.p_double[i] = 2*ae_randomreal(_state)-1;
+            }
+            else
+            {
+                y->ptr.p_double[i] = y->ptr.p_double[i-1]+(2*ae_randomreal(_state)-1)*(x->ptr.p_double[i]-x->ptr.p_double[i-1]);
+            }
+        }
+    }
+    else
+    {
+        x->ptr.p_double[0] = 0.5*(a+b);
+        y->ptr.p_double[0] = 2*ae_randomreal(_state)-1;
+    }
+}
+
+
+/*************************************************************************
+This function checks that all values from X[] are distinct. It does more
+than just usual floating point comparison:
+* first, it calculates max(X) and min(X)
+* second, it maps X[] from [min,max] to [1,2]
+* only at this stage actual comparison is done
+
+The meaning of such check is to ensure that all values are "distinct enough"
+and will not cause interpolation subroutine to fail.
+
+NOTE:
+    X[] must be sorted by ascending (subroutine ASSERT's it)
+
+  -- ALGLIB --
+     Copyright 02.12.2009 by Bochkanov Sergey
+*************************************************************************/
+ae_bool aredistinct(/* Real    */ ae_vector* x,
+     ae_int_t n,
+     ae_state *_state)
+{
+    double a;
+    double b;
+    ae_int_t i;
+    ae_bool nonsorted;
+    ae_bool result;
+
+
+    ae_assert(n>=1, "APSERVAreDistinct: internal error (N<1)", _state);
+    if( n==1 )
+    {
+        
+        /*
+         * everything is alright, it is up to caller to decide whether it
+         * can interpolate something with just one point
+         */
+        result = ae_true;
+        return result;
+    }
+    a = x->ptr.p_double[0];
+    b = x->ptr.p_double[0];
+    nonsorted = ae_false;
+    for(i=1; i<=n-1; i++)
+    {
+        a = ae_minreal(a, x->ptr.p_double[i], _state);
+        b = ae_maxreal(b, x->ptr.p_double[i], _state);
+        nonsorted = nonsorted||ae_fp_greater_eq(x->ptr.p_double[i-1],x->ptr.p_double[i]);
+    }
+    ae_assert(!nonsorted, "APSERVAreDistinct: internal error (not sorted)", _state);
+    for(i=1; i<=n-1; i++)
+    {
+        if( ae_fp_eq((x->ptr.p_double[i]-a)/(b-a)+1,(x->ptr.p_double[i-1]-a)/(b-a)+1) )
+        {
+            result = ae_false;
+            return result;
+        }
+    }
+    result = ae_true;
+    return result;
+}
+
+
+/*************************************************************************
+If Length(X)<N, resizes X
+
+  -- ALGLIB --
+     Copyright 20.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void bvectorsetlengthatleast(/* Boolean */ ae_vector* x,
+     ae_int_t n,
+     ae_state *_state)
+{
+
+
+    if( x->cnt<n )
+    {
+        ae_vector_set_length(x, n, _state);
+    }
+}
+
+
+/*************************************************************************
+If Length(X)<N, resizes X
+
+  -- ALGLIB --
+     Copyright 20.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void rvectorsetlengthatleast(/* Real    */ ae_vector* x,
+     ae_int_t n,
+     ae_state *_state)
+{
+
+
+    if( x->cnt<n )
+    {
+        ae_vector_set_length(x, n, _state);
+    }
+}
+
+
+/*************************************************************************
+If Cols(X)<N or Rows(X)<M, resizes X
+
+  -- ALGLIB --
+     Copyright 20.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void rmatrixsetlengthatleast(/* Real    */ ae_matrix* x,
+     ae_int_t m,
+     ae_int_t n,
+     ae_state *_state)
+{
+
+
+    if( x->rows<m||x->cols<n )
+    {
+        ae_matrix_set_length(x, m, n, _state);
+    }
+}
+
+
+/*************************************************************************
+This function checks that all values from X[] are finite
+
+  -- ALGLIB --
+     Copyright 18.06.2010 by Bochkanov Sergey
+*************************************************************************/
+ae_bool isfinitevector(/* Real    */ ae_vector* x,
+     ae_int_t n,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_bool result;
+
+
+    ae_assert(n>=0, "APSERVIsFiniteVector: internal error (N<0)", _state);
+    for(i=0; i<=n-1; i++)
+    {
+        if( !ae_isfinite(x->ptr.p_double[i], _state) )
+        {
+            result = ae_false;
+            return result;
+        }
+    }
+    result = ae_true;
+    return result;
+}
+
+
+/*************************************************************************
+This function checks that all values from X[] are finite
+
+  -- ALGLIB --
+     Copyright 18.06.2010 by Bochkanov Sergey
+*************************************************************************/
+ae_bool isfinitecvector(/* Complex */ ae_vector* z,
+     ae_int_t n,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_bool result;
+
+
+    ae_assert(n>=0, "APSERVIsFiniteCVector: internal error (N<0)", _state);
+    for(i=0; i<=n-1; i++)
+    {
+        if( !ae_isfinite(z->ptr.p_complex[i].x, _state)||!ae_isfinite(z->ptr.p_complex[i].y, _state) )
+        {
+            result = ae_false;
+            return result;
+        }
+    }
+    result = ae_true;
+    return result;
+}
+
+
+/*************************************************************************
+This function checks that all values from X[0..M-1,0..N-1] are finite
+
+  -- ALGLIB --
+     Copyright 18.06.2010 by Bochkanov Sergey
+*************************************************************************/
+ae_bool apservisfinitematrix(/* Real    */ ae_matrix* x,
+     ae_int_t m,
+     ae_int_t n,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    ae_bool result;
+
+
+    ae_assert(n>=0, "APSERVIsFiniteMatrix: internal error (N<0)", _state);
+    ae_assert(m>=0, "APSERVIsFiniteMatrix: internal error (M<0)", _state);
+    for(i=0; i<=m-1; i++)
+    {
+        for(j=0; j<=n-1; j++)
+        {
+            if( !ae_isfinite(x->ptr.pp_double[i][j], _state) )
+            {
+                result = ae_false;
+                return result;
+            }
+        }
+    }
+    result = ae_true;
+    return result;
+}
+
+
+/*************************************************************************
+This function checks that all values from X[0..M-1,0..N-1] are finite
+
+  -- ALGLIB --
+     Copyright 18.06.2010 by Bochkanov Sergey
+*************************************************************************/
+ae_bool apservisfinitecmatrix(/* Complex */ ae_matrix* x,
+     ae_int_t m,
+     ae_int_t n,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    ae_bool result;
+
+
+    ae_assert(n>=0, "APSERVIsFiniteCMatrix: internal error (N<0)", _state);
+    ae_assert(m>=0, "APSERVIsFiniteCMatrix: internal error (M<0)", _state);
+    for(i=0; i<=m-1; i++)
+    {
+        for(j=0; j<=n-1; j++)
+        {
+            if( !ae_isfinite(x->ptr.pp_complex[i][j].x, _state)||!ae_isfinite(x->ptr.pp_complex[i][j].y, _state) )
+            {
+                result = ae_false;
+                return result;
+            }
+        }
+    }
+    result = ae_true;
+    return result;
+}
+
+
+/*************************************************************************
+This function checks that all values from upper/lower triangle of
+X[0..N-1,0..N-1] are finite
+
+  -- ALGLIB --
+     Copyright 18.06.2010 by Bochkanov Sergey
+*************************************************************************/
+ae_bool isfinitertrmatrix(/* Real    */ ae_matrix* x,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j1;
+    ae_int_t j2;
+    ae_int_t j;
+    ae_bool result;
+
+
+    ae_assert(n>=0, "APSERVIsFiniteRTRMatrix: internal error (N<0)", _state);
+    for(i=0; i<=n-1; i++)
+    {
+        if( isupper )
+        {
+            j1 = i;
+            j2 = n-1;
+        }
+        else
+        {
+            j1 = 0;
+            j2 = i;
+        }
+        for(j=j1; j<=j2; j++)
+        {
+            if( !ae_isfinite(x->ptr.pp_double[i][j], _state) )
+            {
+                result = ae_false;
+                return result;
+            }
+        }
+    }
+    result = ae_true;
+    return result;
+}
+
+
+/*************************************************************************
+This function checks that all values from upper/lower triangle of
+X[0..N-1,0..N-1] are finite
+
+  -- ALGLIB --
+     Copyright 18.06.2010 by Bochkanov Sergey
+*************************************************************************/
+ae_bool apservisfinitectrmatrix(/* Complex */ ae_matrix* x,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j1;
+    ae_int_t j2;
+    ae_int_t j;
+    ae_bool result;
+
+
+    ae_assert(n>=0, "APSERVIsFiniteCTRMatrix: internal error (N<0)", _state);
+    for(i=0; i<=n-1; i++)
+    {
+        if( isupper )
+        {
+            j1 = i;
+            j2 = n-1;
+        }
+        else
+        {
+            j1 = 0;
+            j2 = i;
+        }
+        for(j=j1; j<=j2; j++)
+        {
+            if( !ae_isfinite(x->ptr.pp_complex[i][j].x, _state)||!ae_isfinite(x->ptr.pp_complex[i][j].y, _state) )
+            {
+                result = ae_false;
+                return result;
+            }
+        }
+    }
+    result = ae_true;
+    return result;
+}
+
+
+/*************************************************************************
+This function checks that all values from X[0..M-1,0..N-1] are  finite  or
+NaN's.
+
+  -- ALGLIB --
+     Copyright 18.06.2010 by Bochkanov Sergey
+*************************************************************************/
+ae_bool apservisfiniteornanmatrix(/* Real    */ ae_matrix* x,
+     ae_int_t m,
+     ae_int_t n,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    ae_bool result;
+
+
+    ae_assert(n>=0, "APSERVIsFiniteOrNaNMatrix: internal error (N<0)", _state);
+    ae_assert(m>=0, "APSERVIsFiniteOrNaNMatrix: internal error (M<0)", _state);
+    for(i=0; i<=m-1; i++)
+    {
+        for(j=0; j<=n-1; j++)
+        {
+            if( !(ae_isfinite(x->ptr.pp_double[i][j], _state)||ae_isnan(x->ptr.pp_double[i][j], _state)) )
+            {
+                result = ae_false;
+                return result;
+            }
+        }
+    }
+    result = ae_true;
+    return result;
+}
+
+
+/*************************************************************************
+Safe sqrt(x^2+y^2)
+
+  -- ALGLIB --
+     Copyright by Bochkanov Sergey
+*************************************************************************/
+double safepythag2(double x, double y, ae_state *_state)
+{
+    double w;
+    double xabs;
+    double yabs;
+    double z;
+    double result;
+
+
+    xabs = ae_fabs(x, _state);
+    yabs = ae_fabs(y, _state);
+    w = ae_maxreal(xabs, yabs, _state);
+    z = ae_minreal(xabs, yabs, _state);
+    if( ae_fp_eq(z,0) )
+    {
+        result = w;
+    }
+    else
+    {
+        result = w*ae_sqrt(1+ae_sqr(z/w, _state), _state);
+    }
+    return result;
+}
+
+
+/*************************************************************************
+Safe sqrt(x^2+y^2)
+
+  -- ALGLIB --
+     Copyright by Bochkanov Sergey
+*************************************************************************/
+double safepythag3(double x, double y, double z, ae_state *_state)
+{
+    double w;
+    double result;
+
+
+    w = ae_maxreal(ae_fabs(x, _state), ae_maxreal(ae_fabs(y, _state), ae_fabs(z, _state), _state), _state);
+    if( ae_fp_eq(w,0) )
+    {
+        result = 0;
+        return result;
+    }
+    x = x/w;
+    y = y/w;
+    z = z/w;
+    result = w*ae_sqrt(ae_sqr(x, _state)+ae_sqr(y, _state)+ae_sqr(z, _state), _state);
+    return result;
+}
+
+
+/*************************************************************************
+Safe division.
+
+This function attempts to calculate R=X/Y without overflow.
+
+It returns:
+* +1, if abs(X/Y)>=MaxRealNumber or undefined - overflow-like situation
+      (no overlfow is generated, R is either NAN, PosINF, NegINF)
+*  0, if MinRealNumber<abs(X/Y)<MaxRealNumber or X=0, Y<>0
+      (R contains result, may be zero)
+* -1, if 0<abs(X/Y)<MinRealNumber - underflow-like situation
+      (R contains zero; it corresponds to underflow)
+
+No overflow is generated in any case.
+
+  -- ALGLIB --
+     Copyright by Bochkanov Sergey
+*************************************************************************/
+ae_int_t saferdiv(double x, double y, double* r, ae_state *_state)
+{
+    ae_int_t result;
+
+    *r = 0;
+
+    
+    /*
+     * Two special cases:
+     * * Y=0
+     * * X=0 and Y<>0
+     */
+    if( ae_fp_eq(y,0) )
+    {
+        result = 1;
+        if( ae_fp_eq(x,0) )
+        {
+            *r = _state->v_nan;
+        }
+        if( ae_fp_greater(x,0) )
+        {
+            *r = _state->v_posinf;
+        }
+        if( ae_fp_less(x,0) )
+        {
+            *r = _state->v_neginf;
+        }
+        return result;
+    }
+    if( ae_fp_eq(x,0) )
+    {
+        *r = 0;
+        result = 0;
+        return result;
+    }
+    
+    /*
+     * make Y>0
+     */
+    if( ae_fp_less(y,0) )
+    {
+        x = -x;
+        y = -y;
+    }
+    
+    /*
+     *
+     */
+    if( ae_fp_greater_eq(y,1) )
+    {
+        *r = x/y;
+        if( ae_fp_less_eq(ae_fabs(*r, _state),ae_minrealnumber) )
+        {
+            result = -1;
+            *r = 0;
+        }
+        else
+        {
+            result = 0;
+        }
+    }
+    else
+    {
+        if( ae_fp_greater_eq(ae_fabs(x, _state),ae_maxrealnumber*y) )
+        {
+            if( ae_fp_greater(x,0) )
+            {
+                *r = _state->v_posinf;
+            }
+            else
+            {
+                *r = _state->v_neginf;
+            }
+            result = 1;
+        }
+        else
+        {
+            *r = x/y;
+            result = 0;
+        }
+    }
+    return result;
+}
+
+
+/*************************************************************************
+This function calculates "safe" min(X/Y,V) for positive finite X, Y, V.
+No overflow is generated in any case.
+
+  -- ALGLIB --
+     Copyright by Bochkanov Sergey
+*************************************************************************/
+double safeminposrv(double x, double y, double v, ae_state *_state)
+{
+    double r;
+    double result;
+
+
+    if( ae_fp_greater_eq(y,1) )
+    {
+        
+        /*
+         * Y>=1, we can safely divide by Y
+         */
+        r = x/y;
+        result = v;
+        if( ae_fp_greater(v,r) )
+        {
+            result = r;
+        }
+        else
+        {
+            result = v;
+        }
+    }
+    else
+    {
+        
+        /*
+         * Y<1, we can safely multiply by Y
+         */
+        if( ae_fp_less(x,v*y) )
+        {
+            result = x/y;
+        }
+        else
+        {
+            result = v;
+        }
+    }
+    return result;
+}
+
+
+/*************************************************************************
+This function makes periodic mapping of X to [A,B].
+
+It accepts X, A, B (A>B). It returns T which lies in  [A,B] and integer K,
+such that X = T + K*(B-A).
+
+NOTES:
+* K is represented as real value, although actually it is integer
+* T is guaranteed to be in [A,B]
+* T replaces X
+
+  -- ALGLIB --
+     Copyright by Bochkanov Sergey
+*************************************************************************/
+void apperiodicmap(double* x,
+     double a,
+     double b,
+     double* k,
+     ae_state *_state)
+{
+
+    *k = 0;
+
+    ae_assert(ae_fp_less(a,b), "APPeriodicMap: internal error!", _state);
+    *k = ae_ifloor((*x-a)/(b-a), _state);
+    *x = *x-*k*(b-a);
+    while(ae_fp_less(*x,a))
+    {
+        *x = *x+(b-a);
+        *k = *k-1;
+    }
+    while(ae_fp_greater(*x,b))
+    {
+        *x = *x-(b-a);
+        *k = *k+1;
+    }
+    *x = ae_maxreal(*x, a, _state);
+    *x = ae_minreal(*x, b, _state);
+}
+
+
+/*************************************************************************
+'bounds' value: maps X to [B1,B2]
+
+  -- ALGLIB --
+     Copyright 20.03.2009 by Bochkanov Sergey
+*************************************************************************/
+double boundval(double x, double b1, double b2, ae_state *_state)
+{
+    double result;
+
+
+    if( ae_fp_less_eq(x,b1) )
+    {
+        result = b1;
+        return result;
+    }
+    if( ae_fp_greater_eq(x,b2) )
+    {
+        result = b2;
+        return result;
+    }
+    result = x;
+    return result;
+}
+
+
+ae_bool _apbuffers_init(apbuffers* p, ae_state *_state, ae_bool make_automatic)
+{
+    if( !ae_vector_init(&p->ia1, 0, DT_INT, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->ia2, 0, DT_INT, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->ra1, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->ra2, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    return ae_true;
+}
+
+
+ae_bool _apbuffers_init_copy(apbuffers* dst, apbuffers* src, ae_state *_state, ae_bool make_automatic)
+{
+    if( !ae_vector_init_copy(&dst->ia1, &src->ia1, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->ia2, &src->ia2, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->ra1, &src->ra1, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->ra2, &src->ra2, _state, make_automatic) )
+        return ae_false;
+    return ae_true;
+}
+
+
+void _apbuffers_clear(apbuffers* p)
+{
+    ae_vector_clear(&p->ia1);
+    ae_vector_clear(&p->ia2);
+    ae_vector_clear(&p->ra1);
+    ae_vector_clear(&p->ra2);
+}
+
+
+
+
+/*************************************************************************
+Internal ranking subroutine
+*************************************************************************/
+void rankx(/* Real    */ ae_vector* x,
+     ae_int_t n,
+     apbuffers* buf,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t k;
+    ae_int_t t;
+    double tmp;
+    ae_int_t tmpi;
+
+
+    
+    /*
+     * Prepare
+     */
+    if( n<1 )
+    {
+        return;
+    }
+    if( n==1 )
+    {
+        x->ptr.p_double[0] = 1;
+        return;
+    }
+    if( buf->ra1.cnt<n )
+    {
+        ae_vector_set_length(&buf->ra1, n, _state);
+    }
+    if( buf->ia1.cnt<n )
+    {
+        ae_vector_set_length(&buf->ia1, n, _state);
+    }
+    for(i=0; i<=n-1; i++)
+    {
+        buf->ra1.ptr.p_double[i] = x->ptr.p_double[i];
+        buf->ia1.ptr.p_int[i] = i;
+    }
+    
+    /*
+     * sort {R, C}
+     */
+    if( n!=1 )
+    {
+        i = 2;
+        do
+        {
+            t = i;
+            while(t!=1)
+            {
+                k = t/2;
+                if( ae_fp_greater_eq(buf->ra1.ptr.p_double[k-1],buf->ra1.ptr.p_double[t-1]) )
+                {
+                    t = 1;
+                }
+                else
+                {
+                    tmp = buf->ra1.ptr.p_double[k-1];
+                    buf->ra1.ptr.p_double[k-1] = buf->ra1.ptr.p_double[t-1];
+                    buf->ra1.ptr.p_double[t-1] = tmp;
+                    tmpi = buf->ia1.ptr.p_int[k-1];
+                    buf->ia1.ptr.p_int[k-1] = buf->ia1.ptr.p_int[t-1];
+                    buf->ia1.ptr.p_int[t-1] = tmpi;
+                    t = k;
+                }
+            }
+            i = i+1;
+        }
+        while(i<=n);
+        i = n-1;
+        do
+        {
+            tmp = buf->ra1.ptr.p_double[i];
+            buf->ra1.ptr.p_double[i] = buf->ra1.ptr.p_double[0];
+            buf->ra1.ptr.p_double[0] = tmp;
+            tmpi = buf->ia1.ptr.p_int[i];
+            buf->ia1.ptr.p_int[i] = buf->ia1.ptr.p_int[0];
+            buf->ia1.ptr.p_int[0] = tmpi;
+            t = 1;
+            while(t!=0)
+            {
+                k = 2*t;
+                if( k>i )
+                {
+                    t = 0;
+                }
+                else
+                {
+                    if( k<i )
+                    {
+                        if( ae_fp_greater(buf->ra1.ptr.p_double[k],buf->ra1.ptr.p_double[k-1]) )
+                        {
+                            k = k+1;
+                        }
+                    }
+                    if( ae_fp_greater_eq(buf->ra1.ptr.p_double[t-1],buf->ra1.ptr.p_double[k-1]) )
+                    {
+                        t = 0;
+                    }
+                    else
+                    {
+                        tmp = buf->ra1.ptr.p_double[k-1];
+                        buf->ra1.ptr.p_double[k-1] = buf->ra1.ptr.p_double[t-1];
+                        buf->ra1.ptr.p_double[t-1] = tmp;
+                        tmpi = buf->ia1.ptr.p_int[k-1];
+                        buf->ia1.ptr.p_int[k-1] = buf->ia1.ptr.p_int[t-1];
+                        buf->ia1.ptr.p_int[t-1] = tmpi;
+                        t = k;
+                    }
+                }
+            }
+            i = i-1;
+        }
+        while(i>=1);
+    }
+    
+    /*
+     * compute tied ranks
+     */
+    i = 0;
+    while(i<=n-1)
+    {
+        j = i+1;
+        while(j<=n-1)
+        {
+            if( ae_fp_neq(buf->ra1.ptr.p_double[j],buf->ra1.ptr.p_double[i]) )
+            {
+                break;
+            }
+            j = j+1;
+        }
+        for(k=i; k<=j-1; k++)
+        {
+            buf->ra1.ptr.p_double[k] = 1+(double)(i+j-1)/(double)2;
+        }
+        i = j;
+    }
+    
+    /*
+     * back to x
+     */
+    for(i=0; i<=n-1; i++)
+    {
+        x->ptr.p_double[buf->ia1.ptr.p_int[i]] = buf->ra1.ptr.p_double[i];
+    }
+}
+
+
+
+
+/*************************************************************************
+Fast kernel
+
+  -- ALGLIB routine --
+     19.01.2010
+     Bochkanov Sergey
+*************************************************************************/
+ae_bool cmatrixrank1f(ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     /* Complex */ ae_vector* u,
+     ae_int_t iu,
+     /* Complex */ ae_vector* v,
+     ae_int_t iv,
+     ae_state *_state)
+{
+#ifndef ALGLIB_INTERCEPTS_ABLAS
+    ae_bool result;
+
+
+    result = ae_false;
+    return result;
+#else
+    return _ialglib_i_cmatrixrank1f(m, n, a, ia, ja, u, iu, v, iv);
+#endif
+}
+
+
+/*************************************************************************
+Fast kernel
+
+  -- ALGLIB routine --
+     19.01.2010
+     Bochkanov Sergey
+*************************************************************************/
+ae_bool rmatrixrank1f(ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     /* Real    */ ae_vector* u,
+     ae_int_t iu,
+     /* Real    */ ae_vector* v,
+     ae_int_t iv,
+     ae_state *_state)
+{
+#ifndef ALGLIB_INTERCEPTS_ABLAS
+    ae_bool result;
+
+
+    result = ae_false;
+    return result;
+#else
+    return _ialglib_i_rmatrixrank1f(m, n, a, ia, ja, u, iu, v, iv);
+#endif
+}
+
+
+/*************************************************************************
+Fast kernel
+
+  -- ALGLIB routine --
+     19.01.2010
+     Bochkanov Sergey
+*************************************************************************/
+ae_bool cmatrixmvf(ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t opa,
+     /* Complex */ ae_vector* x,
+     ae_int_t ix,
+     /* Complex */ ae_vector* y,
+     ae_int_t iy,
+     ae_state *_state)
+{
+    ae_bool result;
+
+
+    result = ae_false;
+    return result;
+}
+
+
+/*************************************************************************
+Fast kernel
+
+  -- ALGLIB routine --
+     19.01.2010
+     Bochkanov Sergey
+*************************************************************************/
+ae_bool rmatrixmvf(ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t opa,
+     /* Real    */ ae_vector* x,
+     ae_int_t ix,
+     /* Real    */ ae_vector* y,
+     ae_int_t iy,
+     ae_state *_state)
+{
+    ae_bool result;
+
+
+    result = ae_false;
+    return result;
+}
+
+
+/*************************************************************************
+Fast kernel
+
+  -- ALGLIB routine --
+     19.01.2010
+     Bochkanov Sergey
+*************************************************************************/
+ae_bool cmatrixrighttrsmf(ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_matrix* a,
+     ae_int_t i1,
+     ae_int_t j1,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     /* Complex */ ae_matrix* x,
+     ae_int_t i2,
+     ae_int_t j2,
+     ae_state *_state)
+{
+#ifndef ALGLIB_INTERCEPTS_ABLAS
+    ae_bool result;
+
+
+    result = ae_false;
+    return result;
+#else
+    return _ialglib_i_cmatrixrighttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2);
+#endif
+}
+
+
+/*************************************************************************
+Fast kernel
+
+  -- ALGLIB routine --
+     19.01.2010
+     Bochkanov Sergey
+*************************************************************************/
+ae_bool cmatrixlefttrsmf(ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_matrix* a,
+     ae_int_t i1,
+     ae_int_t j1,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     /* Complex */ ae_matrix* x,
+     ae_int_t i2,
+     ae_int_t j2,
+     ae_state *_state)
+{
+#ifndef ALGLIB_INTERCEPTS_ABLAS
+    ae_bool result;
+
+
+    result = ae_false;
+    return result;
+#else
+    return _ialglib_i_cmatrixlefttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2);
+#endif
+}
+
+
+/*************************************************************************
+Fast kernel
+
+  -- ALGLIB routine --
+     19.01.2010
+     Bochkanov Sergey
+*************************************************************************/
+ae_bool rmatrixrighttrsmf(ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_matrix* a,
+     ae_int_t i1,
+     ae_int_t j1,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     /* Real    */ ae_matrix* x,
+     ae_int_t i2,
+     ae_int_t j2,
+     ae_state *_state)
+{
+#ifndef ALGLIB_INTERCEPTS_ABLAS
+    ae_bool result;
+
+
+    result = ae_false;
+    return result;
+#else
+    return _ialglib_i_rmatrixrighttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2);
+#endif
+}
+
+
+/*************************************************************************
+Fast kernel
+
+  -- ALGLIB routine --
+     19.01.2010
+     Bochkanov Sergey
+*************************************************************************/
+ae_bool rmatrixlefttrsmf(ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_matrix* a,
+     ae_int_t i1,
+     ae_int_t j1,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     /* Real    */ ae_matrix* x,
+     ae_int_t i2,
+     ae_int_t j2,
+     ae_state *_state)
+{
+#ifndef ALGLIB_INTERCEPTS_ABLAS
+    ae_bool result;
+
+
+    result = ae_false;
+    return result;
+#else
+    return _ialglib_i_rmatrixlefttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2);
+#endif
+}
+
+
+/*************************************************************************
+Fast kernel
+
+  -- ALGLIB routine --
+     19.01.2010
+     Bochkanov Sergey
+*************************************************************************/
+ae_bool cmatrixsyrkf(ae_int_t n,
+     ae_int_t k,
+     double alpha,
+     /* Complex */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t optypea,
+     double beta,
+     /* Complex */ ae_matrix* c,
+     ae_int_t ic,
+     ae_int_t jc,
+     ae_bool isupper,
+     ae_state *_state)
+{
+#ifndef ALGLIB_INTERCEPTS_ABLAS
+    ae_bool result;
+
+
+    result = ae_false;
+    return result;
+#else
+    return _ialglib_i_cmatrixsyrkf(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper);
+#endif
+}
+
+
+/*************************************************************************
+Fast kernel
+
+  -- ALGLIB routine --
+     19.01.2010
+     Bochkanov Sergey
+*************************************************************************/
+ae_bool rmatrixsyrkf(ae_int_t n,
+     ae_int_t k,
+     double alpha,
+     /* Real    */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t optypea,
+     double beta,
+     /* Real    */ ae_matrix* c,
+     ae_int_t ic,
+     ae_int_t jc,
+     ae_bool isupper,
+     ae_state *_state)
+{
+#ifndef ALGLIB_INTERCEPTS_ABLAS
+    ae_bool result;
+
+
+    result = ae_false;
+    return result;
+#else
+    return _ialglib_i_rmatrixsyrkf(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper);
+#endif
+}
+
+
+/*************************************************************************
+Fast kernel
+
+  -- ALGLIB routine --
+     19.01.2010
+     Bochkanov Sergey
+*************************************************************************/
+ae_bool rmatrixgemmf(ae_int_t m,
+     ae_int_t n,
+     ae_int_t k,
+     double alpha,
+     /* Real    */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t optypea,
+     /* Real    */ ae_matrix* b,
+     ae_int_t ib,
+     ae_int_t jb,
+     ae_int_t optypeb,
+     double beta,
+     /* Real    */ ae_matrix* c,
+     ae_int_t ic,
+     ae_int_t jc,
+     ae_state *_state)
+{
+#ifndef ALGLIB_INTERCEPTS_ABLAS
+    ae_bool result;
+
+
+    result = ae_false;
+    return result;
+#else
+    return _ialglib_i_rmatrixgemmf(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc);
+#endif
+}
+
+
+/*************************************************************************
+Fast kernel
+
+  -- ALGLIB routine --
+     19.01.2010
+     Bochkanov Sergey
+*************************************************************************/
+ae_bool cmatrixgemmf(ae_int_t m,
+     ae_int_t n,
+     ae_int_t k,
+     ae_complex alpha,
+     /* Complex */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t optypea,
+     /* Complex */ ae_matrix* b,
+     ae_int_t ib,
+     ae_int_t jb,
+     ae_int_t optypeb,
+     ae_complex beta,
+     /* Complex */ ae_matrix* c,
+     ae_int_t ic,
+     ae_int_t jc,
+     ae_state *_state)
+{
+#ifndef ALGLIB_INTERCEPTS_ABLAS
+    ae_bool result;
+
+
+    result = ae_false;
+    return result;
+#else
+    return _ialglib_i_cmatrixgemmf(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc);
+#endif
+}
+
+
+
+
+double vectornorm2(/* Real    */ ae_vector* x,
+     ae_int_t i1,
+     ae_int_t i2,
+     ae_state *_state)
+{
+    ae_int_t n;
+    ae_int_t ix;
+    double absxi;
+    double scl;
+    double ssq;
+    double result;
+
+
+    n = i2-i1+1;
+    if( n<1 )
+    {
+        result = 0;
+        return result;
+    }
+    if( n==1 )
+    {
+        result = ae_fabs(x->ptr.p_double[i1], _state);
+        return result;
+    }
+    scl = 0;
+    ssq = 1;
+    for(ix=i1; ix<=i2; ix++)
+    {
+        if( ae_fp_neq(x->ptr.p_double[ix],0) )
+        {
+            absxi = ae_fabs(x->ptr.p_double[ix], _state);
+            if( ae_fp_less(scl,absxi) )
+            {
+                ssq = 1+ssq*ae_sqr(scl/absxi, _state);
+                scl = absxi;
+            }
+            else
+            {
+                ssq = ssq+ae_sqr(absxi/scl, _state);
+            }
+        }
+    }
+    result = scl*ae_sqrt(ssq, _state);
+    return result;
+}
+
+
+ae_int_t vectoridxabsmax(/* Real    */ ae_vector* x,
+     ae_int_t i1,
+     ae_int_t i2,
+     ae_state *_state)
+{
+    ae_int_t i;
+    double a;
+    ae_int_t result;
+
+
+    result = i1;
+    a = ae_fabs(x->ptr.p_double[result], _state);
+    for(i=i1+1; i<=i2; i++)
+    {
+        if( ae_fp_greater(ae_fabs(x->ptr.p_double[i], _state),ae_fabs(x->ptr.p_double[result], _state)) )
+        {
+            result = i;
+        }
+    }
+    return result;
+}
+
+
+ae_int_t columnidxabsmax(/* Real    */ ae_matrix* x,
+     ae_int_t i1,
+     ae_int_t i2,
+     ae_int_t j,
+     ae_state *_state)
+{
+    ae_int_t i;
+    double a;
+    ae_int_t result;
+
+
+    result = i1;
+    a = ae_fabs(x->ptr.pp_double[result][j], _state);
+    for(i=i1+1; i<=i2; i++)
+    {
+        if( ae_fp_greater(ae_fabs(x->ptr.pp_double[i][j], _state),ae_fabs(x->ptr.pp_double[result][j], _state)) )
+        {
+            result = i;
+        }
+    }
+    return result;
+}
+
+
+ae_int_t rowidxabsmax(/* Real    */ ae_matrix* x,
+     ae_int_t j1,
+     ae_int_t j2,
+     ae_int_t i,
+     ae_state *_state)
+{
+    ae_int_t j;
+    double a;
+    ae_int_t result;
+
+
+    result = j1;
+    a = ae_fabs(x->ptr.pp_double[i][result], _state);
+    for(j=j1+1; j<=j2; j++)
+    {
+        if( ae_fp_greater(ae_fabs(x->ptr.pp_double[i][j], _state),ae_fabs(x->ptr.pp_double[i][result], _state)) )
+        {
+            result = j;
+        }
+    }
+    return result;
+}
+
+
+double upperhessenberg1norm(/* Real    */ ae_matrix* a,
+     ae_int_t i1,
+     ae_int_t i2,
+     ae_int_t j1,
+     ae_int_t j2,
+     /* Real    */ ae_vector* work,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    double result;
+
+
+    ae_assert(i2-i1==j2-j1, "UpperHessenberg1Norm: I2-I1<>J2-J1!", _state);
+    for(j=j1; j<=j2; j++)
+    {
+        work->ptr.p_double[j] = 0;
+    }
+    for(i=i1; i<=i2; i++)
+    {
+        for(j=ae_maxint(j1, j1+i-i1-1, _state); j<=j2; j++)
+        {
+            work->ptr.p_double[j] = work->ptr.p_double[j]+ae_fabs(a->ptr.pp_double[i][j], _state);
+        }
+    }
+    result = 0;
+    for(j=j1; j<=j2; j++)
+    {
+        result = ae_maxreal(result, work->ptr.p_double[j], _state);
+    }
+    return result;
+}
+
+
+void copymatrix(/* Real    */ ae_matrix* a,
+     ae_int_t is1,
+     ae_int_t is2,
+     ae_int_t js1,
+     ae_int_t js2,
+     /* Real    */ ae_matrix* b,
+     ae_int_t id1,
+     ae_int_t id2,
+     ae_int_t jd1,
+     ae_int_t jd2,
+     ae_state *_state)
+{
+    ae_int_t isrc;
+    ae_int_t idst;
+
+
+    if( is1>is2||js1>js2 )
+    {
+        return;
+    }
+    ae_assert(is2-is1==id2-id1, "CopyMatrix: different sizes!", _state);
+    ae_assert(js2-js1==jd2-jd1, "CopyMatrix: different sizes!", _state);
+    for(isrc=is1; isrc<=is2; isrc++)
+    {
+        idst = isrc-is1+id1;
+        ae_v_move(&b->ptr.pp_double[idst][jd1], 1, &a->ptr.pp_double[isrc][js1], 1, ae_v_len(jd1,jd2));
+    }
+}
+
+
+void inplacetranspose(/* Real    */ ae_matrix* a,
+     ae_int_t i1,
+     ae_int_t i2,
+     ae_int_t j1,
+     ae_int_t j2,
+     /* Real    */ ae_vector* work,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t ips;
+    ae_int_t jps;
+    ae_int_t l;
+
+
+    if( i1>i2||j1>j2 )
+    {
+        return;
+    }
+    ae_assert(i1-i2==j1-j2, "InplaceTranspose error: incorrect array size!", _state);
+    for(i=i1; i<=i2-1; i++)
+    {
+        j = j1+i-i1;
+        ips = i+1;
+        jps = j1+ips-i1;
+        l = i2-i;
+        ae_v_move(&work->ptr.p_double[1], 1, &a->ptr.pp_double[ips][j], a->stride, ae_v_len(1,l));
+        ae_v_move(&a->ptr.pp_double[ips][j], a->stride, &a->ptr.pp_double[i][jps], 1, ae_v_len(ips,i2));
+        ae_v_move(&a->ptr.pp_double[i][jps], 1, &work->ptr.p_double[1], 1, ae_v_len(jps,j2));
+    }
+}
+
+
+void copyandtranspose(/* Real    */ ae_matrix* a,
+     ae_int_t is1,
+     ae_int_t is2,
+     ae_int_t js1,
+     ae_int_t js2,
+     /* Real    */ ae_matrix* b,
+     ae_int_t id1,
+     ae_int_t id2,
+     ae_int_t jd1,
+     ae_int_t jd2,
+     ae_state *_state)
+{
+    ae_int_t isrc;
+    ae_int_t jdst;
+
+
+    if( is1>is2||js1>js2 )
+    {
+        return;
+    }
+    ae_assert(is2-is1==jd2-jd1, "CopyAndTranspose: different sizes!", _state);
+    ae_assert(js2-js1==id2-id1, "CopyAndTranspose: different sizes!", _state);
+    for(isrc=is1; isrc<=is2; isrc++)
+    {
+        jdst = isrc-is1+jd1;
+        ae_v_move(&b->ptr.pp_double[id1][jdst], b->stride, &a->ptr.pp_double[isrc][js1], 1, ae_v_len(id1,id2));
+    }
+}
+
+
+void matrixvectormultiply(/* Real    */ ae_matrix* a,
+     ae_int_t i1,
+     ae_int_t i2,
+     ae_int_t j1,
+     ae_int_t j2,
+     ae_bool trans,
+     /* Real    */ ae_vector* x,
+     ae_int_t ix1,
+     ae_int_t ix2,
+     double alpha,
+     /* Real    */ ae_vector* y,
+     ae_int_t iy1,
+     ae_int_t iy2,
+     double beta,
+     ae_state *_state)
+{
+    ae_int_t i;
+    double v;
+
+
+    if( !trans )
+    {
+        
+        /*
+         * y := alpha*A*x + beta*y;
+         */
+        if( i1>i2||j1>j2 )
+        {
+            return;
+        }
+        ae_assert(j2-j1==ix2-ix1, "MatrixVectorMultiply: A and X dont match!", _state);
+        ae_assert(i2-i1==iy2-iy1, "MatrixVectorMultiply: A and Y dont match!", _state);
+        
+        /*
+         * beta*y
+         */
+        if( ae_fp_eq(beta,0) )
+        {
+            for(i=iy1; i<=iy2; i++)
+            {
+                y->ptr.p_double[i] = 0;
+            }
+        }
+        else
+        {
+            ae_v_muld(&y->ptr.p_double[iy1], 1, ae_v_len(iy1,iy2), beta);
+        }
+        
+        /*
+         * alpha*A*x
+         */
+        for(i=i1; i<=i2; i++)
+        {
+            v = ae_v_dotproduct(&a->ptr.pp_double[i][j1], 1, &x->ptr.p_double[ix1], 1, ae_v_len(j1,j2));
+            y->ptr.p_double[iy1+i-i1] = y->ptr.p_double[iy1+i-i1]+alpha*v;
+        }
+    }
+    else
+    {
+        
+        /*
+         * y := alpha*A'*x + beta*y;
+         */
+        if( i1>i2||j1>j2 )
+        {
+            return;
+        }
+        ae_assert(i2-i1==ix2-ix1, "MatrixVectorMultiply: A and X dont match!", _state);
+        ae_assert(j2-j1==iy2-iy1, "MatrixVectorMultiply: A and Y dont match!", _state);
+        
+        /*
+         * beta*y
+         */
+        if( ae_fp_eq(beta,0) )
+        {
+            for(i=iy1; i<=iy2; i++)
+            {
+                y->ptr.p_double[i] = 0;
+            }
+        }
+        else
+        {
+            ae_v_muld(&y->ptr.p_double[iy1], 1, ae_v_len(iy1,iy2), beta);
+        }
+        
+        /*
+         * alpha*A'*x
+         */
+        for(i=i1; i<=i2; i++)
+        {
+            v = alpha*x->ptr.p_double[ix1+i-i1];
+            ae_v_addd(&y->ptr.p_double[iy1], 1, &a->ptr.pp_double[i][j1], 1, ae_v_len(iy1,iy2), v);
+        }
+    }
+}
+
+
+double pythag2(double x, double y, ae_state *_state)
+{
+    double w;
+    double xabs;
+    double yabs;
+    double z;
+    double result;
+
+
+    xabs = ae_fabs(x, _state);
+    yabs = ae_fabs(y, _state);
+    w = ae_maxreal(xabs, yabs, _state);
+    z = ae_minreal(xabs, yabs, _state);
+    if( ae_fp_eq(z,0) )
+    {
+        result = w;
+    }
+    else
+    {
+        result = w*ae_sqrt(1+ae_sqr(z/w, _state), _state);
+    }
+    return result;
+}
+
+
+void matrixmatrixmultiply(/* Real    */ ae_matrix* a,
+     ae_int_t ai1,
+     ae_int_t ai2,
+     ae_int_t aj1,
+     ae_int_t aj2,
+     ae_bool transa,
+     /* Real    */ ae_matrix* b,
+     ae_int_t bi1,
+     ae_int_t bi2,
+     ae_int_t bj1,
+     ae_int_t bj2,
+     ae_bool transb,
+     double alpha,
+     /* Real    */ ae_matrix* c,
+     ae_int_t ci1,
+     ae_int_t ci2,
+     ae_int_t cj1,
+     ae_int_t cj2,
+     double beta,
+     /* Real    */ ae_vector* work,
+     ae_state *_state)
+{
+    ae_int_t arows;
+    ae_int_t acols;
+    ae_int_t brows;
+    ae_int_t bcols;
+    ae_int_t crows;
+    ae_int_t ccols;
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t k;
+    ae_int_t l;
+    ae_int_t r;
+    double v;
+
+
+    
+    /*
+     * Setup
+     */
+    if( !transa )
+    {
+        arows = ai2-ai1+1;
+        acols = aj2-aj1+1;
+    }
+    else
+    {
+        arows = aj2-aj1+1;
+        acols = ai2-ai1+1;
+    }
+    if( !transb )
+    {
+        brows = bi2-bi1+1;
+        bcols = bj2-bj1+1;
+    }
+    else
+    {
+        brows = bj2-bj1+1;
+        bcols = bi2-bi1+1;
+    }
+    ae_assert(acols==brows, "MatrixMatrixMultiply: incorrect matrix sizes!", _state);
+    if( ((arows<=0||acols<=0)||brows<=0)||bcols<=0 )
+    {
+        return;
+    }
+    crows = arows;
+    ccols = bcols;
+    
+    /*
+     * Test WORK
+     */
+    i = ae_maxint(arows, acols, _state);
+    i = ae_maxint(brows, i, _state);
+    i = ae_maxint(i, bcols, _state);
+    work->ptr.p_double[1] = 0;
+    work->ptr.p_double[i] = 0;
+    
+    /*
+     * Prepare C
+     */
+    if( ae_fp_eq(beta,0) )
+    {
+        for(i=ci1; i<=ci2; i++)
+        {
+            for(j=cj1; j<=cj2; j++)
+            {
+                c->ptr.pp_double[i][j] = 0;
+            }
+        }
+    }
+    else
+    {
+        for(i=ci1; i<=ci2; i++)
+        {
+            ae_v_muld(&c->ptr.pp_double[i][cj1], 1, ae_v_len(cj1,cj2), beta);
+        }
+    }
+    
+    /*
+     * A*B
+     */
+    if( !transa&&!transb )
+    {
+        for(l=ai1; l<=ai2; l++)
+        {
+            for(r=bi1; r<=bi2; r++)
+            {
+                v = alpha*a->ptr.pp_double[l][aj1+r-bi1];
+                k = ci1+l-ai1;
+                ae_v_addd(&c->ptr.pp_double[k][cj1], 1, &b->ptr.pp_double[r][bj1], 1, ae_v_len(cj1,cj2), v);
+            }
+        }
+        return;
+    }
+    
+    /*
+     * A*B'
+     */
+    if( !transa&&transb )
+    {
+        if( arows*acols<brows*bcols )
+        {
+            for(r=bi1; r<=bi2; r++)
+            {
+                for(l=ai1; l<=ai2; l++)
+                {
+                    v = ae_v_dotproduct(&a->ptr.pp_double[l][aj1], 1, &b->ptr.pp_double[r][bj1], 1, ae_v_len(aj1,aj2));
+                    c->ptr.pp_double[ci1+l-ai1][cj1+r-bi1] = c->ptr.pp_double[ci1+l-ai1][cj1+r-bi1]+alpha*v;
+                }
+            }
+            return;
+        }
+        else
+        {
+            for(l=ai1; l<=ai2; l++)
+            {
+                for(r=bi1; r<=bi2; r++)
+                {
+                    v = ae_v_dotproduct(&a->ptr.pp_double[l][aj1], 1, &b->ptr.pp_double[r][bj1], 1, ae_v_len(aj1,aj2));
+                    c->ptr.pp_double[ci1+l-ai1][cj1+r-bi1] = c->ptr.pp_double[ci1+l-ai1][cj1+r-bi1]+alpha*v;
+                }
+            }
+            return;
+        }
+    }
+    
+    /*
+     * A'*B
+     */
+    if( transa&&!transb )
+    {
+        for(l=aj1; l<=aj2; l++)
+        {
+            for(r=bi1; r<=bi2; r++)
+            {
+                v = alpha*a->ptr.pp_double[ai1+r-bi1][l];
+                k = ci1+l-aj1;
+                ae_v_addd(&c->ptr.pp_double[k][cj1], 1, &b->ptr.pp_double[r][bj1], 1, ae_v_len(cj1,cj2), v);
+            }
+        }
+        return;
+    }
+    
+    /*
+     * A'*B'
+     */
+    if( transa&&transb )
+    {
+        if( arows*acols<brows*bcols )
+        {
+            for(r=bi1; r<=bi2; r++)
+            {
+                k = cj1+r-bi1;
+                for(i=1; i<=crows; i++)
+                {
+                    work->ptr.p_double[i] = 0.0;
+                }
+                for(l=ai1; l<=ai2; l++)
+                {
+                    v = alpha*b->ptr.pp_double[r][bj1+l-ai1];
+                    ae_v_addd(&work->ptr.p_double[1], 1, &a->ptr.pp_double[l][aj1], 1, ae_v_len(1,crows), v);
+                }
+                ae_v_add(&c->ptr.pp_double[ci1][k], c->stride, &work->ptr.p_double[1], 1, ae_v_len(ci1,ci2));
+            }
+            return;
+        }
+        else
+        {
+            for(l=aj1; l<=aj2; l++)
+            {
+                k = ai2-ai1+1;
+                ae_v_move(&work->ptr.p_double[1], 1, &a->ptr.pp_double[ai1][l], a->stride, ae_v_len(1,k));
+                for(r=bi1; r<=bi2; r++)
+                {
+                    v = ae_v_dotproduct(&work->ptr.p_double[1], 1, &b->ptr.pp_double[r][bj1], 1, ae_v_len(1,k));
+                    c->ptr.pp_double[ci1+l-aj1][cj1+r-bi1] = c->ptr.pp_double[ci1+l-aj1][cj1+r-bi1]+alpha*v;
+                }
+            }
+            return;
+        }
+    }
+}
+
+
+
+
+void hermitianmatrixvectormultiply(/* Complex */ ae_matrix* a,
+     ae_bool isupper,
+     ae_int_t i1,
+     ae_int_t i2,
+     /* Complex */ ae_vector* x,
+     ae_complex alpha,
+     /* Complex */ ae_vector* y,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t ba1;
+    ae_int_t ba2;
+    ae_int_t by1;
+    ae_int_t by2;
+    ae_int_t bx1;
+    ae_int_t bx2;
+    ae_int_t n;
+    ae_complex v;
+
+
+    n = i2-i1+1;
+    if( n<=0 )
+    {
+        return;
+    }
+    
+    /*
+     * Let A = L + D + U, where
+     *  L is strictly lower triangular (main diagonal is zero)
+     *  D is diagonal
+     *  U is strictly upper triangular (main diagonal is zero)
+     *
+     * A*x = L*x + D*x + U*x
+     *
+     * Calculate D*x first
+     */
+    for(i=i1; i<=i2; i++)
+    {
+        y->ptr.p_complex[i-i1+1] = ae_c_mul(a->ptr.pp_complex[i][i],x->ptr.p_complex[i-i1+1]);
+    }
+    
+    /*
+     * Add L*x + U*x
+     */
+    if( isupper )
+    {
+        for(i=i1; i<=i2-1; i++)
+        {
+            
+            /*
+             * Add L*x to the result
+             */
+            v = x->ptr.p_complex[i-i1+1];
+            by1 = i-i1+2;
+            by2 = n;
+            ba1 = i+1;
+            ba2 = i2;
+            ae_v_caddc(&y->ptr.p_complex[by1], 1, &a->ptr.pp_complex[i][ba1], 1, "Conj", ae_v_len(by1,by2), v);
+            
+            /*
+             * Add U*x to the result
+             */
+            bx1 = i-i1+2;
+            bx2 = n;
+            ba1 = i+1;
+            ba2 = i2;
+            v = ae_v_cdotproduct(&x->ptr.p_complex[bx1], 1, "N", &a->ptr.pp_complex[i][ba1], 1, "N", ae_v_len(bx1,bx2));
+            y->ptr.p_complex[i-i1+1] = ae_c_add(y->ptr.p_complex[i-i1+1],v);
+        }
+    }
+    else
+    {
+        for(i=i1+1; i<=i2; i++)
+        {
+            
+            /*
+             * Add L*x to the result
+             */
+            bx1 = 1;
+            bx2 = i-i1;
+            ba1 = i1;
+            ba2 = i-1;
+            v = ae_v_cdotproduct(&x->ptr.p_complex[bx1], 1, "N", &a->ptr.pp_complex[i][ba1], 1, "N", ae_v_len(bx1,bx2));
+            y->ptr.p_complex[i-i1+1] = ae_c_add(y->ptr.p_complex[i-i1+1],v);
+            
+            /*
+             * Add U*x to the result
+             */
+            v = x->ptr.p_complex[i-i1+1];
+            by1 = 1;
+            by2 = i-i1;
+            ba1 = i1;
+            ba2 = i-1;
+            ae_v_caddc(&y->ptr.p_complex[by1], 1, &a->ptr.pp_complex[i][ba1], 1, "Conj", ae_v_len(by1,by2), v);
+        }
+    }
+    ae_v_cmulc(&y->ptr.p_complex[1], 1, ae_v_len(1,n), alpha);
+}
+
+
+void hermitianrank2update(/* Complex */ ae_matrix* a,
+     ae_bool isupper,
+     ae_int_t i1,
+     ae_int_t i2,
+     /* Complex */ ae_vector* x,
+     /* Complex */ ae_vector* y,
+     /* Complex */ ae_vector* t,
+     ae_complex alpha,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t tp1;
+    ae_int_t tp2;
+    ae_complex v;
+
+
+    if( isupper )
+    {
+        for(i=i1; i<=i2; i++)
+        {
+            tp1 = i+1-i1;
+            tp2 = i2-i1+1;
+            v = ae_c_mul(alpha,x->ptr.p_complex[i+1-i1]);
+            ae_v_cmovec(&t->ptr.p_complex[tp1], 1, &y->ptr.p_complex[tp1], 1, "Conj", ae_v_len(tp1,tp2), v);
+            v = ae_c_mul(ae_c_conj(alpha, _state),y->ptr.p_complex[i+1-i1]);
+            ae_v_caddc(&t->ptr.p_complex[tp1], 1, &x->ptr.p_complex[tp1], 1, "Conj", ae_v_len(tp1,tp2), v);
+            ae_v_cadd(&a->ptr.pp_complex[i][i], 1, &t->ptr.p_complex[tp1], 1, "N", ae_v_len(i,i2));
+        }
+    }
+    else
+    {
+        for(i=i1; i<=i2; i++)
+        {
+            tp1 = 1;
+            tp2 = i+1-i1;
+            v = ae_c_mul(alpha,x->ptr.p_complex[i+1-i1]);
+            ae_v_cmovec(&t->ptr.p_complex[tp1], 1, &y->ptr.p_complex[tp1], 1, "Conj", ae_v_len(tp1,tp2), v);
+            v = ae_c_mul(ae_c_conj(alpha, _state),y->ptr.p_complex[i+1-i1]);
+            ae_v_caddc(&t->ptr.p_complex[tp1], 1, &x->ptr.p_complex[tp1], 1, "Conj", ae_v_len(tp1,tp2), v);
+            ae_v_cadd(&a->ptr.pp_complex[i][i1], 1, &t->ptr.p_complex[tp1], 1, "N", ae_v_len(i1,i));
+        }
+    }
+}
+
+
+
+
+/*************************************************************************
+Generation of an elementary reflection transformation
+
+The subroutine generates elementary reflection H of order N, so that, for
+a given X, the following equality holds true:
+
+    ( X(1) )   ( Beta )
+H * (  ..  ) = (  0   )
+    ( X(n) )   (  0   )
+
+where
+              ( V(1) )
+H = 1 - Tau * (  ..  ) * ( V(1), ..., V(n) )
+              ( V(n) )
+
+where the first component of vector V equals 1.
+
+Input parameters:
+    X   -   vector. Array whose index ranges within [1..N].
+    N   -   reflection order.
+
+Output parameters:
+    X   -   components from 2 to N are replaced with vector V.
+            The first component is replaced with parameter Beta.
+    Tau -   scalar value Tau. If X is a null vector, Tau equals 0,
+            otherwise 1 <= Tau <= 2.
+
+This subroutine is the modification of the DLARFG subroutines from
+the LAPACK library.
+
+MODIFICATIONS:
+    24.12.2005 sign(Alpha) was replaced with an analogous to the Fortran SIGN code.
+
+  -- LAPACK auxiliary routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     September 30, 1994
+*************************************************************************/
+void generatereflection(/* Real    */ ae_vector* x,
+     ae_int_t n,
+     double* tau,
+     ae_state *_state)
+{
+    ae_int_t j;
+    double alpha;
+    double xnorm;
+    double v;
+    double beta;
+    double mx;
+    double s;
+
+    *tau = 0;
+
+    if( n<=1 )
+    {
+        *tau = 0;
+        return;
+    }
+    
+    /*
+     * Scale if needed (to avoid overflow/underflow during intermediate
+     * calculations).
+     */
+    mx = 0;
+    for(j=1; j<=n; j++)
+    {
+        mx = ae_maxreal(ae_fabs(x->ptr.p_double[j], _state), mx, _state);
+    }
+    s = 1;
+    if( ae_fp_neq(mx,0) )
+    {
+        if( ae_fp_less_eq(mx,ae_minrealnumber/ae_machineepsilon) )
+        {
+            s = ae_minrealnumber/ae_machineepsilon;
+            v = 1/s;
+            ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), v);
+            mx = mx*v;
+        }
+        else
+        {
+            if( ae_fp_greater_eq(mx,ae_maxrealnumber*ae_machineepsilon) )
+            {
+                s = ae_maxrealnumber*ae_machineepsilon;
+                v = 1/s;
+                ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), v);
+                mx = mx*v;
+            }
+        }
+    }
+    
+    /*
+     * XNORM = DNRM2( N-1, X, INCX )
+     */
+    alpha = x->ptr.p_double[1];
+    xnorm = 0;
+    if( ae_fp_neq(mx,0) )
+    {
+        for(j=2; j<=n; j++)
+        {
+            xnorm = xnorm+ae_sqr(x->ptr.p_double[j]/mx, _state);
+        }
+        xnorm = ae_sqrt(xnorm, _state)*mx;
+    }
+    if( ae_fp_eq(xnorm,0) )
+    {
+        
+        /*
+         * H  =  I
+         */
+        *tau = 0;
+        x->ptr.p_double[1] = x->ptr.p_double[1]*s;
+        return;
+    }
+    
+    /*
+     * general case
+     */
+    mx = ae_maxreal(ae_fabs(alpha, _state), ae_fabs(xnorm, _state), _state);
+    beta = -mx*ae_sqrt(ae_sqr(alpha/mx, _state)+ae_sqr(xnorm/mx, _state), _state);
+    if( ae_fp_less(alpha,0) )
+    {
+        beta = -beta;
+    }
+    *tau = (beta-alpha)/beta;
+    v = 1/(alpha-beta);
+    ae_v_muld(&x->ptr.p_double[2], 1, ae_v_len(2,n), v);
+    x->ptr.p_double[1] = beta;
+    
+    /*
+     * Scale back outputs
+     */
+    x->ptr.p_double[1] = x->ptr.p_double[1]*s;
+}
+
+
+/*************************************************************************
+Application of an elementary reflection to a rectangular matrix of size MxN
+
+The algorithm pre-multiplies the matrix by an elementary reflection transformation
+which is given by column V and scalar Tau (see the description of the
+GenerateReflection procedure). Not the whole matrix but only a part of it
+is transformed (rows from M1 to M2, columns from N1 to N2). Only the elements
+of this submatrix are changed.
+
+Input parameters:
+    C       -   matrix to be transformed.
+    Tau     -   scalar defining the transformation.
+    V       -   column defining the transformation.
+                Array whose index ranges within [1..M2-M1+1].
+    M1, M2  -   range of rows to be transformed.
+    N1, N2  -   range of columns to be transformed.
+    WORK    -   working array whose indexes goes from N1 to N2.
+
+Output parameters:
+    C       -   the result of multiplying the input matrix C by the
+                transformation matrix which is given by Tau and V.
+                If N1>N2 or M1>M2, C is not modified.
+
+  -- LAPACK auxiliary routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     September 30, 1994
+*************************************************************************/
+void applyreflectionfromtheleft(/* Real    */ ae_matrix* c,
+     double tau,
+     /* Real    */ ae_vector* v,
+     ae_int_t m1,
+     ae_int_t m2,
+     ae_int_t n1,
+     ae_int_t n2,
+     /* Real    */ ae_vector* work,
+     ae_state *_state)
+{
+    double t;
+    ae_int_t i;
+    ae_int_t vm;
+
+
+    if( (ae_fp_eq(tau,0)||n1>n2)||m1>m2 )
+    {
+        return;
+    }
+    
+    /*
+     * w := C' * v
+     */
+    vm = m2-m1+1;
+    for(i=n1; i<=n2; i++)
+    {
+        work->ptr.p_double[i] = 0;
+    }
+    for(i=m1; i<=m2; i++)
+    {
+        t = v->ptr.p_double[i+1-m1];
+        ae_v_addd(&work->ptr.p_double[n1], 1, &c->ptr.pp_double[i][n1], 1, ae_v_len(n1,n2), t);
+    }
+    
+    /*
+     * C := C - tau * v * w'
+     */
+    for(i=m1; i<=m2; i++)
+    {
+        t = v->ptr.p_double[i-m1+1]*tau;
+        ae_v_subd(&c->ptr.pp_double[i][n1], 1, &work->ptr.p_double[n1], 1, ae_v_len(n1,n2), t);
+    }
+}
+
+
+/*************************************************************************
+Application of an elementary reflection to a rectangular matrix of size MxN
+
+The algorithm post-multiplies the matrix by an elementary reflection transformation
+which is given by column V and scalar Tau (see the description of the
+GenerateReflection procedure). Not the whole matrix but only a part of it
+is transformed (rows from M1 to M2, columns from N1 to N2). Only the
+elements of this submatrix are changed.
+
+Input parameters:
+    C       -   matrix to be transformed.
+    Tau     -   scalar defining the transformation.
+    V       -   column defining the transformation.
+                Array whose index ranges within [1..N2-N1+1].
+    M1, M2  -   range of rows to be transformed.
+    N1, N2  -   range of columns to be transformed.
+    WORK    -   working array whose indexes goes from M1 to M2.
+
+Output parameters:
+    C       -   the result of multiplying the input matrix C by the
+                transformation matrix which is given by Tau and V.
+                If N1>N2 or M1>M2, C is not modified.
+
+  -- LAPACK auxiliary routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     September 30, 1994
+*************************************************************************/
+void applyreflectionfromtheright(/* Real    */ ae_matrix* c,
+     double tau,
+     /* Real    */ ae_vector* v,
+     ae_int_t m1,
+     ae_int_t m2,
+     ae_int_t n1,
+     ae_int_t n2,
+     /* Real    */ ae_vector* work,
+     ae_state *_state)
+{
+    double t;
+    ae_int_t i;
+    ae_int_t vm;
+
+
+    if( (ae_fp_eq(tau,0)||n1>n2)||m1>m2 )
+    {
+        return;
+    }
+    vm = n2-n1+1;
+    for(i=m1; i<=m2; i++)
+    {
+        t = ae_v_dotproduct(&c->ptr.pp_double[i][n1], 1, &v->ptr.p_double[1], 1, ae_v_len(n1,n2));
+        t = t*tau;
+        ae_v_subd(&c->ptr.pp_double[i][n1], 1, &v->ptr.p_double[1], 1, ae_v_len(n1,n2), t);
+    }
+}
+
+
+
+
+/*************************************************************************
+Generation of an elementary complex reflection transformation
+
+The subroutine generates elementary complex reflection H of  order  N,  so
+that, for a given X, the following equality holds true:
+
+     ( X(1) )   ( Beta )
+H' * (  ..  ) = (  0   ),   H'*H = I,   Beta is a real number
+     ( X(n) )   (  0   )
+
+where
+
+              ( V(1) )
+H = 1 - Tau * (  ..  ) * ( conj(V(1)), ..., conj(V(n)) )
+              ( V(n) )
+
+where the first component of vector V equals 1.
+
+Input parameters:
+    X   -   vector. Array with elements [1..N].
+    N   -   reflection order.
+
+Output parameters:
+    X   -   components from 2 to N are replaced by vector V.
+            The first component is replaced with parameter Beta.
+    Tau -   scalar value Tau.
+
+This subroutine is the modification of CLARFG subroutines  from the LAPACK
+library. It has similar functionality except for the fact that it  doesn’t
+handle errors when intermediate results cause an overflow.
+
+  -- LAPACK auxiliary routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     September 30, 1994
+*************************************************************************/
+void complexgeneratereflection(/* Complex */ ae_vector* x,
+     ae_int_t n,
+     ae_complex* tau,
+     ae_state *_state)
+{
+    ae_int_t j;
+    ae_complex alpha;
+    double alphi;
+    double alphr;
+    double beta;
+    double xnorm;
+    double mx;
+    ae_complex t;
+    double s;
+    ae_complex v;
+
+    tau->x = 0;
+    tau->y = 0;
+
+    if( n<=0 )
+    {
+        *tau = ae_complex_from_d(0);
+        return;
+    }
+    
+    /*
+     * Scale if needed (to avoid overflow/underflow during intermediate
+     * calculations).
+     */
+    mx = 0;
+    for(j=1; j<=n; j++)
+    {
+        mx = ae_maxreal(ae_c_abs(x->ptr.p_complex[j], _state), mx, _state);
+    }
+    s = 1;
+    if( ae_fp_neq(mx,0) )
+    {
+        if( ae_fp_less(mx,1) )
+        {
+            s = ae_sqrt(ae_minrealnumber, _state);
+            v = ae_complex_from_d(1/s);
+            ae_v_cmulc(&x->ptr.p_complex[1], 1, ae_v_len(1,n), v);
+        }
+        else
+        {
+            s = ae_sqrt(ae_maxrealnumber, _state);
+            v = ae_complex_from_d(1/s);
+            ae_v_cmulc(&x->ptr.p_complex[1], 1, ae_v_len(1,n), v);
+        }
+    }
+    
+    /*
+     * calculate
+     */
+    alpha = x->ptr.p_complex[1];
+    mx = 0;
+    for(j=2; j<=n; j++)
+    {
+        mx = ae_maxreal(ae_c_abs(x->ptr.p_complex[j], _state), mx, _state);
+    }
+    xnorm = 0;
+    if( ae_fp_neq(mx,0) )
+    {
+        for(j=2; j<=n; j++)
+        {
+            t = ae_c_div_d(x->ptr.p_complex[j],mx);
+            xnorm = xnorm+ae_c_mul(t,ae_c_conj(t, _state)).x;
+        }
+        xnorm = ae_sqrt(xnorm, _state)*mx;
+    }
+    alphr = alpha.x;
+    alphi = alpha.y;
+    if( ae_fp_eq(xnorm,0)&&ae_fp_eq(alphi,0) )
+    {
+        *tau = ae_complex_from_d(0);
+        x->ptr.p_complex[1] = ae_c_mul_d(x->ptr.p_complex[1],s);
+        return;
+    }
+    mx = ae_maxreal(ae_fabs(alphr, _state), ae_fabs(alphi, _state), _state);
+    mx = ae_maxreal(mx, ae_fabs(xnorm, _state), _state);
+    beta = -mx*ae_sqrt(ae_sqr(alphr/mx, _state)+ae_sqr(alphi/mx, _state)+ae_sqr(xnorm/mx, _state), _state);
+    if( ae_fp_less(alphr,0) )
+    {
+        beta = -beta;
+    }
+    tau->x = (beta-alphr)/beta;
+    tau->y = -alphi/beta;
+    alpha = ae_c_d_div(1,ae_c_sub_d(alpha,beta));
+    if( n>1 )
+    {
+        ae_v_cmulc(&x->ptr.p_complex[2], 1, ae_v_len(2,n), alpha);
+    }
+    alpha = ae_complex_from_d(beta);
+    x->ptr.p_complex[1] = alpha;
+    
+    /*
+     * Scale back
+     */
+    x->ptr.p_complex[1] = ae_c_mul_d(x->ptr.p_complex[1],s);
+}
+
+
+/*************************************************************************
+Application of an elementary reflection to a rectangular matrix of size MxN
+
+The  algorithm  pre-multiplies  the  matrix  by  an  elementary reflection
+transformation  which  is  given  by  column  V  and  scalar  Tau (see the
+description of the GenerateReflection). Not the whole matrix  but  only  a
+part of it is transformed (rows from M1 to M2, columns from N1 to N2). Only
+the elements of this submatrix are changed.
+
+Note: the matrix is multiplied by H, not by H'.   If  it  is  required  to
+multiply the matrix by H', it is necessary to pass Conj(Tau) instead of Tau.
+
+Input parameters:
+    C       -   matrix to be transformed.
+    Tau     -   scalar defining transformation.
+    V       -   column defining transformation.
+                Array whose index ranges within [1..M2-M1+1]
+    M1, M2  -   range of rows to be transformed.
+    N1, N2  -   range of columns to be transformed.
+    WORK    -   working array whose index goes from N1 to N2.
+
+Output parameters:
+    C       -   the result of multiplying the input matrix C by the
+                transformation matrix which is given by Tau and V.
+                If N1>N2 or M1>M2, C is not modified.
+
+  -- LAPACK auxiliary routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     September 30, 1994
+*************************************************************************/
+void complexapplyreflectionfromtheleft(/* Complex */ ae_matrix* c,
+     ae_complex tau,
+     /* Complex */ ae_vector* v,
+     ae_int_t m1,
+     ae_int_t m2,
+     ae_int_t n1,
+     ae_int_t n2,
+     /* Complex */ ae_vector* work,
+     ae_state *_state)
+{
+    ae_complex t;
+    ae_int_t i;
+    ae_int_t vm;
+
+
+    if( (ae_c_eq_d(tau,0)||n1>n2)||m1>m2 )
+    {
+        return;
+    }
+    
+    /*
+     * w := C^T * conj(v)
+     */
+    vm = m2-m1+1;
+    for(i=n1; i<=n2; i++)
+    {
+        work->ptr.p_complex[i] = ae_complex_from_d(0);
+    }
+    for(i=m1; i<=m2; i++)
+    {
+        t = ae_c_conj(v->ptr.p_complex[i+1-m1], _state);
+        ae_v_caddc(&work->ptr.p_complex[n1], 1, &c->ptr.pp_complex[i][n1], 1, "N", ae_v_len(n1,n2), t);
+    }
+    
+    /*
+     * C := C - tau * v * w^T
+     */
+    for(i=m1; i<=m2; i++)
+    {
+        t = ae_c_mul(v->ptr.p_complex[i-m1+1],tau);
+        ae_v_csubc(&c->ptr.pp_complex[i][n1], 1, &work->ptr.p_complex[n1], 1, "N", ae_v_len(n1,n2), t);
+    }
+}
+
+
+/*************************************************************************
+Application of an elementary reflection to a rectangular matrix of size MxN
+
+The  algorithm  post-multiplies  the  matrix  by  an elementary reflection
+transformation  which  is  given  by  column  V  and  scalar  Tau (see the
+description  of  the  GenerateReflection). Not the whole matrix but only a
+part  of  it  is  transformed (rows from M1 to M2, columns from N1 to N2).
+Only the elements of this submatrix are changed.
+
+Input parameters:
+    C       -   matrix to be transformed.
+    Tau     -   scalar defining transformation.
+    V       -   column defining transformation.
+                Array whose index ranges within [1..N2-N1+1]
+    M1, M2  -   range of rows to be transformed.
+    N1, N2  -   range of columns to be transformed.
+    WORK    -   working array whose index goes from M1 to M2.
+
+Output parameters:
+    C       -   the result of multiplying the input matrix C by the
+                transformation matrix which is given by Tau and V.
+                If N1>N2 or M1>M2, C is not modified.
+
+  -- LAPACK auxiliary routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     September 30, 1994
+*************************************************************************/
+void complexapplyreflectionfromtheright(/* Complex */ ae_matrix* c,
+     ae_complex tau,
+     /* Complex */ ae_vector* v,
+     ae_int_t m1,
+     ae_int_t m2,
+     ae_int_t n1,
+     ae_int_t n2,
+     /* Complex */ ae_vector* work,
+     ae_state *_state)
+{
+    ae_complex t;
+    ae_int_t i;
+    ae_int_t vm;
+
+
+    if( (ae_c_eq_d(tau,0)||n1>n2)||m1>m2 )
+    {
+        return;
+    }
+    
+    /*
+     * w := C * v
+     */
+    vm = n2-n1+1;
+    for(i=m1; i<=m2; i++)
+    {
+        t = ae_v_cdotproduct(&c->ptr.pp_complex[i][n1], 1, "N", &v->ptr.p_complex[1], 1, "N", ae_v_len(n1,n2));
+        work->ptr.p_complex[i] = t;
+    }
+    
+    /*
+     * C := C - w * conj(v^T)
+     */
+    ae_v_cmove(&v->ptr.p_complex[1], 1, &v->ptr.p_complex[1], 1, "Conj", ae_v_len(1,vm));
+    for(i=m1; i<=m2; i++)
+    {
+        t = ae_c_mul(work->ptr.p_complex[i],tau);
+        ae_v_csubc(&c->ptr.pp_complex[i][n1], 1, &v->ptr.p_complex[1], 1, "N", ae_v_len(n1,n2), t);
+    }
+    ae_v_cmove(&v->ptr.p_complex[1], 1, &v->ptr.p_complex[1], 1, "Conj", ae_v_len(1,vm));
+}
+
+
+
+
+void symmetricmatrixvectormultiply(/* Real    */ ae_matrix* a,
+     ae_bool isupper,
+     ae_int_t i1,
+     ae_int_t i2,
+     /* Real    */ ae_vector* x,
+     double alpha,
+     /* Real    */ ae_vector* y,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t ba1;
+    ae_int_t ba2;
+    ae_int_t by1;
+    ae_int_t by2;
+    ae_int_t bx1;
+    ae_int_t bx2;
+    ae_int_t n;
+    double v;
+
+
+    n = i2-i1+1;
+    if( n<=0 )
+    {
+        return;
+    }
+    
+    /*
+     * Let A = L + D + U, where
+     *  L is strictly lower triangular (main diagonal is zero)
+     *  D is diagonal
+     *  U is strictly upper triangular (main diagonal is zero)
+     *
+     * A*x = L*x + D*x + U*x
+     *
+     * Calculate D*x first
+     */
+    for(i=i1; i<=i2; i++)
+    {
+        y->ptr.p_double[i-i1+1] = a->ptr.pp_double[i][i]*x->ptr.p_double[i-i1+1];
+    }
+    
+    /*
+     * Add L*x + U*x
+     */
+    if( isupper )
+    {
+        for(i=i1; i<=i2-1; i++)
+        {
+            
+            /*
+             * Add L*x to the result
+             */
+            v = x->ptr.p_double[i-i1+1];
+            by1 = i-i1+2;
+            by2 = n;
+            ba1 = i+1;
+            ba2 = i2;
+            ae_v_addd(&y->ptr.p_double[by1], 1, &a->ptr.pp_double[i][ba1], 1, ae_v_len(by1,by2), v);
+            
+            /*
+             * Add U*x to the result
+             */
+            bx1 = i-i1+2;
+            bx2 = n;
+            ba1 = i+1;
+            ba2 = i2;
+            v = ae_v_dotproduct(&x->ptr.p_double[bx1], 1, &a->ptr.pp_double[i][ba1], 1, ae_v_len(bx1,bx2));
+            y->ptr.p_double[i-i1+1] = y->ptr.p_double[i-i1+1]+v;
+        }
+    }
+    else
+    {
+        for(i=i1+1; i<=i2; i++)
+        {
+            
+            /*
+             * Add L*x to the result
+             */
+            bx1 = 1;
+            bx2 = i-i1;
+            ba1 = i1;
+            ba2 = i-1;
+            v = ae_v_dotproduct(&x->ptr.p_double[bx1], 1, &a->ptr.pp_double[i][ba1], 1, ae_v_len(bx1,bx2));
+            y->ptr.p_double[i-i1+1] = y->ptr.p_double[i-i1+1]+v;
+            
+            /*
+             * Add U*x to the result
+             */
+            v = x->ptr.p_double[i-i1+1];
+            by1 = 1;
+            by2 = i-i1;
+            ba1 = i1;
+            ba2 = i-1;
+            ae_v_addd(&y->ptr.p_double[by1], 1, &a->ptr.pp_double[i][ba1], 1, ae_v_len(by1,by2), v);
+        }
+    }
+    ae_v_muld(&y->ptr.p_double[1], 1, ae_v_len(1,n), alpha);
+}
+
+
+void symmetricrank2update(/* Real    */ ae_matrix* a,
+     ae_bool isupper,
+     ae_int_t i1,
+     ae_int_t i2,
+     /* Real    */ ae_vector* x,
+     /* Real    */ ae_vector* y,
+     /* Real    */ ae_vector* t,
+     double alpha,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t tp1;
+    ae_int_t tp2;
+    double v;
+
+
+    if( isupper )
+    {
+        for(i=i1; i<=i2; i++)
+        {
+            tp1 = i+1-i1;
+            tp2 = i2-i1+1;
+            v = x->ptr.p_double[i+1-i1];
+            ae_v_moved(&t->ptr.p_double[tp1], 1, &y->ptr.p_double[tp1], 1, ae_v_len(tp1,tp2), v);
+            v = y->ptr.p_double[i+1-i1];
+            ae_v_addd(&t->ptr.p_double[tp1], 1, &x->ptr.p_double[tp1], 1, ae_v_len(tp1,tp2), v);
+            ae_v_muld(&t->ptr.p_double[tp1], 1, ae_v_len(tp1,tp2), alpha);
+            ae_v_add(&a->ptr.pp_double[i][i], 1, &t->ptr.p_double[tp1], 1, ae_v_len(i,i2));
+        }
+    }
+    else
+    {
+        for(i=i1; i<=i2; i++)
+        {
+            tp1 = 1;
+            tp2 = i+1-i1;
+            v = x->ptr.p_double[i+1-i1];
+            ae_v_moved(&t->ptr.p_double[tp1], 1, &y->ptr.p_double[tp1], 1, ae_v_len(tp1,tp2), v);
+            v = y->ptr.p_double[i+1-i1];
+            ae_v_addd(&t->ptr.p_double[tp1], 1, &x->ptr.p_double[tp1], 1, ae_v_len(tp1,tp2), v);
+            ae_v_muld(&t->ptr.p_double[tp1], 1, ae_v_len(tp1,tp2), alpha);
+            ae_v_add(&a->ptr.pp_double[i][i1], 1, &t->ptr.p_double[tp1], 1, ae_v_len(i1,i));
+        }
+    }
+}
+
+
+
+
+/*************************************************************************
+Application of a sequence of  elementary rotations to a matrix
+
+The algorithm pre-multiplies the matrix by a sequence of rotation
+transformations which is given by arrays C and S. Depending on the value
+of the IsForward parameter either 1 and 2, 3 and 4 and so on (if IsForward=true)
+rows are rotated, or the rows N and N-1, N-2 and N-3 and so on, are rotated.
+
+Not the whole matrix but only a part of it is transformed (rows from M1 to
+M2, columns from N1 to N2). Only the elements of this submatrix are changed.
+
+Input parameters:
+    IsForward   -   the sequence of the rotation application.
+    M1,M2       -   the range of rows to be transformed.
+    N1, N2      -   the range of columns to be transformed.
+    C,S         -   transformation coefficients.
+                    Array whose index ranges within [1..M2-M1].
+    A           -   processed matrix.
+    WORK        -   working array whose index ranges within [N1..N2].
+
+Output parameters:
+    A           -   transformed matrix.
+
+Utility subroutine.
+*************************************************************************/
+void applyrotationsfromtheleft(ae_bool isforward,
+     ae_int_t m1,
+     ae_int_t m2,
+     ae_int_t n1,
+     ae_int_t n2,
+     /* Real    */ ae_vector* c,
+     /* Real    */ ae_vector* s,
+     /* Real    */ ae_matrix* a,
+     /* Real    */ ae_vector* work,
+     ae_state *_state)
+{
+    ae_int_t j;
+    ae_int_t jp1;
+    double ctemp;
+    double stemp;
+    double temp;
+
+
+    if( m1>m2||n1>n2 )
+    {
+        return;
+    }
+    
+    /*
+     * Form  P * A
+     */
+    if( isforward )
+    {
+        if( n1!=n2 )
+        {
+            
+            /*
+             * Common case: N1<>N2
+             */
+            for(j=m1; j<=m2-1; j++)
+            {
+                ctemp = c->ptr.p_double[j-m1+1];
+                stemp = s->ptr.p_double[j-m1+1];
+                if( ae_fp_neq(ctemp,1)||ae_fp_neq(stemp,0) )
+                {
+                    jp1 = j+1;
+                    ae_v_moved(&work->ptr.p_double[n1], 1, &a->ptr.pp_double[jp1][n1], 1, ae_v_len(n1,n2), ctemp);
+                    ae_v_subd(&work->ptr.p_double[n1], 1, &a->ptr.pp_double[j][n1], 1, ae_v_len(n1,n2), stemp);
+                    ae_v_muld(&a->ptr.pp_double[j][n1], 1, ae_v_len(n1,n2), ctemp);
+                    ae_v_addd(&a->ptr.pp_double[j][n1], 1, &a->ptr.pp_double[jp1][n1], 1, ae_v_len(n1,n2), stemp);
+                    ae_v_move(&a->ptr.pp_double[jp1][n1], 1, &work->ptr.p_double[n1], 1, ae_v_len(n1,n2));
+                }
+            }
+        }
+        else
+        {
+            
+            /*
+             * Special case: N1=N2
+             */
+            for(j=m1; j<=m2-1; j++)
+            {
+                ctemp = c->ptr.p_double[j-m1+1];
+                stemp = s->ptr.p_double[j-m1+1];
+                if( ae_fp_neq(ctemp,1)||ae_fp_neq(stemp,0) )
+                {
+                    temp = a->ptr.pp_double[j+1][n1];
+                    a->ptr.pp_double[j+1][n1] = ctemp*temp-stemp*a->ptr.pp_double[j][n1];
+                    a->ptr.pp_double[j][n1] = stemp*temp+ctemp*a->ptr.pp_double[j][n1];
+                }
+            }
+        }
+    }
+    else
+    {
+        if( n1!=n2 )
+        {
+            
+            /*
+             * Common case: N1<>N2
+             */
+            for(j=m2-1; j>=m1; j--)
+            {
+                ctemp = c->ptr.p_double[j-m1+1];
+                stemp = s->ptr.p_double[j-m1+1];
+                if( ae_fp_neq(ctemp,1)||ae_fp_neq(stemp,0) )
+                {
+                    jp1 = j+1;
+                    ae_v_moved(&work->ptr.p_double[n1], 1, &a->ptr.pp_double[jp1][n1], 1, ae_v_len(n1,n2), ctemp);
+                    ae_v_subd(&work->ptr.p_double[n1], 1, &a->ptr.pp_double[j][n1], 1, ae_v_len(n1,n2), stemp);
+                    ae_v_muld(&a->ptr.pp_double[j][n1], 1, ae_v_len(n1,n2), ctemp);
+                    ae_v_addd(&a->ptr.pp_double[j][n1], 1, &a->ptr.pp_double[jp1][n1], 1, ae_v_len(n1,n2), stemp);
+                    ae_v_move(&a->ptr.pp_double[jp1][n1], 1, &work->ptr.p_double[n1], 1, ae_v_len(n1,n2));
+                }
+            }
+        }
+        else
+        {
+            
+            /*
+             * Special case: N1=N2
+             */
+            for(j=m2-1; j>=m1; j--)
+            {
+                ctemp = c->ptr.p_double[j-m1+1];
+                stemp = s->ptr.p_double[j-m1+1];
+                if( ae_fp_neq(ctemp,1)||ae_fp_neq(stemp,0) )
+                {
+                    temp = a->ptr.pp_double[j+1][n1];
+                    a->ptr.pp_double[j+1][n1] = ctemp*temp-stemp*a->ptr.pp_double[j][n1];
+                    a->ptr.pp_double[j][n1] = stemp*temp+ctemp*a->ptr.pp_double[j][n1];
+                }
+            }
+        }
+    }
+}
+
+
+/*************************************************************************
+Application of a sequence of  elementary rotations to a matrix
+
+The algorithm post-multiplies the matrix by a sequence of rotation
+transformations which is given by arrays C and S. Depending on the value
+of the IsForward parameter either 1 and 2, 3 and 4 and so on (if IsForward=true)
+rows are rotated, or the rows N and N-1, N-2 and N-3 and so on are rotated.
+
+Not the whole matrix but only a part of it is transformed (rows from M1
+to M2, columns from N1 to N2). Only the elements of this submatrix are changed.
+
+Input parameters:
+    IsForward   -   the sequence of the rotation application.
+    M1,M2       -   the range of rows to be transformed.
+    N1, N2      -   the range of columns to be transformed.
+    C,S         -   transformation coefficients.
+                    Array whose index ranges within [1..N2-N1].
+    A           -   processed matrix.
+    WORK        -   working array whose index ranges within [M1..M2].
+
+Output parameters:
+    A           -   transformed matrix.
+
+Utility subroutine.
+*************************************************************************/
+void applyrotationsfromtheright(ae_bool isforward,
+     ae_int_t m1,
+     ae_int_t m2,
+     ae_int_t n1,
+     ae_int_t n2,
+     /* Real    */ ae_vector* c,
+     /* Real    */ ae_vector* s,
+     /* Real    */ ae_matrix* a,
+     /* Real    */ ae_vector* work,
+     ae_state *_state)
+{
+    ae_int_t j;
+    ae_int_t jp1;
+    double ctemp;
+    double stemp;
+    double temp;
+
+
+    
+    /*
+     * Form A * P'
+     */
+    if( isforward )
+    {
+        if( m1!=m2 )
+        {
+            
+            /*
+             * Common case: M1<>M2
+             */
+            for(j=n1; j<=n2-1; j++)
+            {
+                ctemp = c->ptr.p_double[j-n1+1];
+                stemp = s->ptr.p_double[j-n1+1];
+                if( ae_fp_neq(ctemp,1)||ae_fp_neq(stemp,0) )
+                {
+                    jp1 = j+1;
+                    ae_v_moved(&work->ptr.p_double[m1], 1, &a->ptr.pp_double[m1][jp1], a->stride, ae_v_len(m1,m2), ctemp);
+                    ae_v_subd(&work->ptr.p_double[m1], 1, &a->ptr.pp_double[m1][j], a->stride, ae_v_len(m1,m2), stemp);
+                    ae_v_muld(&a->ptr.pp_double[m1][j], a->stride, ae_v_len(m1,m2), ctemp);
+                    ae_v_addd(&a->ptr.pp_double[m1][j], a->stride, &a->ptr.pp_double[m1][jp1], a->stride, ae_v_len(m1,m2), stemp);
+                    ae_v_move(&a->ptr.pp_double[m1][jp1], a->stride, &work->ptr.p_double[m1], 1, ae_v_len(m1,m2));
+                }
+            }
+        }
+        else
+        {
+            
+            /*
+             * Special case: M1=M2
+             */
+            for(j=n1; j<=n2-1; j++)
+            {
+                ctemp = c->ptr.p_double[j-n1+1];
+                stemp = s->ptr.p_double[j-n1+1];
+                if( ae_fp_neq(ctemp,1)||ae_fp_neq(stemp,0) )
+                {
+                    temp = a->ptr.pp_double[m1][j+1];
+                    a->ptr.pp_double[m1][j+1] = ctemp*temp-stemp*a->ptr.pp_double[m1][j];
+                    a->ptr.pp_double[m1][j] = stemp*temp+ctemp*a->ptr.pp_double[m1][j];
+                }
+            }
+        }
+    }
+    else
+    {
+        if( m1!=m2 )
+        {
+            
+            /*
+             * Common case: M1<>M2
+             */
+            for(j=n2-1; j>=n1; j--)
+            {
+                ctemp = c->ptr.p_double[j-n1+1];
+                stemp = s->ptr.p_double[j-n1+1];
+                if( ae_fp_neq(ctemp,1)||ae_fp_neq(stemp,0) )
+                {
+                    jp1 = j+1;
+                    ae_v_moved(&work->ptr.p_double[m1], 1, &a->ptr.pp_double[m1][jp1], a->stride, ae_v_len(m1,m2), ctemp);
+                    ae_v_subd(&work->ptr.p_double[m1], 1, &a->ptr.pp_double[m1][j], a->stride, ae_v_len(m1,m2), stemp);
+                    ae_v_muld(&a->ptr.pp_double[m1][j], a->stride, ae_v_len(m1,m2), ctemp);
+                    ae_v_addd(&a->ptr.pp_double[m1][j], a->stride, &a->ptr.pp_double[m1][jp1], a->stride, ae_v_len(m1,m2), stemp);
+                    ae_v_move(&a->ptr.pp_double[m1][jp1], a->stride, &work->ptr.p_double[m1], 1, ae_v_len(m1,m2));
+                }
+            }
+        }
+        else
+        {
+            
+            /*
+             * Special case: M1=M2
+             */
+            for(j=n2-1; j>=n1; j--)
+            {
+                ctemp = c->ptr.p_double[j-n1+1];
+                stemp = s->ptr.p_double[j-n1+1];
+                if( ae_fp_neq(ctemp,1)||ae_fp_neq(stemp,0) )
+                {
+                    temp = a->ptr.pp_double[m1][j+1];
+                    a->ptr.pp_double[m1][j+1] = ctemp*temp-stemp*a->ptr.pp_double[m1][j];
+                    a->ptr.pp_double[m1][j] = stemp*temp+ctemp*a->ptr.pp_double[m1][j];
+                }
+            }
+        }
+    }
+}
+
+
+/*************************************************************************
+The subroutine generates the elementary rotation, so that:
+
+[  CS  SN  ]  .  [ F ]  =  [ R ]
+[ -SN  CS  ]     [ G ]     [ 0 ]
+
+CS**2 + SN**2 = 1
+*************************************************************************/
+void generaterotation(double f,
+     double g,
+     double* cs,
+     double* sn,
+     double* r,
+     ae_state *_state)
+{
+    double f1;
+    double g1;
+
+    *cs = 0;
+    *sn = 0;
+    *r = 0;
+
+    if( ae_fp_eq(g,0) )
+    {
+        *cs = 1;
+        *sn = 0;
+        *r = f;
+    }
+    else
+    {
+        if( ae_fp_eq(f,0) )
+        {
+            *cs = 0;
+            *sn = 1;
+            *r = g;
+        }
+        else
+        {
+            f1 = f;
+            g1 = g;
+            if( ae_fp_greater(ae_fabs(f1, _state),ae_fabs(g1, _state)) )
+            {
+                *r = ae_fabs(f1, _state)*ae_sqrt(1+ae_sqr(g1/f1, _state), _state);
+            }
+            else
+            {
+                *r = ae_fabs(g1, _state)*ae_sqrt(1+ae_sqr(f1/g1, _state), _state);
+            }
+            *cs = f1/(*r);
+            *sn = g1/(*r);
+            if( ae_fp_greater(ae_fabs(f, _state),ae_fabs(g, _state))&&ae_fp_less(*cs,0) )
+            {
+                *cs = -*cs;
+                *sn = -*sn;
+                *r = -*r;
+            }
+        }
+    }
+}
+
+
+
+
+/*************************************************************************
+Subroutine performing  the  Schur  decomposition  of  a  matrix  in  upper
+Hessenberg form using the QR algorithm with multiple shifts.
+
+The  source matrix  H  is  represented as  S'*H*S = T, where H - matrix in
+upper Hessenberg form,  S - orthogonal matrix (Schur vectors),   T - upper
+quasi-triangular matrix (with blocks of sizes  1x1  and  2x2  on  the main
+diagonal).
+
+Input parameters:
+    H   -   matrix to be decomposed.
+            Array whose indexes range within [1..N, 1..N].
+    N   -   size of H, N>=0.
+
+
+Output parameters:
+    H   –   contains the matrix T.
+            Array whose indexes range within [1..N, 1..N].
+            All elements below the blocks on the main diagonal are equal
+            to 0.
+    S   -   contains Schur vectors.
+            Array whose indexes range within [1..N, 1..N].
+
+Note 1:
+    The block structure of matrix T could be easily recognized: since  all
+    the elements  below  the blocks are zeros, the elements a[i+1,i] which
+    are equal to 0 show the block border.
+
+Note 2:
+    the algorithm  performance  depends  on  the  value  of  the  internal
+    parameter NS of InternalSchurDecomposition  subroutine  which  defines
+    the number of shifts in the QR algorithm (analog of  the  block  width
+    in block matrix algorithms in linear algebra). If you require  maximum
+    performance  on  your  machine,  it  is  recommended  to  adjust  this
+    parameter manually.
+
+Result:
+    True, if the algorithm has converged and the parameters H and S contain
+        the result.
+    False, if the algorithm has not converged.
+
+Algorithm implemented on the basis of subroutine DHSEQR (LAPACK 3.0 library).
+*************************************************************************/
+ae_bool upperhessenbergschurdecomposition(/* Real    */ ae_matrix* h,
+     ae_int_t n,
+     /* Real    */ ae_matrix* s,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector wi;
+    ae_vector wr;
+    ae_int_t info;
+    ae_bool result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_matrix_clear(s);
+    ae_vector_init(&wi, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&wr, 0, DT_REAL, _state, ae_true);
+
+    internalschurdecomposition(h, n, 1, 2, &wr, &wi, s, &info, _state);
+    result = info==0;
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+void internalschurdecomposition(/* Real    */ ae_matrix* h,
+     ae_int_t n,
+     ae_int_t tneeded,
+     ae_int_t zneeded,
+     /* Real    */ ae_vector* wr,
+     /* Real    */ ae_vector* wi,
+     /* Real    */ ae_matrix* z,
+     ae_int_t* info,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector work;
+    ae_int_t i;
+    ae_int_t i1;
+    ae_int_t i2;
+    ae_int_t ierr;
+    ae_int_t ii;
+    ae_int_t itemp;
+    ae_int_t itn;
+    ae_int_t its;
+    ae_int_t j;
+    ae_int_t k;
+    ae_int_t l;
+    ae_int_t maxb;
+    ae_int_t nr;
+    ae_int_t ns;
+    ae_int_t nv;
+    double absw;
+    double ovfl;
+    double smlnum;
+    double tau;
+    double temp;
+    double tst1;
+    double ulp;
+    double unfl;
+    ae_matrix s;
+    ae_vector v;
+    ae_vector vv;
+    ae_vector workc1;
+    ae_vector works1;
+    ae_vector workv3;
+    ae_vector tmpwr;
+    ae_vector tmpwi;
+    ae_bool initz;
+    ae_bool wantt;
+    ae_bool wantz;
+    double cnst;
+    ae_bool failflag;
+    ae_int_t p1;
+    ae_int_t p2;
+    double vt;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_clear(wr);
+    ae_vector_clear(wi);
+    *info = 0;
+    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&s, 0, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&v, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&vv, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&workc1, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&works1, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&workv3, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&tmpwr, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&tmpwi, 0, DT_REAL, _state, ae_true);
+
+    
+    /*
+     * Set the order of the multi-shift QR algorithm to be used.
+     * If you want to tune algorithm, change this values
+     */
+    ns = 12;
+    maxb = 50;
+    
+    /*
+     * Now 2 < NS <= MAXB < NH.
+     */
+    maxb = ae_maxint(3, maxb, _state);
+    ns = ae_minint(maxb, ns, _state);
+    
+    /*
+     * Initialize
+     */
+    cnst = 1.5;
+    ae_vector_set_length(&work, ae_maxint(n, 1, _state)+1, _state);
+    ae_matrix_set_length(&s, ns+1, ns+1, _state);
+    ae_vector_set_length(&v, ns+1+1, _state);
+    ae_vector_set_length(&vv, ns+1+1, _state);
+    ae_vector_set_length(wr, ae_maxint(n, 1, _state)+1, _state);
+    ae_vector_set_length(wi, ae_maxint(n, 1, _state)+1, _state);
+    ae_vector_set_length(&workc1, 1+1, _state);
+    ae_vector_set_length(&works1, 1+1, _state);
+    ae_vector_set_length(&workv3, 3+1, _state);
+    ae_vector_set_length(&tmpwr, ae_maxint(n, 1, _state)+1, _state);
+    ae_vector_set_length(&tmpwi, ae_maxint(n, 1, _state)+1, _state);
+    ae_assert(n>=0, "InternalSchurDecomposition: incorrect N!", _state);
+    ae_assert(tneeded==0||tneeded==1, "InternalSchurDecomposition: incorrect TNeeded!", _state);
+    ae_assert((zneeded==0||zneeded==1)||zneeded==2, "InternalSchurDecomposition: incorrect ZNeeded!", _state);
+    wantt = tneeded==1;
+    initz = zneeded==2;
+    wantz = zneeded!=0;
+    *info = 0;
+    
+    /*
+     * Initialize Z, if necessary
+     */
+    if( initz )
+    {
+        ae_matrix_set_length(z, n+1, n+1, _state);
+        for(i=1; i<=n; i++)
+        {
+            for(j=1; j<=n; j++)
+            {
+                if( i==j )
+                {
+                    z->ptr.pp_double[i][j] = 1;
+                }
+                else
+                {
+                    z->ptr.pp_double[i][j] = 0;
+                }
+            }
+        }
+    }
+    
+    /*
+     * Quick return if possible
+     */
+    if( n==0 )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    if( n==1 )
+    {
+        wr->ptr.p_double[1] = h->ptr.pp_double[1][1];
+        wi->ptr.p_double[1] = 0;
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * Set rows and columns 1 to N to zero below the first
+     * subdiagonal.
+     */
+    for(j=1; j<=n-2; j++)
+    {
+        for(i=j+2; i<=n; i++)
+        {
+            h->ptr.pp_double[i][j] = 0;
+        }
+    }
+    
+    /*
+     * Test if N is sufficiently small
+     */
+    if( (ns<=2||ns>n)||maxb>=n )
+    {
+        
+        /*
+         * Use the standard double-shift algorithm
+         */
+        hsschur_internalauxschur(wantt, wantz, n, 1, n, h, wr, wi, 1, n, z, &work, &workv3, &workc1, &works1, info, _state);
+        
+        /*
+         * fill entries under diagonal blocks of T with zeros
+         */
+        if( wantt )
+        {
+            j = 1;
+            while(j<=n)
+            {
+                if( ae_fp_eq(wi->ptr.p_double[j],0) )
+                {
+                    for(i=j+1; i<=n; i++)
+                    {
+                        h->ptr.pp_double[i][j] = 0;
+                    }
+                    j = j+1;
+                }
+                else
+                {
+                    for(i=j+2; i<=n; i++)
+                    {
+                        h->ptr.pp_double[i][j] = 0;
+                        h->ptr.pp_double[i][j+1] = 0;
+                    }
+                    j = j+2;
+                }
+            }
+        }
+        ae_frame_leave(_state);
+        return;
+    }
+    unfl = ae_minrealnumber;
+    ovfl = 1/unfl;
+    ulp = 2*ae_machineepsilon;
+    smlnum = unfl*(n/ulp);
+    
+    /*
+     * I1 and I2 are the indices of the first row and last column of H
+     * to which transformations must be applied. If eigenvalues only are
+     * being computed, I1 and I2 are set inside the main loop.
+     */
+    i1 = 1;
+    i2 = n;
+    
+    /*
+     * ITN is the total number of multiple-shift QR iterations allowed.
+     */
+    itn = 30*n;
+    
+    /*
+     * The main loop begins here. I is the loop index and decreases from
+     * IHI to ILO in steps of at most MAXB. Each iteration of the loop
+     * works with the active submatrix in rows and columns L to I.
+     * Eigenvalues I+1 to IHI have already converged. Either L = ILO or
+     * H(L,L-1) is negligible so that the matrix splits.
+     */
+    i = n;
+    for(;;)
+    {
+        l = 1;
+        if( i<1 )
+        {
+            
+            /*
+             * fill entries under diagonal blocks of T with zeros
+             */
+            if( wantt )
+            {
+                j = 1;
+                while(j<=n)
+                {
+                    if( ae_fp_eq(wi->ptr.p_double[j],0) )
+                    {
+                        for(i=j+1; i<=n; i++)
+                        {
+                            h->ptr.pp_double[i][j] = 0;
+                        }
+                        j = j+1;
+                    }
+                    else
+                    {
+                        for(i=j+2; i<=n; i++)
+                        {
+                            h->ptr.pp_double[i][j] = 0;
+                            h->ptr.pp_double[i][j+1] = 0;
+                        }
+                        j = j+2;
+                    }
+                }
+            }
+            
+            /*
+             * Exit
+             */
+            ae_frame_leave(_state);
+            return;
+        }
+        
+        /*
+         * Perform multiple-shift QR iterations on rows and columns ILO to I
+         * until a submatrix of order at most MAXB splits off at the bottom
+         * because a subdiagonal element has become negligible.
+         */
+        failflag = ae_true;
+        for(its=0; its<=itn; its++)
+        {
+            
+            /*
+             * Look for a single small subdiagonal element.
+             */
+            for(k=i; k>=l+1; k--)
+            {
+                tst1 = ae_fabs(h->ptr.pp_double[k-1][k-1], _state)+ae_fabs(h->ptr.pp_double[k][k], _state);
+                if( ae_fp_eq(tst1,0) )
+                {
+                    tst1 = upperhessenberg1norm(h, l, i, l, i, &work, _state);
+                }
+                if( ae_fp_less_eq(ae_fabs(h->ptr.pp_double[k][k-1], _state),ae_maxreal(ulp*tst1, smlnum, _state)) )
+                {
+                    break;
+                }
+            }
+            l = k;
+            if( l>1 )
+            {
+                
+                /*
+                 * H(L,L-1) is negligible.
+                 */
+                h->ptr.pp_double[l][l-1] = 0;
+            }
+            
+            /*
+             * Exit from loop if a submatrix of order <= MAXB has split off.
+             */
+            if( l>=i-maxb+1 )
+            {
+                failflag = ae_false;
+                break;
+            }
+            
+            /*
+             * Now the active submatrix is in rows and columns L to I. If
+             * eigenvalues only are being computed, only the active submatrix
+             * need be transformed.
+             */
+            if( its==20||its==30 )
+            {
+                
+                /*
+                 * Exceptional shifts.
+                 */
+                for(ii=i-ns+1; ii<=i; ii++)
+                {
+                    wr->ptr.p_double[ii] = cnst*(ae_fabs(h->ptr.pp_double[ii][ii-1], _state)+ae_fabs(h->ptr.pp_double[ii][ii], _state));
+                    wi->ptr.p_double[ii] = 0;
+                }
+            }
+            else
+            {
+                
+                /*
+                 * Use eigenvalues of trailing submatrix of order NS as shifts.
+                 */
+                copymatrix(h, i-ns+1, i, i-ns+1, i, &s, 1, ns, 1, ns, _state);
+                hsschur_internalauxschur(ae_false, ae_false, ns, 1, ns, &s, &tmpwr, &tmpwi, 1, ns, z, &work, &workv3, &workc1, &works1, &ierr, _state);
+                for(p1=1; p1<=ns; p1++)
+                {
+                    wr->ptr.p_double[i-ns+p1] = tmpwr.ptr.p_double[p1];
+                    wi->ptr.p_double[i-ns+p1] = tmpwi.ptr.p_double[p1];
+                }
+                if( ierr>0 )
+                {
+                    
+                    /*
+                     * If DLAHQR failed to compute all NS eigenvalues, use the
+                     * unconverged diagonal elements as the remaining shifts.
+                     */
+                    for(ii=1; ii<=ierr; ii++)
+                    {
+                        wr->ptr.p_double[i-ns+ii] = s.ptr.pp_double[ii][ii];
+                        wi->ptr.p_double[i-ns+ii] = 0;
+                    }
+                }
+            }
+            
+            /*
+             * Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns))
+             * where G is the Hessenberg submatrix H(L:I,L:I) and w is
+             * the vector of shifts (stored in WR and WI). The result is
+             * stored in the local array V.
+             */
+            v.ptr.p_double[1] = 1;
+            for(ii=2; ii<=ns+1; ii++)
+            {
+                v.ptr.p_double[ii] = 0;
+            }
+            nv = 1;
+            for(j=i-ns+1; j<=i; j++)
+            {
+                if( ae_fp_greater_eq(wi->ptr.p_double[j],0) )
+                {
+                    if( ae_fp_eq(wi->ptr.p_double[j],0) )
+                    {
+                        
+                        /*
+                         * real shift
+                         */
+                        p1 = nv+1;
+                        ae_v_move(&vv.ptr.p_double[1], 1, &v.ptr.p_double[1], 1, ae_v_len(1,p1));
+                        matrixvectormultiply(h, l, l+nv, l, l+nv-1, ae_false, &vv, 1, nv, 1.0, &v, 1, nv+1, -wr->ptr.p_double[j], _state);
+                        nv = nv+1;
+                    }
+                    else
+                    {
+                        if( ae_fp_greater(wi->ptr.p_double[j],0) )
+                        {
+                            
+                            /*
+                             * complex conjugate pair of shifts
+                             */
+                            p1 = nv+1;
+                            ae_v_move(&vv.ptr.p_double[1], 1, &v.ptr.p_double[1], 1, ae_v_len(1,p1));
+                            matrixvectormultiply(h, l, l+nv, l, l+nv-1, ae_false, &v, 1, nv, 1.0, &vv, 1, nv+1, -2*wr->ptr.p_double[j], _state);
+                            itemp = vectoridxabsmax(&vv, 1, nv+1, _state);
+                            temp = 1/ae_maxreal(ae_fabs(vv.ptr.p_double[itemp], _state), smlnum, _state);
+                            p1 = nv+1;
+                            ae_v_muld(&vv.ptr.p_double[1], 1, ae_v_len(1,p1), temp);
+                            absw = pythag2(wr->ptr.p_double[j], wi->ptr.p_double[j], _state);
+                            temp = temp*absw*absw;
+                            matrixvectormultiply(h, l, l+nv+1, l, l+nv, ae_false, &vv, 1, nv+1, 1.0, &v, 1, nv+2, temp, _state);
+                            nv = nv+2;
+                        }
+                    }
+                    
+                    /*
+                     * Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero,
+                     * reset it to the unit vector.
+                     */
+                    itemp = vectoridxabsmax(&v, 1, nv, _state);
+                    temp = ae_fabs(v.ptr.p_double[itemp], _state);
+                    if( ae_fp_eq(temp,0) )
+                    {
+                        v.ptr.p_double[1] = 1;
+                        for(ii=2; ii<=nv; ii++)
+                        {
+                            v.ptr.p_double[ii] = 0;
+                        }
+                    }
+                    else
+                    {
+                        temp = ae_maxreal(temp, smlnum, _state);
+                        vt = 1/temp;
+                        ae_v_muld(&v.ptr.p_double[1], 1, ae_v_len(1,nv), vt);
+                    }
+                }
+            }
+            
+            /*
+             * Multiple-shift QR step
+             */
+            for(k=l; k<=i-1; k++)
+            {
+                
+                /*
+                 * The first iteration of this loop determines a reflection G
+                 * from the vector V and applies it from left and right to H,
+                 * thus creating a nonzero bulge below the subdiagonal.
+                 *
+                 * Each subsequent iteration determines a reflection G to
+                 * restore the Hessenberg form in the (K-1)th column, and thus
+                 * chases the bulge one step toward the bottom of the active
+                 * submatrix. NR is the order of G.
+                 */
+                nr = ae_minint(ns+1, i-k+1, _state);
+                if( k>l )
+                {
+                    p1 = k-1;
+                    p2 = k+nr-1;
+                    ae_v_move(&v.ptr.p_double[1], 1, &h->ptr.pp_double[k][p1], h->stride, ae_v_len(1,nr));
+                }
+                generatereflection(&v, nr, &tau, _state);
+                if( k>l )
+                {
+                    h->ptr.pp_double[k][k-1] = v.ptr.p_double[1];
+                    for(ii=k+1; ii<=i; ii++)
+                    {
+                        h->ptr.pp_double[ii][k-1] = 0;
+                    }
+                }
+                v.ptr.p_double[1] = 1;
+                
+                /*
+                 * Apply G from the left to transform the rows of the matrix in
+                 * columns K to I2.
+                 */
+                applyreflectionfromtheleft(h, tau, &v, k, k+nr-1, k, i2, &work, _state);
+                
+                /*
+                 * Apply G from the right to transform the columns of the
+                 * matrix in rows I1 to min(K+NR,I).
+                 */
+                applyreflectionfromtheright(h, tau, &v, i1, ae_minint(k+nr, i, _state), k, k+nr-1, &work, _state);
+                if( wantz )
+                {
+                    
+                    /*
+                     * Accumulate transformations in the matrix Z
+                     */
+                    applyreflectionfromtheright(z, tau, &v, 1, n, k, k+nr-1, &work, _state);
+                }
+            }
+        }
+        
+        /*
+         * Failure to converge in remaining number of iterations
+         */
+        if( failflag )
+        {
+            *info = i;
+            ae_frame_leave(_state);
+            return;
+        }
+        
+        /*
+         * A submatrix of order <= MAXB in rows and columns L to I has split
+         * off. Use the double-shift QR algorithm to handle it.
+         */
+        hsschur_internalauxschur(wantt, wantz, n, l, i, h, wr, wi, 1, n, z, &work, &workv3, &workc1, &works1, info, _state);
+        if( *info>0 )
+        {
+            ae_frame_leave(_state);
+            return;
+        }
+        
+        /*
+         * Decrement number of remaining iterations, and return to start of
+         * the main loop with a new value of I.
+         */
+        itn = itn-its;
+        i = l-1;
+    }
+    ae_frame_leave(_state);
+}
+
+
+static void hsschur_internalauxschur(ae_bool wantt,
+     ae_bool wantz,
+     ae_int_t n,
+     ae_int_t ilo,
+     ae_int_t ihi,
+     /* Real    */ ae_matrix* h,
+     /* Real    */ ae_vector* wr,
+     /* Real    */ ae_vector* wi,
+     ae_int_t iloz,
+     ae_int_t ihiz,
+     /* Real    */ ae_matrix* z,
+     /* Real    */ ae_vector* work,
+     /* Real    */ ae_vector* workv3,
+     /* Real    */ ae_vector* workc1,
+     /* Real    */ ae_vector* works1,
+     ae_int_t* info,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t i1;
+    ae_int_t i2;
+    ae_int_t itn;
+    ae_int_t its;
+    ae_int_t j;
+    ae_int_t k;
+    ae_int_t l;
+    ae_int_t m;
+    ae_int_t nh;
+    ae_int_t nr;
+    ae_int_t nz;
+    double ave;
+    double cs;
+    double disc;
+    double h00;
+    double h10;
+    double h11;
+    double h12;
+    double h21;
+    double h22;
+    double h33;
+    double h33s;
+    double h43h34;
+    double h44;
+    double h44s;
+    double ovfl;
+    double s;
+    double smlnum;
+    double sn;
+    double sum;
+    double t1;
+    double t2;
+    double t3;
+    double tst1;
+    double unfl;
+    double v1;
+    double v2;
+    double v3;
+    ae_bool failflag;
+    double dat1;
+    double dat2;
+    ae_int_t p1;
+    double him1im1;
+    double him1i;
+    double hiim1;
+    double hii;
+    double wrim1;
+    double wri;
+    double wiim1;
+    double wii;
+    double ulp;
+
+    *info = 0;
+
+    *info = 0;
+    dat1 = 0.75;
+    dat2 = -0.4375;
+    ulp = ae_machineepsilon;
+    
+    /*
+     * Quick return if possible
+     */
+    if( n==0 )
+    {
+        return;
+    }
+    if( ilo==ihi )
+    {
+        wr->ptr.p_double[ilo] = h->ptr.pp_double[ilo][ilo];
+        wi->ptr.p_double[ilo] = 0;
+        return;
+    }
+    nh = ihi-ilo+1;
+    nz = ihiz-iloz+1;
+    
+    /*
+     * Set machine-dependent constants for the stopping criterion.
+     * If norm(H) <= sqrt(OVFL), overflow should not occur.
+     */
+    unfl = ae_minrealnumber;
+    ovfl = 1/unfl;
+    smlnum = unfl*(nh/ulp);
+    
+    /*
+     * I1 and I2 are the indices of the first row and last column of H
+     * to which transformations must be applied. If eigenvalues only are
+     * being computed, I1 and I2 are set inside the main loop.
+     */
+    i1 = 1;
+    i2 = n;
+    
+    /*
+     * ITN is the total number of QR iterations allowed.
+     */
+    itn = 30*nh;
+    
+    /*
+     * The main loop begins here. I is the loop index and decreases from
+     * IHI to ILO in steps of 1 or 2. Each iteration of the loop works
+     * with the active submatrix in rows and columns L to I.
+     * Eigenvalues I+1 to IHI have already converged. Either L = ILO or
+     * H(L,L-1) is negligible so that the matrix splits.
+     */
+    i = ihi;
+    for(;;)
+    {
+        l = ilo;
+        if( i<ilo )
+        {
+            return;
+        }
+        
+        /*
+         * Perform QR iterations on rows and columns ILO to I until a
+         * submatrix of order 1 or 2 splits off at the bottom because a
+         * subdiagonal element has become negligible.
+         */
+        failflag = ae_true;
+        for(its=0; its<=itn; its++)
+        {
+            
+            /*
+             * Look for a single small subdiagonal element.
+             */
+            for(k=i; k>=l+1; k--)
+            {
+                tst1 = ae_fabs(h->ptr.pp_double[k-1][k-1], _state)+ae_fabs(h->ptr.pp_double[k][k], _state);
+                if( ae_fp_eq(tst1,0) )
+                {
+                    tst1 = upperhessenberg1norm(h, l, i, l, i, work, _state);
+                }
+                if( ae_fp_less_eq(ae_fabs(h->ptr.pp_double[k][k-1], _state),ae_maxreal(ulp*tst1, smlnum, _state)) )
+                {
+                    break;
+                }
+            }
+            l = k;
+            if( l>ilo )
+            {
+                
+                /*
+                 * H(L,L-1) is negligible
+                 */
+                h->ptr.pp_double[l][l-1] = 0;
+            }
+            
+            /*
+             * Exit from loop if a submatrix of order 1 or 2 has split off.
+             */
+            if( l>=i-1 )
+            {
+                failflag = ae_false;
+                break;
+            }
+            
+            /*
+             * Now the active submatrix is in rows and columns L to I. If
+             * eigenvalues only are being computed, only the active submatrix
+             * need be transformed.
+             */
+            if( its==10||its==20 )
+            {
+                
+                /*
+                 * Exceptional shift.
+                 */
+                s = ae_fabs(h->ptr.pp_double[i][i-1], _state)+ae_fabs(h->ptr.pp_double[i-1][i-2], _state);
+                h44 = dat1*s+h->ptr.pp_double[i][i];
+                h33 = h44;
+                h43h34 = dat2*s*s;
+            }
+            else
+            {
+                
+                /*
+                 * Prepare to use Francis' double shift
+                 * (i.e. 2nd degree generalized Rayleigh quotient)
+                 */
+                h44 = h->ptr.pp_double[i][i];
+                h33 = h->ptr.pp_double[i-1][i-1];
+                h43h34 = h->ptr.pp_double[i][i-1]*h->ptr.pp_double[i-1][i];
+                s = h->ptr.pp_double[i-1][i-2]*h->ptr.pp_double[i-1][i-2];
+                disc = (h33-h44)*0.5;
+                disc = disc*disc+h43h34;
+                if( ae_fp_greater(disc,0) )
+                {
+                    
+                    /*
+                     * Real roots: use Wilkinson's shift twice
+                     */
+                    disc = ae_sqrt(disc, _state);
+                    ave = 0.5*(h33+h44);
+                    if( ae_fp_greater(ae_fabs(h33, _state)-ae_fabs(h44, _state),0) )
+                    {
+                        h33 = h33*h44-h43h34;
+                        h44 = h33/(hsschur_extschursign(disc, ave, _state)+ave);
+                    }
+                    else
+                    {
+                        h44 = hsschur_extschursign(disc, ave, _state)+ave;
+                    }
+                    h33 = h44;
+                    h43h34 = 0;
+                }
+            }
+            
+            /*
+             * Look for two consecutive small subdiagonal elements.
+             */
+            for(m=i-2; m>=l; m--)
+            {
+                
+                /*
+                 * Determine the effect of starting the double-shift QR
+                 * iteration at row M, and see if this would make H(M,M-1)
+                 * negligible.
+                 */
+                h11 = h->ptr.pp_double[m][m];
+                h22 = h->ptr.pp_double[m+1][m+1];
+                h21 = h->ptr.pp_double[m+1][m];
+                h12 = h->ptr.pp_double[m][m+1];
+                h44s = h44-h11;
+                h33s = h33-h11;
+                v1 = (h33s*h44s-h43h34)/h21+h12;
+                v2 = h22-h11-h33s-h44s;
+                v3 = h->ptr.pp_double[m+2][m+1];
+                s = ae_fabs(v1, _state)+ae_fabs(v2, _state)+ae_fabs(v3, _state);
+                v1 = v1/s;
+                v2 = v2/s;
+                v3 = v3/s;
+                workv3->ptr.p_double[1] = v1;
+                workv3->ptr.p_double[2] = v2;
+                workv3->ptr.p_double[3] = v3;
+                if( m==l )
+                {
+                    break;
+                }
+                h00 = h->ptr.pp_double[m-1][m-1];
+                h10 = h->ptr.pp_double[m][m-1];
+                tst1 = ae_fabs(v1, _state)*(ae_fabs(h00, _state)+ae_fabs(h11, _state)+ae_fabs(h22, _state));
+                if( ae_fp_less_eq(ae_fabs(h10, _state)*(ae_fabs(v2, _state)+ae_fabs(v3, _state)),ulp*tst1) )
+                {
+                    break;
+                }
+            }
+            
+            /*
+             * Double-shift QR step
+             */
+            for(k=m; k<=i-1; k++)
+            {
+                
+                /*
+                 * The first iteration of this loop determines a reflection G
+                 * from the vector V and applies it from left and right to H,
+                 * thus creating a nonzero bulge below the subdiagonal.
+                 *
+                 * Each subsequent iteration determines a reflection G to
+                 * restore the Hessenberg form in the (K-1)th column, and thus
+                 * chases the bulge one step toward the bottom of the active
+                 * submatrix. NR is the order of G.
+                 */
+                nr = ae_minint(3, i-k+1, _state);
+                if( k>m )
+                {
+                    for(p1=1; p1<=nr; p1++)
+                    {
+                        workv3->ptr.p_double[p1] = h->ptr.pp_double[k+p1-1][k-1];
+                    }
+                }
+                generatereflection(workv3, nr, &t1, _state);
+                if( k>m )
+                {
+                    h->ptr.pp_double[k][k-1] = workv3->ptr.p_double[1];
+                    h->ptr.pp_double[k+1][k-1] = 0;
+                    if( k<i-1 )
+                    {
+                        h->ptr.pp_double[k+2][k-1] = 0;
+                    }
+                }
+                else
+                {
+                    if( m>l )
+                    {
+                        h->ptr.pp_double[k][k-1] = -h->ptr.pp_double[k][k-1];
+                    }
+                }
+                v2 = workv3->ptr.p_double[2];
+                t2 = t1*v2;
+                if( nr==3 )
+                {
+                    v3 = workv3->ptr.p_double[3];
+                    t3 = t1*v3;
+                    
+                    /*
+                     * Apply G from the left to transform the rows of the matrix
+                     * in columns K to I2.
+                     */
+                    for(j=k; j<=i2; j++)
+                    {
+                        sum = h->ptr.pp_double[k][j]+v2*h->ptr.pp_double[k+1][j]+v3*h->ptr.pp_double[k+2][j];
+                        h->ptr.pp_double[k][j] = h->ptr.pp_double[k][j]-sum*t1;
+                        h->ptr.pp_double[k+1][j] = h->ptr.pp_double[k+1][j]-sum*t2;
+                        h->ptr.pp_double[k+2][j] = h->ptr.pp_double[k+2][j]-sum*t3;
+                    }
+                    
+                    /*
+                     * Apply G from the right to transform the columns of the
+                     * matrix in rows I1 to min(K+3,I).
+                     */
+                    for(j=i1; j<=ae_minint(k+3, i, _state); j++)
+                    {
+                        sum = h->ptr.pp_double[j][k]+v2*h->ptr.pp_double[j][k+1]+v3*h->ptr.pp_double[j][k+2];
+                        h->ptr.pp_double[j][k] = h->ptr.pp_double[j][k]-sum*t1;
+                        h->ptr.pp_double[j][k+1] = h->ptr.pp_double[j][k+1]-sum*t2;
+                        h->ptr.pp_double[j][k+2] = h->ptr.pp_double[j][k+2]-sum*t3;
+                    }
+                    if( wantz )
+                    {
+                        
+                        /*
+                         * Accumulate transformations in the matrix Z
+                         */
+                        for(j=iloz; j<=ihiz; j++)
+                        {
+                            sum = z->ptr.pp_double[j][k]+v2*z->ptr.pp_double[j][k+1]+v3*z->ptr.pp_double[j][k+2];
+                            z->ptr.pp_double[j][k] = z->ptr.pp_double[j][k]-sum*t1;
+                            z->ptr.pp_double[j][k+1] = z->ptr.pp_double[j][k+1]-sum*t2;
+                            z->ptr.pp_double[j][k+2] = z->ptr.pp_double[j][k+2]-sum*t3;
+                        }
+                    }
+                }
+                else
+                {
+                    if( nr==2 )
+                    {
+                        
+                        /*
+                         * Apply G from the left to transform the rows of the matrix
+                         * in columns K to I2.
+                         */
+                        for(j=k; j<=i2; j++)
+                        {
+                            sum = h->ptr.pp_double[k][j]+v2*h->ptr.pp_double[k+1][j];
+                            h->ptr.pp_double[k][j] = h->ptr.pp_double[k][j]-sum*t1;
+                            h->ptr.pp_double[k+1][j] = h->ptr.pp_double[k+1][j]-sum*t2;
+                        }
+                        
+                        /*
+                         * Apply G from the right to transform the columns of the
+                         * matrix in rows I1 to min(K+3,I).
+                         */
+                        for(j=i1; j<=i; j++)
+                        {
+                            sum = h->ptr.pp_double[j][k]+v2*h->ptr.pp_double[j][k+1];
+                            h->ptr.pp_double[j][k] = h->ptr.pp_double[j][k]-sum*t1;
+                            h->ptr.pp_double[j][k+1] = h->ptr.pp_double[j][k+1]-sum*t2;
+                        }
+                        if( wantz )
+                        {
+                            
+                            /*
+                             * Accumulate transformations in the matrix Z
+                             */
+                            for(j=iloz; j<=ihiz; j++)
+                            {
+                                sum = z->ptr.pp_double[j][k]+v2*z->ptr.pp_double[j][k+1];
+                                z->ptr.pp_double[j][k] = z->ptr.pp_double[j][k]-sum*t1;
+                                z->ptr.pp_double[j][k+1] = z->ptr.pp_double[j][k+1]-sum*t2;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        if( failflag )
+        {
+            
+            /*
+             * Failure to converge in remaining number of iterations
+             */
+            *info = i;
+            return;
+        }
+        if( l==i )
+        {
+            
+            /*
+             * H(I,I-1) is negligible: one eigenvalue has converged.
+             */
+            wr->ptr.p_double[i] = h->ptr.pp_double[i][i];
+            wi->ptr.p_double[i] = 0;
+        }
+        else
+        {
+            if( l==i-1 )
+            {
+                
+                /*
+                 * H(I-1,I-2) is negligible: a pair of eigenvalues have converged.
+                 *
+                 *        Transform the 2-by-2 submatrix to standard Schur form,
+                 *        and compute and store the eigenvalues.
+                 */
+                him1im1 = h->ptr.pp_double[i-1][i-1];
+                him1i = h->ptr.pp_double[i-1][i];
+                hiim1 = h->ptr.pp_double[i][i-1];
+                hii = h->ptr.pp_double[i][i];
+                hsschur_aux2x2schur(&him1im1, &him1i, &hiim1, &hii, &wrim1, &wiim1, &wri, &wii, &cs, &sn, _state);
+                wr->ptr.p_double[i-1] = wrim1;
+                wi->ptr.p_double[i-1] = wiim1;
+                wr->ptr.p_double[i] = wri;
+                wi->ptr.p_double[i] = wii;
+                h->ptr.pp_double[i-1][i-1] = him1im1;
+                h->ptr.pp_double[i-1][i] = him1i;
+                h->ptr.pp_double[i][i-1] = hiim1;
+                h->ptr.pp_double[i][i] = hii;
+                if( wantt )
+                {
+                    
+                    /*
+                     * Apply the transformation to the rest of H.
+                     */
+                    if( i2>i )
+                    {
+                        workc1->ptr.p_double[1] = cs;
+                        works1->ptr.p_double[1] = sn;
+                        applyrotationsfromtheleft(ae_true, i-1, i, i+1, i2, workc1, works1, h, work, _state);
+                    }
+                    workc1->ptr.p_double[1] = cs;
+                    works1->ptr.p_double[1] = sn;
+                    applyrotationsfromtheright(ae_true, i1, i-2, i-1, i, workc1, works1, h, work, _state);
+                }
+                if( wantz )
+                {
+                    
+                    /*
+                     * Apply the transformation to Z.
+                     */
+                    workc1->ptr.p_double[1] = cs;
+                    works1->ptr.p_double[1] = sn;
+                    applyrotationsfromtheright(ae_true, iloz, iloz+nz-1, i-1, i, workc1, works1, z, work, _state);
+                }
+            }
+        }
+        
+        /*
+         * Decrement number of remaining iterations, and return to start of
+         * the main loop with new value of I.
+         */
+        itn = itn-its;
+        i = l-1;
+    }
+}
+
+
+static void hsschur_aux2x2schur(double* a,
+     double* b,
+     double* c,
+     double* d,
+     double* rt1r,
+     double* rt1i,
+     double* rt2r,
+     double* rt2i,
+     double* cs,
+     double* sn,
+     ae_state *_state)
+{
+    double multpl;
+    double aa;
+    double bb;
+    double bcmax;
+    double bcmis;
+    double cc;
+    double cs1;
+    double dd;
+    double eps;
+    double p;
+    double sab;
+    double sac;
+    double scl;
+    double sigma;
+    double sn1;
+    double tau;
+    double temp;
+    double z;
+
+    *rt1r = 0;
+    *rt1i = 0;
+    *rt2r = 0;
+    *rt2i = 0;
+    *cs = 0;
+    *sn = 0;
+
+    multpl = 4.0;
+    eps = ae_machineepsilon;
+    if( ae_fp_eq(*c,0) )
+    {
+        *cs = 1;
+        *sn = 0;
+    }
+    else
+    {
+        if( ae_fp_eq(*b,0) )
+        {
+            
+            /*
+             * Swap rows and columns
+             */
+            *cs = 0;
+            *sn = 1;
+            temp = *d;
+            *d = *a;
+            *a = temp;
+            *b = -*c;
+            *c = 0;
+        }
+        else
+        {
+            if( ae_fp_eq(*a-(*d),0)&&hsschur_extschursigntoone(*b, _state)!=hsschur_extschursigntoone(*c, _state) )
+            {
+                *cs = 1;
+                *sn = 0;
+            }
+            else
+            {
+                temp = *a-(*d);
+                p = 0.5*temp;
+                bcmax = ae_maxreal(ae_fabs(*b, _state), ae_fabs(*c, _state), _state);
+                bcmis = ae_minreal(ae_fabs(*b, _state), ae_fabs(*c, _state), _state)*hsschur_extschursigntoone(*b, _state)*hsschur_extschursigntoone(*c, _state);
+                scl = ae_maxreal(ae_fabs(p, _state), bcmax, _state);
+                z = p/scl*p+bcmax/scl*bcmis;
+                
+                /*
+                 * If Z is of the order of the machine accuracy, postpone the
+                 * decision on the nature of eigenvalues
+                 */
+                if( ae_fp_greater_eq(z,multpl*eps) )
+                {
+                    
+                    /*
+                     * Real eigenvalues. Compute A and D.
+                     */
+                    z = p+hsschur_extschursign(ae_sqrt(scl, _state)*ae_sqrt(z, _state), p, _state);
+                    *a = *d+z;
+                    *d = *d-bcmax/z*bcmis;
+                    
+                    /*
+                     * Compute B and the rotation matrix
+                     */
+                    tau = pythag2(*c, z, _state);
+                    *cs = z/tau;
+                    *sn = *c/tau;
+                    *b = *b-(*c);
+                    *c = 0;
+                }
+                else
+                {
+                    
+                    /*
+                     * Complex eigenvalues, or real (almost) equal eigenvalues.
+                     * Make diagonal elements equal.
+                     */
+                    sigma = *b+(*c);
+                    tau = pythag2(sigma, temp, _state);
+                    *cs = ae_sqrt(0.5*(1+ae_fabs(sigma, _state)/tau), _state);
+                    *sn = -p/(tau*(*cs))*hsschur_extschursign(1, sigma, _state);
+                    
+                    /*
+                     * Compute [ AA  BB ] = [ A  B ] [ CS -SN ]
+                     *         [ CC  DD ]   [ C  D ] [ SN  CS ]
+                     */
+                    aa = *a*(*cs)+*b*(*sn);
+                    bb = -*a*(*sn)+*b*(*cs);
+                    cc = *c*(*cs)+*d*(*sn);
+                    dd = -*c*(*sn)+*d*(*cs);
+                    
+                    /*
+                     * Compute [ A  B ] = [ CS  SN ] [ AA  BB ]
+                     *         [ C  D ]   [-SN  CS ] [ CC  DD ]
+                     */
+                    *a = aa*(*cs)+cc*(*sn);
+                    *b = bb*(*cs)+dd*(*sn);
+                    *c = -aa*(*sn)+cc*(*cs);
+                    *d = -bb*(*sn)+dd*(*cs);
+                    temp = 0.5*(*a+(*d));
+                    *a = temp;
+                    *d = temp;
+                    if( ae_fp_neq(*c,0) )
+                    {
+                        if( ae_fp_neq(*b,0) )
+                        {
+                            if( hsschur_extschursigntoone(*b, _state)==hsschur_extschursigntoone(*c, _state) )
+                            {
+                                
+                                /*
+                                 * Real eigenvalues: reduce to upper triangular form
+                                 */
+                                sab = ae_sqrt(ae_fabs(*b, _state), _state);
+                                sac = ae_sqrt(ae_fabs(*c, _state), _state);
+                                p = hsschur_extschursign(sab*sac, *c, _state);
+                                tau = 1/ae_sqrt(ae_fabs(*b+(*c), _state), _state);
+                                *a = temp+p;
+                                *d = temp-p;
+                                *b = *b-(*c);
+                                *c = 0;
+                                cs1 = sab*tau;
+                                sn1 = sac*tau;
+                                temp = *cs*cs1-*sn*sn1;
+                                *sn = *cs*sn1+*sn*cs1;
+                                *cs = temp;
+                            }
+                        }
+                        else
+                        {
+                            *b = -*c;
+                            *c = 0;
+                            temp = *cs;
+                            *cs = -*sn;
+                            *sn = temp;
+                        }
+                    }
+                }
+            }
+        }
+    }
+    
+    /*
+     * Store eigenvalues in (RT1R,RT1I) and (RT2R,RT2I).
+     */
+    *rt1r = *a;
+    *rt2r = *d;
+    if( ae_fp_eq(*c,0) )
+    {
+        *rt1i = 0;
+        *rt2i = 0;
+    }
+    else
+    {
+        *rt1i = ae_sqrt(ae_fabs(*b, _state), _state)*ae_sqrt(ae_fabs(*c, _state), _state);
+        *rt2i = -*rt1i;
+    }
+}
+
+
+static double hsschur_extschursign(double a, double b, ae_state *_state)
+{
+    double result;
+
+
+    if( ae_fp_greater_eq(b,0) )
+    {
+        result = ae_fabs(a, _state);
+    }
+    else
+    {
+        result = -ae_fabs(a, _state);
+    }
+    return result;
+}
+
+
+static ae_int_t hsschur_extschursigntoone(double b, ae_state *_state)
+{
+    ae_int_t result;
+
+
+    if( ae_fp_greater_eq(b,0) )
+    {
+        result = 1;
+    }
+    else
+    {
+        result = -1;
+    }
+    return result;
+}
+
+
+
+
+/*************************************************************************
+Utility subroutine performing the "safe" solution of system of linear
+equations with triangular coefficient matrices.
+
+The subroutine uses scaling and solves the scaled system A*x=s*b (where  s
+is  a  scalar  value)  instead  of  A*x=b,  choosing  s  so  that x can be
+represented by a floating-point number. The closer the system  gets  to  a
+singular, the less s is. If the system is singular, s=0 and x contains the
+non-trivial solution of equation A*x=0.
+
+The feature of an algorithm is that it could not cause an  overflow  or  a
+division by zero regardless of the matrix used as the input.
+
+The algorithm can solve systems of equations with  upper/lower  triangular
+matrices,  with/without unit diagonal, and systems of type A*x=b or A'*x=b
+(where A' is a transposed matrix A).
+
+Input parameters:
+    A       -   system matrix. Array whose indexes range within [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    X       -   right-hand member of a system.
+                Array whose index ranges within [0..N-1].
+    IsUpper -   matrix type. If it is True, the system matrix is the upper
+                triangular and is located in  the  corresponding  part  of
+                matrix A.
+    Trans   -   problem type. If it is True, the problem to be  solved  is
+                A'*x=b, otherwise it is A*x=b.
+    Isunit  -   matrix type. If it is True, the system matrix has  a  unit
+                diagonal (the elements on the main diagonal are  not  used
+                in the calculation process), otherwise the matrix is considered
+                to be a general triangular matrix.
+
+Output parameters:
+    X       -   solution. Array whose index ranges within [0..N-1].
+    S       -   scaling factor.
+
+  -- LAPACK auxiliary routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     June 30, 1992
+*************************************************************************/
+void rmatrixtrsafesolve(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     /* Real    */ ae_vector* x,
+     double* s,
+     ae_bool isupper,
+     ae_bool istrans,
+     ae_bool isunit,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_bool normin;
+    ae_vector cnorm;
+    ae_matrix a1;
+    ae_vector x1;
+    ae_int_t i;
+
+    ae_frame_make(_state, &_frame_block);
+    *s = 0;
+    ae_vector_init(&cnorm, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&a1, 0, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&x1, 0, DT_REAL, _state, ae_true);
+
+    
+    /*
+     * From 0-based to 1-based
+     */
+    normin = ae_false;
+    ae_matrix_set_length(&a1, n+1, n+1, _state);
+    ae_vector_set_length(&x1, n+1, _state);
+    for(i=1; i<=n; i++)
+    {
+        ae_v_move(&a1.ptr.pp_double[i][1], 1, &a->ptr.pp_double[i-1][0], 1, ae_v_len(1,n));
+    }
+    ae_v_move(&x1.ptr.p_double[1], 1, &x->ptr.p_double[0], 1, ae_v_len(1,n));
+    
+    /*
+     * Solve 1-based
+     */
+    safesolvetriangular(&a1, n, &x1, s, isupper, istrans, isunit, normin, &cnorm, _state);
+    
+    /*
+     * From 1-based to 0-based
+     */
+    ae_v_move(&x->ptr.p_double[0], 1, &x1.ptr.p_double[1], 1, ae_v_len(0,n-1));
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Obsolete 1-based subroutine.
+See RMatrixTRSafeSolve for 0-based replacement.
+*************************************************************************/
+void safesolvetriangular(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     /* Real    */ ae_vector* x,
+     double* s,
+     ae_bool isupper,
+     ae_bool istrans,
+     ae_bool isunit,
+     ae_bool normin,
+     /* Real    */ ae_vector* cnorm,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t imax;
+    ae_int_t j;
+    ae_int_t jfirst;
+    ae_int_t jinc;
+    ae_int_t jlast;
+    ae_int_t jm1;
+    ae_int_t jp1;
+    ae_int_t ip1;
+    ae_int_t im1;
+    ae_int_t k;
+    ae_int_t flg;
+    double v;
+    double vd;
+    double bignum;
+    double grow;
+    double rec;
+    double smlnum;
+    double sumj;
+    double tjj;
+    double tjjs;
+    double tmax;
+    double tscal;
+    double uscal;
+    double xbnd;
+    double xj;
+    double xmax;
+    ae_bool notran;
+    ae_bool upper;
+    ae_bool nounit;
+
+    *s = 0;
+
+    upper = isupper;
+    notran = !istrans;
+    nounit = !isunit;
+    
+    /*
+     * these initializers are not really necessary,
+     * but without them compiler complains about uninitialized locals
+     */
+    tjjs = 0;
+    
+    /*
+     * Quick return if possible
+     */
+    if( n==0 )
+    {
+        return;
+    }
+    
+    /*
+     * Determine machine dependent parameters to control overflow.
+     */
+    smlnum = ae_minrealnumber/(ae_machineepsilon*2);
+    bignum = 1/smlnum;
+    *s = 1;
+    if( !normin )
+    {
+        ae_vector_set_length(cnorm, n+1, _state);
+        
+        /*
+         * Compute the 1-norm of each column, not including the diagonal.
+         */
+        if( upper )
+        {
+            
+            /*
+             * A is upper triangular.
+             */
+            for(j=1; j<=n; j++)
+            {
+                v = 0;
+                for(k=1; k<=j-1; k++)
+                {
+                    v = v+ae_fabs(a->ptr.pp_double[k][j], _state);
+                }
+                cnorm->ptr.p_double[j] = v;
+            }
+        }
+        else
+        {
+            
+            /*
+             * A is lower triangular.
+             */
+            for(j=1; j<=n-1; j++)
+            {
+                v = 0;
+                for(k=j+1; k<=n; k++)
+                {
+                    v = v+ae_fabs(a->ptr.pp_double[k][j], _state);
+                }
+                cnorm->ptr.p_double[j] = v;
+            }
+            cnorm->ptr.p_double[n] = 0;
+        }
+    }
+    
+    /*
+     * Scale the column norms by TSCAL if the maximum element in CNORM is
+     * greater than BIGNUM.
+     */
+    imax = 1;
+    for(k=2; k<=n; k++)
+    {
+        if( ae_fp_greater(cnorm->ptr.p_double[k],cnorm->ptr.p_double[imax]) )
+        {
+            imax = k;
+        }
+    }
+    tmax = cnorm->ptr.p_double[imax];
+    if( ae_fp_less_eq(tmax,bignum) )
+    {
+        tscal = 1;
+    }
+    else
+    {
+        tscal = 1/(smlnum*tmax);
+        ae_v_muld(&cnorm->ptr.p_double[1], 1, ae_v_len(1,n), tscal);
+    }
+    
+    /*
+     * Compute a bound on the computed solution vector to see if the
+     * Level 2 BLAS routine DTRSV can be used.
+     */
+    j = 1;
+    for(k=2; k<=n; k++)
+    {
+        if( ae_fp_greater(ae_fabs(x->ptr.p_double[k], _state),ae_fabs(x->ptr.p_double[j], _state)) )
+        {
+            j = k;
+        }
+    }
+    xmax = ae_fabs(x->ptr.p_double[j], _state);
+    xbnd = xmax;
+    if( notran )
+    {
+        
+        /*
+         * Compute the growth in A * x = b.
+         */
+        if( upper )
+        {
+            jfirst = n;
+            jlast = 1;
+            jinc = -1;
+        }
+        else
+        {
+            jfirst = 1;
+            jlast = n;
+            jinc = 1;
+        }
+        if( ae_fp_neq(tscal,1) )
+        {
+            grow = 0;
+        }
+        else
+        {
+            if( nounit )
+            {
+                
+                /*
+                 * A is non-unit triangular.
+                 *
+                 * Compute GROW = 1/G(j) and XBND = 1/M(j).
+                 * Initially, G(0) = max{x(i), i=1,...,n}.
+                 */
+                grow = 1/ae_maxreal(xbnd, smlnum, _state);
+                xbnd = grow;
+                j = jfirst;
+                while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast))
+                {
+                    
+                    /*
+                     * Exit the loop if the growth factor is too small.
+                     */
+                    if( ae_fp_less_eq(grow,smlnum) )
+                    {
+                        break;
+                    }
+                    
+                    /*
+                     * M(j) = G(j-1) / abs(A(j,j))
+                     */
+                    tjj = ae_fabs(a->ptr.pp_double[j][j], _state);
+                    xbnd = ae_minreal(xbnd, ae_minreal(1, tjj, _state)*grow, _state);
+                    if( ae_fp_greater_eq(tjj+cnorm->ptr.p_double[j],smlnum) )
+                    {
+                        
+                        /*
+                         * G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) )
+                         */
+                        grow = grow*(tjj/(tjj+cnorm->ptr.p_double[j]));
+                    }
+                    else
+                    {
+                        
+                        /*
+                         * G(j) could overflow, set GROW to 0.
+                         */
+                        grow = 0;
+                    }
+                    if( j==jlast )
+                    {
+                        grow = xbnd;
+                    }
+                    j = j+jinc;
+                }
+            }
+            else
+            {
+                
+                /*
+                 * A is unit triangular.
+                 *
+                 * Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
+                 */
+                grow = ae_minreal(1, 1/ae_maxreal(xbnd, smlnum, _state), _state);
+                j = jfirst;
+                while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast))
+                {
+                    
+                    /*
+                     * Exit the loop if the growth factor is too small.
+                     */
+                    if( ae_fp_less_eq(grow,smlnum) )
+                    {
+                        break;
+                    }
+                    
+                    /*
+                     * G(j) = G(j-1)*( 1 + CNORM(j) )
+                     */
+                    grow = grow*(1/(1+cnorm->ptr.p_double[j]));
+                    j = j+jinc;
+                }
+            }
+        }
+    }
+    else
+    {
+        
+        /*
+         * Compute the growth in A' * x = b.
+         */
+        if( upper )
+        {
+            jfirst = 1;
+            jlast = n;
+            jinc = 1;
+        }
+        else
+        {
+            jfirst = n;
+            jlast = 1;
+            jinc = -1;
+        }
+        if( ae_fp_neq(tscal,1) )
+        {
+            grow = 0;
+        }
+        else
+        {
+            if( nounit )
+            {
+                
+                /*
+                 * A is non-unit triangular.
+                 *
+                 * Compute GROW = 1/G(j) and XBND = 1/M(j).
+                 * Initially, M(0) = max{x(i), i=1,...,n}.
+                 */
+                grow = 1/ae_maxreal(xbnd, smlnum, _state);
+                xbnd = grow;
+                j = jfirst;
+                while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast))
+                {
+                    
+                    /*
+                     * Exit the loop if the growth factor is too small.
+                     */
+                    if( ae_fp_less_eq(grow,smlnum) )
+                    {
+                        break;
+                    }
+                    
+                    /*
+                     * G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) )
+                     */
+                    xj = 1+cnorm->ptr.p_double[j];
+                    grow = ae_minreal(grow, xbnd/xj, _state);
+                    
+                    /*
+                     * M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j))
+                     */
+                    tjj = ae_fabs(a->ptr.pp_double[j][j], _state);
+                    if( ae_fp_greater(xj,tjj) )
+                    {
+                        xbnd = xbnd*(tjj/xj);
+                    }
+                    if( j==jlast )
+                    {
+                        grow = ae_minreal(grow, xbnd, _state);
+                    }
+                    j = j+jinc;
+                }
+            }
+            else
+            {
+                
+                /*
+                 * A is unit triangular.
+                 *
+                 * Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
+                 */
+                grow = ae_minreal(1, 1/ae_maxreal(xbnd, smlnum, _state), _state);
+                j = jfirst;
+                while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast))
+                {
+                    
+                    /*
+                     * Exit the loop if the growth factor is too small.
+                     */
+                    if( ae_fp_less_eq(grow,smlnum) )
+                    {
+                        break;
+                    }
+                    
+                    /*
+                     * G(j) = ( 1 + CNORM(j) )*G(j-1)
+                     */
+                    xj = 1+cnorm->ptr.p_double[j];
+                    grow = grow/xj;
+                    j = j+jinc;
+                }
+            }
+        }
+    }
+    if( ae_fp_greater(grow*tscal,smlnum) )
+    {
+        
+        /*
+         * Use the Level 2 BLAS solve if the reciprocal of the bound on
+         * elements of X is not too small.
+         */
+        if( (upper&&notran)||(!upper&&!notran) )
+        {
+            if( nounit )
+            {
+                vd = a->ptr.pp_double[n][n];
+            }
+            else
+            {
+                vd = 1;
+            }
+            x->ptr.p_double[n] = x->ptr.p_double[n]/vd;
+            for(i=n-1; i>=1; i--)
+            {
+                ip1 = i+1;
+                if( upper )
+                {
+                    v = ae_v_dotproduct(&a->ptr.pp_double[i][ip1], 1, &x->ptr.p_double[ip1], 1, ae_v_len(ip1,n));
+                }
+                else
+                {
+                    v = ae_v_dotproduct(&a->ptr.pp_double[ip1][i], a->stride, &x->ptr.p_double[ip1], 1, ae_v_len(ip1,n));
+                }
+                if( nounit )
+                {
+                    vd = a->ptr.pp_double[i][i];
+                }
+                else
+                {
+                    vd = 1;
+                }
+                x->ptr.p_double[i] = (x->ptr.p_double[i]-v)/vd;
+            }
+        }
+        else
+        {
+            if( nounit )
+            {
+                vd = a->ptr.pp_double[1][1];
+            }
+            else
+            {
+                vd = 1;
+            }
+            x->ptr.p_double[1] = x->ptr.p_double[1]/vd;
+            for(i=2; i<=n; i++)
+            {
+                im1 = i-1;
+                if( upper )
+                {
+                    v = ae_v_dotproduct(&a->ptr.pp_double[1][i], a->stride, &x->ptr.p_double[1], 1, ae_v_len(1,im1));
+                }
+                else
+                {
+                    v = ae_v_dotproduct(&a->ptr.pp_double[i][1], 1, &x->ptr.p_double[1], 1, ae_v_len(1,im1));
+                }
+                if( nounit )
+                {
+                    vd = a->ptr.pp_double[i][i];
+                }
+                else
+                {
+                    vd = 1;
+                }
+                x->ptr.p_double[i] = (x->ptr.p_double[i]-v)/vd;
+            }
+        }
+    }
+    else
+    {
+        
+        /*
+         * Use a Level 1 BLAS solve, scaling intermediate results.
+         */
+        if( ae_fp_greater(xmax,bignum) )
+        {
+            
+            /*
+             * Scale X so that its components are less than or equal to
+             * BIGNUM in absolute value.
+             */
+            *s = bignum/xmax;
+            ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), *s);
+            xmax = bignum;
+        }
+        if( notran )
+        {
+            
+            /*
+             * Solve A * x = b
+             */
+            j = jfirst;
+            while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast))
+            {
+                
+                /*
+                 * Compute x(j) = b(j) / A(j,j), scaling x if necessary.
+                 */
+                xj = ae_fabs(x->ptr.p_double[j], _state);
+                flg = 0;
+                if( nounit )
+                {
+                    tjjs = a->ptr.pp_double[j][j]*tscal;
+                }
+                else
+                {
+                    tjjs = tscal;
+                    if( ae_fp_eq(tscal,1) )
+                    {
+                        flg = 100;
+                    }
+                }
+                if( flg!=100 )
+                {
+                    tjj = ae_fabs(tjjs, _state);
+                    if( ae_fp_greater(tjj,smlnum) )
+                    {
+                        
+                        /*
+                         * abs(A(j,j)) > SMLNUM:
+                         */
+                        if( ae_fp_less(tjj,1) )
+                        {
+                            if( ae_fp_greater(xj,tjj*bignum) )
+                            {
+                                
+                                /*
+                                 * Scale x by 1/b(j).
+                                 */
+                                rec = 1/xj;
+                                ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), rec);
+                                *s = *s*rec;
+                                xmax = xmax*rec;
+                            }
+                        }
+                        x->ptr.p_double[j] = x->ptr.p_double[j]/tjjs;
+                        xj = ae_fabs(x->ptr.p_double[j], _state);
+                    }
+                    else
+                    {
+                        if( ae_fp_greater(tjj,0) )
+                        {
+                            
+                            /*
+                             * 0 < abs(A(j,j)) <= SMLNUM:
+                             */
+                            if( ae_fp_greater(xj,tjj*bignum) )
+                            {
+                                
+                                /*
+                                 * Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM
+                                 * to avoid overflow when dividing by A(j,j).
+                                 */
+                                rec = tjj*bignum/xj;
+                                if( ae_fp_greater(cnorm->ptr.p_double[j],1) )
+                                {
+                                    
+                                    /*
+                                     * Scale by 1/CNORM(j) to avoid overflow when
+                                     * multiplying x(j) times column j.
+                                     */
+                                    rec = rec/cnorm->ptr.p_double[j];
+                                }
+                                ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), rec);
+                                *s = *s*rec;
+                                xmax = xmax*rec;
+                            }
+                            x->ptr.p_double[j] = x->ptr.p_double[j]/tjjs;
+                            xj = ae_fabs(x->ptr.p_double[j], _state);
+                        }
+                        else
+                        {
+                            
+                            /*
+                             * A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+                             * scale = 0, and compute a solution to A*x = 0.
+                             */
+                            for(i=1; i<=n; i++)
+                            {
+                                x->ptr.p_double[i] = 0;
+                            }
+                            x->ptr.p_double[j] = 1;
+                            xj = 1;
+                            *s = 0;
+                            xmax = 0;
+                        }
+                    }
+                }
+                
+                /*
+                 * Scale x if necessary to avoid overflow when adding a
+                 * multiple of column j of A.
+                 */
+                if( ae_fp_greater(xj,1) )
+                {
+                    rec = 1/xj;
+                    if( ae_fp_greater(cnorm->ptr.p_double[j],(bignum-xmax)*rec) )
+                    {
+                        
+                        /*
+                         * Scale x by 1/(2*abs(x(j))).
+                         */
+                        rec = rec*0.5;
+                        ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), rec);
+                        *s = *s*rec;
+                    }
+                }
+                else
+                {
+                    if( ae_fp_greater(xj*cnorm->ptr.p_double[j],bignum-xmax) )
+                    {
+                        
+                        /*
+                         * Scale x by 1/2.
+                         */
+                        ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), 0.5);
+                        *s = *s*0.5;
+                    }
+                }
+                if( upper )
+                {
+                    if( j>1 )
+                    {
+                        
+                        /*
+                         * Compute the update
+                         * x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j)
+                         */
+                        v = x->ptr.p_double[j]*tscal;
+                        jm1 = j-1;
+                        ae_v_subd(&x->ptr.p_double[1], 1, &a->ptr.pp_double[1][j], a->stride, ae_v_len(1,jm1), v);
+                        i = 1;
+                        for(k=2; k<=j-1; k++)
+                        {
+                            if( ae_fp_greater(ae_fabs(x->ptr.p_double[k], _state),ae_fabs(x->ptr.p_double[i], _state)) )
+                            {
+                                i = k;
+                            }
+                        }
+                        xmax = ae_fabs(x->ptr.p_double[i], _state);
+                    }
+                }
+                else
+                {
+                    if( j<n )
+                    {
+                        
+                        /*
+                         * Compute the update
+                         * x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j)
+                         */
+                        jp1 = j+1;
+                        v = x->ptr.p_double[j]*tscal;
+                        ae_v_subd(&x->ptr.p_double[jp1], 1, &a->ptr.pp_double[jp1][j], a->stride, ae_v_len(jp1,n), v);
+                        i = j+1;
+                        for(k=j+2; k<=n; k++)
+                        {
+                            if( ae_fp_greater(ae_fabs(x->ptr.p_double[k], _state),ae_fabs(x->ptr.p_double[i], _state)) )
+                            {
+                                i = k;
+                            }
+                        }
+                        xmax = ae_fabs(x->ptr.p_double[i], _state);
+                    }
+                }
+                j = j+jinc;
+            }
+        }
+        else
+        {
+            
+            /*
+             * Solve A' * x = b
+             */
+            j = jfirst;
+            while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast))
+            {
+                
+                /*
+                 * Compute x(j) = b(j) - sum A(k,j)*x(k).
+                 *   k<>j
+                 */
+                xj = ae_fabs(x->ptr.p_double[j], _state);
+                uscal = tscal;
+                rec = 1/ae_maxreal(xmax, 1, _state);
+                if( ae_fp_greater(cnorm->ptr.p_double[j],(bignum-xj)*rec) )
+                {
+                    
+                    /*
+                     * If x(j) could overflow, scale x by 1/(2*XMAX).
+                     */
+                    rec = rec*0.5;
+                    if( nounit )
+                    {
+                        tjjs = a->ptr.pp_double[j][j]*tscal;
+                    }
+                    else
+                    {
+                        tjjs = tscal;
+                    }
+                    tjj = ae_fabs(tjjs, _state);
+                    if( ae_fp_greater(tjj,1) )
+                    {
+                        
+                        /*
+                         * Divide by A(j,j) when scaling x if A(j,j) > 1.
+                         */
+                        rec = ae_minreal(1, rec*tjj, _state);
+                        uscal = uscal/tjjs;
+                    }
+                    if( ae_fp_less(rec,1) )
+                    {
+                        ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), rec);
+                        *s = *s*rec;
+                        xmax = xmax*rec;
+                    }
+                }
+                sumj = 0;
+                if( ae_fp_eq(uscal,1) )
+                {
+                    
+                    /*
+                     * If the scaling needed for A in the dot product is 1,
+                     * call DDOT to perform the dot product.
+                     */
+                    if( upper )
+                    {
+                        if( j>1 )
+                        {
+                            jm1 = j-1;
+                            sumj = ae_v_dotproduct(&a->ptr.pp_double[1][j], a->stride, &x->ptr.p_double[1], 1, ae_v_len(1,jm1));
+                        }
+                        else
+                        {
+                            sumj = 0;
+                        }
+                    }
+                    else
+                    {
+                        if( j<n )
+                        {
+                            jp1 = j+1;
+                            sumj = ae_v_dotproduct(&a->ptr.pp_double[jp1][j], a->stride, &x->ptr.p_double[jp1], 1, ae_v_len(jp1,n));
+                        }
+                    }
+                }
+                else
+                {
+                    
+                    /*
+                     * Otherwise, use in-line code for the dot product.
+                     */
+                    if( upper )
+                    {
+                        for(i=1; i<=j-1; i++)
+                        {
+                            v = a->ptr.pp_double[i][j]*uscal;
+                            sumj = sumj+v*x->ptr.p_double[i];
+                        }
+                    }
+                    else
+                    {
+                        if( j<n )
+                        {
+                            for(i=j+1; i<=n; i++)
+                            {
+                                v = a->ptr.pp_double[i][j]*uscal;
+                                sumj = sumj+v*x->ptr.p_double[i];
+                            }
+                        }
+                    }
+                }
+                if( ae_fp_eq(uscal,tscal) )
+                {
+                    
+                    /*
+                     * Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j)
+                     * was not used to scale the dotproduct.
+                     */
+                    x->ptr.p_double[j] = x->ptr.p_double[j]-sumj;
+                    xj = ae_fabs(x->ptr.p_double[j], _state);
+                    flg = 0;
+                    if( nounit )
+                    {
+                        tjjs = a->ptr.pp_double[j][j]*tscal;
+                    }
+                    else
+                    {
+                        tjjs = tscal;
+                        if( ae_fp_eq(tscal,1) )
+                        {
+                            flg = 150;
+                        }
+                    }
+                    
+                    /*
+                     * Compute x(j) = x(j) / A(j,j), scaling if necessary.
+                     */
+                    if( flg!=150 )
+                    {
+                        tjj = ae_fabs(tjjs, _state);
+                        if( ae_fp_greater(tjj,smlnum) )
+                        {
+                            
+                            /*
+                             * abs(A(j,j)) > SMLNUM:
+                             */
+                            if( ae_fp_less(tjj,1) )
+                            {
+                                if( ae_fp_greater(xj,tjj*bignum) )
+                                {
+                                    
+                                    /*
+                                     * Scale X by 1/abs(x(j)).
+                                     */
+                                    rec = 1/xj;
+                                    ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), rec);
+                                    *s = *s*rec;
+                                    xmax = xmax*rec;
+                                }
+                            }
+                            x->ptr.p_double[j] = x->ptr.p_double[j]/tjjs;
+                        }
+                        else
+                        {
+                            if( ae_fp_greater(tjj,0) )
+                            {
+                                
+                                /*
+                                 * 0 < abs(A(j,j)) <= SMLNUM:
+                                 */
+                                if( ae_fp_greater(xj,tjj*bignum) )
+                                {
+                                    
+                                    /*
+                                     * Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM.
+                                     */
+                                    rec = tjj*bignum/xj;
+                                    ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), rec);
+                                    *s = *s*rec;
+                                    xmax = xmax*rec;
+                                }
+                                x->ptr.p_double[j] = x->ptr.p_double[j]/tjjs;
+                            }
+                            else
+                            {
+                                
+                                /*
+                                 * A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+                                 * scale = 0, and compute a solution to A'*x = 0.
+                                 */
+                                for(i=1; i<=n; i++)
+                                {
+                                    x->ptr.p_double[i] = 0;
+                                }
+                                x->ptr.p_double[j] = 1;
+                                *s = 0;
+                                xmax = 0;
+                            }
+                        }
+                    }
+                }
+                else
+                {
+                    
+                    /*
+                     * Compute x(j) := x(j) / A(j,j)  - sumj if the dot
+                     * product has already been divided by 1/A(j,j).
+                     */
+                    x->ptr.p_double[j] = x->ptr.p_double[j]/tjjs-sumj;
+                }
+                xmax = ae_maxreal(xmax, ae_fabs(x->ptr.p_double[j], _state), _state);
+                j = j+jinc;
+            }
+        }
+        *s = *s/tscal;
+    }
+    
+    /*
+     * Scale the column norms by 1/TSCAL for return.
+     */
+    if( ae_fp_neq(tscal,1) )
+    {
+        v = 1/tscal;
+        ae_v_muld(&cnorm->ptr.p_double[1], 1, ae_v_len(1,n), v);
+    }
+}
+
+
+
+
+/*************************************************************************
+Real implementation of CMatrixScaledTRSafeSolve
+
+  -- ALGLIB routine --
+     21.01.2010
+     Bochkanov Sergey
+*************************************************************************/
+ae_bool rmatrixscaledtrsafesolve(/* Real    */ ae_matrix* a,
+     double sa,
+     ae_int_t n,
+     /* Real    */ ae_vector* x,
+     ae_bool isupper,
+     ae_int_t trans,
+     ae_bool isunit,
+     double maxgrowth,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    double lnmax;
+    double nrmb;
+    double nrmx;
+    ae_int_t i;
+    ae_complex alpha;
+    ae_complex beta;
+    double vr;
+    ae_complex cx;
+    ae_vector tmp;
+    ae_bool result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);
+
+    ae_assert(n>0, "RMatrixTRSafeSolve: incorrect N!", _state);
+    ae_assert(trans==0||trans==1, "RMatrixTRSafeSolve: incorrect Trans!", _state);
+    result = ae_true;
+    lnmax = ae_log(ae_maxrealnumber, _state);
+    
+    /*
+     * Quick return if possible
+     */
+    if( n<=0 )
+    {
+        ae_frame_leave(_state);
+        return result;
+    }
+    
+    /*
+     * Load norms: right part and X
+     */
+    nrmb = 0;
+    for(i=0; i<=n-1; i++)
+    {
+        nrmb = ae_maxreal(nrmb, ae_fabs(x->ptr.p_double[i], _state), _state);
+    }
+    nrmx = 0;
+    
+    /*
+     * Solve
+     */
+    ae_vector_set_length(&tmp, n, _state);
+    result = ae_true;
+    if( isupper&&trans==0 )
+    {
+        
+        /*
+         * U*x = b
+         */
+        for(i=n-1; i>=0; i--)
+        {
+            
+            /*
+             * Task is reduced to alpha*x[i] = beta
+             */
+            if( isunit )
+            {
+                alpha = ae_complex_from_d(sa);
+            }
+            else
+            {
+                alpha = ae_complex_from_d(a->ptr.pp_double[i][i]*sa);
+            }
+            if( i<n-1 )
+            {
+                ae_v_moved(&tmp.ptr.p_double[i+1], 1, &a->ptr.pp_double[i][i+1], 1, ae_v_len(i+1,n-1), sa);
+                vr = ae_v_dotproduct(&tmp.ptr.p_double[i+1], 1, &x->ptr.p_double[i+1], 1, ae_v_len(i+1,n-1));
+                beta = ae_complex_from_d(x->ptr.p_double[i]-vr);
+            }
+            else
+            {
+                beta = ae_complex_from_d(x->ptr.p_double[i]);
+            }
+            
+            /*
+             * solve alpha*x[i] = beta
+             */
+            result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &cx, _state);
+            if( !result )
+            {
+                ae_frame_leave(_state);
+                return result;
+            }
+            x->ptr.p_double[i] = cx.x;
+        }
+        ae_frame_leave(_state);
+        return result;
+    }
+    if( !isupper&&trans==0 )
+    {
+        
+        /*
+         * L*x = b
+         */
+        for(i=0; i<=n-1; i++)
+        {
+            
+            /*
+             * Task is reduced to alpha*x[i] = beta
+             */
+            if( isunit )
+            {
+                alpha = ae_complex_from_d(sa);
+            }
+            else
+            {
+                alpha = ae_complex_from_d(a->ptr.pp_double[i][i]*sa);
+            }
+            if( i>0 )
+            {
+                ae_v_moved(&tmp.ptr.p_double[0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,i-1), sa);
+                vr = ae_v_dotproduct(&tmp.ptr.p_double[0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,i-1));
+                beta = ae_complex_from_d(x->ptr.p_double[i]-vr);
+            }
+            else
+            {
+                beta = ae_complex_from_d(x->ptr.p_double[i]);
+            }
+            
+            /*
+             * solve alpha*x[i] = beta
+             */
+            result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &cx, _state);
+            if( !result )
+            {
+                ae_frame_leave(_state);
+                return result;
+            }
+            x->ptr.p_double[i] = cx.x;
+        }
+        ae_frame_leave(_state);
+        return result;
+    }
+    if( isupper&&trans==1 )
+    {
+        
+        /*
+         * U^T*x = b
+         */
+        for(i=0; i<=n-1; i++)
+        {
+            
+            /*
+             * Task is reduced to alpha*x[i] = beta
+             */
+            if( isunit )
+            {
+                alpha = ae_complex_from_d(sa);
+            }
+            else
+            {
+                alpha = ae_complex_from_d(a->ptr.pp_double[i][i]*sa);
+            }
+            beta = ae_complex_from_d(x->ptr.p_double[i]);
+            
+            /*
+             * solve alpha*x[i] = beta
+             */
+            result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &cx, _state);
+            if( !result )
+            {
+                ae_frame_leave(_state);
+                return result;
+            }
+            x->ptr.p_double[i] = cx.x;
+            
+            /*
+             * update the rest of right part
+             */
+            if( i<n-1 )
+            {
+                vr = cx.x;
+                ae_v_moved(&tmp.ptr.p_double[i+1], 1, &a->ptr.pp_double[i][i+1], 1, ae_v_len(i+1,n-1), sa);
+                ae_v_subd(&x->ptr.p_double[i+1], 1, &tmp.ptr.p_double[i+1], 1, ae_v_len(i+1,n-1), vr);
+            }
+        }
+        ae_frame_leave(_state);
+        return result;
+    }
+    if( !isupper&&trans==1 )
+    {
+        
+        /*
+         * L^T*x = b
+         */
+        for(i=n-1; i>=0; i--)
+        {
+            
+            /*
+             * Task is reduced to alpha*x[i] = beta
+             */
+            if( isunit )
+            {
+                alpha = ae_complex_from_d(sa);
+            }
+            else
+            {
+                alpha = ae_complex_from_d(a->ptr.pp_double[i][i]*sa);
+            }
+            beta = ae_complex_from_d(x->ptr.p_double[i]);
+            
+            /*
+             * solve alpha*x[i] = beta
+             */
+            result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &cx, _state);
+            if( !result )
+            {
+                ae_frame_leave(_state);
+                return result;
+            }
+            x->ptr.p_double[i] = cx.x;
+            
+            /*
+             * update the rest of right part
+             */
+            if( i>0 )
+            {
+                vr = cx.x;
+                ae_v_moved(&tmp.ptr.p_double[0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,i-1), sa);
+                ae_v_subd(&x->ptr.p_double[0], 1, &tmp.ptr.p_double[0], 1, ae_v_len(0,i-1), vr);
+            }
+        }
+        ae_frame_leave(_state);
+        return result;
+    }
+    result = ae_false;
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+/*************************************************************************
+Internal subroutine for safe solution of
+
+    SA*op(A)=b
+    
+where  A  is  NxN  upper/lower  triangular/unitriangular  matrix, op(A) is
+either identity transform, transposition or Hermitian transposition, SA is
+a scaling factor such that max(|SA*A[i,j]|) is close to 1.0 in magnutude.
+
+This subroutine  limits  relative  growth  of  solution  (in inf-norm)  by
+MaxGrowth,  returning  False  if  growth  exceeds MaxGrowth. Degenerate or
+near-degenerate matrices are handled correctly (False is returned) as long
+as MaxGrowth is significantly less than MaxRealNumber/norm(b).
+
+  -- ALGLIB routine --
+     21.01.2010
+     Bochkanov Sergey
+*************************************************************************/
+ae_bool cmatrixscaledtrsafesolve(/* Complex */ ae_matrix* a,
+     double sa,
+     ae_int_t n,
+     /* Complex */ ae_vector* x,
+     ae_bool isupper,
+     ae_int_t trans,
+     ae_bool isunit,
+     double maxgrowth,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    double lnmax;
+    double nrmb;
+    double nrmx;
+    ae_int_t i;
+    ae_complex alpha;
+    ae_complex beta;
+    ae_complex vc;
+    ae_vector tmp;
+    ae_bool result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_init(&tmp, 0, DT_COMPLEX, _state, ae_true);
+
+    ae_assert(n>0, "CMatrixTRSafeSolve: incorrect N!", _state);
+    ae_assert((trans==0||trans==1)||trans==2, "CMatrixTRSafeSolve: incorrect Trans!", _state);
+    result = ae_true;
+    lnmax = ae_log(ae_maxrealnumber, _state);
+    
+    /*
+     * Quick return if possible
+     */
+    if( n<=0 )
+    {
+        ae_frame_leave(_state);
+        return result;
+    }
+    
+    /*
+     * Load norms: right part and X
+     */
+    nrmb = 0;
+    for(i=0; i<=n-1; i++)
+    {
+        nrmb = ae_maxreal(nrmb, ae_c_abs(x->ptr.p_complex[i], _state), _state);
+    }
+    nrmx = 0;
+    
+    /*
+     * Solve
+     */
+    ae_vector_set_length(&tmp, n, _state);
+    result = ae_true;
+    if( isupper&&trans==0 )
+    {
+        
+        /*
+         * U*x = b
+         */
+        for(i=n-1; i>=0; i--)
+        {
+            
+            /*
+             * Task is reduced to alpha*x[i] = beta
+             */
+            if( isunit )
+            {
+                alpha = ae_complex_from_d(sa);
+            }
+            else
+            {
+                alpha = ae_c_mul_d(a->ptr.pp_complex[i][i],sa);
+            }
+            if( i<n-1 )
+            {
+                ae_v_cmoved(&tmp.ptr.p_complex[i+1], 1, &a->ptr.pp_complex[i][i+1], 1, "N", ae_v_len(i+1,n-1), sa);
+                vc = ae_v_cdotproduct(&tmp.ptr.p_complex[i+1], 1, "N", &x->ptr.p_complex[i+1], 1, "N", ae_v_len(i+1,n-1));
+                beta = ae_c_sub(x->ptr.p_complex[i],vc);
+            }
+            else
+            {
+                beta = x->ptr.p_complex[i];
+            }
+            
+            /*
+             * solve alpha*x[i] = beta
+             */
+            result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state);
+            if( !result )
+            {
+                ae_frame_leave(_state);
+                return result;
+            }
+            x->ptr.p_complex[i] = vc;
+        }
+        ae_frame_leave(_state);
+        return result;
+    }
+    if( !isupper&&trans==0 )
+    {
+        
+        /*
+         * L*x = b
+         */
+        for(i=0; i<=n-1; i++)
+        {
+            
+            /*
+             * Task is reduced to alpha*x[i] = beta
+             */
+            if( isunit )
+            {
+                alpha = ae_complex_from_d(sa);
+            }
+            else
+            {
+                alpha = ae_c_mul_d(a->ptr.pp_complex[i][i],sa);
+            }
+            if( i>0 )
+            {
+                ae_v_cmoved(&tmp.ptr.p_complex[0], 1, &a->ptr.pp_complex[i][0], 1, "N", ae_v_len(0,i-1), sa);
+                vc = ae_v_cdotproduct(&tmp.ptr.p_complex[0], 1, "N", &x->ptr.p_complex[0], 1, "N", ae_v_len(0,i-1));
+                beta = ae_c_sub(x->ptr.p_complex[i],vc);
+            }
+            else
+            {
+                beta = x->ptr.p_complex[i];
+            }
+            
+            /*
+             * solve alpha*x[i] = beta
+             */
+            result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state);
+            if( !result )
+            {
+                ae_frame_leave(_state);
+                return result;
+            }
+            x->ptr.p_complex[i] = vc;
+        }
+        ae_frame_leave(_state);
+        return result;
+    }
+    if( isupper&&trans==1 )
+    {
+        
+        /*
+         * U^T*x = b
+         */
+        for(i=0; i<=n-1; i++)
+        {
+            
+            /*
+             * Task is reduced to alpha*x[i] = beta
+             */
+            if( isunit )
+            {
+                alpha = ae_complex_from_d(sa);
+            }
+            else
+            {
+                alpha = ae_c_mul_d(a->ptr.pp_complex[i][i],sa);
+            }
+            beta = x->ptr.p_complex[i];
+            
+            /*
+             * solve alpha*x[i] = beta
+             */
+            result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state);
+            if( !result )
+            {
+                ae_frame_leave(_state);
+                return result;
+            }
+            x->ptr.p_complex[i] = vc;
+            
+            /*
+             * update the rest of right part
+             */
+            if( i<n-1 )
+            {
+                ae_v_cmoved(&tmp.ptr.p_complex[i+1], 1, &a->ptr.pp_complex[i][i+1], 1, "N", ae_v_len(i+1,n-1), sa);
+                ae_v_csubc(&x->ptr.p_complex[i+1], 1, &tmp.ptr.p_complex[i+1], 1, "N", ae_v_len(i+1,n-1), vc);
+            }
+        }
+        ae_frame_leave(_state);
+        return result;
+    }
+    if( !isupper&&trans==1 )
+    {
+        
+        /*
+         * L^T*x = b
+         */
+        for(i=n-1; i>=0; i--)
+        {
+            
+            /*
+             * Task is reduced to alpha*x[i] = beta
+             */
+            if( isunit )
+            {
+                alpha = ae_complex_from_d(sa);
+            }
+            else
+            {
+                alpha = ae_c_mul_d(a->ptr.pp_complex[i][i],sa);
+            }
+            beta = x->ptr.p_complex[i];
+            
+            /*
+             * solve alpha*x[i] = beta
+             */
+            result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state);
+            if( !result )
+            {
+                ae_frame_leave(_state);
+                return result;
+            }
+            x->ptr.p_complex[i] = vc;
+            
+            /*
+             * update the rest of right part
+             */
+            if( i>0 )
+            {
+                ae_v_cmoved(&tmp.ptr.p_complex[0], 1, &a->ptr.pp_complex[i][0], 1, "N", ae_v_len(0,i-1), sa);
+                ae_v_csubc(&x->ptr.p_complex[0], 1, &tmp.ptr.p_complex[0], 1, "N", ae_v_len(0,i-1), vc);
+            }
+        }
+        ae_frame_leave(_state);
+        return result;
+    }
+    if( isupper&&trans==2 )
+    {
+        
+        /*
+         * U^H*x = b
+         */
+        for(i=0; i<=n-1; i++)
+        {
+            
+            /*
+             * Task is reduced to alpha*x[i] = beta
+             */
+            if( isunit )
+            {
+                alpha = ae_complex_from_d(sa);
+            }
+            else
+            {
+                alpha = ae_c_mul_d(ae_c_conj(a->ptr.pp_complex[i][i], _state),sa);
+            }
+            beta = x->ptr.p_complex[i];
+            
+            /*
+             * solve alpha*x[i] = beta
+             */
+            result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state);
+            if( !result )
+            {
+                ae_frame_leave(_state);
+                return result;
+            }
+            x->ptr.p_complex[i] = vc;
+            
+            /*
+             * update the rest of right part
+             */
+            if( i<n-1 )
+            {
+                ae_v_cmoved(&tmp.ptr.p_complex[i+1], 1, &a->ptr.pp_complex[i][i+1], 1, "Conj", ae_v_len(i+1,n-1), sa);
+                ae_v_csubc(&x->ptr.p_complex[i+1], 1, &tmp.ptr.p_complex[i+1], 1, "N", ae_v_len(i+1,n-1), vc);
+            }
+        }
+        ae_frame_leave(_state);
+        return result;
+    }
+    if( !isupper&&trans==2 )
+    {
+        
+        /*
+         * L^T*x = b
+         */
+        for(i=n-1; i>=0; i--)
+        {
+            
+            /*
+             * Task is reduced to alpha*x[i] = beta
+             */
+            if( isunit )
+            {
+                alpha = ae_complex_from_d(sa);
+            }
+            else
+            {
+                alpha = ae_c_mul_d(ae_c_conj(a->ptr.pp_complex[i][i], _state),sa);
+            }
+            beta = x->ptr.p_complex[i];
+            
+            /*
+             * solve alpha*x[i] = beta
+             */
+            result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state);
+            if( !result )
+            {
+                ae_frame_leave(_state);
+                return result;
+            }
+            x->ptr.p_complex[i] = vc;
+            
+            /*
+             * update the rest of right part
+             */
+            if( i>0 )
+            {
+                ae_v_cmoved(&tmp.ptr.p_complex[0], 1, &a->ptr.pp_complex[i][0], 1, "Conj", ae_v_len(0,i-1), sa);
+                ae_v_csubc(&x->ptr.p_complex[0], 1, &tmp.ptr.p_complex[0], 1, "N", ae_v_len(0,i-1), vc);
+            }
+        }
+        ae_frame_leave(_state);
+        return result;
+    }
+    result = ae_false;
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+/*************************************************************************
+complex basic solver-updater for reduced linear system
+
+    alpha*x[i] = beta
+
+solves this equation and updates it in overlfow-safe manner (keeping track
+of relative growth of solution).
+
+Parameters:
+    Alpha   -   alpha
+    Beta    -   beta
+    LnMax   -   precomputed Ln(MaxRealNumber)
+    BNorm   -   inf-norm of b (right part of original system)
+    MaxGrowth-  maximum growth of norm(x) relative to norm(b)
+    XNorm   -   inf-norm of other components of X (which are already processed)
+                it is updated by CBasicSolveAndUpdate.
+    X       -   solution
+
+  -- ALGLIB routine --
+     26.01.2009
+     Bochkanov Sergey
+*************************************************************************/
+static ae_bool safesolve_cbasicsolveandupdate(ae_complex alpha,
+     ae_complex beta,
+     double lnmax,
+     double bnorm,
+     double maxgrowth,
+     double* xnorm,
+     ae_complex* x,
+     ae_state *_state)
+{
+    double v;
+    ae_bool result;
+
+    x->x = 0;
+    x->y = 0;
+
+    result = ae_false;
+    if( ae_c_eq_d(alpha,0) )
+    {
+        return result;
+    }
+    if( ae_c_neq_d(beta,0) )
+    {
+        
+        /*
+         * alpha*x[i]=beta
+         */
+        v = ae_log(ae_c_abs(beta, _state), _state)-ae_log(ae_c_abs(alpha, _state), _state);
+        if( ae_fp_greater(v,lnmax) )
+        {
+            return result;
+        }
+        *x = ae_c_div(beta,alpha);
+    }
+    else
+    {
+        
+        /*
+         * alpha*x[i]=0
+         */
+        *x = ae_complex_from_d(0);
+    }
+    
+    /*
+     * update NrmX, test growth limit
+     */
+    *xnorm = ae_maxreal(*xnorm, ae_c_abs(*x, _state), _state);
+    if( ae_fp_greater(*xnorm,maxgrowth*bnorm) )
+    {
+        return result;
+    }
+    result = ae_true;
+    return result;
+}
+
+
+
+
+/*************************************************************************
+More precise dot-product. Absolute error of  subroutine  result  is  about
+1 ulp of max(MX,V), where:
+    MX = max( |a[i]*b[i]| )
+    V  = |(a,b)|
+
+INPUT PARAMETERS
+    A       -   array[0..N-1], vector 1
+    B       -   array[0..N-1], vector 2
+    N       -   vectors length, N<2^29.
+    Temp    -   array[0..N-1], pre-allocated temporary storage
+
+OUTPUT PARAMETERS
+    R       -   (A,B)
+    RErr    -   estimate of error. This estimate accounts for both  errors
+                during  calculation  of  (A,B)  and  errors  introduced by
+                rounding of A and B to fit in double (about 1 ulp).
+
+  -- ALGLIB --
+     Copyright 24.08.2009 by Bochkanov Sergey
+*************************************************************************/
+void xdot(/* Real    */ ae_vector* a,
+     /* Real    */ ae_vector* b,
+     ae_int_t n,
+     /* Real    */ ae_vector* temp,
+     double* r,
+     double* rerr,
+     ae_state *_state)
+{
+    ae_int_t i;
+    double mx;
+    double v;
+
+    *r = 0;
+    *rerr = 0;
+
+    
+    /*
+     * special cases:
+     * * N=0
+     */
+    if( n==0 )
+    {
+        *r = 0;
+        *rerr = 0;
+        return;
+    }
+    mx = 0;
+    for(i=0; i<=n-1; i++)
+    {
+        v = a->ptr.p_double[i]*b->ptr.p_double[i];
+        temp->ptr.p_double[i] = v;
+        mx = ae_maxreal(mx, ae_fabs(v, _state), _state);
+    }
+    if( ae_fp_eq(mx,0) )
+    {
+        *r = 0;
+        *rerr = 0;
+        return;
+    }
+    xblas_xsum(temp, mx, n, r, rerr, _state);
+}
+
+
+/*************************************************************************
+More precise complex dot-product. Absolute error of  subroutine  result is
+about 1 ulp of max(MX,V), where:
+    MX = max( |a[i]*b[i]| )
+    V  = |(a,b)|
+
+INPUT PARAMETERS
+    A       -   array[0..N-1], vector 1
+    B       -   array[0..N-1], vector 2
+    N       -   vectors length, N<2^29.
+    Temp    -   array[0..2*N-1], pre-allocated temporary storage
+
+OUTPUT PARAMETERS
+    R       -   (A,B)
+    RErr    -   estimate of error. This estimate accounts for both  errors
+                during  calculation  of  (A,B)  and  errors  introduced by
+                rounding of A and B to fit in double (about 1 ulp).
+
+  -- ALGLIB --
+     Copyright 27.01.2010 by Bochkanov Sergey
+*************************************************************************/
+void xcdot(/* Complex */ ae_vector* a,
+     /* Complex */ ae_vector* b,
+     ae_int_t n,
+     /* Real    */ ae_vector* temp,
+     ae_complex* r,
+     double* rerr,
+     ae_state *_state)
+{
+    ae_int_t i;
+    double mx;
+    double v;
+    double rerrx;
+    double rerry;
+
+    r->x = 0;
+    r->y = 0;
+    *rerr = 0;
+
+    
+    /*
+     * special cases:
+     * * N=0
+     */
+    if( n==0 )
+    {
+        *r = ae_complex_from_d(0);
+        *rerr = 0;
+        return;
+    }
+    
+    /*
+     * calculate real part
+     */
+    mx = 0;
+    for(i=0; i<=n-1; i++)
+    {
+        v = a->ptr.p_complex[i].x*b->ptr.p_complex[i].x;
+        temp->ptr.p_double[2*i+0] = v;
+        mx = ae_maxreal(mx, ae_fabs(v, _state), _state);
+        v = -a->ptr.p_complex[i].y*b->ptr.p_complex[i].y;
+        temp->ptr.p_double[2*i+1] = v;
+        mx = ae_maxreal(mx, ae_fabs(v, _state), _state);
+    }
+    if( ae_fp_eq(mx,0) )
+    {
+        r->x = 0;
+        rerrx = 0;
+    }
+    else
+    {
+        xblas_xsum(temp, mx, 2*n, &r->x, &rerrx, _state);
+    }
+    
+    /*
+     * calculate imaginary part
+     */
+    mx = 0;
+    for(i=0; i<=n-1; i++)
+    {
+        v = a->ptr.p_complex[i].x*b->ptr.p_complex[i].y;
+        temp->ptr.p_double[2*i+0] = v;
+        mx = ae_maxreal(mx, ae_fabs(v, _state), _state);
+        v = a->ptr.p_complex[i].y*b->ptr.p_complex[i].x;
+        temp->ptr.p_double[2*i+1] = v;
+        mx = ae_maxreal(mx, ae_fabs(v, _state), _state);
+    }
+    if( ae_fp_eq(mx,0) )
+    {
+        r->y = 0;
+        rerry = 0;
+    }
+    else
+    {
+        xblas_xsum(temp, mx, 2*n, &r->y, &rerry, _state);
+    }
+    
+    /*
+     * total error
+     */
+    if( ae_fp_eq(rerrx,0)&&ae_fp_eq(rerry,0) )
+    {
+        *rerr = 0;
+    }
+    else
+    {
+        *rerr = ae_maxreal(rerrx, rerry, _state)*ae_sqrt(1+ae_sqr(ae_minreal(rerrx, rerry, _state)/ae_maxreal(rerrx, rerry, _state), _state), _state);
+    }
+}
+
+
+/*************************************************************************
+Internal subroutine for extra-precise calculation of SUM(w[i]).
+
+INPUT PARAMETERS:
+    W   -   array[0..N-1], values to be added
+            W is modified during calculations.
+    MX  -   max(W[i])
+    N   -   array size
+    
+OUTPUT PARAMETERS:
+    R   -   SUM(w[i])
+    RErr-   error estimate for R
+
+  -- ALGLIB --
+     Copyright 24.08.2009 by Bochkanov Sergey
+*************************************************************************/
+static void xblas_xsum(/* Real    */ ae_vector* w,
+     double mx,
+     ae_int_t n,
+     double* r,
+     double* rerr,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t k;
+    ae_int_t ks;
+    double v;
+    double s;
+    double ln2;
+    double chunk;
+    double invchunk;
+    ae_bool allzeros;
+
+    *r = 0;
+    *rerr = 0;
+
+    
+    /*
+     * special cases:
+     * * N=0
+     * * N is too large to use integer arithmetics
+     */
+    if( n==0 )
+    {
+        *r = 0;
+        *rerr = 0;
+        return;
+    }
+    if( ae_fp_eq(mx,0) )
+    {
+        *r = 0;
+        *rerr = 0;
+        return;
+    }
+    ae_assert(n<536870912, "XDot: N is too large!", _state);
+    
+    /*
+     * Prepare
+     */
+    ln2 = ae_log(2, _state);
+    *rerr = mx*ae_machineepsilon;
+    
+    /*
+     * 1. find S such that 0.5<=S*MX<1
+     * 2. multiply W by S, so task is normalized in some sense
+     * 3. S:=1/S so we can obtain original vector multiplying by S
+     */
+    k = ae_round(ae_log(mx, _state)/ln2, _state);
+    s = xblas_xfastpow(2, -k, _state);
+    while(ae_fp_greater_eq(s*mx,1))
+    {
+        s = 0.5*s;
+    }
+    while(ae_fp_less(s*mx,0.5))
+    {
+        s = 2*s;
+    }
+    ae_v_muld(&w->ptr.p_double[0], 1, ae_v_len(0,n-1), s);
+    s = 1/s;
+    
+    /*
+     * find Chunk=2^M such that N*Chunk<2^29
+     *
+     * we have chosen upper limit (2^29) with enough space left
+     * to tolerate possible problems with rounding and N's close
+     * to the limit, so we don't want to be very strict here.
+     */
+    k = ae_trunc(ae_log((double)536870912/(double)n, _state)/ln2, _state);
+    chunk = xblas_xfastpow(2, k, _state);
+    if( ae_fp_less(chunk,2) )
+    {
+        chunk = 2;
+    }
+    invchunk = 1/chunk;
+    
+    /*
+     * calculate result
+     */
+    *r = 0;
+    ae_v_muld(&w->ptr.p_double[0], 1, ae_v_len(0,n-1), chunk);
+    for(;;)
+    {
+        s = s*invchunk;
+        allzeros = ae_true;
+        ks = 0;
+        for(i=0; i<=n-1; i++)
+        {
+            v = w->ptr.p_double[i];
+            k = ae_trunc(v, _state);
+            if( ae_fp_neq(v,k) )
+            {
+                allzeros = ae_false;
+            }
+            w->ptr.p_double[i] = chunk*(v-k);
+            ks = ks+k;
+        }
+        *r = *r+s*ks;
+        v = ae_fabs(*r, _state);
+        if( allzeros||ae_fp_eq(s*n+mx,mx) )
+        {
+            break;
+        }
+    }
+    
+    /*
+     * correct error
+     */
+    *rerr = ae_maxreal(*rerr, ae_fabs(*r, _state)*ae_machineepsilon, _state);
+}
+
+
+/*************************************************************************
+Fast Pow
+
+  -- ALGLIB --
+     Copyright 24.08.2009 by Bochkanov Sergey
+*************************************************************************/
+static double xblas_xfastpow(double r, ae_int_t n, ae_state *_state)
+{
+    double result;
+
+
+    result = 0;
+    if( n>0 )
+    {
+        if( n%2==0 )
+        {
+            result = ae_sqr(xblas_xfastpow(r, n/2, _state), _state);
+        }
+        else
+        {
+            result = r*xblas_xfastpow(r, n-1, _state);
+        }
+        return result;
+    }
+    if( n==0 )
+    {
+        result = 1;
+    }
+    if( n<0 )
+    {
+        result = xblas_xfastpow(1/r, -n, _state);
+    }
+    return result;
+}
+
+
+
+
+/*************************************************************************
+Normalizes direction/step pair: makes |D|=1, scales Stp.
+If |D|=0, it returns, leavind D/Stp unchanged.
+
+  -- ALGLIB --
+     Copyright 01.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void linminnormalized(/* Real    */ ae_vector* d,
+     double* stp,
+     ae_int_t n,
+     ae_state *_state)
+{
+    double mx;
+    double s;
+    ae_int_t i;
+
+
+    
+    /*
+     * first, scale D to avoid underflow/overflow durng squaring
+     */
+    mx = 0;
+    for(i=0; i<=n-1; i++)
+    {
+        mx = ae_maxreal(mx, ae_fabs(d->ptr.p_double[i], _state), _state);
+    }
+    if( ae_fp_eq(mx,0) )
+    {
+        return;
+    }
+    s = 1/mx;
+    ae_v_muld(&d->ptr.p_double[0], 1, ae_v_len(0,n-1), s);
+    *stp = *stp/s;
+    
+    /*
+     * normalize D
+     */
+    s = ae_v_dotproduct(&d->ptr.p_double[0], 1, &d->ptr.p_double[0], 1, ae_v_len(0,n-1));
+    s = 1/ae_sqrt(s, _state);
+    ae_v_muld(&d->ptr.p_double[0], 1, ae_v_len(0,n-1), s);
+    *stp = *stp/s;
+}
+
+
+/*************************************************************************
+THE  PURPOSE  OF  MCSRCH  IS  TO  FIND A STEP WHICH SATISFIES A SUFFICIENT
+DECREASE CONDITION AND A CURVATURE CONDITION.
+
+AT EACH STAGE THE SUBROUTINE  UPDATES  AN  INTERVAL  OF  UNCERTAINTY  WITH
+ENDPOINTS  STX  AND  STY.  THE INTERVAL OF UNCERTAINTY IS INITIALLY CHOSEN
+SO THAT IT CONTAINS A MINIMIZER OF THE MODIFIED FUNCTION
+
+    F(X+STP*S) - F(X) - FTOL*STP*(GRADF(X)'S).
+
+IF  A STEP  IS OBTAINED FOR  WHICH THE MODIFIED FUNCTION HAS A NONPOSITIVE
+FUNCTION  VALUE  AND  NONNEGATIVE  DERIVATIVE,   THEN   THE   INTERVAL  OF
+UNCERTAINTY IS CHOSEN SO THAT IT CONTAINS A MINIMIZER OF F(X+STP*S).
+
+THE  ALGORITHM  IS  DESIGNED TO FIND A STEP WHICH SATISFIES THE SUFFICIENT
+DECREASE CONDITION
+
+    F(X+STP*S) .LE. F(X) + FTOL*STP*(GRADF(X)'S),
+
+AND THE CURVATURE CONDITION
+
+    ABS(GRADF(X+STP*S)'S)) .LE. GTOL*ABS(GRADF(X)'S).
+
+IF  FTOL  IS  LESS  THAN GTOL AND IF, FOR EXAMPLE, THE FUNCTION IS BOUNDED
+BELOW,  THEN  THERE  IS  ALWAYS  A  STEP  WHICH SATISFIES BOTH CONDITIONS.
+IF  NO  STEP  CAN BE FOUND  WHICH  SATISFIES  BOTH  CONDITIONS,  THEN  THE
+ALGORITHM  USUALLY STOPS  WHEN  ROUNDING ERRORS  PREVENT FURTHER PROGRESS.
+IN THIS CASE STP ONLY SATISFIES THE SUFFICIENT DECREASE CONDITION.
+
+PARAMETERS DESCRIPRION
+
+STAGE IS ZERO ON FIRST CALL, ZERO ON FINAL EXIT
+
+N IS A POSITIVE INTEGER INPUT VARIABLE SET TO THE NUMBER OF VARIABLES.
+
+X IS  AN  ARRAY  OF  LENGTH N. ON INPUT IT MUST CONTAIN THE BASE POINT FOR
+THE LINE SEARCH. ON OUTPUT IT CONTAINS X+STP*S.
+
+F IS  A  VARIABLE. ON INPUT IT MUST CONTAIN THE VALUE OF F AT X. ON OUTPUT
+IT CONTAINS THE VALUE OF F AT X + STP*S.
+
+G IS AN ARRAY OF LENGTH N. ON INPUT IT MUST CONTAIN THE GRADIENT OF F AT X.
+ON OUTPUT IT CONTAINS THE GRADIENT OF F AT X + STP*S.
+
+S IS AN INPUT ARRAY OF LENGTH N WHICH SPECIFIES THE SEARCH DIRECTION.
+
+STP  IS  A NONNEGATIVE VARIABLE. ON INPUT STP CONTAINS AN INITIAL ESTIMATE
+OF A SATISFACTORY STEP. ON OUTPUT STP CONTAINS THE FINAL ESTIMATE.
+
+FTOL AND GTOL ARE NONNEGATIVE INPUT VARIABLES. TERMINATION OCCURS WHEN THE
+SUFFICIENT DECREASE CONDITION AND THE DIRECTIONAL DERIVATIVE CONDITION ARE
+SATISFIED.
+
+XTOL IS A NONNEGATIVE INPUT VARIABLE. TERMINATION OCCURS WHEN THE RELATIVE
+WIDTH OF THE INTERVAL OF UNCERTAINTY IS AT MOST XTOL.
+
+STPMIN AND STPMAX ARE NONNEGATIVE INPUT VARIABLES WHICH SPECIFY LOWER  AND
+UPPER BOUNDS FOR THE STEP.
+
+MAXFEV IS A POSITIVE INTEGER INPUT VARIABLE. TERMINATION OCCURS WHEN THE
+NUMBER OF CALLS TO FCN IS AT LEAST MAXFEV BY THE END OF AN ITERATION.
+
+INFO IS AN INTEGER OUTPUT VARIABLE SET AS FOLLOWS:
+    INFO = 0  IMPROPER INPUT PARAMETERS.
+
+    INFO = 1  THE SUFFICIENT DECREASE CONDITION AND THE
+              DIRECTIONAL DERIVATIVE CONDITION HOLD.
+
+    INFO = 2  RELATIVE WIDTH OF THE INTERVAL OF UNCERTAINTY
+              IS AT MOST XTOL.
+
+    INFO = 3  NUMBER OF CALLS TO FCN HAS REACHED MAXFEV.
+
+    INFO = 4  THE STEP IS AT THE LOWER BOUND STPMIN.
+
+    INFO = 5  THE STEP IS AT THE UPPER BOUND STPMAX.
+
+    INFO = 6  ROUNDING ERRORS PREVENT FURTHER PROGRESS.
+              THERE MAY NOT BE A STEP WHICH SATISFIES THE
+              SUFFICIENT DECREASE AND CURVATURE CONDITIONS.
+              TOLERANCES MAY BE TOO SMALL.
+
+NFEV IS AN INTEGER OUTPUT VARIABLE SET TO THE NUMBER OF CALLS TO FCN.
+
+WA IS A WORK ARRAY OF LENGTH N.
+
+ARGONNE NATIONAL LABORATORY. MINPACK PROJECT. JUNE 1983
+JORGE J. MORE', DAVID J. THUENTE
+*************************************************************************/
+void mcsrch(ae_int_t n,
+     /* Real    */ ae_vector* x,
+     double* f,
+     /* Real    */ ae_vector* g,
+     /* Real    */ ae_vector* s,
+     double* stp,
+     double stpmax,
+     ae_int_t* info,
+     ae_int_t* nfev,
+     /* Real    */ ae_vector* wa,
+     linminstate* state,
+     ae_int_t* stage,
+     ae_state *_state)
+{
+    double v;
+    double p5;
+    double p66;
+    double zero;
+
+
+    
+    /*
+     * init
+     */
+    p5 = 0.5;
+    p66 = 0.66;
+    state->xtrapf = 4.0;
+    zero = 0;
+    if( ae_fp_eq(stpmax,0) )
+    {
+        stpmax = linmin_defstpmax;
+    }
+    if( ae_fp_less(*stp,linmin_stpmin) )
+    {
+        *stp = linmin_stpmin;
+    }
+    if( ae_fp_greater(*stp,stpmax) )
+    {
+        *stp = stpmax;
+    }
+    
+    /*
+     * Main cycle
+     */
+    for(;;)
+    {
+        if( *stage==0 )
+        {
+            
+            /*
+             * NEXT
+             */
+            *stage = 2;
+            continue;
+        }
+        if( *stage==2 )
+        {
+            state->infoc = 1;
+            *info = 0;
+            
+            /*
+             *     CHECK THE INPUT PARAMETERS FOR ERRORS.
+             */
+            if( ((((((n<=0||ae_fp_less_eq(*stp,0))||ae_fp_less(linmin_ftol,0))||ae_fp_less(linmin_gtol,zero))||ae_fp_less(linmin_xtol,zero))||ae_fp_less(linmin_stpmin,zero))||ae_fp_less(stpmax,linmin_stpmin))||linmin_maxfev<=0 )
+            {
+                *stage = 0;
+                return;
+            }
+            
+            /*
+             *     COMPUTE THE INITIAL GRADIENT IN THE SEARCH DIRECTION
+             *     AND CHECK THAT S IS A DESCENT DIRECTION.
+             */
+            v = ae_v_dotproduct(&g->ptr.p_double[0], 1, &s->ptr.p_double[0], 1, ae_v_len(0,n-1));
+            state->dginit = v;
+            if( ae_fp_greater_eq(state->dginit,0) )
+            {
+                *stage = 0;
+                return;
+            }
+            
+            /*
+             *     INITIALIZE LOCAL VARIABLES.
+             */
+            state->brackt = ae_false;
+            state->stage1 = ae_true;
+            *nfev = 0;
+            state->finit = *f;
+            state->dgtest = linmin_ftol*state->dginit;
+            state->width = stpmax-linmin_stpmin;
+            state->width1 = state->width/p5;
+            ae_v_move(&wa->ptr.p_double[0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,n-1));
+            
+            /*
+             *     THE VARIABLES STX, FX, DGX CONTAIN THE VALUES OF THE STEP,
+             *     FUNCTION, AND DIRECTIONAL DERIVATIVE AT THE BEST STEP.
+             *     THE VARIABLES STY, FY, DGY CONTAIN THE VALUE OF THE STEP,
+             *     FUNCTION, AND DERIVATIVE AT THE OTHER ENDPOINT OF
+             *     THE INTERVAL OF UNCERTAINTY.
+             *     THE VARIABLES STP, F, DG CONTAIN THE VALUES OF THE STEP,
+             *     FUNCTION, AND DERIVATIVE AT THE CURRENT STEP.
+             */
+            state->stx = 0;
+            state->fx = state->finit;
+            state->dgx = state->dginit;
+            state->sty = 0;
+            state->fy = state->finit;
+            state->dgy = state->dginit;
+            
+            /*
+             * NEXT
+             */
+            *stage = 3;
+            continue;
+        }
+        if( *stage==3 )
+        {
+            
+            /*
+             *     START OF ITERATION.
+             *
+             *     SET THE MINIMUM AND MAXIMUM STEPS TO CORRESPOND
+             *     TO THE PRESENT INTERVAL OF UNCERTAINTY.
+             */
+            if( state->brackt )
+            {
+                if( ae_fp_less(state->stx,state->sty) )
+                {
+                    state->stmin = state->stx;
+                    state->stmax = state->sty;
+                }
+                else
+                {
+                    state->stmin = state->sty;
+                    state->stmax = state->stx;
+                }
+            }
+            else
+            {
+                state->stmin = state->stx;
+                state->stmax = *stp+state->xtrapf*(*stp-state->stx);
+            }
+            
+            /*
+             *        FORCE THE STEP TO BE WITHIN THE BOUNDS STPMAX AND STPMIN.
+             */
+            if( ae_fp_greater(*stp,stpmax) )
+            {
+                *stp = stpmax;
+            }
+            if( ae_fp_less(*stp,linmin_stpmin) )
+            {
+                *stp = linmin_stpmin;
+            }
+            
+            /*
+             *        IF AN UNUSUAL TERMINATION IS TO OCCUR THEN LET
+             *        STP BE THE LOWEST POINT OBTAINED SO FAR.
+             */
+            if( (((state->brackt&&(ae_fp_less_eq(*stp,state->stmin)||ae_fp_greater_eq(*stp,state->stmax)))||*nfev>=linmin_maxfev-1)||state->infoc==0)||(state->brackt&&ae_fp_less_eq(state->stmax-state->stmin,linmin_xtol*state->stmax)) )
+            {
+                *stp = state->stx;
+            }
+            
+            /*
+             *        EVALUATE THE FUNCTION AND GRADIENT AT STP
+             *        AND COMPUTE THE DIRECTIONAL DERIVATIVE.
+             */
+            ae_v_move(&x->ptr.p_double[0], 1, &wa->ptr.p_double[0], 1, ae_v_len(0,n-1));
+            ae_v_addd(&x->ptr.p_double[0], 1, &s->ptr.p_double[0], 1, ae_v_len(0,n-1), *stp);
+            
+            /*
+             * NEXT
+             */
+            *stage = 4;
+            return;
+        }
+        if( *stage==4 )
+        {
+            *info = 0;
+            *nfev = *nfev+1;
+            v = ae_v_dotproduct(&g->ptr.p_double[0], 1, &s->ptr.p_double[0], 1, ae_v_len(0,n-1));
+            state->dg = v;
+            state->ftest1 = state->finit+*stp*state->dgtest;
+            
+            /*
+             *        TEST FOR CONVERGENCE.
+             */
+            if( (state->brackt&&(ae_fp_less_eq(*stp,state->stmin)||ae_fp_greater_eq(*stp,state->stmax)))||state->infoc==0 )
+            {
+                *info = 6;
+            }
+            if( (ae_fp_eq(*stp,stpmax)&&ae_fp_less_eq(*f,state->ftest1))&&ae_fp_less_eq(state->dg,state->dgtest) )
+            {
+                *info = 5;
+            }
+            if( ae_fp_eq(*stp,linmin_stpmin)&&(ae_fp_greater(*f,state->ftest1)||ae_fp_greater_eq(state->dg,state->dgtest)) )
+            {
+                *info = 4;
+            }
+            if( *nfev>=linmin_maxfev )
+            {
+                *info = 3;
+            }
+            if( state->brackt&&ae_fp_less_eq(state->stmax-state->stmin,linmin_xtol*state->stmax) )
+            {
+                *info = 2;
+            }
+            if( ae_fp_less_eq(*f,state->ftest1)&&ae_fp_less_eq(ae_fabs(state->dg, _state),-linmin_gtol*state->dginit) )
+            {
+                *info = 1;
+            }
+            
+            /*
+             *        CHECK FOR TERMINATION.
+             */
+            if( *info!=0 )
+            {
+                *stage = 0;
+                return;
+            }
+            
+            /*
+             *        IN THE FIRST STAGE WE SEEK A STEP FOR WHICH THE MODIFIED
+             *        FUNCTION HAS A NONPOSITIVE VALUE AND NONNEGATIVE DERIVATIVE.
+             */
+            if( (state->stage1&&ae_fp_less_eq(*f,state->ftest1))&&ae_fp_greater_eq(state->dg,ae_minreal(linmin_ftol, linmin_gtol, _state)*state->dginit) )
+            {
+                state->stage1 = ae_false;
+            }
+            
+            /*
+             *        A MODIFIED FUNCTION IS USED TO PREDICT THE STEP ONLY IF
+             *        WE HAVE NOT OBTAINED A STEP FOR WHICH THE MODIFIED
+             *        FUNCTION HAS A NONPOSITIVE FUNCTION VALUE AND NONNEGATIVE
+             *        DERIVATIVE, AND IF A LOWER FUNCTION VALUE HAS BEEN
+             *        OBTAINED BUT THE DECREASE IS NOT SUFFICIENT.
+             */
+            if( (state->stage1&&ae_fp_less_eq(*f,state->fx))&&ae_fp_greater(*f,state->ftest1) )
+            {
+                
+                /*
+                 *           DEFINE THE MODIFIED FUNCTION AND DERIVATIVE VALUES.
+                 */
+                state->fm = *f-*stp*state->dgtest;
+                state->fxm = state->fx-state->stx*state->dgtest;
+                state->fym = state->fy-state->sty*state->dgtest;
+                state->dgm = state->dg-state->dgtest;
+                state->dgxm = state->dgx-state->dgtest;
+                state->dgym = state->dgy-state->dgtest;
+                
+                /*
+                 *           CALL CSTEP TO UPDATE THE INTERVAL OF UNCERTAINTY
+                 *           AND TO COMPUTE THE NEW STEP.
+                 */
+                linmin_mcstep(&state->stx, &state->fxm, &state->dgxm, &state->sty, &state->fym, &state->dgym, stp, state->fm, state->dgm, &state->brackt, state->stmin, state->stmax, &state->infoc, _state);
+                
+                /*
+                 *           RESET THE FUNCTION AND GRADIENT VALUES FOR F.
+                 */
+                state->fx = state->fxm+state->stx*state->dgtest;
+                state->fy = state->fym+state->sty*state->dgtest;
+                state->dgx = state->dgxm+state->dgtest;
+                state->dgy = state->dgym+state->dgtest;
+            }
+            else
+            {
+                
+                /*
+                 *           CALL MCSTEP TO UPDATE THE INTERVAL OF UNCERTAINTY
+                 *           AND TO COMPUTE THE NEW STEP.
+                 */
+                linmin_mcstep(&state->stx, &state->fx, &state->dgx, &state->sty, &state->fy, &state->dgy, stp, *f, state->dg, &state->brackt, state->stmin, state->stmax, &state->infoc, _state);
+            }
+            
+            /*
+             *        FORCE A SUFFICIENT DECREASE IN THE SIZE OF THE
+             *        INTERVAL OF UNCERTAINTY.
+             */
+            if( state->brackt )
+            {
+                if( ae_fp_greater_eq(ae_fabs(state->sty-state->stx, _state),p66*state->width1) )
+                {
+                    *stp = state->stx+p5*(state->sty-state->stx);
+                }
+                state->width1 = state->width;
+                state->width = ae_fabs(state->sty-state->stx, _state);
+            }
+            
+            /*
+             *  NEXT.
+             */
+            *stage = 3;
+            continue;
+        }
+    }
+}
+
+
+/*************************************************************************
+These functions perform Armijo line search using  at  most  FMAX  function
+evaluations.  It  doesn't  enforce  some  kind  of  " sufficient decrease"
+criterion - it just tries different Armijo steps and returns optimum found
+so far.
+
+Optimization is done using F-rcomm interface:
+* ArmijoCreate initializes State structure
+  (reusing previously allocated buffers)
+* ArmijoIteration is subsequently called
+* ArmijoResults returns results
+
+INPUT PARAMETERS:
+    N       -   problem size
+    X       -   array[N], starting point
+    F       -   F(X+S*STP)
+    S       -   step direction, S>0
+    STP     -   step length
+    STPMAX  -   maximum value for STP or zero (if no limit is imposed)
+    FMAX    -   maximum number of function evaluations
+    State   -   optimization state
+
+  -- ALGLIB --
+     Copyright 05.10.2010 by Bochkanov Sergey
+*************************************************************************/
+void armijocreate(ae_int_t n,
+     /* Real    */ ae_vector* x,
+     double f,
+     /* Real    */ ae_vector* s,
+     double stp,
+     double stpmax,
+     ae_int_t fmax,
+     armijostate* state,
+     ae_state *_state)
+{
+
+
+    if( state->x.cnt<n )
+    {
+        ae_vector_set_length(&state->x, n, _state);
+    }
+    if( state->xbase.cnt<n )
+    {
+        ae_vector_set_length(&state->xbase, n, _state);
+    }
+    if( state->s.cnt<n )
+    {
+        ae_vector_set_length(&state->s, n, _state);
+    }
+    state->stpmax = stpmax;
+    state->fmax = fmax;
+    state->stplen = stp;
+    state->fcur = f;
+    state->n = n;
+    ae_v_move(&state->xbase.ptr.p_double[0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,n-1));
+    ae_v_move(&state->s.ptr.p_double[0], 1, &s->ptr.p_double[0], 1, ae_v_len(0,n-1));
+    ae_vector_set_length(&state->rstate.ia, 0+1, _state);
+    ae_vector_set_length(&state->rstate.ra, 0+1, _state);
+    state->rstate.stage = -1;
+}
+
+
+/*************************************************************************
+This is rcomm-based search function
+
+  -- ALGLIB --
+     Copyright 05.10.2010 by Bochkanov Sergey
+*************************************************************************/
+ae_bool armijoiteration(armijostate* state, ae_state *_state)
+{
+    double v;
+    ae_int_t n;
+    ae_bool result;
+
+
+    
+    /*
+     * Reverse communication preparations
+     * I know it looks ugly, but it works the same way
+     * anywhere from C++ to Python.
+     *
+     * This code initializes locals by:
+     * * random values determined during code
+     *   generation - on first subroutine call
+     * * values from previous call - on subsequent calls
+     */
+    if( state->rstate.stage>=0 )
+    {
+        n = state->rstate.ia.ptr.p_int[0];
+        v = state->rstate.ra.ptr.p_double[0];
+    }
+    else
+    {
+        n = -983;
+        v = -989;
+    }
+    if( state->rstate.stage==0 )
+    {
+        goto lbl_0;
+    }
+    if( state->rstate.stage==1 )
+    {
+        goto lbl_1;
+    }
+    if( state->rstate.stage==2 )
+    {
+        goto lbl_2;
+    }
+    if( state->rstate.stage==3 )
+    {
+        goto lbl_3;
+    }
+    
+    /*
+     * Routine body
+     */
+    if( (ae_fp_less_eq(state->stplen,0)||ae_fp_less(state->stpmax,0))||state->fmax<2 )
+    {
+        state->info = 0;
+        result = ae_false;
+        return result;
+    }
+    if( ae_fp_less_eq(state->stplen,linmin_stpmin) )
+    {
+        state->info = 4;
+        result = ae_false;
+        return result;
+    }
+    n = state->n;
+    state->nfev = 0;
+    
+    /*
+     * We always need F
+     */
+    state->needf = ae_true;
+    
+    /*
+     * Bound StpLen
+     */
+    if( ae_fp_greater(state->stplen,state->stpmax)&&ae_fp_neq(state->stpmax,0) )
+    {
+        state->stplen = state->stpmax;
+    }
+    
+    /*
+     * Increase length
+     */
+    v = state->stplen*linmin_armijofactor;
+    if( ae_fp_greater(v,state->stpmax)&&ae_fp_neq(state->stpmax,0) )
+    {
+        v = state->stpmax;
+    }
+    ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    ae_v_addd(&state->x.ptr.p_double[0], 1, &state->s.ptr.p_double[0], 1, ae_v_len(0,n-1), v);
+    state->rstate.stage = 0;
+    goto lbl_rcomm;
+lbl_0:
+    state->nfev = state->nfev+1;
+    if( ae_fp_greater_eq(state->f,state->fcur) )
+    {
+        goto lbl_4;
+    }
+    state->stplen = v;
+    state->fcur = state->f;
+lbl_6:
+    if( ae_false )
+    {
+        goto lbl_7;
+    }
+    
+    /*
+     * test stopping conditions
+     */
+    if( state->nfev>=state->fmax )
+    {
+        state->info = 3;
+        result = ae_false;
+        return result;
+    }
+    if( ae_fp_greater_eq(state->stplen,state->stpmax) )
+    {
+        state->info = 5;
+        result = ae_false;
+        return result;
+    }
+    
+    /*
+     * evaluate F
+     */
+    v = state->stplen*linmin_armijofactor;
+    if( ae_fp_greater(v,state->stpmax)&&ae_fp_neq(state->stpmax,0) )
+    {
+        v = state->stpmax;
+    }
+    ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    ae_v_addd(&state->x.ptr.p_double[0], 1, &state->s.ptr.p_double[0], 1, ae_v_len(0,n-1), v);
+    state->rstate.stage = 1;
+    goto lbl_rcomm;
+lbl_1:
+    state->nfev = state->nfev+1;
+    
+    /*
+     * make decision
+     */
+    if( ae_fp_less(state->f,state->fcur) )
+    {
+        state->stplen = v;
+        state->fcur = state->f;
+    }
+    else
+    {
+        state->info = 1;
+        result = ae_false;
+        return result;
+    }
+    goto lbl_6;
+lbl_7:
+lbl_4:
+    
+    /*
+     * Decrease length
+     */
+    v = state->stplen/linmin_armijofactor;
+    ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    ae_v_addd(&state->x.ptr.p_double[0], 1, &state->s.ptr.p_double[0], 1, ae_v_len(0,n-1), v);
+    state->rstate.stage = 2;
+    goto lbl_rcomm;
+lbl_2:
+    state->nfev = state->nfev+1;
+    if( ae_fp_greater_eq(state->f,state->fcur) )
+    {
+        goto lbl_8;
+    }
+    state->stplen = state->stplen/linmin_armijofactor;
+    state->fcur = state->f;
+lbl_10:
+    if( ae_false )
+    {
+        goto lbl_11;
+    }
+    
+    /*
+     * test stopping conditions
+     */
+    if( state->nfev>=state->fmax )
+    {
+        state->info = 3;
+        result = ae_false;
+        return result;
+    }
+    if( ae_fp_less_eq(state->stplen,linmin_stpmin) )
+    {
+        state->info = 4;
+        result = ae_false;
+        return result;
+    }
+    
+    /*
+     * evaluate F
+     */
+    v = state->stplen/linmin_armijofactor;
+    ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    ae_v_addd(&state->x.ptr.p_double[0], 1, &state->s.ptr.p_double[0], 1, ae_v_len(0,n-1), v);
+    state->rstate.stage = 3;
+    goto lbl_rcomm;
+lbl_3:
+    state->nfev = state->nfev+1;
+    
+    /*
+     * make decision
+     */
+    if( ae_fp_less(state->f,state->fcur) )
+    {
+        state->stplen = state->stplen/linmin_armijofactor;
+        state->fcur = state->f;
+    }
+    else
+    {
+        state->info = 1;
+        result = ae_false;
+        return result;
+    }
+    goto lbl_10;
+lbl_11:
+lbl_8:
+    
+    /*
+     * Nothing to be done
+     */
+    state->info = 1;
+    result = ae_false;
+    return result;
+    
+    /*
+     * Saving state
+     */
+lbl_rcomm:
+    result = ae_true;
+    state->rstate.ia.ptr.p_int[0] = n;
+    state->rstate.ra.ptr.p_double[0] = v;
+    return result;
+}
+
+
+/*************************************************************************
+Results of Armijo search
+
+OUTPUT PARAMETERS:
+    INFO    -   on output it is set to one of the return codes:
+                * 0     improper input params
+                * 1     optimum step is found with at most FMAX evaluations
+                * 3     FMAX evaluations were used,
+                        X contains optimum found so far
+                * 4     step is at lower bound STPMIN
+                * 5     step is at upper bound
+    STP     -   step length (in case of failure it is still returned)
+    F       -   function value (in case of failure it is still returned)
+
+  -- ALGLIB --
+     Copyright 05.10.2010 by Bochkanov Sergey
+*************************************************************************/
+void armijoresults(armijostate* state,
+     ae_int_t* info,
+     double* stp,
+     double* f,
+     ae_state *_state)
+{
+
+
+    *info = state->info;
+    *stp = state->stplen;
+    *f = state->fcur;
+}
+
+
+static void linmin_mcstep(double* stx,
+     double* fx,
+     double* dx,
+     double* sty,
+     double* fy,
+     double* dy,
+     double* stp,
+     double fp,
+     double dp,
+     ae_bool* brackt,
+     double stmin,
+     double stmax,
+     ae_int_t* info,
+     ae_state *_state)
+{
+    ae_bool bound;
+    double gamma;
+    double p;
+    double q;
+    double r;
+    double s;
+    double sgnd;
+    double stpc;
+    double stpf;
+    double stpq;
+    double theta;
+
+
+    *info = 0;
+    
+    /*
+     *     CHECK THE INPUT PARAMETERS FOR ERRORS.
+     */
+    if( ((*brackt&&(ae_fp_less_eq(*stp,ae_minreal(*stx, *sty, _state))||ae_fp_greater_eq(*stp,ae_maxreal(*stx, *sty, _state))))||ae_fp_greater_eq(*dx*(*stp-(*stx)),0))||ae_fp_less(stmax,stmin) )
+    {
+        return;
+    }
+    
+    /*
+     *     DETERMINE IF THE DERIVATIVES HAVE OPPOSITE SIGN.
+     */
+    sgnd = dp*(*dx/ae_fabs(*dx, _state));
+    
+    /*
+     *     FIRST CASE. A HIGHER FUNCTION VALUE.
+     *     THE MINIMUM IS BRACKETED. IF THE CUBIC STEP IS CLOSER
+     *     TO STX THAN THE QUADRATIC STEP, THE CUBIC STEP IS TAKEN,
+     *     ELSE THE AVERAGE OF THE CUBIC AND QUADRATIC STEPS IS TAKEN.
+     */
+    if( ae_fp_greater(fp,*fx) )
+    {
+        *info = 1;
+        bound = ae_true;
+        theta = 3*(*fx-fp)/(*stp-(*stx))+(*dx)+dp;
+        s = ae_maxreal(ae_fabs(theta, _state), ae_maxreal(ae_fabs(*dx, _state), ae_fabs(dp, _state), _state), _state);
+        gamma = s*ae_sqrt(ae_sqr(theta/s, _state)-*dx/s*(dp/s), _state);
+        if( ae_fp_less(*stp,*stx) )
+        {
+            gamma = -gamma;
+        }
+        p = gamma-(*dx)+theta;
+        q = gamma-(*dx)+gamma+dp;
+        r = p/q;
+        stpc = *stx+r*(*stp-(*stx));
+        stpq = *stx+*dx/((*fx-fp)/(*stp-(*stx))+(*dx))/2*(*stp-(*stx));
+        if( ae_fp_less(ae_fabs(stpc-(*stx), _state),ae_fabs(stpq-(*stx), _state)) )
+        {
+            stpf = stpc;
+        }
+        else
+        {
+            stpf = stpc+(stpq-stpc)/2;
+        }
+        *brackt = ae_true;
+    }
+    else
+    {
+        if( ae_fp_less(sgnd,0) )
+        {
+            
+            /*
+             *     SECOND CASE. A LOWER FUNCTION VALUE AND DERIVATIVES OF
+             *     OPPOSITE SIGN. THE MINIMUM IS BRACKETED. IF THE CUBIC
+             *     STEP IS CLOSER TO STX THAN THE QUADRATIC (SECANT) STEP,
+             *     THE CUBIC STEP IS TAKEN, ELSE THE QUADRATIC STEP IS TAKEN.
+             */
+            *info = 2;
+            bound = ae_false;
+            theta = 3*(*fx-fp)/(*stp-(*stx))+(*dx)+dp;
+            s = ae_maxreal(ae_fabs(theta, _state), ae_maxreal(ae_fabs(*dx, _state), ae_fabs(dp, _state), _state), _state);
+            gamma = s*ae_sqrt(ae_sqr(theta/s, _state)-*dx/s*(dp/s), _state);
+            if( ae_fp_greater(*stp,*stx) )
+            {
+                gamma = -gamma;
+            }
+            p = gamma-dp+theta;
+            q = gamma-dp+gamma+(*dx);
+            r = p/q;
+            stpc = *stp+r*(*stx-(*stp));
+            stpq = *stp+dp/(dp-(*dx))*(*stx-(*stp));
+            if( ae_fp_greater(ae_fabs(stpc-(*stp), _state),ae_fabs(stpq-(*stp), _state)) )
+            {
+                stpf = stpc;
+            }
+            else
+            {
+                stpf = stpq;
+            }
+            *brackt = ae_true;
+        }
+        else
+        {
+            if( ae_fp_less(ae_fabs(dp, _state),ae_fabs(*dx, _state)) )
+            {
+                
+                /*
+                 *     THIRD CASE. A LOWER FUNCTION VALUE, DERIVATIVES OF THE
+                 *     SAME SIGN, AND THE MAGNITUDE OF THE DERIVATIVE DECREASES.
+                 *     THE CUBIC STEP IS ONLY USED IF THE CUBIC TENDS TO INFINITY
+                 *     IN THE DIRECTION OF THE STEP OR IF THE MINIMUM OF THE CUBIC
+                 *     IS BEYOND STP. OTHERWISE THE CUBIC STEP IS DEFINED TO BE
+                 *     EITHER STPMIN OR STPMAX. THE QUADRATIC (SECANT) STEP IS ALSO
+                 *     COMPUTED AND IF THE MINIMUM IS BRACKETED THEN THE THE STEP
+                 *     CLOSEST TO STX IS TAKEN, ELSE THE STEP FARTHEST AWAY IS TAKEN.
+                 */
+                *info = 3;
+                bound = ae_true;
+                theta = 3*(*fx-fp)/(*stp-(*stx))+(*dx)+dp;
+                s = ae_maxreal(ae_fabs(theta, _state), ae_maxreal(ae_fabs(*dx, _state), ae_fabs(dp, _state), _state), _state);
+                
+                /*
+                 *        THE CASE GAMMA = 0 ONLY ARISES IF THE CUBIC DOES NOT TEND
+                 *        TO INFINITY IN THE DIRECTION OF THE STEP.
+                 */
+                gamma = s*ae_sqrt(ae_maxreal(0, ae_sqr(theta/s, _state)-*dx/s*(dp/s), _state), _state);
+                if( ae_fp_greater(*stp,*stx) )
+                {
+                    gamma = -gamma;
+                }
+                p = gamma-dp+theta;
+                q = gamma+(*dx-dp)+gamma;
+                r = p/q;
+                if( ae_fp_less(r,0)&&ae_fp_neq(gamma,0) )
+                {
+                    stpc = *stp+r*(*stx-(*stp));
+                }
+                else
+                {
+                    if( ae_fp_greater(*stp,*stx) )
+                    {
+                        stpc = stmax;
+                    }
+                    else
+                    {
+                        stpc = stmin;
+                    }
+                }
+                stpq = *stp+dp/(dp-(*dx))*(*stx-(*stp));
+                if( *brackt )
+                {
+                    if( ae_fp_less(ae_fabs(*stp-stpc, _state),ae_fabs(*stp-stpq, _state)) )
+                    {
+                        stpf = stpc;
+                    }
+                    else
+                    {
+                        stpf = stpq;
+                    }
+                }
+                else
+                {
+                    if( ae_fp_greater(ae_fabs(*stp-stpc, _state),ae_fabs(*stp-stpq, _state)) )
+                    {
+                        stpf = stpc;
+                    }
+                    else
+                    {
+                        stpf = stpq;
+                    }
+                }
+            }
+            else
+            {
+                
+                /*
+                 *     FOURTH CASE. A LOWER FUNCTION VALUE, DERIVATIVES OF THE
+                 *     SAME SIGN, AND THE MAGNITUDE OF THE DERIVATIVE DOES
+                 *     NOT DECREASE. IF THE MINIMUM IS NOT BRACKETED, THE STEP
+                 *     IS EITHER STPMIN OR STPMAX, ELSE THE CUBIC STEP IS TAKEN.
+                 */
+                *info = 4;
+                bound = ae_false;
+                if( *brackt )
+                {
+                    theta = 3*(fp-(*fy))/(*sty-(*stp))+(*dy)+dp;
+                    s = ae_maxreal(ae_fabs(theta, _state), ae_maxreal(ae_fabs(*dy, _state), ae_fabs(dp, _state), _state), _state);
+                    gamma = s*ae_sqrt(ae_sqr(theta/s, _state)-*dy/s*(dp/s), _state);
+                    if( ae_fp_greater(*stp,*sty) )
+                    {
+                        gamma = -gamma;
+                    }
+                    p = gamma-dp+theta;
+                    q = gamma-dp+gamma+(*dy);
+                    r = p/q;
+                    stpc = *stp+r*(*sty-(*stp));
+                    stpf = stpc;
+                }
+                else
+                {
+                    if( ae_fp_greater(*stp,*stx) )
+                    {
+                        stpf = stmax;
+                    }
+                    else
+                    {
+                        stpf = stmin;
+                    }
+                }
+            }
+        }
+    }
+    
+    /*
+     *     UPDATE THE INTERVAL OF UNCERTAINTY. THIS UPDATE DOES NOT
+     *     DEPEND ON THE NEW STEP OR THE CASE ANALYSIS ABOVE.
+     */
+    if( ae_fp_greater(fp,*fx) )
+    {
+        *sty = *stp;
+        *fy = fp;
+        *dy = dp;
+    }
+    else
+    {
+        if( ae_fp_less(sgnd,0.0) )
+        {
+            *sty = *stx;
+            *fy = *fx;
+            *dy = *dx;
+        }
+        *stx = *stp;
+        *fx = fp;
+        *dx = dp;
+    }
+    
+    /*
+     *     COMPUTE THE NEW STEP AND SAFEGUARD IT.
+     */
+    stpf = ae_minreal(stmax, stpf, _state);
+    stpf = ae_maxreal(stmin, stpf, _state);
+    *stp = stpf;
+    if( *brackt&&bound )
+    {
+        if( ae_fp_greater(*sty,*stx) )
+        {
+            *stp = ae_minreal(*stx+0.66*(*sty-(*stx)), *stp, _state);
+        }
+        else
+        {
+            *stp = ae_maxreal(*stx+0.66*(*sty-(*stx)), *stp, _state);
+        }
+    }
+}
+
+
+ae_bool _linminstate_init(linminstate* p, ae_state *_state, ae_bool make_automatic)
+{
+    return ae_true;
+}
+
+
+ae_bool _linminstate_init_copy(linminstate* dst, linminstate* src, ae_state *_state, ae_bool make_automatic)
+{
+    dst->brackt = src->brackt;
+    dst->stage1 = src->stage1;
+    dst->infoc = src->infoc;
+    dst->dg = src->dg;
+    dst->dgm = src->dgm;
+    dst->dginit = src->dginit;
+    dst->dgtest = src->dgtest;
+    dst->dgx = src->dgx;
+    dst->dgxm = src->dgxm;
+    dst->dgy = src->dgy;
+    dst->dgym = src->dgym;
+    dst->finit = src->finit;
+    dst->ftest1 = src->ftest1;
+    dst->fm = src->fm;
+    dst->fx = src->fx;
+    dst->fxm = src->fxm;
+    dst->fy = src->fy;
+    dst->fym = src->fym;
+    dst->stx = src->stx;
+    dst->sty = src->sty;
+    dst->stmin = src->stmin;
+    dst->stmax = src->stmax;
+    dst->width = src->width;
+    dst->width1 = src->width1;
+    dst->xtrapf = src->xtrapf;
+    return ae_true;
+}
+
+
+void _linminstate_clear(linminstate* p)
+{
+}
+
+
+ae_bool _armijostate_init(armijostate* p, ae_state *_state, ae_bool make_automatic)
+{
+    if( !ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->xbase, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->s, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !_rcommstate_init(&p->rstate, _state, make_automatic) )
+        return ae_false;
+    return ae_true;
+}
+
+
+ae_bool _armijostate_init_copy(armijostate* dst, armijostate* src, ae_state *_state, ae_bool make_automatic)
+{
+    dst->needf = src->needf;
+    if( !ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic) )
+        return ae_false;
+    dst->f = src->f;
+    dst->n = src->n;
+    if( !ae_vector_init_copy(&dst->xbase, &src->xbase, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->s, &src->s, _state, make_automatic) )
+        return ae_false;
+    dst->stplen = src->stplen;
+    dst->fcur = src->fcur;
+    dst->stpmax = src->stpmax;
+    dst->fmax = src->fmax;
+    dst->nfev = src->nfev;
+    dst->info = src->info;
+    if( !_rcommstate_init_copy(&dst->rstate, &src->rstate, _state, make_automatic) )
+        return ae_false;
+    return ae_true;
+}
+
+
+void _armijostate_clear(armijostate* p)
+{
+    ae_vector_clear(&p->x);
+    ae_vector_clear(&p->xbase);
+    ae_vector_clear(&p->s);
+    _rcommstate_clear(&p->rstate);
+}
+
+
+
+
+/*************************************************************************
+This subroutine generates FFT plan - a decomposition of a N-length FFT to
+the more simpler operations. Plan consists of the root entry and the child
+entries.
+
+Subroutine parameters:
+    N               task size
+    
+Output parameters:
+    Plan            plan
+
+  -- ALGLIB --
+     Copyright 01.05.2009 by Bochkanov Sergey
+*************************************************************************/
+void ftbasegeneratecomplexfftplan(ae_int_t n,
+     ftplan* plan,
+     ae_state *_state)
+{
+    ae_int_t planarraysize;
+    ae_int_t plansize;
+    ae_int_t precomputedsize;
+    ae_int_t tmpmemsize;
+    ae_int_t stackmemsize;
+    ae_int_t stackptr;
+
+    _ftplan_clear(plan);
+
+    planarraysize = 1;
+    plansize = 0;
+    precomputedsize = 0;
+    stackmemsize = 0;
+    stackptr = 0;
+    tmpmemsize = 2*n;
+    ae_vector_set_length(&plan->plan, planarraysize, _state);
+    ftbase_ftbasegenerateplanrec(n, ftbase_ftbasecffttask, plan, &plansize, &precomputedsize, &planarraysize, &tmpmemsize, &stackmemsize, stackptr, _state);
+    ae_assert(stackptr==0, "Internal error in FTBaseGenerateComplexFFTPlan: stack ptr!", _state);
+    ae_vector_set_length(&plan->stackbuf, ae_maxint(stackmemsize, 1, _state), _state);
+    ae_vector_set_length(&plan->tmpbuf, ae_maxint(tmpmemsize, 1, _state), _state);
+    ae_vector_set_length(&plan->precomputed, ae_maxint(precomputedsize, 1, _state), _state);
+    stackptr = 0;
+    ftbase_ftbaseprecomputeplanrec(plan, 0, stackptr, _state);
+    ae_assert(stackptr==0, "Internal error in FTBaseGenerateComplexFFTPlan: stack ptr!", _state);
+}
+
+
+/*************************************************************************
+Generates real FFT plan
+*************************************************************************/
+void ftbasegeneraterealfftplan(ae_int_t n, ftplan* plan, ae_state *_state)
+{
+    ae_int_t planarraysize;
+    ae_int_t plansize;
+    ae_int_t precomputedsize;
+    ae_int_t tmpmemsize;
+    ae_int_t stackmemsize;
+    ae_int_t stackptr;
+
+    _ftplan_clear(plan);
+
+    planarraysize = 1;
+    plansize = 0;
+    precomputedsize = 0;
+    stackmemsize = 0;
+    stackptr = 0;
+    tmpmemsize = 2*n;
+    ae_vector_set_length(&plan->plan, planarraysize, _state);
+    ftbase_ftbasegenerateplanrec(n, ftbase_ftbaserffttask, plan, &plansize, &precomputedsize, &planarraysize, &tmpmemsize, &stackmemsize, stackptr, _state);
+    ae_assert(stackptr==0, "Internal error in FTBaseGenerateRealFFTPlan: stack ptr!", _state);
+    ae_vector_set_length(&plan->stackbuf, ae_maxint(stackmemsize, 1, _state), _state);
+    ae_vector_set_length(&plan->tmpbuf, ae_maxint(tmpmemsize, 1, _state), _state);
+    ae_vector_set_length(&plan->precomputed, ae_maxint(precomputedsize, 1, _state), _state);
+    stackptr = 0;
+    ftbase_ftbaseprecomputeplanrec(plan, 0, stackptr, _state);
+    ae_assert(stackptr==0, "Internal error in FTBaseGenerateRealFFTPlan: stack ptr!", _state);
+}
+
+
+/*************************************************************************
+Generates real FHT plan
+*************************************************************************/
+void ftbasegeneraterealfhtplan(ae_int_t n, ftplan* plan, ae_state *_state)
+{
+    ae_int_t planarraysize;
+    ae_int_t plansize;
+    ae_int_t precomputedsize;
+    ae_int_t tmpmemsize;
+    ae_int_t stackmemsize;
+    ae_int_t stackptr;
+
+    _ftplan_clear(plan);
+
+    planarraysize = 1;
+    plansize = 0;
+    precomputedsize = 0;
+    stackmemsize = 0;
+    stackptr = 0;
+    tmpmemsize = n;
+    ae_vector_set_length(&plan->plan, planarraysize, _state);
+    ftbase_ftbasegenerateplanrec(n, ftbase_ftbaserfhttask, plan, &plansize, &precomputedsize, &planarraysize, &tmpmemsize, &stackmemsize, stackptr, _state);
+    ae_assert(stackptr==0, "Internal error in FTBaseGenerateRealFHTPlan: stack ptr!", _state);
+    ae_vector_set_length(&plan->stackbuf, ae_maxint(stackmemsize, 1, _state), _state);
+    ae_vector_set_length(&plan->tmpbuf, ae_maxint(tmpmemsize, 1, _state), _state);
+    ae_vector_set_length(&plan->precomputed, ae_maxint(precomputedsize, 1, _state), _state);
+    stackptr = 0;
+    ftbase_ftbaseprecomputeplanrec(plan, 0, stackptr, _state);
+    ae_assert(stackptr==0, "Internal error in FTBaseGenerateRealFHTPlan: stack ptr!", _state);
+}
+
+
+/*************************************************************************
+This subroutine executes FFT/FHT plan.
+
+If Plan is a:
+* complex FFT plan  -   sizeof(A)=2*N,
+                        A contains interleaved real/imaginary values
+* real FFT plan     -   sizeof(A)=2*N,
+                        A contains real values interleaved with zeros
+* real FHT plan     -   sizeof(A)=2*N,
+                        A contains real values interleaved with zeros
+
+  -- ALGLIB --
+     Copyright 01.05.2009 by Bochkanov Sergey
+*************************************************************************/
+void ftbaseexecuteplan(/* Real    */ ae_vector* a,
+     ae_int_t aoffset,
+     ae_int_t n,
+     ftplan* plan,
+     ae_state *_state)
+{
+    ae_int_t stackptr;
+
+
+    stackptr = 0;
+    ftbaseexecuteplanrec(a, aoffset, plan, 0, stackptr, _state);
+}
+
+
+/*************************************************************************
+Recurrent subroutine for the FTBaseExecutePlan
+
+Parameters:
+    A           FFT'ed array
+    AOffset     offset of the FFT'ed part (distance is measured in doubles)
+
+  -- ALGLIB --
+     Copyright 01.05.2009 by Bochkanov Sergey
+*************************************************************************/
+void ftbaseexecuteplanrec(/* Real    */ ae_vector* a,
+     ae_int_t aoffset,
+     ftplan* plan,
+     ae_int_t entryoffset,
+     ae_int_t stackptr,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t k;
+    ae_int_t n1;
+    ae_int_t n2;
+    ae_int_t n;
+    ae_int_t m;
+    ae_int_t offs;
+    ae_int_t offs1;
+    ae_int_t offs2;
+    ae_int_t offsa;
+    ae_int_t offsb;
+    ae_int_t offsp;
+    double hk;
+    double hnk;
+    double x;
+    double y;
+    double bx;
+    double by;
+    ae_vector emptyarray;
+    double a0x;
+    double a0y;
+    double a1x;
+    double a1y;
+    double a2x;
+    double a2y;
+    double a3x;
+    double a3y;
+    double v0;
+    double v1;
+    double v2;
+    double v3;
+    double t1x;
+    double t1y;
+    double t2x;
+    double t2y;
+    double t3x;
+    double t3y;
+    double t4x;
+    double t4y;
+    double t5x;
+    double t5y;
+    double m1x;
+    double m1y;
+    double m2x;
+    double m2y;
+    double m3x;
+    double m3y;
+    double m4x;
+    double m4y;
+    double m5x;
+    double m5y;
+    double s1x;
+    double s1y;
+    double s2x;
+    double s2y;
+    double s3x;
+    double s3y;
+    double s4x;
+    double s4y;
+    double s5x;
+    double s5y;
+    double c1;
+    double c2;
+    double c3;
+    double c4;
+    double c5;
+    ae_vector tmp;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_init(&emptyarray, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);
+
+    if( plan->plan.ptr.p_int[entryoffset+3]==ftbase_fftemptyplan )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    if( plan->plan.ptr.p_int[entryoffset+3]==ftbase_fftcooleytukeyplan )
+    {
+        
+        /*
+         * Cooley-Tukey plan
+         * * transposition
+         * * row-wise FFT
+         * * twiddle factors:
+         *   - TwBase is a basis twiddle factor for I=1, J=1
+         *   - TwRow is a twiddle factor for a second element in a row (J=1)
+         *   - Tw is a twiddle factor for a current element
+         * * transposition again
+         * * row-wise FFT again
+         */
+        n1 = plan->plan.ptr.p_int[entryoffset+1];
+        n2 = plan->plan.ptr.p_int[entryoffset+2];
+        ftbase_internalcomplexlintranspose(a, n1, n2, aoffset, &plan->tmpbuf, _state);
+        for(i=0; i<=n2-1; i++)
+        {
+            ftbaseexecuteplanrec(a, aoffset+i*n1*2, plan, plan->plan.ptr.p_int[entryoffset+5], stackptr, _state);
+        }
+        ftbase_ffttwcalc(a, aoffset, n1, n2, _state);
+        ftbase_internalcomplexlintranspose(a, n2, n1, aoffset, &plan->tmpbuf, _state);
+        for(i=0; i<=n1-1; i++)
+        {
+            ftbaseexecuteplanrec(a, aoffset+i*n2*2, plan, plan->plan.ptr.p_int[entryoffset+6], stackptr, _state);
+        }
+        ftbase_internalcomplexlintranspose(a, n1, n2, aoffset, &plan->tmpbuf, _state);
+        ae_frame_leave(_state);
+        return;
+    }
+    if( plan->plan.ptr.p_int[entryoffset+3]==ftbase_fftrealcooleytukeyplan )
+    {
+        
+        /*
+         * Cooley-Tukey plan
+         * * transposition
+         * * row-wise FFT
+         * * twiddle factors:
+         *   - TwBase is a basis twiddle factor for I=1, J=1
+         *   - TwRow is a twiddle factor for a second element in a row (J=1)
+         *   - Tw is a twiddle factor for a current element
+         * * transposition again
+         * * row-wise FFT again
+         */
+        n1 = plan->plan.ptr.p_int[entryoffset+1];
+        n2 = plan->plan.ptr.p_int[entryoffset+2];
+        ftbase_internalcomplexlintranspose(a, n2, n1, aoffset, &plan->tmpbuf, _state);
+        for(i=0; i<=n1/2-1; i++)
+        {
+            
+            /*
+             * pack two adjacent smaller real FFT's together,
+             * make one complex FFT,
+             * unpack result
+             */
+            offs = aoffset+2*i*n2*2;
+            for(k=0; k<=n2-1; k++)
+            {
+                a->ptr.p_double[offs+2*k+1] = a->ptr.p_double[offs+2*n2+2*k+0];
+            }
+            ftbaseexecuteplanrec(a, offs, plan, plan->plan.ptr.p_int[entryoffset+6], stackptr, _state);
+            plan->tmpbuf.ptr.p_double[0] = a->ptr.p_double[offs+0];
+            plan->tmpbuf.ptr.p_double[1] = 0;
+            plan->tmpbuf.ptr.p_double[2*n2+0] = a->ptr.p_double[offs+1];
+            plan->tmpbuf.ptr.p_double[2*n2+1] = 0;
+            for(k=1; k<=n2-1; k++)
+            {
+                offs1 = 2*k;
+                offs2 = 2*n2+2*k;
+                hk = a->ptr.p_double[offs+2*k+0];
+                hnk = a->ptr.p_double[offs+2*(n2-k)+0];
+                plan->tmpbuf.ptr.p_double[offs1+0] = 0.5*(hk+hnk);
+                plan->tmpbuf.ptr.p_double[offs2+1] = -0.5*(hk-hnk);
+                hk = a->ptr.p_double[offs+2*k+1];
+                hnk = a->ptr.p_double[offs+2*(n2-k)+1];
+                plan->tmpbuf.ptr.p_double[offs2+0] = 0.5*(hk+hnk);
+                plan->tmpbuf.ptr.p_double[offs1+1] = 0.5*(hk-hnk);
+            }
+            ae_v_move(&a->ptr.p_double[offs], 1, &plan->tmpbuf.ptr.p_double[0], 1, ae_v_len(offs,offs+2*n2*2-1));
+        }
+        if( n1%2!=0 )
+        {
+            ftbaseexecuteplanrec(a, aoffset+(n1-1)*n2*2, plan, plan->plan.ptr.p_int[entryoffset+6], stackptr, _state);
+        }
+        ftbase_ffttwcalc(a, aoffset, n2, n1, _state);
+        ftbase_internalcomplexlintranspose(a, n1, n2, aoffset, &plan->tmpbuf, _state);
+        for(i=0; i<=n2-1; i++)
+        {
+            ftbaseexecuteplanrec(a, aoffset+i*n1*2, plan, plan->plan.ptr.p_int[entryoffset+5], stackptr, _state);
+        }
+        ftbase_internalcomplexlintranspose(a, n2, n1, aoffset, &plan->tmpbuf, _state);
+        ae_frame_leave(_state);
+        return;
+    }
+    if( plan->plan.ptr.p_int[entryoffset+3]==ftbase_fhtcooleytukeyplan )
+    {
+        
+        /*
+         * Cooley-Tukey FHT plan:
+         * * transpose                    \
+         * * smaller FHT's                |
+         * * pre-process                  |
+         * * multiply by twiddle factors  | corresponds to multiplication by H1
+         * * post-process                 |
+         * * transpose again              /
+         * * multiply by H2 (smaller FHT's)
+         * * final transposition
+         *
+         * For more details see Vitezslav Vesely, "Fast algorithms
+         * of Fourier and Hartley transform and their implementation in MATLAB",
+         * page 31.
+         */
+        n1 = plan->plan.ptr.p_int[entryoffset+1];
+        n2 = plan->plan.ptr.p_int[entryoffset+2];
+        n = n1*n2;
+        ftbase_internalreallintranspose(a, n1, n2, aoffset, &plan->tmpbuf, _state);
+        for(i=0; i<=n2-1; i++)
+        {
+            ftbaseexecuteplanrec(a, aoffset+i*n1, plan, plan->plan.ptr.p_int[entryoffset+5], stackptr, _state);
+        }
+        for(i=0; i<=n2-1; i++)
+        {
+            for(j=0; j<=n1-1; j++)
+            {
+                offsa = aoffset+i*n1;
+                hk = a->ptr.p_double[offsa+j];
+                hnk = a->ptr.p_double[offsa+(n1-j)%n1];
+                offs = 2*(i*n1+j);
+                plan->tmpbuf.ptr.p_double[offs+0] = -0.5*(hnk-hk);
+                plan->tmpbuf.ptr.p_double[offs+1] = 0.5*(hk+hnk);
+            }
+        }
+        ftbase_ffttwcalc(&plan->tmpbuf, 0, n1, n2, _state);
+        for(j=0; j<=n1-1; j++)
+        {
+            a->ptr.p_double[aoffset+j] = plan->tmpbuf.ptr.p_double[2*j+0]+plan->tmpbuf.ptr.p_double[2*j+1];
+        }
+        if( n2%2==0 )
+        {
+            offs = 2*(n2/2)*n1;
+            offsa = aoffset+n2/2*n1;
+            for(j=0; j<=n1-1; j++)
+            {
+                a->ptr.p_double[offsa+j] = plan->tmpbuf.ptr.p_double[offs+2*j+0]+plan->tmpbuf.ptr.p_double[offs+2*j+1];
+            }
+        }
+        for(i=1; i<=(n2+1)/2-1; i++)
+        {
+            offs = 2*i*n1;
+            offs2 = 2*(n2-i)*n1;
+            offsa = aoffset+i*n1;
+            for(j=0; j<=n1-1; j++)
+            {
+                a->ptr.p_double[offsa+j] = plan->tmpbuf.ptr.p_double[offs+2*j+1]+plan->tmpbuf.ptr.p_double[offs2+2*j+0];
+            }
+            offsa = aoffset+(n2-i)*n1;
+            for(j=0; j<=n1-1; j++)
+            {
+                a->ptr.p_double[offsa+j] = plan->tmpbuf.ptr.p_double[offs+2*j+0]+plan->tmpbuf.ptr.p_double[offs2+2*j+1];
+            }
+        }
+        ftbase_internalreallintranspose(a, n2, n1, aoffset, &plan->tmpbuf, _state);
+        for(i=0; i<=n1-1; i++)
+        {
+            ftbaseexecuteplanrec(a, aoffset+i*n2, plan, plan->plan.ptr.p_int[entryoffset+6], stackptr, _state);
+        }
+        ftbase_internalreallintranspose(a, n1, n2, aoffset, &plan->tmpbuf, _state);
+        ae_frame_leave(_state);
+        return;
+    }
+    if( plan->plan.ptr.p_int[entryoffset+3]==ftbase_fhtn2plan )
+    {
+        
+        /*
+         * Cooley-Tukey FHT plan
+         */
+        n1 = plan->plan.ptr.p_int[entryoffset+1];
+        n2 = plan->plan.ptr.p_int[entryoffset+2];
+        n = n1*n2;
+        ftbase_reffht(a, n, aoffset, _state);
+        ae_frame_leave(_state);
+        return;
+    }
+    if( plan->plan.ptr.p_int[entryoffset+3]==ftbase_fftcodeletplan )
+    {
+        n1 = plan->plan.ptr.p_int[entryoffset+1];
+        n2 = plan->plan.ptr.p_int[entryoffset+2];
+        n = n1*n2;
+        if( n==2 )
+        {
+            a0x = a->ptr.p_double[aoffset+0];
+            a0y = a->ptr.p_double[aoffset+1];
+            a1x = a->ptr.p_double[aoffset+2];
+            a1y = a->ptr.p_double[aoffset+3];
+            v0 = a0x+a1x;
+            v1 = a0y+a1y;
+            v2 = a0x-a1x;
+            v3 = a0y-a1y;
+            a->ptr.p_double[aoffset+0] = v0;
+            a->ptr.p_double[aoffset+1] = v1;
+            a->ptr.p_double[aoffset+2] = v2;
+            a->ptr.p_double[aoffset+3] = v3;
+            ae_frame_leave(_state);
+            return;
+        }
+        if( n==3 )
+        {
+            offs = plan->plan.ptr.p_int[entryoffset+7];
+            c1 = plan->precomputed.ptr.p_double[offs+0];
+            c2 = plan->precomputed.ptr.p_double[offs+1];
+            a0x = a->ptr.p_double[aoffset+0];
+            a0y = a->ptr.p_double[aoffset+1];
+            a1x = a->ptr.p_double[aoffset+2];
+            a1y = a->ptr.p_double[aoffset+3];
+            a2x = a->ptr.p_double[aoffset+4];
+            a2y = a->ptr.p_double[aoffset+5];
+            t1x = a1x+a2x;
+            t1y = a1y+a2y;
+            a0x = a0x+t1x;
+            a0y = a0y+t1y;
+            m1x = c1*t1x;
+            m1y = c1*t1y;
+            m2x = c2*(a1y-a2y);
+            m2y = c2*(a2x-a1x);
+            s1x = a0x+m1x;
+            s1y = a0y+m1y;
+            a1x = s1x+m2x;
+            a1y = s1y+m2y;
+            a2x = s1x-m2x;
+            a2y = s1y-m2y;
+            a->ptr.p_double[aoffset+0] = a0x;
+            a->ptr.p_double[aoffset+1] = a0y;
+            a->ptr.p_double[aoffset+2] = a1x;
+            a->ptr.p_double[aoffset+3] = a1y;
+            a->ptr.p_double[aoffset+4] = a2x;
+            a->ptr.p_double[aoffset+5] = a2y;
+            ae_frame_leave(_state);
+            return;
+        }
+        if( n==4 )
+        {
+            a0x = a->ptr.p_double[aoffset+0];
+            a0y = a->ptr.p_double[aoffset+1];
+            a1x = a->ptr.p_double[aoffset+2];
+            a1y = a->ptr.p_double[aoffset+3];
+            a2x = a->ptr.p_double[aoffset+4];
+            a2y = a->ptr.p_double[aoffset+5];
+            a3x = a->ptr.p_double[aoffset+6];
+            a3y = a->ptr.p_double[aoffset+7];
+            t1x = a0x+a2x;
+            t1y = a0y+a2y;
+            t2x = a1x+a3x;
+            t2y = a1y+a3y;
+            m2x = a0x-a2x;
+            m2y = a0y-a2y;
+            m3x = a1y-a3y;
+            m3y = a3x-a1x;
+            a->ptr.p_double[aoffset+0] = t1x+t2x;
+            a->ptr.p_double[aoffset+1] = t1y+t2y;
+            a->ptr.p_double[aoffset+4] = t1x-t2x;
+            a->ptr.p_double[aoffset+5] = t1y-t2y;
+            a->ptr.p_double[aoffset+2] = m2x+m3x;
+            a->ptr.p_double[aoffset+3] = m2y+m3y;
+            a->ptr.p_double[aoffset+6] = m2x-m3x;
+            a->ptr.p_double[aoffset+7] = m2y-m3y;
+            ae_frame_leave(_state);
+            return;
+        }
+        if( n==5 )
+        {
+            offs = plan->plan.ptr.p_int[entryoffset+7];
+            c1 = plan->precomputed.ptr.p_double[offs+0];
+            c2 = plan->precomputed.ptr.p_double[offs+1];
+            c3 = plan->precomputed.ptr.p_double[offs+2];
+            c4 = plan->precomputed.ptr.p_double[offs+3];
+            c5 = plan->precomputed.ptr.p_double[offs+4];
+            t1x = a->ptr.p_double[aoffset+2]+a->ptr.p_double[aoffset+8];
+            t1y = a->ptr.p_double[aoffset+3]+a->ptr.p_double[aoffset+9];
+            t2x = a->ptr.p_double[aoffset+4]+a->ptr.p_double[aoffset+6];
+            t2y = a->ptr.p_double[aoffset+5]+a->ptr.p_double[aoffset+7];
+            t3x = a->ptr.p_double[aoffset+2]-a->ptr.p_double[aoffset+8];
+            t3y = a->ptr.p_double[aoffset+3]-a->ptr.p_double[aoffset+9];
+            t4x = a->ptr.p_double[aoffset+6]-a->ptr.p_double[aoffset+4];
+            t4y = a->ptr.p_double[aoffset+7]-a->ptr.p_double[aoffset+5];
+            t5x = t1x+t2x;
+            t5y = t1y+t2y;
+            a->ptr.p_double[aoffset+0] = a->ptr.p_double[aoffset+0]+t5x;
+            a->ptr.p_double[aoffset+1] = a->ptr.p_double[aoffset+1]+t5y;
+            m1x = c1*t5x;
+            m1y = c1*t5y;
+            m2x = c2*(t1x-t2x);
+            m2y = c2*(t1y-t2y);
+            m3x = -c3*(t3y+t4y);
+            m3y = c3*(t3x+t4x);
+            m4x = -c4*t4y;
+            m4y = c4*t4x;
+            m5x = -c5*t3y;
+            m5y = c5*t3x;
+            s3x = m3x-m4x;
+            s3y = m3y-m4y;
+            s5x = m3x+m5x;
+            s5y = m3y+m5y;
+            s1x = a->ptr.p_double[aoffset+0]+m1x;
+            s1y = a->ptr.p_double[aoffset+1]+m1y;
+            s2x = s1x+m2x;
+            s2y = s1y+m2y;
+            s4x = s1x-m2x;
+            s4y = s1y-m2y;
+            a->ptr.p_double[aoffset+2] = s2x+s3x;
+            a->ptr.p_double[aoffset+3] = s2y+s3y;
+            a->ptr.p_double[aoffset+4] = s4x+s5x;
+            a->ptr.p_double[aoffset+5] = s4y+s5y;
+            a->ptr.p_double[aoffset+6] = s4x-s5x;
+            a->ptr.p_double[aoffset+7] = s4y-s5y;
+            a->ptr.p_double[aoffset+8] = s2x-s3x;
+            a->ptr.p_double[aoffset+9] = s2y-s3y;
+            ae_frame_leave(_state);
+            return;
+        }
+    }
+    if( plan->plan.ptr.p_int[entryoffset+3]==ftbase_fhtcodeletplan )
+    {
+        n1 = plan->plan.ptr.p_int[entryoffset+1];
+        n2 = plan->plan.ptr.p_int[entryoffset+2];
+        n = n1*n2;
+        if( n==2 )
+        {
+            a0x = a->ptr.p_double[aoffset+0];
+            a1x = a->ptr.p_double[aoffset+1];
+            a->ptr.p_double[aoffset+0] = a0x+a1x;
+            a->ptr.p_double[aoffset+1] = a0x-a1x;
+            ae_frame_leave(_state);
+            return;
+        }
+        if( n==3 )
+        {
+            offs = plan->plan.ptr.p_int[entryoffset+7];
+            c1 = plan->precomputed.ptr.p_double[offs+0];
+            c2 = plan->precomputed.ptr.p_double[offs+1];
+            a0x = a->ptr.p_double[aoffset+0];
+            a1x = a->ptr.p_double[aoffset+1];
+            a2x = a->ptr.p_double[aoffset+2];
+            t1x = a1x+a2x;
+            a0x = a0x+t1x;
+            m1x = c1*t1x;
+            m2y = c2*(a2x-a1x);
+            s1x = a0x+m1x;
+            a->ptr.p_double[aoffset+0] = a0x;
+            a->ptr.p_double[aoffset+1] = s1x-m2y;
+            a->ptr.p_double[aoffset+2] = s1x+m2y;
+            ae_frame_leave(_state);
+            return;
+        }
+        if( n==4 )
+        {
+            a0x = a->ptr.p_double[aoffset+0];
+            a1x = a->ptr.p_double[aoffset+1];
+            a2x = a->ptr.p_double[aoffset+2];
+            a3x = a->ptr.p_double[aoffset+3];
+            t1x = a0x+a2x;
+            t2x = a1x+a3x;
+            m2x = a0x-a2x;
+            m3y = a3x-a1x;
+            a->ptr.p_double[aoffset+0] = t1x+t2x;
+            a->ptr.p_double[aoffset+1] = m2x-m3y;
+            a->ptr.p_double[aoffset+2] = t1x-t2x;
+            a->ptr.p_double[aoffset+3] = m2x+m3y;
+            ae_frame_leave(_state);
+            return;
+        }
+        if( n==5 )
+        {
+            offs = plan->plan.ptr.p_int[entryoffset+7];
+            c1 = plan->precomputed.ptr.p_double[offs+0];
+            c2 = plan->precomputed.ptr.p_double[offs+1];
+            c3 = plan->precomputed.ptr.p_double[offs+2];
+            c4 = plan->precomputed.ptr.p_double[offs+3];
+            c5 = plan->precomputed.ptr.p_double[offs+4];
+            t1x = a->ptr.p_double[aoffset+1]+a->ptr.p_double[aoffset+4];
+            t2x = a->ptr.p_double[aoffset+2]+a->ptr.p_double[aoffset+3];
+            t3x = a->ptr.p_double[aoffset+1]-a->ptr.p_double[aoffset+4];
+            t4x = a->ptr.p_double[aoffset+3]-a->ptr.p_double[aoffset+2];
+            t5x = t1x+t2x;
+            v0 = a->ptr.p_double[aoffset+0]+t5x;
+            a->ptr.p_double[aoffset+0] = v0;
+            m2x = c2*(t1x-t2x);
+            m3y = c3*(t3x+t4x);
+            s3y = m3y-c4*t4x;
+            s5y = m3y+c5*t3x;
+            s1x = v0+c1*t5x;
+            s2x = s1x+m2x;
+            s4x = s1x-m2x;
+            a->ptr.p_double[aoffset+1] = s2x-s3y;
+            a->ptr.p_double[aoffset+2] = s4x-s5y;
+            a->ptr.p_double[aoffset+3] = s4x+s5y;
+            a->ptr.p_double[aoffset+4] = s2x+s3y;
+            ae_frame_leave(_state);
+            return;
+        }
+    }
+    if( plan->plan.ptr.p_int[entryoffset+3]==ftbase_fftbluesteinplan )
+    {
+        
+        /*
+         * Bluestein plan:
+         * 1. multiply by precomputed coefficients
+         * 2. make convolution: forward FFT, multiplication by precomputed FFT
+         *    and backward FFT. backward FFT is represented as
+         *
+         *        invfft(x) = fft(x')'/M
+         *
+         *    for performance reasons reduction of inverse FFT to
+         *    forward FFT is merged with multiplication of FFT components
+         *    and last stage of Bluestein's transformation.
+         * 3. post-multiplication by Bluestein factors
+         */
+        n = plan->plan.ptr.p_int[entryoffset+1];
+        m = plan->plan.ptr.p_int[entryoffset+4];
+        offs = plan->plan.ptr.p_int[entryoffset+7];
+        for(i=stackptr+2*n; i<=stackptr+2*m-1; i++)
+        {
+            plan->stackbuf.ptr.p_double[i] = 0;
+        }
+        offsp = offs+2*m;
+        offsa = aoffset;
+        offsb = stackptr;
+        for(i=0; i<=n-1; i++)
+        {
+            bx = plan->precomputed.ptr.p_double[offsp+0];
+            by = plan->precomputed.ptr.p_double[offsp+1];
+            x = a->ptr.p_double[offsa+0];
+            y = a->ptr.p_double[offsa+1];
+            plan->stackbuf.ptr.p_double[offsb+0] = x*bx-y*(-by);
+            plan->stackbuf.ptr.p_double[offsb+1] = x*(-by)+y*bx;
+            offsp = offsp+2;
+            offsa = offsa+2;
+            offsb = offsb+2;
+        }
+        ftbaseexecuteplanrec(&plan->stackbuf, stackptr, plan, plan->plan.ptr.p_int[entryoffset+5], stackptr+2*2*m, _state);
+        offsb = stackptr;
+        offsp = offs;
+        for(i=0; i<=m-1; i++)
+        {
+            x = plan->stackbuf.ptr.p_double[offsb+0];
+            y = plan->stackbuf.ptr.p_double[offsb+1];
+            bx = plan->precomputed.ptr.p_double[offsp+0];
+            by = plan->precomputed.ptr.p_double[offsp+1];
+            plan->stackbuf.ptr.p_double[offsb+0] = x*bx-y*by;
+            plan->stackbuf.ptr.p_double[offsb+1] = -(x*by+y*bx);
+            offsb = offsb+2;
+            offsp = offsp+2;
+        }
+        ftbaseexecuteplanrec(&plan->stackbuf, stackptr, plan, plan->plan.ptr.p_int[entryoffset+5], stackptr+2*2*m, _state);
+        offsb = stackptr;
+        offsp = offs+2*m;
+        offsa = aoffset;
+        for(i=0; i<=n-1; i++)
+        {
+            x = plan->stackbuf.ptr.p_double[offsb+0]/m;
+            y = -plan->stackbuf.ptr.p_double[offsb+1]/m;
+            bx = plan->precomputed.ptr.p_double[offsp+0];
+            by = plan->precomputed.ptr.p_double[offsp+1];
+            a->ptr.p_double[offsa+0] = x*bx-y*(-by);
+            a->ptr.p_double[offsa+1] = x*(-by)+y*bx;
+            offsp = offsp+2;
+            offsa = offsa+2;
+            offsb = offsb+2;
+        }
+        ae_frame_leave(_state);
+        return;
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Returns good factorization N=N1*N2.
+
+Usually N1<=N2 (but not always - small N's may be exception).
+if N1<>1 then N2<>1.
+
+Factorization is chosen depending on task type and codelets we have.
+
+  -- ALGLIB --
+     Copyright 01.05.2009 by Bochkanov Sergey
+*************************************************************************/
+void ftbasefactorize(ae_int_t n,
+     ae_int_t tasktype,
+     ae_int_t* n1,
+     ae_int_t* n2,
+     ae_state *_state)
+{
+    ae_int_t j;
+
+    *n1 = 0;
+    *n2 = 0;
+
+    *n1 = 0;
+    *n2 = 0;
+    
+    /*
+     * try to find good codelet
+     */
+    if( *n1*(*n2)!=n )
+    {
+        for(j=ftbase_ftbasecodeletrecommended; j>=2; j--)
+        {
+            if( n%j==0 )
+            {
+                *n1 = j;
+                *n2 = n/j;
+                break;
+            }
+        }
+    }
+    
+    /*
+     * try to factorize N
+     */
+    if( *n1*(*n2)!=n )
+    {
+        for(j=ftbase_ftbasecodeletrecommended+1; j<=n-1; j++)
+        {
+            if( n%j==0 )
+            {
+                *n1 = j;
+                *n2 = n/j;
+                break;
+            }
+        }
+    }
+    
+    /*
+     * looks like N is prime :(
+     */
+    if( *n1*(*n2)!=n )
+    {
+        *n1 = 1;
+        *n2 = n;
+    }
+    
+    /*
+     * normalize
+     */
+    if( *n2==1&&*n1!=1 )
+    {
+        *n2 = *n1;
+        *n1 = 1;
+    }
+}
+
+
+/*************************************************************************
+Is number smooth?
+
+  -- ALGLIB --
+     Copyright 01.05.2009 by Bochkanov Sergey
+*************************************************************************/
+ae_bool ftbaseissmooth(ae_int_t n, ae_state *_state)
+{
+    ae_int_t i;
+    ae_bool result;
+
+
+    for(i=2; i<=ftbase_ftbasemaxsmoothfactor; i++)
+    {
+        while(n%i==0)
+        {
+            n = n/i;
+        }
+    }
+    result = n==1;
+    return result;
+}
+
+
+/*************************************************************************
+Returns smallest smooth (divisible only by 2, 3, 5) number that is greater
+than or equal to max(N,2)
+
+  -- ALGLIB --
+     Copyright 01.05.2009 by Bochkanov Sergey
+*************************************************************************/
+ae_int_t ftbasefindsmooth(ae_int_t n, ae_state *_state)
+{
+    ae_int_t best;
+    ae_int_t result;
+
+
+    best = 2;
+    while(best<n)
+    {
+        best = 2*best;
+    }
+    ftbase_ftbasefindsmoothrec(n, 1, 2, &best, _state);
+    result = best;
+    return result;
+}
+
+
+/*************************************************************************
+Returns  smallest  smooth  (divisible only by 2, 3, 5) even number that is
+greater than or equal to max(N,2)
+
+  -- ALGLIB --
+     Copyright 01.05.2009 by Bochkanov Sergey
+*************************************************************************/
+ae_int_t ftbasefindsmootheven(ae_int_t n, ae_state *_state)
+{
+    ae_int_t best;
+    ae_int_t result;
+
+
+    best = 2;
+    while(best<n)
+    {
+        best = 2*best;
+    }
+    ftbase_ftbasefindsmoothrec(n, 2, 2, &best, _state);
+    result = best;
+    return result;
+}
+
+
+/*************************************************************************
+Returns estimate of FLOP count for the FFT.
+
+It is only an estimate based on operations count for the PERFECT FFT
+and relative inefficiency of the algorithm actually used.
+
+N should be power of 2, estimates are badly wrong for non-power-of-2 N's.
+
+  -- ALGLIB --
+     Copyright 01.05.2009 by Bochkanov Sergey
+*************************************************************************/
+double ftbasegetflopestimate(ae_int_t n, ae_state *_state)
+{
+    double result;
+
+
+    result = ftbase_ftbaseinefficiencyfactor*(4*n*ae_log(n, _state)/ae_log(2, _state)-6*n+8);
+    return result;
+}
+
+
+/*************************************************************************
+Recurrent subroutine for the FFTGeneratePlan:
+
+PARAMETERS:
+    N                   plan size
+    IsReal              whether input is real or not.
+                        subroutine MUST NOT ignore this flag because real
+                        inputs comes with non-initialized imaginary parts,
+                        so ignoring this flag will result in corrupted output
+    HalfOut             whether full output or only half of it from 0 to
+                        floor(N/2) is needed. This flag may be ignored if
+                        doing so will simplify calculations
+    Plan                plan array
+    PlanSize            size of used part (in integers)
+    PrecomputedSize     size of precomputed array allocated yet
+    PlanArraySize       plan array size (actual)
+    TmpMemSize          temporary memory required size
+    BluesteinMemSize    temporary memory required size
+
+  -- ALGLIB --
+     Copyright 01.05.2009 by Bochkanov Sergey
+*************************************************************************/
+static void ftbase_ftbasegenerateplanrec(ae_int_t n,
+     ae_int_t tasktype,
+     ftplan* plan,
+     ae_int_t* plansize,
+     ae_int_t* precomputedsize,
+     ae_int_t* planarraysize,
+     ae_int_t* tmpmemsize,
+     ae_int_t* stackmemsize,
+     ae_int_t stackptr,
+     ae_state *_state)
+{
+    ae_int_t k;
+    ae_int_t m;
+    ae_int_t n1;
+    ae_int_t n2;
+    ae_int_t esize;
+    ae_int_t entryoffset;
+
+
+    
+    /*
+     * prepare
+     */
+    if( *plansize+ftbase_ftbaseplanentrysize>(*planarraysize) )
+    {
+        ftbase_fftarrayresize(&plan->plan, planarraysize, 8*(*planarraysize), _state);
+    }
+    entryoffset = *plansize;
+    esize = ftbase_ftbaseplanentrysize;
+    *plansize = *plansize+esize;
+    
+    /*
+     * if N=1, generate empty plan and exit
+     */
+    if( n==1 )
+    {
+        plan->plan.ptr.p_int[entryoffset+0] = esize;
+        plan->plan.ptr.p_int[entryoffset+1] = -1;
+        plan->plan.ptr.p_int[entryoffset+2] = -1;
+        plan->plan.ptr.p_int[entryoffset+3] = ftbase_fftemptyplan;
+        plan->plan.ptr.p_int[entryoffset+4] = -1;
+        plan->plan.ptr.p_int[entryoffset+5] = -1;
+        plan->plan.ptr.p_int[entryoffset+6] = -1;
+        plan->plan.ptr.p_int[entryoffset+7] = -1;
+        return;
+    }
+    
+    /*
+     * generate plans
+     */
+    ftbasefactorize(n, tasktype, &n1, &n2, _state);
+    if( tasktype==ftbase_ftbasecffttask||tasktype==ftbase_ftbaserffttask )
+    {
+        
+        /*
+         * complex FFT plans
+         */
+        if( n1!=1 )
+        {
+            
+            /*
+             * Cooley-Tukey plan (real or complex)
+             *
+             * Note that child plans are COMPLEX
+             * (whether plan itself is complex or not).
+             */
+            *tmpmemsize = ae_maxint(*tmpmemsize, 2*n1*n2, _state);
+            plan->plan.ptr.p_int[entryoffset+0] = esize;
+            plan->plan.ptr.p_int[entryoffset+1] = n1;
+            plan->plan.ptr.p_int[entryoffset+2] = n2;
+            if( tasktype==ftbase_ftbasecffttask )
+            {
+                plan->plan.ptr.p_int[entryoffset+3] = ftbase_fftcooleytukeyplan;
+            }
+            else
+            {
+                plan->plan.ptr.p_int[entryoffset+3] = ftbase_fftrealcooleytukeyplan;
+            }
+            plan->plan.ptr.p_int[entryoffset+4] = 0;
+            plan->plan.ptr.p_int[entryoffset+5] = *plansize;
+            ftbase_ftbasegenerateplanrec(n1, ftbase_ftbasecffttask, plan, plansize, precomputedsize, planarraysize, tmpmemsize, stackmemsize, stackptr, _state);
+            plan->plan.ptr.p_int[entryoffset+6] = *plansize;
+            ftbase_ftbasegenerateplanrec(n2, ftbase_ftbasecffttask, plan, plansize, precomputedsize, planarraysize, tmpmemsize, stackmemsize, stackptr, _state);
+            plan->plan.ptr.p_int[entryoffset+7] = -1;
+            return;
+        }
+        else
+        {
+            if( ((n==2||n==3)||n==4)||n==5 )
+            {
+                
+                /*
+                 * hard-coded plan
+                 */
+                plan->plan.ptr.p_int[entryoffset+0] = esize;
+                plan->plan.ptr.p_int[entryoffset+1] = n1;
+                plan->plan.ptr.p_int[entryoffset+2] = n2;
+                plan->plan.ptr.p_int[entryoffset+3] = ftbase_fftcodeletplan;
+                plan->plan.ptr.p_int[entryoffset+4] = 0;
+                plan->plan.ptr.p_int[entryoffset+5] = -1;
+                plan->plan.ptr.p_int[entryoffset+6] = -1;
+                plan->plan.ptr.p_int[entryoffset+7] = *precomputedsize;
+                if( n==3 )
+                {
+                    *precomputedsize = *precomputedsize+2;
+                }
+                if( n==5 )
+                {
+                    *precomputedsize = *precomputedsize+5;
+                }
+                return;
+            }
+            else
+            {
+                
+                /*
+                 * Bluestein's plan
+                 *
+                 * Select such M that M>=2*N-1, M is composite, and M's
+                 * factors are 2, 3, 5
+                 */
+                k = 2*n2-1;
+                m = ftbasefindsmooth(k, _state);
+                *tmpmemsize = ae_maxint(*tmpmemsize, 2*m, _state);
+                plan->plan.ptr.p_int[entryoffset+0] = esize;
+                plan->plan.ptr.p_int[entryoffset+1] = n2;
+                plan->plan.ptr.p_int[entryoffset+2] = -1;
+                plan->plan.ptr.p_int[entryoffset+3] = ftbase_fftbluesteinplan;
+                plan->plan.ptr.p_int[entryoffset+4] = m;
+                plan->plan.ptr.p_int[entryoffset+5] = *plansize;
+                stackptr = stackptr+2*2*m;
+                *stackmemsize = ae_maxint(*stackmemsize, stackptr, _state);
+                ftbase_ftbasegenerateplanrec(m, ftbase_ftbasecffttask, plan, plansize, precomputedsize, planarraysize, tmpmemsize, stackmemsize, stackptr, _state);
+                stackptr = stackptr-2*2*m;
+                plan->plan.ptr.p_int[entryoffset+6] = -1;
+                plan->plan.ptr.p_int[entryoffset+7] = *precomputedsize;
+                *precomputedsize = *precomputedsize+2*m+2*n;
+                return;
+            }
+        }
+    }
+    if( tasktype==ftbase_ftbaserfhttask )
+    {
+        
+        /*
+         * real FHT plans
+         */
+        if( n1!=1 )
+        {
+            
+            /*
+             * Cooley-Tukey plan
+             *
+             */
+            *tmpmemsize = ae_maxint(*tmpmemsize, 2*n1*n2, _state);
+            plan->plan.ptr.p_int[entryoffset+0] = esize;
+            plan->plan.ptr.p_int[entryoffset+1] = n1;
+            plan->plan.ptr.p_int[entryoffset+2] = n2;
+            plan->plan.ptr.p_int[entryoffset+3] = ftbase_fhtcooleytukeyplan;
+            plan->plan.ptr.p_int[entryoffset+4] = 0;
+            plan->plan.ptr.p_int[entryoffset+5] = *plansize;
+            ftbase_ftbasegenerateplanrec(n1, tasktype, plan, plansize, precomputedsize, planarraysize, tmpmemsize, stackmemsize, stackptr, _state);
+            plan->plan.ptr.p_int[entryoffset+6] = *plansize;
+            ftbase_ftbasegenerateplanrec(n2, tasktype, plan, plansize, precomputedsize, planarraysize, tmpmemsize, stackmemsize, stackptr, _state);
+            plan->plan.ptr.p_int[entryoffset+7] = -1;
+            return;
+        }
+        else
+        {
+            
+            /*
+             * N2 plan
+             */
+            plan->plan.ptr.p_int[entryoffset+0] = esize;
+            plan->plan.ptr.p_int[entryoffset+1] = n1;
+            plan->plan.ptr.p_int[entryoffset+2] = n2;
+            plan->plan.ptr.p_int[entryoffset+3] = ftbase_fhtn2plan;
+            plan->plan.ptr.p_int[entryoffset+4] = 0;
+            plan->plan.ptr.p_int[entryoffset+5] = -1;
+            plan->plan.ptr.p_int[entryoffset+6] = -1;
+            plan->plan.ptr.p_int[entryoffset+7] = -1;
+            if( ((n==2||n==3)||n==4)||n==5 )
+            {
+                
+                /*
+                 * hard-coded plan
+                 */
+                plan->plan.ptr.p_int[entryoffset+0] = esize;
+                plan->plan.ptr.p_int[entryoffset+1] = n1;
+                plan->plan.ptr.p_int[entryoffset+2] = n2;
+                plan->plan.ptr.p_int[entryoffset+3] = ftbase_fhtcodeletplan;
+                plan->plan.ptr.p_int[entryoffset+4] = 0;
+                plan->plan.ptr.p_int[entryoffset+5] = -1;
+                plan->plan.ptr.p_int[entryoffset+6] = -1;
+                plan->plan.ptr.p_int[entryoffset+7] = *precomputedsize;
+                if( n==3 )
+                {
+                    *precomputedsize = *precomputedsize+2;
+                }
+                if( n==5 )
+                {
+                    *precomputedsize = *precomputedsize+5;
+                }
+                return;
+            }
+            return;
+        }
+    }
+}
+
+
+/*************************************************************************
+Recurrent subroutine for precomputing FFT plans
+
+  -- ALGLIB --
+     Copyright 01.05.2009 by Bochkanov Sergey
+*************************************************************************/
+static void ftbase_ftbaseprecomputeplanrec(ftplan* plan,
+     ae_int_t entryoffset,
+     ae_int_t stackptr,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_int_t i;
+    ae_int_t n1;
+    ae_int_t n2;
+    ae_int_t n;
+    ae_int_t m;
+    ae_int_t offs;
+    double v;
+    ae_vector emptyarray;
+    double bx;
+    double by;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_init(&emptyarray, 0, DT_REAL, _state, ae_true);
+
+    if( (plan->plan.ptr.p_int[entryoffset+3]==ftbase_fftcooleytukeyplan||plan->plan.ptr.p_int[entryoffset+3]==ftbase_fftrealcooleytukeyplan)||plan->plan.ptr.p_int[entryoffset+3]==ftbase_fhtcooleytukeyplan )
+    {
+        ftbase_ftbaseprecomputeplanrec(plan, plan->plan.ptr.p_int[entryoffset+5], stackptr, _state);
+        ftbase_ftbaseprecomputeplanrec(plan, plan->plan.ptr.p_int[entryoffset+6], stackptr, _state);
+        ae_frame_leave(_state);
+        return;
+    }
+    if( plan->plan.ptr.p_int[entryoffset+3]==ftbase_fftcodeletplan||plan->plan.ptr.p_int[entryoffset+3]==ftbase_fhtcodeletplan )
+    {
+        n1 = plan->plan.ptr.p_int[entryoffset+1];
+        n2 = plan->plan.ptr.p_int[entryoffset+2];
+        n = n1*n2;
+        if( n==3 )
+        {
+            offs = plan->plan.ptr.p_int[entryoffset+7];
+            plan->precomputed.ptr.p_double[offs+0] = ae_cos(2*ae_pi/3, _state)-1;
+            plan->precomputed.ptr.p_double[offs+1] = ae_sin(2*ae_pi/3, _state);
+            ae_frame_leave(_state);
+            return;
+        }
+        if( n==5 )
+        {
+            offs = plan->plan.ptr.p_int[entryoffset+7];
+            v = 2*ae_pi/5;
+            plan->precomputed.ptr.p_double[offs+0] = (ae_cos(v, _state)+ae_cos(2*v, _state))/2-1;
+            plan->precomputed.ptr.p_double[offs+1] = (ae_cos(v, _state)-ae_cos(2*v, _state))/2;
+            plan->precomputed.ptr.p_double[offs+2] = -ae_sin(v, _state);
+            plan->precomputed.ptr.p_double[offs+3] = -(ae_sin(v, _state)+ae_sin(2*v, _state));
+            plan->precomputed.ptr.p_double[offs+4] = ae_sin(v, _state)-ae_sin(2*v, _state);
+            ae_frame_leave(_state);
+            return;
+        }
+    }
+    if( plan->plan.ptr.p_int[entryoffset+3]==ftbase_fftbluesteinplan )
+    {
+        ftbase_ftbaseprecomputeplanrec(plan, plan->plan.ptr.p_int[entryoffset+5], stackptr, _state);
+        n = plan->plan.ptr.p_int[entryoffset+1];
+        m = plan->plan.ptr.p_int[entryoffset+4];
+        offs = plan->plan.ptr.p_int[entryoffset+7];
+        for(i=0; i<=2*m-1; i++)
+        {
+            plan->precomputed.ptr.p_double[offs+i] = 0;
+        }
+        for(i=0; i<=n-1; i++)
+        {
+            bx = ae_cos(ae_pi*ae_sqr(i, _state)/n, _state);
+            by = ae_sin(ae_pi*ae_sqr(i, _state)/n, _state);
+            plan->precomputed.ptr.p_double[offs+2*i+0] = bx;
+            plan->precomputed.ptr.p_double[offs+2*i+1] = by;
+            plan->precomputed.ptr.p_double[offs+2*m+2*i+0] = bx;
+            plan->precomputed.ptr.p_double[offs+2*m+2*i+1] = by;
+            if( i>0 )
+            {
+                plan->precomputed.ptr.p_double[offs+2*(m-i)+0] = bx;
+                plan->precomputed.ptr.p_double[offs+2*(m-i)+1] = by;
+            }
+        }
+        ftbaseexecuteplanrec(&plan->precomputed, offs, plan, plan->plan.ptr.p_int[entryoffset+5], stackptr, _state);
+        ae_frame_leave(_state);
+        return;
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Twiddle factors calculation
+
+  -- ALGLIB --
+     Copyright 01.05.2009 by Bochkanov Sergey
+*************************************************************************/
+static void ftbase_ffttwcalc(/* Real    */ ae_vector* a,
+     ae_int_t aoffset,
+     ae_int_t n1,
+     ae_int_t n2,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t n;
+    ae_int_t idx;
+    ae_int_t offs;
+    double x;
+    double y;
+    double twxm1;
+    double twy;
+    double twbasexm1;
+    double twbasey;
+    double twrowxm1;
+    double twrowy;
+    double tmpx;
+    double tmpy;
+    double v;
+
+
+    n = n1*n2;
+    v = -2*ae_pi/n;
+    twbasexm1 = -2*ae_sqr(ae_sin(0.5*v, _state), _state);
+    twbasey = ae_sin(v, _state);
+    twrowxm1 = 0;
+    twrowy = 0;
+    for(i=0; i<=n2-1; i++)
+    {
+        twxm1 = 0;
+        twy = 0;
+        for(j=0; j<=n1-1; j++)
+        {
+            idx = i*n1+j;
+            offs = aoffset+2*idx;
+            x = a->ptr.p_double[offs+0];
+            y = a->ptr.p_double[offs+1];
+            tmpx = x*twxm1-y*twy;
+            tmpy = x*twy+y*twxm1;
+            a->ptr.p_double[offs+0] = x+tmpx;
+            a->ptr.p_double[offs+1] = y+tmpy;
+            
+            /*
+             * update Tw: Tw(new) = Tw(old)*TwRow
+             */
+            if( j<n1-1 )
+            {
+                if( j%ftbase_ftbaseupdatetw==0 )
+                {
+                    v = -2*ae_pi*i*(j+1)/n;
+                    twxm1 = -2*ae_sqr(ae_sin(0.5*v, _state), _state);
+                    twy = ae_sin(v, _state);
+                }
+                else
+                {
+                    tmpx = twrowxm1+twxm1*twrowxm1-twy*twrowy;
+                    tmpy = twrowy+twxm1*twrowy+twy*twrowxm1;
+                    twxm1 = twxm1+tmpx;
+                    twy = twy+tmpy;
+                }
+            }
+        }
+        
+        /*
+         * update TwRow: TwRow(new) = TwRow(old)*TwBase
+         */
+        if( i<n2-1 )
+        {
+            if( j%ftbase_ftbaseupdatetw==0 )
+            {
+                v = -2*ae_pi*(i+1)/n;
+                twrowxm1 = -2*ae_sqr(ae_sin(0.5*v, _state), _state);
+                twrowy = ae_sin(v, _state);
+            }
+            else
+            {
+                tmpx = twbasexm1+twrowxm1*twbasexm1-twrowy*twbasey;
+                tmpy = twbasey+twrowxm1*twbasey+twrowy*twbasexm1;
+                twrowxm1 = twrowxm1+tmpx;
+                twrowy = twrowy+tmpy;
+            }
+        }
+    }
+}
+
+
+/*************************************************************************
+Linear transpose: transpose complex matrix stored in 1-dimensional array
+
+  -- ALGLIB --
+     Copyright 01.05.2009 by Bochkanov Sergey
+*************************************************************************/
+static void ftbase_internalcomplexlintranspose(/* Real    */ ae_vector* a,
+     ae_int_t m,
+     ae_int_t n,
+     ae_int_t astart,
+     /* Real    */ ae_vector* buf,
+     ae_state *_state)
+{
+
+
+    ftbase_ffticltrec(a, astart, n, buf, 0, m, m, n, _state);
+    ae_v_move(&a->ptr.p_double[astart], 1, &buf->ptr.p_double[0], 1, ae_v_len(astart,astart+2*m*n-1));
+}
+
+
+/*************************************************************************
+Linear transpose: transpose real matrix stored in 1-dimensional array
+
+  -- ALGLIB --
+     Copyright 01.05.2009 by Bochkanov Sergey
+*************************************************************************/
+static void ftbase_internalreallintranspose(/* Real    */ ae_vector* a,
+     ae_int_t m,
+     ae_int_t n,
+     ae_int_t astart,
+     /* Real    */ ae_vector* buf,
+     ae_state *_state)
+{
+
+
+    ftbase_fftirltrec(a, astart, n, buf, 0, m, m, n, _state);
+    ae_v_move(&a->ptr.p_double[astart], 1, &buf->ptr.p_double[0], 1, ae_v_len(astart,astart+m*n-1));
+}
+
+
+/*************************************************************************
+Recurrent subroutine for a InternalComplexLinTranspose
+
+Write A^T to B, where:
+* A is m*n complex matrix stored in array A as pairs of real/image values,
+  beginning from AStart position, with AStride stride
+* B is n*m complex matrix stored in array B as pairs of real/image values,
+  beginning from BStart position, with BStride stride
+stride is measured in complex numbers, i.e. in real/image pairs.
+
+  -- ALGLIB --
+     Copyright 01.05.2009 by Bochkanov Sergey
+*************************************************************************/
+static void ftbase_ffticltrec(/* Real    */ ae_vector* a,
+     ae_int_t astart,
+     ae_int_t astride,
+     /* Real    */ ae_vector* b,
+     ae_int_t bstart,
+     ae_int_t bstride,
+     ae_int_t m,
+     ae_int_t n,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t idx1;
+    ae_int_t idx2;
+    ae_int_t m2;
+    ae_int_t m1;
+    ae_int_t n1;
+
+
+    if( m==0||n==0 )
+    {
+        return;
+    }
+    if( ae_maxint(m, n, _state)<=8 )
+    {
+        m2 = 2*bstride;
+        for(i=0; i<=m-1; i++)
+        {
+            idx1 = bstart+2*i;
+            idx2 = astart+2*i*astride;
+            for(j=0; j<=n-1; j++)
+            {
+                b->ptr.p_double[idx1+0] = a->ptr.p_double[idx2+0];
+                b->ptr.p_double[idx1+1] = a->ptr.p_double[idx2+1];
+                idx1 = idx1+m2;
+                idx2 = idx2+2;
+            }
+        }
+        return;
+    }
+    if( n>m )
+    {
+        
+        /*
+         * New partition:
+         *
+         * "A^T -> B" becomes "(A1 A2)^T -> ( B1 )
+         *                                  ( B2 )
+         */
+        n1 = n/2;
+        if( n-n1>=8&&n1%8!=0 )
+        {
+            n1 = n1+(8-n1%8);
+        }
+        ae_assert(n-n1>0, "Assertion failed", _state);
+        ftbase_ffticltrec(a, astart, astride, b, bstart, bstride, m, n1, _state);
+        ftbase_ffticltrec(a, astart+2*n1, astride, b, bstart+2*n1*bstride, bstride, m, n-n1, _state);
+    }
+    else
+    {
+        
+        /*
+         * New partition:
+         *
+         * "A^T -> B" becomes "( A1 )^T -> ( B1 B2 )
+         *                     ( A2 )
+         */
+        m1 = m/2;
+        if( m-m1>=8&&m1%8!=0 )
+        {
+            m1 = m1+(8-m1%8);
+        }
+        ae_assert(m-m1>0, "Assertion failed", _state);
+        ftbase_ffticltrec(a, astart, astride, b, bstart, bstride, m1, n, _state);
+        ftbase_ffticltrec(a, astart+2*m1*astride, astride, b, bstart+2*m1, bstride, m-m1, n, _state);
+    }
+}
+
+
+/*************************************************************************
+Recurrent subroutine for a InternalRealLinTranspose
+
+
+  -- ALGLIB --
+     Copyright 01.05.2009 by Bochkanov Sergey
+*************************************************************************/
+static void ftbase_fftirltrec(/* Real    */ ae_vector* a,
+     ae_int_t astart,
+     ae_int_t astride,
+     /* Real    */ ae_vector* b,
+     ae_int_t bstart,
+     ae_int_t bstride,
+     ae_int_t m,
+     ae_int_t n,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t idx1;
+    ae_int_t idx2;
+    ae_int_t m1;
+    ae_int_t n1;
+
+
+    if( m==0||n==0 )
+    {
+        return;
+    }
+    if( ae_maxint(m, n, _state)<=8 )
+    {
+        for(i=0; i<=m-1; i++)
+        {
+            idx1 = bstart+i;
+            idx2 = astart+i*astride;
+            for(j=0; j<=n-1; j++)
+            {
+                b->ptr.p_double[idx1] = a->ptr.p_double[idx2];
+                idx1 = idx1+bstride;
+                idx2 = idx2+1;
+            }
+        }
+        return;
+    }
+    if( n>m )
+    {
+        
+        /*
+         * New partition:
+         *
+         * "A^T -> B" becomes "(A1 A2)^T -> ( B1 )
+         *                                  ( B2 )
+         */
+        n1 = n/2;
+        if( n-n1>=8&&n1%8!=0 )
+        {
+            n1 = n1+(8-n1%8);
+        }
+        ae_assert(n-n1>0, "Assertion failed", _state);
+        ftbase_fftirltrec(a, astart, astride, b, bstart, bstride, m, n1, _state);
+        ftbase_fftirltrec(a, astart+n1, astride, b, bstart+n1*bstride, bstride, m, n-n1, _state);
+    }
+    else
+    {
+        
+        /*
+         * New partition:
+         *
+         * "A^T -> B" becomes "( A1 )^T -> ( B1 B2 )
+         *                     ( A2 )
+         */
+        m1 = m/2;
+        if( m-m1>=8&&m1%8!=0 )
+        {
+            m1 = m1+(8-m1%8);
+        }
+        ae_assert(m-m1>0, "Assertion failed", _state);
+        ftbase_fftirltrec(a, astart, astride, b, bstart, bstride, m1, n, _state);
+        ftbase_fftirltrec(a, astart+m1*astride, astride, b, bstart+m1, bstride, m-m1, n, _state);
+    }
+}
+
+
+/*************************************************************************
+recurrent subroutine for FFTFindSmoothRec
+
+  -- ALGLIB --
+     Copyright 01.05.2009 by Bochkanov Sergey
+*************************************************************************/
+static void ftbase_ftbasefindsmoothrec(ae_int_t n,
+     ae_int_t seed,
+     ae_int_t leastfactor,
+     ae_int_t* best,
+     ae_state *_state)
+{
+
+
+    ae_assert(ftbase_ftbasemaxsmoothfactor<=5, "FTBaseFindSmoothRec: internal error!", _state);
+    if( seed>=n )
+    {
+        *best = ae_minint(*best, seed, _state);
+        return;
+    }
+    if( leastfactor<=2 )
+    {
+        ftbase_ftbasefindsmoothrec(n, seed*2, 2, best, _state);
+    }
+    if( leastfactor<=3 )
+    {
+        ftbase_ftbasefindsmoothrec(n, seed*3, 3, best, _state);
+    }
+    if( leastfactor<=5 )
+    {
+        ftbase_ftbasefindsmoothrec(n, seed*5, 5, best, _state);
+    }
+}
+
+
+/*************************************************************************
+Internal subroutine: array resize
+
+  -- ALGLIB --
+     Copyright 01.05.2009 by Bochkanov Sergey
+*************************************************************************/
+static void ftbase_fftarrayresize(/* Integer */ ae_vector* a,
+     ae_int_t* asize,
+     ae_int_t newasize,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector tmp;
+    ae_int_t i;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_init(&tmp, 0, DT_INT, _state, ae_true);
+
+    ae_vector_set_length(&tmp, *asize, _state);
+    for(i=0; i<=*asize-1; i++)
+    {
+        tmp.ptr.p_int[i] = a->ptr.p_int[i];
+    }
+    ae_vector_set_length(a, newasize, _state);
+    for(i=0; i<=*asize-1; i++)
+    {
+        a->ptr.p_int[i] = tmp.ptr.p_int[i];
+    }
+    *asize = newasize;
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Reference FHT stub
+*************************************************************************/
+static void ftbase_reffht(/* Real    */ ae_vector* a,
+     ae_int_t n,
+     ae_int_t offs,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector buf;
+    ae_int_t i;
+    ae_int_t j;
+    double v;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_init(&buf, 0, DT_REAL, _state, ae_true);
+
+    ae_assert(n>0, "RefFHTR1D: incorrect N!", _state);
+    ae_vector_set_length(&buf, n, _state);
+    for(i=0; i<=n-1; i++)
+    {
+        v = 0;
+        for(j=0; j<=n-1; j++)
+        {
+            v = v+a->ptr.p_double[offs+j]*(ae_cos(2*ae_pi*i*j/n, _state)+ae_sin(2*ae_pi*i*j/n, _state));
+        }
+        buf.ptr.p_double[i] = v;
+    }
+    for(i=0; i<=n-1; i++)
+    {
+        a->ptr.p_double[offs+i] = buf.ptr.p_double[i];
+    }
+    ae_frame_leave(_state);
+}
+
+
+ae_bool _ftplan_init(ftplan* p, ae_state *_state, ae_bool make_automatic)
+{
+    if( !ae_vector_init(&p->plan, 0, DT_INT, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->precomputed, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->tmpbuf, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->stackbuf, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    return ae_true;
+}
+
+
+ae_bool _ftplan_init_copy(ftplan* dst, ftplan* src, ae_state *_state, ae_bool make_automatic)
+{
+    if( !ae_vector_init_copy(&dst->plan, &src->plan, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->precomputed, &src->precomputed, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->tmpbuf, &src->tmpbuf, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->stackbuf, &src->stackbuf, _state, make_automatic) )
+        return ae_false;
+    return ae_true;
+}
+
+
+void _ftplan_clear(ftplan* p)
+{
+    ae_vector_clear(&p->plan);
+    ae_vector_clear(&p->precomputed);
+    ae_vector_clear(&p->tmpbuf);
+    ae_vector_clear(&p->stackbuf);
+}
+
+
+
+
+double nulog1p(double x, ae_state *_state)
+{
+    double z;
+    double lp;
+    double lq;
+    double result;
+
+
+    z = 1.0+x;
+    if( ae_fp_less(z,0.70710678118654752440)||ae_fp_greater(z,1.41421356237309504880) )
+    {
+        result = ae_log(z, _state);
+        return result;
+    }
+    z = x*x;
+    lp = 4.5270000862445199635215E-5;
+    lp = lp*x+4.9854102823193375972212E-1;
+    lp = lp*x+6.5787325942061044846969E0;
+    lp = lp*x+2.9911919328553073277375E1;
+    lp = lp*x+6.0949667980987787057556E1;
+    lp = lp*x+5.7112963590585538103336E1;
+    lp = lp*x+2.0039553499201281259648E1;
+    lq = 1.0000000000000000000000E0;
+    lq = lq*x+1.5062909083469192043167E1;
+    lq = lq*x+8.3047565967967209469434E1;
+    lq = lq*x+2.2176239823732856465394E2;
+    lq = lq*x+3.0909872225312059774938E2;
+    lq = lq*x+2.1642788614495947685003E2;
+    lq = lq*x+6.0118660497603843919306E1;
+    z = -0.5*z+x*(z*lp/lq);
+    result = x+z;
+    return result;
+}
+
+
+double nuexpm1(double x, ae_state *_state)
+{
+    double r;
+    double xx;
+    double ep;
+    double eq;
+    double result;
+
+
+    if( ae_fp_less(x,-0.5)||ae_fp_greater(x,0.5) )
+    {
+        result = ae_exp(x, _state)-1.0;
+        return result;
+    }
+    xx = x*x;
+    ep = 1.2617719307481059087798E-4;
+    ep = ep*xx+3.0299440770744196129956E-2;
+    ep = ep*xx+9.9999999999999999991025E-1;
+    eq = 3.0019850513866445504159E-6;
+    eq = eq*xx+2.5244834034968410419224E-3;
+    eq = eq*xx+2.2726554820815502876593E-1;
+    eq = eq*xx+2.0000000000000000000897E0;
+    r = x*ep;
+    r = r/(eq-r);
+    result = r+r;
+    return result;
+}
+
+
+double nucosm1(double x, ae_state *_state)
+{
+    double xx;
+    double c;
+    double result;
+
+
+    if( ae_fp_less(x,-0.25*ae_pi)||ae_fp_greater(x,0.25*ae_pi) )
+    {
+        result = ae_cos(x, _state)-1;
+        return result;
+    }
+    xx = x*x;
+    c = 4.7377507964246204691685E-14;
+    c = c*xx-1.1470284843425359765671E-11;
+    c = c*xx+2.0876754287081521758361E-9;
+    c = c*xx-2.7557319214999787979814E-7;
+    c = c*xx+2.4801587301570552304991E-5;
+    c = c*xx-1.3888888888888872993737E-3;
+    c = c*xx+4.1666666666666666609054E-2;
+    result = -0.5*xx+xx*xx*c;
+    return result;
+}
+
+
+
+
+
+}
+
diff --git a/contrib/lbfgs/alglibinternal.h b/contrib/lbfgs/alglibinternal.h
new file mode 100755
index 0000000000..ee81f195c0
--- /dev/null
+++ b/contrib/lbfgs/alglibinternal.h
@@ -0,0 +1,707 @@
+/*************************************************************************
+Copyright (c) Sergey Bochkanov (ALGLIB project).
+
+>>> SOURCE LICENSE >>>
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation (www.fsf.org); either version 2 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+A copy of the GNU General Public License is available at
+http://www.fsf.org/licensing/licenses
+>>> END OF LICENSE >>>
+*************************************************************************/
+#ifndef _alglibinternal_pkg_h
+#define _alglibinternal_pkg_h
+#include "ap.h"
+
+
+/////////////////////////////////////////////////////////////////////////
+//
+// THIS SECTION CONTAINS COMPUTATIONAL CORE DECLARATIONS (DATATYPES)
+//
+/////////////////////////////////////////////////////////////////////////
+namespace alglib_impl
+{
+typedef struct
+{
+    ae_vector ia1;
+    ae_vector ia2;
+    ae_vector ra1;
+    ae_vector ra2;
+} apbuffers;
+typedef struct
+{
+    ae_bool brackt;
+    ae_bool stage1;
+    ae_int_t infoc;
+    double dg;
+    double dgm;
+    double dginit;
+    double dgtest;
+    double dgx;
+    double dgxm;
+    double dgy;
+    double dgym;
+    double finit;
+    double ftest1;
+    double fm;
+    double fx;
+    double fxm;
+    double fy;
+    double fym;
+    double stx;
+    double sty;
+    double stmin;
+    double stmax;
+    double width;
+    double width1;
+    double xtrapf;
+} linminstate;
+typedef struct
+{
+    ae_bool needf;
+    ae_vector x;
+    double f;
+    ae_int_t n;
+    ae_vector xbase;
+    ae_vector s;
+    double stplen;
+    double fcur;
+    double stpmax;
+    ae_int_t fmax;
+    ae_int_t nfev;
+    ae_int_t info;
+    rcommstate rstate;
+} armijostate;
+typedef struct
+{
+    ae_vector plan;
+    ae_vector precomputed;
+    ae_vector tmpbuf;
+    ae_vector stackbuf;
+} ftplan;
+
+}
+
+/////////////////////////////////////////////////////////////////////////
+//
+// THIS SECTION CONTAINS C++ INTERFACE
+//
+/////////////////////////////////////////////////////////////////////////
+namespace alglib
+{
+
+
+}
+
+/////////////////////////////////////////////////////////////////////////
+//
+// THIS SECTION CONTAINS COMPUTATIONAL CORE DECLARATIONS (FUNCTIONS)
+//
+/////////////////////////////////////////////////////////////////////////
+namespace alglib_impl
+{
+void tagsort(/* Real    */ ae_vector* a,
+     ae_int_t n,
+     /* Integer */ ae_vector* p1,
+     /* Integer */ ae_vector* p2,
+     ae_state *_state);
+void tagsortfasti(/* Real    */ ae_vector* a,
+     /* Integer */ ae_vector* b,
+     /* Real    */ ae_vector* bufa,
+     /* Integer */ ae_vector* bufb,
+     ae_int_t n,
+     ae_state *_state);
+void tagsortfastr(/* Real    */ ae_vector* a,
+     /* Real    */ ae_vector* b,
+     /* Real    */ ae_vector* bufa,
+     /* Real    */ ae_vector* bufb,
+     ae_int_t n,
+     ae_state *_state);
+void tagsortfast(/* Real    */ ae_vector* a,
+     /* Real    */ ae_vector* bufa,
+     ae_int_t n,
+     ae_state *_state);
+void tagheappushi(/* Real    */ ae_vector* a,
+     /* Integer */ ae_vector* b,
+     ae_int_t* n,
+     double va,
+     ae_int_t vb,
+     ae_state *_state);
+void tagheapreplacetopi(/* Real    */ ae_vector* a,
+     /* Integer */ ae_vector* b,
+     ae_int_t n,
+     double va,
+     ae_int_t vb,
+     ae_state *_state);
+void tagheappopi(/* Real    */ ae_vector* a,
+     /* Integer */ ae_vector* b,
+     ae_int_t* n,
+     ae_state *_state);
+void taskgenint1d(double a,
+     double b,
+     ae_int_t n,
+     /* Real    */ ae_vector* x,
+     /* Real    */ ae_vector* y,
+     ae_state *_state);
+void taskgenint1dequidist(double a,
+     double b,
+     ae_int_t n,
+     /* Real    */ ae_vector* x,
+     /* Real    */ ae_vector* y,
+     ae_state *_state);
+void taskgenint1dcheb1(double a,
+     double b,
+     ae_int_t n,
+     /* Real    */ ae_vector* x,
+     /* Real    */ ae_vector* y,
+     ae_state *_state);
+void taskgenint1dcheb2(double a,
+     double b,
+     ae_int_t n,
+     /* Real    */ ae_vector* x,
+     /* Real    */ ae_vector* y,
+     ae_state *_state);
+ae_bool aredistinct(/* Real    */ ae_vector* x,
+     ae_int_t n,
+     ae_state *_state);
+void bvectorsetlengthatleast(/* Boolean */ ae_vector* x,
+     ae_int_t n,
+     ae_state *_state);
+void rvectorsetlengthatleast(/* Real    */ ae_vector* x,
+     ae_int_t n,
+     ae_state *_state);
+void rmatrixsetlengthatleast(/* Real    */ ae_matrix* x,
+     ae_int_t m,
+     ae_int_t n,
+     ae_state *_state);
+ae_bool isfinitevector(/* Real    */ ae_vector* x,
+     ae_int_t n,
+     ae_state *_state);
+ae_bool isfinitecvector(/* Complex */ ae_vector* z,
+     ae_int_t n,
+     ae_state *_state);
+ae_bool apservisfinitematrix(/* Real    */ ae_matrix* x,
+     ae_int_t m,
+     ae_int_t n,
+     ae_state *_state);
+ae_bool apservisfinitecmatrix(/* Complex */ ae_matrix* x,
+     ae_int_t m,
+     ae_int_t n,
+     ae_state *_state);
+ae_bool isfinitertrmatrix(/* Real    */ ae_matrix* x,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_state *_state);
+ae_bool apservisfinitectrmatrix(/* Complex */ ae_matrix* x,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_state *_state);
+ae_bool apservisfiniteornanmatrix(/* Real    */ ae_matrix* x,
+     ae_int_t m,
+     ae_int_t n,
+     ae_state *_state);
+double safepythag2(double x, double y, ae_state *_state);
+double safepythag3(double x, double y, double z, ae_state *_state);
+ae_int_t saferdiv(double x, double y, double* r, ae_state *_state);
+double safeminposrv(double x, double y, double v, ae_state *_state);
+void apperiodicmap(double* x,
+     double a,
+     double b,
+     double* k,
+     ae_state *_state);
+double boundval(double x, double b1, double b2, ae_state *_state);
+ae_bool _apbuffers_init(apbuffers* p, ae_state *_state, ae_bool make_automatic);
+ae_bool _apbuffers_init_copy(apbuffers* dst, apbuffers* src, ae_state *_state, ae_bool make_automatic);
+void _apbuffers_clear(apbuffers* p);
+void rankx(/* Real    */ ae_vector* x,
+     ae_int_t n,
+     apbuffers* buf,
+     ae_state *_state);
+ae_bool cmatrixrank1f(ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     /* Complex */ ae_vector* u,
+     ae_int_t iu,
+     /* Complex */ ae_vector* v,
+     ae_int_t iv,
+     ae_state *_state);
+ae_bool rmatrixrank1f(ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     /* Real    */ ae_vector* u,
+     ae_int_t iu,
+     /* Real    */ ae_vector* v,
+     ae_int_t iv,
+     ae_state *_state);
+ae_bool cmatrixmvf(ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t opa,
+     /* Complex */ ae_vector* x,
+     ae_int_t ix,
+     /* Complex */ ae_vector* y,
+     ae_int_t iy,
+     ae_state *_state);
+ae_bool rmatrixmvf(ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t opa,
+     /* Real    */ ae_vector* x,
+     ae_int_t ix,
+     /* Real    */ ae_vector* y,
+     ae_int_t iy,
+     ae_state *_state);
+ae_bool cmatrixrighttrsmf(ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_matrix* a,
+     ae_int_t i1,
+     ae_int_t j1,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     /* Complex */ ae_matrix* x,
+     ae_int_t i2,
+     ae_int_t j2,
+     ae_state *_state);
+ae_bool cmatrixlefttrsmf(ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_matrix* a,
+     ae_int_t i1,
+     ae_int_t j1,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     /* Complex */ ae_matrix* x,
+     ae_int_t i2,
+     ae_int_t j2,
+     ae_state *_state);
+ae_bool rmatrixrighttrsmf(ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_matrix* a,
+     ae_int_t i1,
+     ae_int_t j1,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     /* Real    */ ae_matrix* x,
+     ae_int_t i2,
+     ae_int_t j2,
+     ae_state *_state);
+ae_bool rmatrixlefttrsmf(ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_matrix* a,
+     ae_int_t i1,
+     ae_int_t j1,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     /* Real    */ ae_matrix* x,
+     ae_int_t i2,
+     ae_int_t j2,
+     ae_state *_state);
+ae_bool cmatrixsyrkf(ae_int_t n,
+     ae_int_t k,
+     double alpha,
+     /* Complex */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t optypea,
+     double beta,
+     /* Complex */ ae_matrix* c,
+     ae_int_t ic,
+     ae_int_t jc,
+     ae_bool isupper,
+     ae_state *_state);
+ae_bool rmatrixsyrkf(ae_int_t n,
+     ae_int_t k,
+     double alpha,
+     /* Real    */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t optypea,
+     double beta,
+     /* Real    */ ae_matrix* c,
+     ae_int_t ic,
+     ae_int_t jc,
+     ae_bool isupper,
+     ae_state *_state);
+ae_bool rmatrixgemmf(ae_int_t m,
+     ae_int_t n,
+     ae_int_t k,
+     double alpha,
+     /* Real    */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t optypea,
+     /* Real    */ ae_matrix* b,
+     ae_int_t ib,
+     ae_int_t jb,
+     ae_int_t optypeb,
+     double beta,
+     /* Real    */ ae_matrix* c,
+     ae_int_t ic,
+     ae_int_t jc,
+     ae_state *_state);
+ae_bool cmatrixgemmf(ae_int_t m,
+     ae_int_t n,
+     ae_int_t k,
+     ae_complex alpha,
+     /* Complex */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t optypea,
+     /* Complex */ ae_matrix* b,
+     ae_int_t ib,
+     ae_int_t jb,
+     ae_int_t optypeb,
+     ae_complex beta,
+     /* Complex */ ae_matrix* c,
+     ae_int_t ic,
+     ae_int_t jc,
+     ae_state *_state);
+double vectornorm2(/* Real    */ ae_vector* x,
+     ae_int_t i1,
+     ae_int_t i2,
+     ae_state *_state);
+ae_int_t vectoridxabsmax(/* Real    */ ae_vector* x,
+     ae_int_t i1,
+     ae_int_t i2,
+     ae_state *_state);
+ae_int_t columnidxabsmax(/* Real    */ ae_matrix* x,
+     ae_int_t i1,
+     ae_int_t i2,
+     ae_int_t j,
+     ae_state *_state);
+ae_int_t rowidxabsmax(/* Real    */ ae_matrix* x,
+     ae_int_t j1,
+     ae_int_t j2,
+     ae_int_t i,
+     ae_state *_state);
+double upperhessenberg1norm(/* Real    */ ae_matrix* a,
+     ae_int_t i1,
+     ae_int_t i2,
+     ae_int_t j1,
+     ae_int_t j2,
+     /* Real    */ ae_vector* work,
+     ae_state *_state);
+void copymatrix(/* Real    */ ae_matrix* a,
+     ae_int_t is1,
+     ae_int_t is2,
+     ae_int_t js1,
+     ae_int_t js2,
+     /* Real    */ ae_matrix* b,
+     ae_int_t id1,
+     ae_int_t id2,
+     ae_int_t jd1,
+     ae_int_t jd2,
+     ae_state *_state);
+void inplacetranspose(/* Real    */ ae_matrix* a,
+     ae_int_t i1,
+     ae_int_t i2,
+     ae_int_t j1,
+     ae_int_t j2,
+     /* Real    */ ae_vector* work,
+     ae_state *_state);
+void copyandtranspose(/* Real    */ ae_matrix* a,
+     ae_int_t is1,
+     ae_int_t is2,
+     ae_int_t js1,
+     ae_int_t js2,
+     /* Real    */ ae_matrix* b,
+     ae_int_t id1,
+     ae_int_t id2,
+     ae_int_t jd1,
+     ae_int_t jd2,
+     ae_state *_state);
+void matrixvectormultiply(/* Real    */ ae_matrix* a,
+     ae_int_t i1,
+     ae_int_t i2,
+     ae_int_t j1,
+     ae_int_t j2,
+     ae_bool trans,
+     /* Real    */ ae_vector* x,
+     ae_int_t ix1,
+     ae_int_t ix2,
+     double alpha,
+     /* Real    */ ae_vector* y,
+     ae_int_t iy1,
+     ae_int_t iy2,
+     double beta,
+     ae_state *_state);
+double pythag2(double x, double y, ae_state *_state);
+void matrixmatrixmultiply(/* Real    */ ae_matrix* a,
+     ae_int_t ai1,
+     ae_int_t ai2,
+     ae_int_t aj1,
+     ae_int_t aj2,
+     ae_bool transa,
+     /* Real    */ ae_matrix* b,
+     ae_int_t bi1,
+     ae_int_t bi2,
+     ae_int_t bj1,
+     ae_int_t bj2,
+     ae_bool transb,
+     double alpha,
+     /* Real    */ ae_matrix* c,
+     ae_int_t ci1,
+     ae_int_t ci2,
+     ae_int_t cj1,
+     ae_int_t cj2,
+     double beta,
+     /* Real    */ ae_vector* work,
+     ae_state *_state);
+void hermitianmatrixvectormultiply(/* Complex */ ae_matrix* a,
+     ae_bool isupper,
+     ae_int_t i1,
+     ae_int_t i2,
+     /* Complex */ ae_vector* x,
+     ae_complex alpha,
+     /* Complex */ ae_vector* y,
+     ae_state *_state);
+void hermitianrank2update(/* Complex */ ae_matrix* a,
+     ae_bool isupper,
+     ae_int_t i1,
+     ae_int_t i2,
+     /* Complex */ ae_vector* x,
+     /* Complex */ ae_vector* y,
+     /* Complex */ ae_vector* t,
+     ae_complex alpha,
+     ae_state *_state);
+void generatereflection(/* Real    */ ae_vector* x,
+     ae_int_t n,
+     double* tau,
+     ae_state *_state);
+void applyreflectionfromtheleft(/* Real    */ ae_matrix* c,
+     double tau,
+     /* Real    */ ae_vector* v,
+     ae_int_t m1,
+     ae_int_t m2,
+     ae_int_t n1,
+     ae_int_t n2,
+     /* Real    */ ae_vector* work,
+     ae_state *_state);
+void applyreflectionfromtheright(/* Real    */ ae_matrix* c,
+     double tau,
+     /* Real    */ ae_vector* v,
+     ae_int_t m1,
+     ae_int_t m2,
+     ae_int_t n1,
+     ae_int_t n2,
+     /* Real    */ ae_vector* work,
+     ae_state *_state);
+void complexgeneratereflection(/* Complex */ ae_vector* x,
+     ae_int_t n,
+     ae_complex* tau,
+     ae_state *_state);
+void complexapplyreflectionfromtheleft(/* Complex */ ae_matrix* c,
+     ae_complex tau,
+     /* Complex */ ae_vector* v,
+     ae_int_t m1,
+     ae_int_t m2,
+     ae_int_t n1,
+     ae_int_t n2,
+     /* Complex */ ae_vector* work,
+     ae_state *_state);
+void complexapplyreflectionfromtheright(/* Complex */ ae_matrix* c,
+     ae_complex tau,
+     /* Complex */ ae_vector* v,
+     ae_int_t m1,
+     ae_int_t m2,
+     ae_int_t n1,
+     ae_int_t n2,
+     /* Complex */ ae_vector* work,
+     ae_state *_state);
+void symmetricmatrixvectormultiply(/* Real    */ ae_matrix* a,
+     ae_bool isupper,
+     ae_int_t i1,
+     ae_int_t i2,
+     /* Real    */ ae_vector* x,
+     double alpha,
+     /* Real    */ ae_vector* y,
+     ae_state *_state);
+void symmetricrank2update(/* Real    */ ae_matrix* a,
+     ae_bool isupper,
+     ae_int_t i1,
+     ae_int_t i2,
+     /* Real    */ ae_vector* x,
+     /* Real    */ ae_vector* y,
+     /* Real    */ ae_vector* t,
+     double alpha,
+     ae_state *_state);
+void applyrotationsfromtheleft(ae_bool isforward,
+     ae_int_t m1,
+     ae_int_t m2,
+     ae_int_t n1,
+     ae_int_t n2,
+     /* Real    */ ae_vector* c,
+     /* Real    */ ae_vector* s,
+     /* Real    */ ae_matrix* a,
+     /* Real    */ ae_vector* work,
+     ae_state *_state);
+void applyrotationsfromtheright(ae_bool isforward,
+     ae_int_t m1,
+     ae_int_t m2,
+     ae_int_t n1,
+     ae_int_t n2,
+     /* Real    */ ae_vector* c,
+     /* Real    */ ae_vector* s,
+     /* Real    */ ae_matrix* a,
+     /* Real    */ ae_vector* work,
+     ae_state *_state);
+void generaterotation(double f,
+     double g,
+     double* cs,
+     double* sn,
+     double* r,
+     ae_state *_state);
+ae_bool upperhessenbergschurdecomposition(/* Real    */ ae_matrix* h,
+     ae_int_t n,
+     /* Real    */ ae_matrix* s,
+     ae_state *_state);
+void internalschurdecomposition(/* Real    */ ae_matrix* h,
+     ae_int_t n,
+     ae_int_t tneeded,
+     ae_int_t zneeded,
+     /* Real    */ ae_vector* wr,
+     /* Real    */ ae_vector* wi,
+     /* Real    */ ae_matrix* z,
+     ae_int_t* info,
+     ae_state *_state);
+void rmatrixtrsafesolve(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     /* Real    */ ae_vector* x,
+     double* s,
+     ae_bool isupper,
+     ae_bool istrans,
+     ae_bool isunit,
+     ae_state *_state);
+void safesolvetriangular(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     /* Real    */ ae_vector* x,
+     double* s,
+     ae_bool isupper,
+     ae_bool istrans,
+     ae_bool isunit,
+     ae_bool normin,
+     /* Real    */ ae_vector* cnorm,
+     ae_state *_state);
+ae_bool rmatrixscaledtrsafesolve(/* Real    */ ae_matrix* a,
+     double sa,
+     ae_int_t n,
+     /* Real    */ ae_vector* x,
+     ae_bool isupper,
+     ae_int_t trans,
+     ae_bool isunit,
+     double maxgrowth,
+     ae_state *_state);
+ae_bool cmatrixscaledtrsafesolve(/* Complex */ ae_matrix* a,
+     double sa,
+     ae_int_t n,
+     /* Complex */ ae_vector* x,
+     ae_bool isupper,
+     ae_int_t trans,
+     ae_bool isunit,
+     double maxgrowth,
+     ae_state *_state);
+void xdot(/* Real    */ ae_vector* a,
+     /* Real    */ ae_vector* b,
+     ae_int_t n,
+     /* Real    */ ae_vector* temp,
+     double* r,
+     double* rerr,
+     ae_state *_state);
+void xcdot(/* Complex */ ae_vector* a,
+     /* Complex */ ae_vector* b,
+     ae_int_t n,
+     /* Real    */ ae_vector* temp,
+     ae_complex* r,
+     double* rerr,
+     ae_state *_state);
+void linminnormalized(/* Real    */ ae_vector* d,
+     double* stp,
+     ae_int_t n,
+     ae_state *_state);
+void mcsrch(ae_int_t n,
+     /* Real    */ ae_vector* x,
+     double* f,
+     /* Real    */ ae_vector* g,
+     /* Real    */ ae_vector* s,
+     double* stp,
+     double stpmax,
+     ae_int_t* info,
+     ae_int_t* nfev,
+     /* Real    */ ae_vector* wa,
+     linminstate* state,
+     ae_int_t* stage,
+     ae_state *_state);
+void armijocreate(ae_int_t n,
+     /* Real    */ ae_vector* x,
+     double f,
+     /* Real    */ ae_vector* s,
+     double stp,
+     double stpmax,
+     ae_int_t fmax,
+     armijostate* state,
+     ae_state *_state);
+ae_bool armijoiteration(armijostate* state, ae_state *_state);
+void armijoresults(armijostate* state,
+     ae_int_t* info,
+     double* stp,
+     double* f,
+     ae_state *_state);
+ae_bool _linminstate_init(linminstate* p, ae_state *_state, ae_bool make_automatic);
+ae_bool _linminstate_init_copy(linminstate* dst, linminstate* src, ae_state *_state, ae_bool make_automatic);
+void _linminstate_clear(linminstate* p);
+ae_bool _armijostate_init(armijostate* p, ae_state *_state, ae_bool make_automatic);
+ae_bool _armijostate_init_copy(armijostate* dst, armijostate* src, ae_state *_state, ae_bool make_automatic);
+void _armijostate_clear(armijostate* p);
+void ftbasegeneratecomplexfftplan(ae_int_t n,
+     ftplan* plan,
+     ae_state *_state);
+void ftbasegeneraterealfftplan(ae_int_t n, ftplan* plan, ae_state *_state);
+void ftbasegeneraterealfhtplan(ae_int_t n, ftplan* plan, ae_state *_state);
+void ftbaseexecuteplan(/* Real    */ ae_vector* a,
+     ae_int_t aoffset,
+     ae_int_t n,
+     ftplan* plan,
+     ae_state *_state);
+void ftbaseexecuteplanrec(/* Real    */ ae_vector* a,
+     ae_int_t aoffset,
+     ftplan* plan,
+     ae_int_t entryoffset,
+     ae_int_t stackptr,
+     ae_state *_state);
+void ftbasefactorize(ae_int_t n,
+     ae_int_t tasktype,
+     ae_int_t* n1,
+     ae_int_t* n2,
+     ae_state *_state);
+ae_bool ftbaseissmooth(ae_int_t n, ae_state *_state);
+ae_int_t ftbasefindsmooth(ae_int_t n, ae_state *_state);
+ae_int_t ftbasefindsmootheven(ae_int_t n, ae_state *_state);
+double ftbasegetflopestimate(ae_int_t n, ae_state *_state);
+ae_bool _ftplan_init(ftplan* p, ae_state *_state, ae_bool make_automatic);
+ae_bool _ftplan_init_copy(ftplan* dst, ftplan* src, ae_state *_state, ae_bool make_automatic);
+void _ftplan_clear(ftplan* p);
+double nulog1p(double x, ae_state *_state);
+double nuexpm1(double x, ae_state *_state);
+double nucosm1(double x, ae_state *_state);
+
+}
+#endif
+
diff --git a/contrib/lbfgs/alglibmisc.cpp b/contrib/lbfgs/alglibmisc.cpp
new file mode 100755
index 0000000000..c1cd80cff9
--- /dev/null
+++ b/contrib/lbfgs/alglibmisc.cpp
@@ -0,0 +1,3083 @@
+/*************************************************************************
+Copyright (c) Sergey Bochkanov (ALGLIB project).
+
+>>> SOURCE LICENSE >>>
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation (www.fsf.org); either version 2 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+A copy of the GNU General Public License is available at
+http://www.fsf.org/licensing/licenses
+>>> END OF LICENSE >>>
+*************************************************************************/
+#include "stdafx.h"
+#include "alglibmisc.h"
+
+// disable some irrelevant warnings
+#if (AE_COMPILER==AE_MSVC)
+#pragma warning(disable:4100)
+#pragma warning(disable:4127)
+#pragma warning(disable:4702)
+#pragma warning(disable:4996)
+#endif
+using namespace std;
+
+/////////////////////////////////////////////////////////////////////////
+//
+// THIS SECTION CONTAINS IMPLEMENTATION OF C++ INTERFACE
+//
+/////////////////////////////////////////////////////////////////////////
+namespace alglib
+{
+
+
+/*************************************************************************
+Portable high quality random number generator state.
+Initialized with HQRNDRandomize() or HQRNDSeed().
+
+Fields:
+    S1, S2      -   seed values
+    V           -   precomputed value
+    MagicV      -   'magic' value used to determine whether State structure
+                    was correctly initialized.
+*************************************************************************/
+_hqrndstate_owner::_hqrndstate_owner()
+{
+    p_struct = (alglib_impl::hqrndstate*)alglib_impl::ae_malloc(sizeof(alglib_impl::hqrndstate), NULL);
+    if( p_struct==NULL )
+        throw ap_error("ALGLIB: malloc error");
+    if( !alglib_impl::_hqrndstate_init(p_struct, NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+}
+
+_hqrndstate_owner::_hqrndstate_owner(const _hqrndstate_owner &rhs)
+{
+    p_struct = (alglib_impl::hqrndstate*)alglib_impl::ae_malloc(sizeof(alglib_impl::hqrndstate), NULL);
+    if( p_struct==NULL )
+        throw ap_error("ALGLIB: malloc error");
+    if( !alglib_impl::_hqrndstate_init_copy(p_struct, const_cast<alglib_impl::hqrndstate*>(rhs.p_struct), NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+}
+
+_hqrndstate_owner& _hqrndstate_owner::operator=(const _hqrndstate_owner &rhs)
+{
+    if( this==&rhs )
+        return *this;
+    alglib_impl::_hqrndstate_clear(p_struct);
+    if( !alglib_impl::_hqrndstate_init_copy(p_struct, const_cast<alglib_impl::hqrndstate*>(rhs.p_struct), NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+    return *this;
+}
+
+_hqrndstate_owner::~_hqrndstate_owner()
+{
+    alglib_impl::_hqrndstate_clear(p_struct);
+    ae_free(p_struct);
+}
+
+alglib_impl::hqrndstate* _hqrndstate_owner::c_ptr()
+{
+    return p_struct;
+}
+
+alglib_impl::hqrndstate* _hqrndstate_owner::c_ptr() const
+{
+    return const_cast<alglib_impl::hqrndstate*>(p_struct);
+}
+hqrndstate::hqrndstate() : _hqrndstate_owner() 
+{
+}
+
+hqrndstate::hqrndstate(const hqrndstate &rhs):_hqrndstate_owner(rhs) 
+{
+}
+
+hqrndstate& hqrndstate::operator=(const hqrndstate &rhs)
+{
+    if( this==&rhs )
+        return *this;
+    _hqrndstate_owner::operator=(rhs);
+    return *this;
+}
+
+hqrndstate::~hqrndstate()
+{
+}
+
+/*************************************************************************
+HQRNDState  initialization  with  random  values  which come from standard
+RNG.
+
+  -- ALGLIB --
+     Copyright 02.12.2009 by Bochkanov Sergey
+*************************************************************************/
+void hqrndrandomize(hqrndstate &state)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::hqrndrandomize(const_cast<alglib_impl::hqrndstate*>(state.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+HQRNDState initialization with seed values
+
+  -- ALGLIB --
+     Copyright 02.12.2009 by Bochkanov Sergey
+*************************************************************************/
+void hqrndseed(const ae_int_t s1, const ae_int_t s2, hqrndstate &state)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::hqrndseed(s1, s2, const_cast<alglib_impl::hqrndstate*>(state.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This function generates random real number in (0,1),
+not including interval boundaries
+
+State structure must be initialized with HQRNDRandomize() or HQRNDSeed().
+
+  -- ALGLIB --
+     Copyright 02.12.2009 by Bochkanov Sergey
+*************************************************************************/
+double hqrnduniformr(const hqrndstate &state)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        double result = alglib_impl::hqrnduniformr(const_cast<alglib_impl::hqrndstate*>(state.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<double*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This function generates random integer number in [0, N)
+
+1. N must be less than HQRNDMax-1.
+2. State structure must be initialized with HQRNDRandomize() or HQRNDSeed()
+
+  -- ALGLIB --
+     Copyright 02.12.2009 by Bochkanov Sergey
+*************************************************************************/
+ae_int_t hqrnduniformi(const hqrndstate &state, const ae_int_t n)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::ae_int_t result = alglib_impl::hqrnduniformi(const_cast<alglib_impl::hqrndstate*>(state.c_ptr()), n, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<ae_int_t*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Random number generator: normal numbers
+
+This function generates one random number from normal distribution.
+Its performance is equal to that of HQRNDNormal2()
+
+State structure must be initialized with HQRNDRandomize() or HQRNDSeed().
+
+  -- ALGLIB --
+     Copyright 02.12.2009 by Bochkanov Sergey
+*************************************************************************/
+double hqrndnormal(const hqrndstate &state)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        double result = alglib_impl::hqrndnormal(const_cast<alglib_impl::hqrndstate*>(state.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<double*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Random number generator: random X and Y such that X^2+Y^2=1
+
+State structure must be initialized with HQRNDRandomize() or HQRNDSeed().
+
+  -- ALGLIB --
+     Copyright 02.12.2009 by Bochkanov Sergey
+*************************************************************************/
+void hqrndunit2(const hqrndstate &state, double &x, double &y)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::hqrndunit2(const_cast<alglib_impl::hqrndstate*>(state.c_ptr()), &x, &y, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Random number generator: normal numbers
+
+This function generates two independent random numbers from normal
+distribution. Its performance is equal to that of HQRNDNormal()
+
+State structure must be initialized with HQRNDRandomize() or HQRNDSeed().
+
+  -- ALGLIB --
+     Copyright 02.12.2009 by Bochkanov Sergey
+*************************************************************************/
+void hqrndnormal2(const hqrndstate &state, double &x1, double &x2)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::hqrndnormal2(const_cast<alglib_impl::hqrndstate*>(state.c_ptr()), &x1, &x2, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Random number generator: exponential distribution
+
+State structure must be initialized with HQRNDRandomize() or HQRNDSeed().
+
+  -- ALGLIB --
+     Copyright 11.08.2007 by Bochkanov Sergey
+*************************************************************************/
+double hqrndexponential(const hqrndstate &state, const double lambdav)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        double result = alglib_impl::hqrndexponential(const_cast<alglib_impl::hqrndstate*>(state.c_ptr()), lambdav, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<double*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+
+*************************************************************************/
+_kdtree_owner::_kdtree_owner()
+{
+    p_struct = (alglib_impl::kdtree*)alglib_impl::ae_malloc(sizeof(alglib_impl::kdtree), NULL);
+    if( p_struct==NULL )
+        throw ap_error("ALGLIB: malloc error");
+    if( !alglib_impl::_kdtree_init(p_struct, NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+}
+
+_kdtree_owner::_kdtree_owner(const _kdtree_owner &rhs)
+{
+    p_struct = (alglib_impl::kdtree*)alglib_impl::ae_malloc(sizeof(alglib_impl::kdtree), NULL);
+    if( p_struct==NULL )
+        throw ap_error("ALGLIB: malloc error");
+    if( !alglib_impl::_kdtree_init_copy(p_struct, const_cast<alglib_impl::kdtree*>(rhs.p_struct), NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+}
+
+_kdtree_owner& _kdtree_owner::operator=(const _kdtree_owner &rhs)
+{
+    if( this==&rhs )
+        return *this;
+    alglib_impl::_kdtree_clear(p_struct);
+    if( !alglib_impl::_kdtree_init_copy(p_struct, const_cast<alglib_impl::kdtree*>(rhs.p_struct), NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+    return *this;
+}
+
+_kdtree_owner::~_kdtree_owner()
+{
+    alglib_impl::_kdtree_clear(p_struct);
+    ae_free(p_struct);
+}
+
+alglib_impl::kdtree* _kdtree_owner::c_ptr()
+{
+    return p_struct;
+}
+
+alglib_impl::kdtree* _kdtree_owner::c_ptr() const
+{
+    return const_cast<alglib_impl::kdtree*>(p_struct);
+}
+kdtree::kdtree() : _kdtree_owner() 
+{
+}
+
+kdtree::kdtree(const kdtree &rhs):_kdtree_owner(rhs) 
+{
+}
+
+kdtree& kdtree::operator=(const kdtree &rhs)
+{
+    if( this==&rhs )
+        return *this;
+    _kdtree_owner::operator=(rhs);
+    return *this;
+}
+
+kdtree::~kdtree()
+{
+}
+
+/*************************************************************************
+KD-tree creation
+
+This subroutine creates KD-tree from set of X-values and optional Y-values
+
+INPUT PARAMETERS
+    XY      -   dataset, array[0..N-1,0..NX+NY-1].
+                one row corresponds to one point.
+                first NX columns contain X-values, next NY (NY may be zero)
+                columns may contain associated Y-values
+    N       -   number of points, N>=1
+    NX      -   space dimension, NX>=1.
+    NY      -   number of optional Y-values, NY>=0.
+    NormType-   norm type:
+                * 0 denotes infinity-norm
+                * 1 denotes 1-norm
+                * 2 denotes 2-norm (Euclidean norm)
+
+OUTPUT PARAMETERS
+    KDT     -   KD-tree
+
+
+NOTES
+
+1. KD-tree  creation  have O(N*logN) complexity and O(N*(2*NX+NY))  memory
+   requirements.
+2. Although KD-trees may be used with any combination of N  and  NX,  they
+   are more efficient than brute-force search only when N >> 4^NX. So they
+   are most useful in low-dimensional tasks (NX=2, NX=3). NX=1  is another
+   inefficient case, because  simple  binary  search  (without  additional
+   structures) is much more efficient in such tasks than KD-trees.
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void kdtreebuild(const real_2d_array &xy, const ae_int_t n, const ae_int_t nx, const ae_int_t ny, const ae_int_t normtype, kdtree &kdt)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::kdtreebuild(const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), n, nx, ny, normtype, const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+KD-tree creation
+
+This subroutine creates KD-tree from set of X-values and optional Y-values
+
+INPUT PARAMETERS
+    XY      -   dataset, array[0..N-1,0..NX+NY-1].
+                one row corresponds to one point.
+                first NX columns contain X-values, next NY (NY may be zero)
+                columns may contain associated Y-values
+    N       -   number of points, N>=1
+    NX      -   space dimension, NX>=1.
+    NY      -   number of optional Y-values, NY>=0.
+    NormType-   norm type:
+                * 0 denotes infinity-norm
+                * 1 denotes 1-norm
+                * 2 denotes 2-norm (Euclidean norm)
+
+OUTPUT PARAMETERS
+    KDT     -   KD-tree
+
+
+NOTES
+
+1. KD-tree  creation  have O(N*logN) complexity and O(N*(2*NX+NY))  memory
+   requirements.
+2. Although KD-trees may be used with any combination of N  and  NX,  they
+   are more efficient than brute-force search only when N >> 4^NX. So they
+   are most useful in low-dimensional tasks (NX=2, NX=3). NX=1  is another
+   inefficient case, because  simple  binary  search  (without  additional
+   structures) is much more efficient in such tasks than KD-trees.
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void kdtreebuild(const real_2d_array &xy, const ae_int_t nx, const ae_int_t ny, const ae_int_t normtype, kdtree &kdt)
+{
+    alglib_impl::ae_state _alglib_env_state;    
+    ae_int_t n;
+
+    n = xy.rows();
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::kdtreebuild(const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), n, nx, ny, normtype, const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), &_alglib_env_state);
+
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+KD-tree creation
+
+This  subroutine  creates  KD-tree  from set of X-values, integer tags and
+optional Y-values
+
+INPUT PARAMETERS
+    XY      -   dataset, array[0..N-1,0..NX+NY-1].
+                one row corresponds to one point.
+                first NX columns contain X-values, next NY (NY may be zero)
+                columns may contain associated Y-values
+    Tags    -   tags, array[0..N-1], contains integer tags associated
+                with points.
+    N       -   number of points, N>=1
+    NX      -   space dimension, NX>=1.
+    NY      -   number of optional Y-values, NY>=0.
+    NormType-   norm type:
+                * 0 denotes infinity-norm
+                * 1 denotes 1-norm
+                * 2 denotes 2-norm (Euclidean norm)
+
+OUTPUT PARAMETERS
+    KDT     -   KD-tree
+
+NOTES
+
+1. KD-tree  creation  have O(N*logN) complexity and O(N*(2*NX+NY))  memory
+   requirements.
+2. Although KD-trees may be used with any combination of N  and  NX,  they
+   are more efficient than brute-force search only when N >> 4^NX. So they
+   are most useful in low-dimensional tasks (NX=2, NX=3). NX=1  is another
+   inefficient case, because  simple  binary  search  (without  additional
+   structures) is much more efficient in such tasks than KD-trees.
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void kdtreebuildtagged(const real_2d_array &xy, const integer_1d_array &tags, const ae_int_t n, const ae_int_t nx, const ae_int_t ny, const ae_int_t normtype, kdtree &kdt)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::kdtreebuildtagged(const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), const_cast<alglib_impl::ae_vector*>(tags.c_ptr()), n, nx, ny, normtype, const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+KD-tree creation
+
+This  subroutine  creates  KD-tree  from set of X-values, integer tags and
+optional Y-values
+
+INPUT PARAMETERS
+    XY      -   dataset, array[0..N-1,0..NX+NY-1].
+                one row corresponds to one point.
+                first NX columns contain X-values, next NY (NY may be zero)
+                columns may contain associated Y-values
+    Tags    -   tags, array[0..N-1], contains integer tags associated
+                with points.
+    N       -   number of points, N>=1
+    NX      -   space dimension, NX>=1.
+    NY      -   number of optional Y-values, NY>=0.
+    NormType-   norm type:
+                * 0 denotes infinity-norm
+                * 1 denotes 1-norm
+                * 2 denotes 2-norm (Euclidean norm)
+
+OUTPUT PARAMETERS
+    KDT     -   KD-tree
+
+NOTES
+
+1. KD-tree  creation  have O(N*logN) complexity and O(N*(2*NX+NY))  memory
+   requirements.
+2. Although KD-trees may be used with any combination of N  and  NX,  they
+   are more efficient than brute-force search only when N >> 4^NX. So they
+   are most useful in low-dimensional tasks (NX=2, NX=3). NX=1  is another
+   inefficient case, because  simple  binary  search  (without  additional
+   structures) is much more efficient in such tasks than KD-trees.
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void kdtreebuildtagged(const real_2d_array &xy, const integer_1d_array &tags, const ae_int_t nx, const ae_int_t ny, const ae_int_t normtype, kdtree &kdt)
+{
+    alglib_impl::ae_state _alglib_env_state;    
+    ae_int_t n;
+    if( (xy.rows()!=tags.length()))
+        throw ap_error("Error while calling 'kdtreebuildtagged': looks like one of arguments has wrong size");
+    n = xy.rows();
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::kdtreebuildtagged(const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), const_cast<alglib_impl::ae_vector*>(tags.c_ptr()), n, nx, ny, normtype, const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), &_alglib_env_state);
+
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+K-NN query: K nearest neighbors
+
+INPUT PARAMETERS
+    KDT         -   KD-tree
+    X           -   point, array[0..NX-1].
+    K           -   number of neighbors to return, K>=1
+    SelfMatch   -   whether self-matches are allowed:
+                    * if True, nearest neighbor may be the point itself
+                      (if it exists in original dataset)
+                    * if False, then only points with non-zero distance
+                      are returned
+                    * if not given, considered True
+
+RESULT
+    number of actual neighbors found (either K or N, if K>N).
+
+This  subroutine  performs  query  and  stores  its result in the internal
+structures of the KD-tree. You can use  following  subroutines  to  obtain
+these results:
+* KDTreeQueryResultsX() to get X-values
+* KDTreeQueryResultsXY() to get X- and Y-values
+* KDTreeQueryResultsTags() to get tag values
+* KDTreeQueryResultsDistances() to get distances
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+ae_int_t kdtreequeryknn(const kdtree &kdt, const real_1d_array &x, const ae_int_t k, const bool selfmatch)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::ae_int_t result = alglib_impl::kdtreequeryknn(const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), k, selfmatch, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<ae_int_t*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+K-NN query: K nearest neighbors
+
+INPUT PARAMETERS
+    KDT         -   KD-tree
+    X           -   point, array[0..NX-1].
+    K           -   number of neighbors to return, K>=1
+    SelfMatch   -   whether self-matches are allowed:
+                    * if True, nearest neighbor may be the point itself
+                      (if it exists in original dataset)
+                    * if False, then only points with non-zero distance
+                      are returned
+                    * if not given, considered True
+
+RESULT
+    number of actual neighbors found (either K or N, if K>N).
+
+This  subroutine  performs  query  and  stores  its result in the internal
+structures of the KD-tree. You can use  following  subroutines  to  obtain
+these results:
+* KDTreeQueryResultsX() to get X-values
+* KDTreeQueryResultsXY() to get X- and Y-values
+* KDTreeQueryResultsTags() to get tag values
+* KDTreeQueryResultsDistances() to get distances
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+ae_int_t kdtreequeryknn(const kdtree &kdt, const real_1d_array &x, const ae_int_t k)
+{
+    alglib_impl::ae_state _alglib_env_state;    
+    bool selfmatch;
+
+    selfmatch = true;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::ae_int_t result = alglib_impl::kdtreequeryknn(const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), k, selfmatch, &_alglib_env_state);
+
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<ae_int_t*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+R-NN query: all points within R-sphere centered at X
+
+INPUT PARAMETERS
+    KDT         -   KD-tree
+    X           -   point, array[0..NX-1].
+    R           -   radius of sphere (in corresponding norm), R>0
+    SelfMatch   -   whether self-matches are allowed:
+                    * if True, nearest neighbor may be the point itself
+                      (if it exists in original dataset)
+                    * if False, then only points with non-zero distance
+                      are returned
+                    * if not given, considered True
+
+RESULT
+    number of neighbors found, >=0
+
+This  subroutine  performs  query  and  stores  its result in the internal
+structures of the KD-tree. You can use  following  subroutines  to  obtain
+actual results:
+* KDTreeQueryResultsX() to get X-values
+* KDTreeQueryResultsXY() to get X- and Y-values
+* KDTreeQueryResultsTags() to get tag values
+* KDTreeQueryResultsDistances() to get distances
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+ae_int_t kdtreequeryrnn(const kdtree &kdt, const real_1d_array &x, const double r, const bool selfmatch)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::ae_int_t result = alglib_impl::kdtreequeryrnn(const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), r, selfmatch, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<ae_int_t*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+R-NN query: all points within R-sphere centered at X
+
+INPUT PARAMETERS
+    KDT         -   KD-tree
+    X           -   point, array[0..NX-1].
+    R           -   radius of sphere (in corresponding norm), R>0
+    SelfMatch   -   whether self-matches are allowed:
+                    * if True, nearest neighbor may be the point itself
+                      (if it exists in original dataset)
+                    * if False, then only points with non-zero distance
+                      are returned
+                    * if not given, considered True
+
+RESULT
+    number of neighbors found, >=0
+
+This  subroutine  performs  query  and  stores  its result in the internal
+structures of the KD-tree. You can use  following  subroutines  to  obtain
+actual results:
+* KDTreeQueryResultsX() to get X-values
+* KDTreeQueryResultsXY() to get X- and Y-values
+* KDTreeQueryResultsTags() to get tag values
+* KDTreeQueryResultsDistances() to get distances
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+ae_int_t kdtreequeryrnn(const kdtree &kdt, const real_1d_array &x, const double r)
+{
+    alglib_impl::ae_state _alglib_env_state;    
+    bool selfmatch;
+
+    selfmatch = true;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::ae_int_t result = alglib_impl::kdtreequeryrnn(const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), r, selfmatch, &_alglib_env_state);
+
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<ae_int_t*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+K-NN query: approximate K nearest neighbors
+
+INPUT PARAMETERS
+    KDT         -   KD-tree
+    X           -   point, array[0..NX-1].
+    K           -   number of neighbors to return, K>=1
+    SelfMatch   -   whether self-matches are allowed:
+                    * if True, nearest neighbor may be the point itself
+                      (if it exists in original dataset)
+                    * if False, then only points with non-zero distance
+                      are returned
+                    * if not given, considered True
+    Eps         -   approximation factor, Eps>=0. eps-approximate  nearest
+                    neighbor  is  a  neighbor  whose distance from X is at
+                    most (1+eps) times distance of true nearest neighbor.
+
+RESULT
+    number of actual neighbors found (either K or N, if K>N).
+
+NOTES
+    significant performance gain may be achieved only when Eps  is  is  on
+    the order of magnitude of 1 or larger.
+
+This  subroutine  performs  query  and  stores  its result in the internal
+structures of the KD-tree. You can use  following  subroutines  to  obtain
+these results:
+* KDTreeQueryResultsX() to get X-values
+* KDTreeQueryResultsXY() to get X- and Y-values
+* KDTreeQueryResultsTags() to get tag values
+* KDTreeQueryResultsDistances() to get distances
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+ae_int_t kdtreequeryaknn(const kdtree &kdt, const real_1d_array &x, const ae_int_t k, const bool selfmatch, const double eps)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::ae_int_t result = alglib_impl::kdtreequeryaknn(const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), k, selfmatch, eps, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<ae_int_t*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+K-NN query: approximate K nearest neighbors
+
+INPUT PARAMETERS
+    KDT         -   KD-tree
+    X           -   point, array[0..NX-1].
+    K           -   number of neighbors to return, K>=1
+    SelfMatch   -   whether self-matches are allowed:
+                    * if True, nearest neighbor may be the point itself
+                      (if it exists in original dataset)
+                    * if False, then only points with non-zero distance
+                      are returned
+                    * if not given, considered True
+    Eps         -   approximation factor, Eps>=0. eps-approximate  nearest
+                    neighbor  is  a  neighbor  whose distance from X is at
+                    most (1+eps) times distance of true nearest neighbor.
+
+RESULT
+    number of actual neighbors found (either K or N, if K>N).
+
+NOTES
+    significant performance gain may be achieved only when Eps  is  is  on
+    the order of magnitude of 1 or larger.
+
+This  subroutine  performs  query  and  stores  its result in the internal
+structures of the KD-tree. You can use  following  subroutines  to  obtain
+these results:
+* KDTreeQueryResultsX() to get X-values
+* KDTreeQueryResultsXY() to get X- and Y-values
+* KDTreeQueryResultsTags() to get tag values
+* KDTreeQueryResultsDistances() to get distances
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+ae_int_t kdtreequeryaknn(const kdtree &kdt, const real_1d_array &x, const ae_int_t k, const double eps)
+{
+    alglib_impl::ae_state _alglib_env_state;    
+    bool selfmatch;
+
+    selfmatch = true;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::ae_int_t result = alglib_impl::kdtreequeryaknn(const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), k, selfmatch, eps, &_alglib_env_state);
+
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<ae_int_t*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+X-values from last query
+
+INPUT PARAMETERS
+    KDT     -   KD-tree
+    X       -   possibly pre-allocated buffer. If X is too small to store
+                result, it is resized. If size(X) is enough to store
+                result, it is left unchanged.
+
+OUTPUT PARAMETERS
+    X       -   rows are filled with X-values
+
+NOTES
+1. points are ordered by distance from the query point (first = closest)
+2. if  XY is larger than required to store result, only leading part  will
+   be overwritten; trailing part will be left unchanged. So  if  on  input
+   XY = [[A,B],[C,D]], and result is [1,2],  then  on  exit  we  will  get
+   XY = [[1,2],[C,D]]. This is done purposely to increase performance;  if
+   you want function  to  resize  array  according  to  result  size,  use
+   function with same name and suffix 'I'.
+
+SEE ALSO
+* KDTreeQueryResultsXY()            X- and Y-values
+* KDTreeQueryResultsTags()          tag values
+* KDTreeQueryResultsDistances()     distances
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void kdtreequeryresultsx(const kdtree &kdt, real_2d_array &x)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::kdtreequeryresultsx(const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), const_cast<alglib_impl::ae_matrix*>(x.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+X- and Y-values from last query
+
+INPUT PARAMETERS
+    KDT     -   KD-tree
+    XY      -   possibly pre-allocated buffer. If XY is too small to store
+                result, it is resized. If size(XY) is enough to store
+                result, it is left unchanged.
+
+OUTPUT PARAMETERS
+    XY      -   rows are filled with points: first NX columns with
+                X-values, next NY columns - with Y-values.
+
+NOTES
+1. points are ordered by distance from the query point (first = closest)
+2. if  XY is larger than required to store result, only leading part  will
+   be overwritten; trailing part will be left unchanged. So  if  on  input
+   XY = [[A,B],[C,D]], and result is [1,2],  then  on  exit  we  will  get
+   XY = [[1,2],[C,D]]. This is done purposely to increase performance;  if
+   you want function  to  resize  array  according  to  result  size,  use
+   function with same name and suffix 'I'.
+
+SEE ALSO
+* KDTreeQueryResultsX()             X-values
+* KDTreeQueryResultsTags()          tag values
+* KDTreeQueryResultsDistances()     distances
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void kdtreequeryresultsxy(const kdtree &kdt, real_2d_array &xy)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::kdtreequeryresultsxy(const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Tags from last query
+
+INPUT PARAMETERS
+    KDT     -   KD-tree
+    Tags    -   possibly pre-allocated buffer. If X is too small to store
+                result, it is resized. If size(X) is enough to store
+                result, it is left unchanged.
+
+OUTPUT PARAMETERS
+    Tags    -   filled with tags associated with points,
+                or, when no tags were supplied, with zeros
+
+NOTES
+1. points are ordered by distance from the query point (first = closest)
+2. if  XY is larger than required to store result, only leading part  will
+   be overwritten; trailing part will be left unchanged. So  if  on  input
+   XY = [[A,B],[C,D]], and result is [1,2],  then  on  exit  we  will  get
+   XY = [[1,2],[C,D]]. This is done purposely to increase performance;  if
+   you want function  to  resize  array  according  to  result  size,  use
+   function with same name and suffix 'I'.
+
+SEE ALSO
+* KDTreeQueryResultsX()             X-values
+* KDTreeQueryResultsXY()            X- and Y-values
+* KDTreeQueryResultsDistances()     distances
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void kdtreequeryresultstags(const kdtree &kdt, integer_1d_array &tags)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::kdtreequeryresultstags(const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), const_cast<alglib_impl::ae_vector*>(tags.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Distances from last query
+
+INPUT PARAMETERS
+    KDT     -   KD-tree
+    R       -   possibly pre-allocated buffer. If X is too small to store
+                result, it is resized. If size(X) is enough to store
+                result, it is left unchanged.
+
+OUTPUT PARAMETERS
+    R       -   filled with distances (in corresponding norm)
+
+NOTES
+1. points are ordered by distance from the query point (first = closest)
+2. if  XY is larger than required to store result, only leading part  will
+   be overwritten; trailing part will be left unchanged. So  if  on  input
+   XY = [[A,B],[C,D]], and result is [1,2],  then  on  exit  we  will  get
+   XY = [[1,2],[C,D]]. This is done purposely to increase performance;  if
+   you want function  to  resize  array  according  to  result  size,  use
+   function with same name and suffix 'I'.
+
+SEE ALSO
+* KDTreeQueryResultsX()             X-values
+* KDTreeQueryResultsXY()            X- and Y-values
+* KDTreeQueryResultsTags()          tag values
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void kdtreequeryresultsdistances(const kdtree &kdt, real_1d_array &r)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::kdtreequeryresultsdistances(const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), const_cast<alglib_impl::ae_vector*>(r.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+X-values from last query; 'interactive' variant for languages like  Python
+which   support    constructs   like  "X = KDTreeQueryResultsXI(KDT)"  and
+interactive mode of interpreter.
+
+This function allocates new array on each call,  so  it  is  significantly
+slower than its 'non-interactive' counterpart, but it is  more  convenient
+when you call it from command line.
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void kdtreequeryresultsxi(const kdtree &kdt, real_2d_array &x)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::kdtreequeryresultsxi(const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), const_cast<alglib_impl::ae_matrix*>(x.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+XY-values from last query; 'interactive' variant for languages like Python
+which   support    constructs   like "XY = KDTreeQueryResultsXYI(KDT)" and
+interactive mode of interpreter.
+
+This function allocates new array on each call,  so  it  is  significantly
+slower than its 'non-interactive' counterpart, but it is  more  convenient
+when you call it from command line.
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void kdtreequeryresultsxyi(const kdtree &kdt, real_2d_array &xy)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::kdtreequeryresultsxyi(const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Tags  from  last  query;  'interactive' variant for languages like  Python
+which  support  constructs  like "Tags = KDTreeQueryResultsTagsI(KDT)" and
+interactive mode of interpreter.
+
+This function allocates new array on each call,  so  it  is  significantly
+slower than its 'non-interactive' counterpart, but it is  more  convenient
+when you call it from command line.
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void kdtreequeryresultstagsi(const kdtree &kdt, integer_1d_array &tags)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::kdtreequeryresultstagsi(const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), const_cast<alglib_impl::ae_vector*>(tags.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Distances from last query; 'interactive' variant for languages like Python
+which  support  constructs   like  "R = KDTreeQueryResultsDistancesI(KDT)"
+and interactive mode of interpreter.
+
+This function allocates new array on each call,  so  it  is  significantly
+slower than its 'non-interactive' counterpart, but it is  more  convenient
+when you call it from command line.
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void kdtreequeryresultsdistancesi(const kdtree &kdt, real_1d_array &r)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::kdtreequeryresultsdistancesi(const_cast<alglib_impl::kdtree*>(kdt.c_ptr()), const_cast<alglib_impl::ae_vector*>(r.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+}
+
+/////////////////////////////////////////////////////////////////////////
+//
+// THIS SECTION CONTAINS IMPLEMENTATION OF COMPUTATIONAL CORE
+//
+/////////////////////////////////////////////////////////////////////////
+namespace alglib_impl
+{
+static ae_int_t hqrnd_hqrndmax = 2147483563;
+static ae_int_t hqrnd_hqrndm1 = 2147483563;
+static ae_int_t hqrnd_hqrndm2 = 2147483399;
+static ae_int_t hqrnd_hqrndmagic = 1634357784;
+static ae_int_t hqrnd_hqrndintegerbase(hqrndstate* state,
+     ae_state *_state);
+
+
+static ae_int_t nearestneighbor_splitnodesize = 6;
+static void nearestneighbor_kdtreesplit(kdtree* kdt,
+     ae_int_t i1,
+     ae_int_t i2,
+     ae_int_t d,
+     double s,
+     ae_int_t* i3,
+     ae_state *_state);
+static void nearestneighbor_kdtreegeneratetreerec(kdtree* kdt,
+     ae_int_t* nodesoffs,
+     ae_int_t* splitsoffs,
+     ae_int_t i1,
+     ae_int_t i2,
+     ae_int_t maxleafsize,
+     ae_state *_state);
+static void nearestneighbor_kdtreequerynnrec(kdtree* kdt,
+     ae_int_t offs,
+     ae_state *_state);
+static void nearestneighbor_kdtreeinitbox(kdtree* kdt,
+     /* Real    */ ae_vector* x,
+     ae_state *_state);
+
+
+
+
+
+/*************************************************************************
+HQRNDState  initialization  with  random  values  which come from standard
+RNG.
+
+  -- ALGLIB --
+     Copyright 02.12.2009 by Bochkanov Sergey
+*************************************************************************/
+void hqrndrandomize(hqrndstate* state, ae_state *_state)
+{
+
+    _hqrndstate_clear(state);
+
+    hqrndseed(ae_randominteger(hqrnd_hqrndm1, _state), ae_randominteger(hqrnd_hqrndm2, _state), state, _state);
+}
+
+
+/*************************************************************************
+HQRNDState initialization with seed values
+
+  -- ALGLIB --
+     Copyright 02.12.2009 by Bochkanov Sergey
+*************************************************************************/
+void hqrndseed(ae_int_t s1,
+     ae_int_t s2,
+     hqrndstate* state,
+     ae_state *_state)
+{
+
+    _hqrndstate_clear(state);
+
+    state->s1 = s1%(hqrnd_hqrndm1-1)+1;
+    state->s2 = s2%(hqrnd_hqrndm2-1)+1;
+    state->v = (double)1/(double)hqrnd_hqrndmax;
+    state->magicv = hqrnd_hqrndmagic;
+}
+
+
+/*************************************************************************
+This function generates random real number in (0,1),
+not including interval boundaries
+
+State structure must be initialized with HQRNDRandomize() or HQRNDSeed().
+
+  -- ALGLIB --
+     Copyright 02.12.2009 by Bochkanov Sergey
+*************************************************************************/
+double hqrnduniformr(hqrndstate* state, ae_state *_state)
+{
+    double result;
+
+
+    result = state->v*hqrnd_hqrndintegerbase(state, _state);
+    return result;
+}
+
+
+/*************************************************************************
+This function generates random integer number in [0, N)
+
+1. N must be less than HQRNDMax-1.
+2. State structure must be initialized with HQRNDRandomize() or HQRNDSeed()
+
+  -- ALGLIB --
+     Copyright 02.12.2009 by Bochkanov Sergey
+*************************************************************************/
+ae_int_t hqrnduniformi(hqrndstate* state, ae_int_t n, ae_state *_state)
+{
+    ae_int_t mx;
+    ae_int_t result;
+
+
+    
+    /*
+     * Correct handling of N's close to RNDBaseMax
+     * (avoiding skewed distributions for RNDBaseMax<>K*N)
+     */
+    ae_assert(n>0, "HQRNDUniformI: N<=0!", _state);
+    ae_assert(n<hqrnd_hqrndmax-1, "HQRNDUniformI: N>=RNDBaseMax-1!", _state);
+    mx = hqrnd_hqrndmax-1-(hqrnd_hqrndmax-1)%n;
+    do
+    {
+        result = hqrnd_hqrndintegerbase(state, _state)-1;
+    }
+    while(result>=mx);
+    result = result%n;
+    return result;
+}
+
+
+/*************************************************************************
+Random number generator: normal numbers
+
+This function generates one random number from normal distribution.
+Its performance is equal to that of HQRNDNormal2()
+
+State structure must be initialized with HQRNDRandomize() or HQRNDSeed().
+
+  -- ALGLIB --
+     Copyright 02.12.2009 by Bochkanov Sergey
+*************************************************************************/
+double hqrndnormal(hqrndstate* state, ae_state *_state)
+{
+    double v1;
+    double v2;
+    double result;
+
+
+    hqrndnormal2(state, &v1, &v2, _state);
+    result = v1;
+    return result;
+}
+
+
+/*************************************************************************
+Random number generator: random X and Y such that X^2+Y^2=1
+
+State structure must be initialized with HQRNDRandomize() or HQRNDSeed().
+
+  -- ALGLIB --
+     Copyright 02.12.2009 by Bochkanov Sergey
+*************************************************************************/
+void hqrndunit2(hqrndstate* state, double* x, double* y, ae_state *_state)
+{
+    double v;
+    double mx;
+    double mn;
+
+    *x = 0;
+    *y = 0;
+
+    do
+    {
+        hqrndnormal2(state, x, y, _state);
+    }
+    while(!(ae_fp_neq(*x,0)||ae_fp_neq(*y,0)));
+    mx = ae_maxreal(ae_fabs(*x, _state), ae_fabs(*y, _state), _state);
+    mn = ae_minreal(ae_fabs(*x, _state), ae_fabs(*y, _state), _state);
+    v = mx*ae_sqrt(1+ae_sqr(mn/mx, _state), _state);
+    *x = *x/v;
+    *y = *y/v;
+}
+
+
+/*************************************************************************
+Random number generator: normal numbers
+
+This function generates two independent random numbers from normal
+distribution. Its performance is equal to that of HQRNDNormal()
+
+State structure must be initialized with HQRNDRandomize() or HQRNDSeed().
+
+  -- ALGLIB --
+     Copyright 02.12.2009 by Bochkanov Sergey
+*************************************************************************/
+void hqrndnormal2(hqrndstate* state,
+     double* x1,
+     double* x2,
+     ae_state *_state)
+{
+    double u;
+    double v;
+    double s;
+
+    *x1 = 0;
+    *x2 = 0;
+
+    for(;;)
+    {
+        u = 2*hqrnduniformr(state, _state)-1;
+        v = 2*hqrnduniformr(state, _state)-1;
+        s = ae_sqr(u, _state)+ae_sqr(v, _state);
+        if( ae_fp_greater(s,0)&&ae_fp_less(s,1) )
+        {
+            
+            /*
+             * two Sqrt's instead of one to
+             * avoid overflow when S is too small
+             */
+            s = ae_sqrt(-2*ae_log(s, _state), _state)/ae_sqrt(s, _state);
+            *x1 = u*s;
+            *x2 = v*s;
+            return;
+        }
+    }
+}
+
+
+/*************************************************************************
+Random number generator: exponential distribution
+
+State structure must be initialized with HQRNDRandomize() or HQRNDSeed().
+
+  -- ALGLIB --
+     Copyright 11.08.2007 by Bochkanov Sergey
+*************************************************************************/
+double hqrndexponential(hqrndstate* state,
+     double lambdav,
+     ae_state *_state)
+{
+    double result;
+
+
+    ae_assert(ae_fp_greater(lambdav,0), "HQRNDExponential: LambdaV<=0!", _state);
+    result = -ae_log(hqrnduniformr(state, _state), _state)/lambdav;
+    return result;
+}
+
+
+/*************************************************************************
+
+L'Ecuyer, Efficient and portable combined random number generators
+*************************************************************************/
+static ae_int_t hqrnd_hqrndintegerbase(hqrndstate* state,
+     ae_state *_state)
+{
+    ae_int_t k;
+    ae_int_t result;
+
+
+    ae_assert(state->magicv==hqrnd_hqrndmagic, "HQRNDIntegerBase: State is not correctly initialized!", _state);
+    k = state->s1/53668;
+    state->s1 = 40014*(state->s1-k*53668)-k*12211;
+    if( state->s1<0 )
+    {
+        state->s1 = state->s1+2147483563;
+    }
+    k = state->s2/52774;
+    state->s2 = 40692*(state->s2-k*52774)-k*3791;
+    if( state->s2<0 )
+    {
+        state->s2 = state->s2+2147483399;
+    }
+    
+    /*
+     * Result
+     */
+    result = state->s1-state->s2;
+    if( result<1 )
+    {
+        result = result+2147483562;
+    }
+    return result;
+}
+
+
+ae_bool _hqrndstate_init(hqrndstate* p, ae_state *_state, ae_bool make_automatic)
+{
+    return ae_true;
+}
+
+
+ae_bool _hqrndstate_init_copy(hqrndstate* dst, hqrndstate* src, ae_state *_state, ae_bool make_automatic)
+{
+    dst->s1 = src->s1;
+    dst->s2 = src->s2;
+    dst->v = src->v;
+    dst->magicv = src->magicv;
+    return ae_true;
+}
+
+
+void _hqrndstate_clear(hqrndstate* p)
+{
+}
+
+
+
+
+/*************************************************************************
+KD-tree creation
+
+This subroutine creates KD-tree from set of X-values and optional Y-values
+
+INPUT PARAMETERS
+    XY      -   dataset, array[0..N-1,0..NX+NY-1].
+                one row corresponds to one point.
+                first NX columns contain X-values, next NY (NY may be zero)
+                columns may contain associated Y-values
+    N       -   number of points, N>=1
+    NX      -   space dimension, NX>=1.
+    NY      -   number of optional Y-values, NY>=0.
+    NormType-   norm type:
+                * 0 denotes infinity-norm
+                * 1 denotes 1-norm
+                * 2 denotes 2-norm (Euclidean norm)
+                
+OUTPUT PARAMETERS
+    KDT     -   KD-tree
+    
+    
+NOTES
+
+1. KD-tree  creation  have O(N*logN) complexity and O(N*(2*NX+NY))  memory
+   requirements.
+2. Although KD-trees may be used with any combination of N  and  NX,  they
+   are more efficient than brute-force search only when N >> 4^NX. So they
+   are most useful in low-dimensional tasks (NX=2, NX=3). NX=1  is another
+   inefficient case, because  simple  binary  search  (without  additional
+   structures) is much more efficient in such tasks than KD-trees.
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void kdtreebuild(/* Real    */ ae_matrix* xy,
+     ae_int_t n,
+     ae_int_t nx,
+     ae_int_t ny,
+     ae_int_t normtype,
+     kdtree* kdt,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector tags;
+    ae_int_t i;
+
+    ae_frame_make(_state, &_frame_block);
+    _kdtree_clear(kdt);
+    ae_vector_init(&tags, 0, DT_INT, _state, ae_true);
+
+    ae_assert(n>=1, "KDTreeBuild: N<1!", _state);
+    ae_assert(nx>=1, "KDTreeBuild: NX<1!", _state);
+    ae_assert(ny>=0, "KDTreeBuild: NY<0!", _state);
+    ae_assert(normtype>=0&&normtype<=2, "KDTreeBuild: incorrect NormType!", _state);
+    ae_assert(xy->rows>=n, "KDTreeBuild: rows(X)<N!", _state);
+    ae_assert(xy->cols>=nx+ny, "KDTreeBuild: cols(X)<NX+NY!", _state);
+    ae_assert(apservisfinitematrix(xy, n, nx+ny, _state), "KDTreeBuild: X contains infinite or NaN values!", _state);
+    ae_vector_set_length(&tags, n, _state);
+    for(i=0; i<=n-1; i++)
+    {
+        tags.ptr.p_int[i] = 0;
+    }
+    kdtreebuildtagged(xy, &tags, n, nx, ny, normtype, kdt, _state);
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+KD-tree creation
+
+This  subroutine  creates  KD-tree  from set of X-values, integer tags and
+optional Y-values
+
+INPUT PARAMETERS
+    XY      -   dataset, array[0..N-1,0..NX+NY-1].
+                one row corresponds to one point.
+                first NX columns contain X-values, next NY (NY may be zero)
+                columns may contain associated Y-values
+    Tags    -   tags, array[0..N-1], contains integer tags associated
+                with points.
+    N       -   number of points, N>=1
+    NX      -   space dimension, NX>=1.
+    NY      -   number of optional Y-values, NY>=0.
+    NormType-   norm type:
+                * 0 denotes infinity-norm
+                * 1 denotes 1-norm
+                * 2 denotes 2-norm (Euclidean norm)
+
+OUTPUT PARAMETERS
+    KDT     -   KD-tree
+
+NOTES
+
+1. KD-tree  creation  have O(N*logN) complexity and O(N*(2*NX+NY))  memory
+   requirements.
+2. Although KD-trees may be used with any combination of N  and  NX,  they
+   are more efficient than brute-force search only when N >> 4^NX. So they
+   are most useful in low-dimensional tasks (NX=2, NX=3). NX=1  is another
+   inefficient case, because  simple  binary  search  (without  additional
+   structures) is much more efficient in such tasks than KD-trees.
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void kdtreebuildtagged(/* Real    */ ae_matrix* xy,
+     /* Integer */ ae_vector* tags,
+     ae_int_t n,
+     ae_int_t nx,
+     ae_int_t ny,
+     ae_int_t normtype,
+     kdtree* kdt,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t maxnodes;
+    ae_int_t nodesoffs;
+    ae_int_t splitsoffs;
+
+    _kdtree_clear(kdt);
+
+    ae_assert(n>=1, "KDTreeBuildTagged: N<1!", _state);
+    ae_assert(nx>=1, "KDTreeBuildTagged: NX<1!", _state);
+    ae_assert(ny>=0, "KDTreeBuildTagged: NY<0!", _state);
+    ae_assert(normtype>=0&&normtype<=2, "KDTreeBuildTagged: incorrect NormType!", _state);
+    ae_assert(xy->rows>=n, "KDTreeBuildTagged: rows(X)<N!", _state);
+    ae_assert(xy->cols>=nx+ny, "KDTreeBuildTagged: cols(X)<NX+NY!", _state);
+    ae_assert(apservisfinitematrix(xy, n, nx+ny, _state), "KDTreeBuildTagged: X contains infinite or NaN values!", _state);
+    
+    /*
+     * initialize
+     */
+    kdt->n = n;
+    kdt->nx = nx;
+    kdt->ny = ny;
+    kdt->normtype = normtype;
+    kdt->distmatrixtype = 0;
+    ae_matrix_set_length(&kdt->xy, n, 2*nx+ny, _state);
+    ae_vector_set_length(&kdt->tags, n, _state);
+    ae_vector_set_length(&kdt->idx, n, _state);
+    ae_vector_set_length(&kdt->r, n, _state);
+    ae_vector_set_length(&kdt->x, nx, _state);
+    ae_vector_set_length(&kdt->buf, ae_maxint(n, nx, _state), _state);
+    
+    /*
+     * Initial fill
+     */
+    for(i=0; i<=n-1; i++)
+    {
+        ae_v_move(&kdt->xy.ptr.pp_double[i][0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nx-1));
+        ae_v_move(&kdt->xy.ptr.pp_double[i][nx], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(nx,2*nx+ny-1));
+        kdt->tags.ptr.p_int[i] = tags->ptr.p_int[i];
+    }
+    
+    /*
+     * Determine bounding box
+     */
+    ae_vector_set_length(&kdt->boxmin, nx, _state);
+    ae_vector_set_length(&kdt->boxmax, nx, _state);
+    ae_vector_set_length(&kdt->curboxmin, nx, _state);
+    ae_vector_set_length(&kdt->curboxmax, nx, _state);
+    ae_v_move(&kdt->boxmin.ptr.p_double[0], 1, &kdt->xy.ptr.pp_double[0][0], 1, ae_v_len(0,nx-1));
+    ae_v_move(&kdt->boxmax.ptr.p_double[0], 1, &kdt->xy.ptr.pp_double[0][0], 1, ae_v_len(0,nx-1));
+    for(i=1; i<=n-1; i++)
+    {
+        for(j=0; j<=nx-1; j++)
+        {
+            kdt->boxmin.ptr.p_double[j] = ae_minreal(kdt->boxmin.ptr.p_double[j], kdt->xy.ptr.pp_double[i][j], _state);
+            kdt->boxmax.ptr.p_double[j] = ae_maxreal(kdt->boxmax.ptr.p_double[j], kdt->xy.ptr.pp_double[i][j], _state);
+        }
+    }
+    
+    /*
+     * prepare tree structure
+     * * MaxNodes=N because we guarantee no trivial splits, i.e.
+     *   every split will generate two non-empty boxes
+     */
+    maxnodes = n;
+    ae_vector_set_length(&kdt->nodes, nearestneighbor_splitnodesize*2*maxnodes, _state);
+    ae_vector_set_length(&kdt->splits, 2*maxnodes, _state);
+    nodesoffs = 0;
+    splitsoffs = 0;
+    ae_v_move(&kdt->curboxmin.ptr.p_double[0], 1, &kdt->boxmin.ptr.p_double[0], 1, ae_v_len(0,nx-1));
+    ae_v_move(&kdt->curboxmax.ptr.p_double[0], 1, &kdt->boxmax.ptr.p_double[0], 1, ae_v_len(0,nx-1));
+    nearestneighbor_kdtreegeneratetreerec(kdt, &nodesoffs, &splitsoffs, 0, n, 8, _state);
+    
+    /*
+     * Set current query size to 0
+     */
+    kdt->kcur = 0;
+}
+
+
+/*************************************************************************
+K-NN query: K nearest neighbors
+
+INPUT PARAMETERS
+    KDT         -   KD-tree
+    X           -   point, array[0..NX-1].
+    K           -   number of neighbors to return, K>=1
+    SelfMatch   -   whether self-matches are allowed:
+                    * if True, nearest neighbor may be the point itself
+                      (if it exists in original dataset)
+                    * if False, then only points with non-zero distance
+                      are returned
+                    * if not given, considered True
+
+RESULT
+    number of actual neighbors found (either K or N, if K>N).
+
+This  subroutine  performs  query  and  stores  its result in the internal
+structures of the KD-tree. You can use  following  subroutines  to  obtain
+these results:
+* KDTreeQueryResultsX() to get X-values
+* KDTreeQueryResultsXY() to get X- and Y-values
+* KDTreeQueryResultsTags() to get tag values
+* KDTreeQueryResultsDistances() to get distances
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+ae_int_t kdtreequeryknn(kdtree* kdt,
+     /* Real    */ ae_vector* x,
+     ae_int_t k,
+     ae_bool selfmatch,
+     ae_state *_state)
+{
+    ae_int_t result;
+
+
+    ae_assert(k>=1, "KDTreeQueryKNN: K<1!", _state);
+    ae_assert(x->cnt>=kdt->nx, "KDTreeQueryKNN: Length(X)<NX!", _state);
+    ae_assert(isfinitevector(x, kdt->nx, _state), "KDTreeQueryKNN: X contains infinite or NaN values!", _state);
+    result = kdtreequeryaknn(kdt, x, k, selfmatch, 0.0, _state);
+    return result;
+}
+
+
+/*************************************************************************
+R-NN query: all points within R-sphere centered at X
+
+INPUT PARAMETERS
+    KDT         -   KD-tree
+    X           -   point, array[0..NX-1].
+    R           -   radius of sphere (in corresponding norm), R>0
+    SelfMatch   -   whether self-matches are allowed:
+                    * if True, nearest neighbor may be the point itself
+                      (if it exists in original dataset)
+                    * if False, then only points with non-zero distance
+                      are returned
+                    * if not given, considered True
+
+RESULT
+    number of neighbors found, >=0
+
+This  subroutine  performs  query  and  stores  its result in the internal
+structures of the KD-tree. You can use  following  subroutines  to  obtain
+actual results:
+* KDTreeQueryResultsX() to get X-values
+* KDTreeQueryResultsXY() to get X- and Y-values
+* KDTreeQueryResultsTags() to get tag values
+* KDTreeQueryResultsDistances() to get distances
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+ae_int_t kdtreequeryrnn(kdtree* kdt,
+     /* Real    */ ae_vector* x,
+     double r,
+     ae_bool selfmatch,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t result;
+
+
+    ae_assert(ae_fp_greater(r,0), "KDTreeQueryRNN: incorrect R!", _state);
+    ae_assert(x->cnt>=kdt->nx, "KDTreeQueryRNN: Length(X)<NX!", _state);
+    ae_assert(isfinitevector(x, kdt->nx, _state), "KDTreeQueryRNN: X contains infinite or NaN values!", _state);
+    
+    /*
+     * Prepare parameters
+     */
+    kdt->kneeded = 0;
+    if( kdt->normtype!=2 )
+    {
+        kdt->rneeded = r;
+    }
+    else
+    {
+        kdt->rneeded = ae_sqr(r, _state);
+    }
+    kdt->selfmatch = selfmatch;
+    kdt->approxf = 1;
+    kdt->kcur = 0;
+    
+    /*
+     * calculate distance from point to current bounding box
+     */
+    nearestneighbor_kdtreeinitbox(kdt, x, _state);
+    
+    /*
+     * call recursive search
+     * results are returned as heap
+     */
+    nearestneighbor_kdtreequerynnrec(kdt, 0, _state);
+    
+    /*
+     * pop from heap to generate ordered representation
+     *
+     * last element is not pop'ed because it is already in
+     * its place
+     */
+    result = kdt->kcur;
+    j = kdt->kcur;
+    for(i=kdt->kcur; i>=2; i--)
+    {
+        tagheappopi(&kdt->r, &kdt->idx, &j, _state);
+    }
+    return result;
+}
+
+
+/*************************************************************************
+K-NN query: approximate K nearest neighbors
+
+INPUT PARAMETERS
+    KDT         -   KD-tree
+    X           -   point, array[0..NX-1].
+    K           -   number of neighbors to return, K>=1
+    SelfMatch   -   whether self-matches are allowed:
+                    * if True, nearest neighbor may be the point itself
+                      (if it exists in original dataset)
+                    * if False, then only points with non-zero distance
+                      are returned
+                    * if not given, considered True
+    Eps         -   approximation factor, Eps>=0. eps-approximate  nearest
+                    neighbor  is  a  neighbor  whose distance from X is at
+                    most (1+eps) times distance of true nearest neighbor.
+
+RESULT
+    number of actual neighbors found (either K or N, if K>N).
+    
+NOTES
+    significant performance gain may be achieved only when Eps  is  is  on
+    the order of magnitude of 1 or larger.
+
+This  subroutine  performs  query  and  stores  its result in the internal
+structures of the KD-tree. You can use  following  subroutines  to  obtain
+these results:
+* KDTreeQueryResultsX() to get X-values
+* KDTreeQueryResultsXY() to get X- and Y-values
+* KDTreeQueryResultsTags() to get tag values
+* KDTreeQueryResultsDistances() to get distances
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+ae_int_t kdtreequeryaknn(kdtree* kdt,
+     /* Real    */ ae_vector* x,
+     ae_int_t k,
+     ae_bool selfmatch,
+     double eps,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t result;
+
+
+    ae_assert(k>0, "KDTreeQueryAKNN: incorrect K!", _state);
+    ae_assert(ae_fp_greater_eq(eps,0), "KDTreeQueryAKNN: incorrect Eps!", _state);
+    ae_assert(x->cnt>=kdt->nx, "KDTreeQueryAKNN: Length(X)<NX!", _state);
+    ae_assert(isfinitevector(x, kdt->nx, _state), "KDTreeQueryAKNN: X contains infinite or NaN values!", _state);
+    
+    /*
+     * Prepare parameters
+     */
+    k = ae_minint(k, kdt->n, _state);
+    kdt->kneeded = k;
+    kdt->rneeded = 0;
+    kdt->selfmatch = selfmatch;
+    if( kdt->normtype==2 )
+    {
+        kdt->approxf = 1/ae_sqr(1+eps, _state);
+    }
+    else
+    {
+        kdt->approxf = 1/(1+eps);
+    }
+    kdt->kcur = 0;
+    
+    /*
+     * calculate distance from point to current bounding box
+     */
+    nearestneighbor_kdtreeinitbox(kdt, x, _state);
+    
+    /*
+     * call recursive search
+     * results are returned as heap
+     */
+    nearestneighbor_kdtreequerynnrec(kdt, 0, _state);
+    
+    /*
+     * pop from heap to generate ordered representation
+     *
+     * last element is non pop'ed because it is already in
+     * its place
+     */
+    result = kdt->kcur;
+    j = kdt->kcur;
+    for(i=kdt->kcur; i>=2; i--)
+    {
+        tagheappopi(&kdt->r, &kdt->idx, &j, _state);
+    }
+    return result;
+}
+
+
+/*************************************************************************
+X-values from last query
+
+INPUT PARAMETERS
+    KDT     -   KD-tree
+    X       -   possibly pre-allocated buffer. If X is too small to store
+                result, it is resized. If size(X) is enough to store
+                result, it is left unchanged.
+
+OUTPUT PARAMETERS
+    X       -   rows are filled with X-values
+
+NOTES
+1. points are ordered by distance from the query point (first = closest)
+2. if  XY is larger than required to store result, only leading part  will
+   be overwritten; trailing part will be left unchanged. So  if  on  input
+   XY = [[A,B],[C,D]], and result is [1,2],  then  on  exit  we  will  get
+   XY = [[1,2],[C,D]]. This is done purposely to increase performance;  if
+   you want function  to  resize  array  according  to  result  size,  use
+   function with same name and suffix 'I'.
+
+SEE ALSO
+* KDTreeQueryResultsXY()            X- and Y-values
+* KDTreeQueryResultsTags()          tag values
+* KDTreeQueryResultsDistances()     distances
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void kdtreequeryresultsx(kdtree* kdt,
+     /* Real    */ ae_matrix* x,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t k;
+
+
+    if( kdt->kcur==0 )
+    {
+        return;
+    }
+    if( x->rows<kdt->kcur||x->cols<kdt->nx )
+    {
+        ae_matrix_set_length(x, kdt->kcur, kdt->nx, _state);
+    }
+    k = kdt->kcur;
+    for(i=0; i<=k-1; i++)
+    {
+        ae_v_move(&x->ptr.pp_double[i][0], 1, &kdt->xy.ptr.pp_double[kdt->idx.ptr.p_int[i]][kdt->nx], 1, ae_v_len(0,kdt->nx-1));
+    }
+}
+
+
+/*************************************************************************
+X- and Y-values from last query
+
+INPUT PARAMETERS
+    KDT     -   KD-tree
+    XY      -   possibly pre-allocated buffer. If XY is too small to store
+                result, it is resized. If size(XY) is enough to store
+                result, it is left unchanged.
+
+OUTPUT PARAMETERS
+    XY      -   rows are filled with points: first NX columns with
+                X-values, next NY columns - with Y-values.
+
+NOTES
+1. points are ordered by distance from the query point (first = closest)
+2. if  XY is larger than required to store result, only leading part  will
+   be overwritten; trailing part will be left unchanged. So  if  on  input
+   XY = [[A,B],[C,D]], and result is [1,2],  then  on  exit  we  will  get
+   XY = [[1,2],[C,D]]. This is done purposely to increase performance;  if
+   you want function  to  resize  array  according  to  result  size,  use
+   function with same name and suffix 'I'.
+
+SEE ALSO
+* KDTreeQueryResultsX()             X-values
+* KDTreeQueryResultsTags()          tag values
+* KDTreeQueryResultsDistances()     distances
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void kdtreequeryresultsxy(kdtree* kdt,
+     /* Real    */ ae_matrix* xy,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t k;
+
+
+    if( kdt->kcur==0 )
+    {
+        return;
+    }
+    if( xy->rows<kdt->kcur||xy->cols<kdt->nx+kdt->ny )
+    {
+        ae_matrix_set_length(xy, kdt->kcur, kdt->nx+kdt->ny, _state);
+    }
+    k = kdt->kcur;
+    for(i=0; i<=k-1; i++)
+    {
+        ae_v_move(&xy->ptr.pp_double[i][0], 1, &kdt->xy.ptr.pp_double[kdt->idx.ptr.p_int[i]][kdt->nx], 1, ae_v_len(0,kdt->nx+kdt->ny-1));
+    }
+}
+
+
+/*************************************************************************
+Tags from last query
+
+INPUT PARAMETERS
+    KDT     -   KD-tree
+    Tags    -   possibly pre-allocated buffer. If X is too small to store
+                result, it is resized. If size(X) is enough to store
+                result, it is left unchanged.
+
+OUTPUT PARAMETERS
+    Tags    -   filled with tags associated with points,
+                or, when no tags were supplied, with zeros
+
+NOTES
+1. points are ordered by distance from the query point (first = closest)
+2. if  XY is larger than required to store result, only leading part  will
+   be overwritten; trailing part will be left unchanged. So  if  on  input
+   XY = [[A,B],[C,D]], and result is [1,2],  then  on  exit  we  will  get
+   XY = [[1,2],[C,D]]. This is done purposely to increase performance;  if
+   you want function  to  resize  array  according  to  result  size,  use
+   function with same name and suffix 'I'.
+
+SEE ALSO
+* KDTreeQueryResultsX()             X-values
+* KDTreeQueryResultsXY()            X- and Y-values
+* KDTreeQueryResultsDistances()     distances
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void kdtreequeryresultstags(kdtree* kdt,
+     /* Integer */ ae_vector* tags,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t k;
+
+
+    if( kdt->kcur==0 )
+    {
+        return;
+    }
+    if( tags->cnt<kdt->kcur )
+    {
+        ae_vector_set_length(tags, kdt->kcur, _state);
+    }
+    k = kdt->kcur;
+    for(i=0; i<=k-1; i++)
+    {
+        tags->ptr.p_int[i] = kdt->tags.ptr.p_int[kdt->idx.ptr.p_int[i]];
+    }
+}
+
+
+/*************************************************************************
+Distances from last query
+
+INPUT PARAMETERS
+    KDT     -   KD-tree
+    R       -   possibly pre-allocated buffer. If X is too small to store
+                result, it is resized. If size(X) is enough to store
+                result, it is left unchanged.
+
+OUTPUT PARAMETERS
+    R       -   filled with distances (in corresponding norm)
+
+NOTES
+1. points are ordered by distance from the query point (first = closest)
+2. if  XY is larger than required to store result, only leading part  will
+   be overwritten; trailing part will be left unchanged. So  if  on  input
+   XY = [[A,B],[C,D]], and result is [1,2],  then  on  exit  we  will  get
+   XY = [[1,2],[C,D]]. This is done purposely to increase performance;  if
+   you want function  to  resize  array  according  to  result  size,  use
+   function with same name and suffix 'I'.
+
+SEE ALSO
+* KDTreeQueryResultsX()             X-values
+* KDTreeQueryResultsXY()            X- and Y-values
+* KDTreeQueryResultsTags()          tag values
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void kdtreequeryresultsdistances(kdtree* kdt,
+     /* Real    */ ae_vector* r,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t k;
+
+
+    if( kdt->kcur==0 )
+    {
+        return;
+    }
+    if( r->cnt<kdt->kcur )
+    {
+        ae_vector_set_length(r, kdt->kcur, _state);
+    }
+    k = kdt->kcur;
+    
+    /*
+     * unload norms
+     *
+     * Abs() call is used to handle cases with negative norms
+     * (generated during KFN requests)
+     */
+    if( kdt->normtype==0 )
+    {
+        for(i=0; i<=k-1; i++)
+        {
+            r->ptr.p_double[i] = ae_fabs(kdt->r.ptr.p_double[i], _state);
+        }
+    }
+    if( kdt->normtype==1 )
+    {
+        for(i=0; i<=k-1; i++)
+        {
+            r->ptr.p_double[i] = ae_fabs(kdt->r.ptr.p_double[i], _state);
+        }
+    }
+    if( kdt->normtype==2 )
+    {
+        for(i=0; i<=k-1; i++)
+        {
+            r->ptr.p_double[i] = ae_sqrt(ae_fabs(kdt->r.ptr.p_double[i], _state), _state);
+        }
+    }
+}
+
+
+/*************************************************************************
+X-values from last query; 'interactive' variant for languages like  Python
+which   support    constructs   like  "X = KDTreeQueryResultsXI(KDT)"  and
+interactive mode of interpreter.
+
+This function allocates new array on each call,  so  it  is  significantly
+slower than its 'non-interactive' counterpart, but it is  more  convenient
+when you call it from command line.
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void kdtreequeryresultsxi(kdtree* kdt,
+     /* Real    */ ae_matrix* x,
+     ae_state *_state)
+{
+
+    ae_matrix_clear(x);
+
+    kdtreequeryresultsx(kdt, x, _state);
+}
+
+
+/*************************************************************************
+XY-values from last query; 'interactive' variant for languages like Python
+which   support    constructs   like "XY = KDTreeQueryResultsXYI(KDT)" and
+interactive mode of interpreter.
+
+This function allocates new array on each call,  so  it  is  significantly
+slower than its 'non-interactive' counterpart, but it is  more  convenient
+when you call it from command line.
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void kdtreequeryresultsxyi(kdtree* kdt,
+     /* Real    */ ae_matrix* xy,
+     ae_state *_state)
+{
+
+    ae_matrix_clear(xy);
+
+    kdtreequeryresultsxy(kdt, xy, _state);
+}
+
+
+/*************************************************************************
+Tags  from  last  query;  'interactive' variant for languages like  Python
+which  support  constructs  like "Tags = KDTreeQueryResultsTagsI(KDT)" and
+interactive mode of interpreter.
+
+This function allocates new array on each call,  so  it  is  significantly
+slower than its 'non-interactive' counterpart, but it is  more  convenient
+when you call it from command line.
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void kdtreequeryresultstagsi(kdtree* kdt,
+     /* Integer */ ae_vector* tags,
+     ae_state *_state)
+{
+
+    ae_vector_clear(tags);
+
+    kdtreequeryresultstags(kdt, tags, _state);
+}
+
+
+/*************************************************************************
+Distances from last query; 'interactive' variant for languages like Python
+which  support  constructs   like  "R = KDTreeQueryResultsDistancesI(KDT)"
+and interactive mode of interpreter.
+
+This function allocates new array on each call,  so  it  is  significantly
+slower than its 'non-interactive' counterpart, but it is  more  convenient
+when you call it from command line.
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void kdtreequeryresultsdistancesi(kdtree* kdt,
+     /* Real    */ ae_vector* r,
+     ae_state *_state)
+{
+
+    ae_vector_clear(r);
+
+    kdtreequeryresultsdistances(kdt, r, _state);
+}
+
+
+/*************************************************************************
+Rearranges nodes [I1,I2) using partition in D-th dimension with S as threshold.
+Returns split position I3: [I1,I3) and [I3,I2) are created as result.
+
+This subroutine doesn't create tree structures, just rearranges nodes.
+*************************************************************************/
+static void nearestneighbor_kdtreesplit(kdtree* kdt,
+     ae_int_t i1,
+     ae_int_t i2,
+     ae_int_t d,
+     double s,
+     ae_int_t* i3,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t ileft;
+    ae_int_t iright;
+    double v;
+
+    *i3 = 0;
+
+    
+    /*
+     * split XY/Tags in two parts:
+     * * [ILeft,IRight] is non-processed part of XY/Tags
+     *
+     * After cycle is done, we have Ileft=IRight. We deal with
+     * this element separately.
+     *
+     * After this, [I1,ILeft) contains left part, and [ILeft,I2)
+     * contains right part.
+     */
+    ileft = i1;
+    iright = i2-1;
+    while(ileft<iright)
+    {
+        if( ae_fp_less_eq(kdt->xy.ptr.pp_double[ileft][d],s) )
+        {
+            
+            /*
+             * XY[ILeft] is on its place.
+             * Advance ILeft.
+             */
+            ileft = ileft+1;
+        }
+        else
+        {
+            
+            /*
+             * XY[ILeft,..] must be at IRight.
+             * Swap and advance IRight.
+             */
+            for(i=0; i<=2*kdt->nx+kdt->ny-1; i++)
+            {
+                v = kdt->xy.ptr.pp_double[ileft][i];
+                kdt->xy.ptr.pp_double[ileft][i] = kdt->xy.ptr.pp_double[iright][i];
+                kdt->xy.ptr.pp_double[iright][i] = v;
+            }
+            j = kdt->tags.ptr.p_int[ileft];
+            kdt->tags.ptr.p_int[ileft] = kdt->tags.ptr.p_int[iright];
+            kdt->tags.ptr.p_int[iright] = j;
+            iright = iright-1;
+        }
+    }
+    if( ae_fp_less_eq(kdt->xy.ptr.pp_double[ileft][d],s) )
+    {
+        ileft = ileft+1;
+    }
+    else
+    {
+        iright = iright-1;
+    }
+    *i3 = ileft;
+}
+
+
+/*************************************************************************
+Recursive kd-tree generation subroutine.
+
+PARAMETERS
+    KDT         tree
+    NodesOffs   unused part of Nodes[] which must be filled by tree
+    SplitsOffs  unused part of Splits[]
+    I1, I2      points from [I1,I2) are processed
+    
+NodesOffs[] and SplitsOffs[] must be large enough.
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+static void nearestneighbor_kdtreegeneratetreerec(kdtree* kdt,
+     ae_int_t* nodesoffs,
+     ae_int_t* splitsoffs,
+     ae_int_t i1,
+     ae_int_t i2,
+     ae_int_t maxleafsize,
+     ae_state *_state)
+{
+    ae_int_t n;
+    ae_int_t nx;
+    ae_int_t ny;
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t oldoffs;
+    ae_int_t i3;
+    ae_int_t cntless;
+    ae_int_t cntgreater;
+    double minv;
+    double maxv;
+    ae_int_t minidx;
+    ae_int_t maxidx;
+    ae_int_t d;
+    double ds;
+    double s;
+    double v;
+
+
+    ae_assert(i2>i1, "KDTreeGenerateTreeRec: internal error", _state);
+    
+    /*
+     * Generate leaf if needed
+     */
+    if( i2-i1<=maxleafsize )
+    {
+        kdt->nodes.ptr.p_int[*nodesoffs+0] = i2-i1;
+        kdt->nodes.ptr.p_int[*nodesoffs+1] = i1;
+        *nodesoffs = *nodesoffs+2;
+        return;
+    }
+    
+    /*
+     * Load values for easier access
+     */
+    nx = kdt->nx;
+    ny = kdt->ny;
+    
+    /*
+     * select dimension to split:
+     * * D is a dimension number
+     */
+    d = 0;
+    ds = kdt->curboxmax.ptr.p_double[0]-kdt->curboxmin.ptr.p_double[0];
+    for(i=1; i<=nx-1; i++)
+    {
+        v = kdt->curboxmax.ptr.p_double[i]-kdt->curboxmin.ptr.p_double[i];
+        if( ae_fp_greater(v,ds) )
+        {
+            ds = v;
+            d = i;
+        }
+    }
+    
+    /*
+     * Select split position S using sliding midpoint rule,
+     * rearrange points into [I1,I3) and [I3,I2)
+     */
+    s = kdt->curboxmin.ptr.p_double[d]+0.5*ds;
+    ae_v_move(&kdt->buf.ptr.p_double[0], 1, &kdt->xy.ptr.pp_double[i1][d], kdt->xy.stride, ae_v_len(0,i2-i1-1));
+    n = i2-i1;
+    cntless = 0;
+    cntgreater = 0;
+    minv = kdt->buf.ptr.p_double[0];
+    maxv = kdt->buf.ptr.p_double[0];
+    minidx = i1;
+    maxidx = i1;
+    for(i=0; i<=n-1; i++)
+    {
+        v = kdt->buf.ptr.p_double[i];
+        if( ae_fp_less(v,minv) )
+        {
+            minv = v;
+            minidx = i1+i;
+        }
+        if( ae_fp_greater(v,maxv) )
+        {
+            maxv = v;
+            maxidx = i1+i;
+        }
+        if( ae_fp_less(v,s) )
+        {
+            cntless = cntless+1;
+        }
+        if( ae_fp_greater(v,s) )
+        {
+            cntgreater = cntgreater+1;
+        }
+    }
+    if( cntless>0&&cntgreater>0 )
+    {
+        
+        /*
+         * normal midpoint split
+         */
+        nearestneighbor_kdtreesplit(kdt, i1, i2, d, s, &i3, _state);
+    }
+    else
+    {
+        
+        /*
+         * sliding midpoint
+         */
+        if( cntless==0 )
+        {
+            
+            /*
+             * 1. move split to MinV,
+             * 2. place one point to the left bin (move to I1),
+             *    others - to the right bin
+             */
+            s = minv;
+            if( minidx!=i1 )
+            {
+                for(i=0; i<=2*kdt->nx+kdt->ny-1; i++)
+                {
+                    v = kdt->xy.ptr.pp_double[minidx][i];
+                    kdt->xy.ptr.pp_double[minidx][i] = kdt->xy.ptr.pp_double[i1][i];
+                    kdt->xy.ptr.pp_double[i1][i] = v;
+                }
+                j = kdt->tags.ptr.p_int[minidx];
+                kdt->tags.ptr.p_int[minidx] = kdt->tags.ptr.p_int[i1];
+                kdt->tags.ptr.p_int[i1] = j;
+            }
+            i3 = i1+1;
+        }
+        else
+        {
+            
+            /*
+             * 1. move split to MaxV,
+             * 2. place one point to the right bin (move to I2-1),
+             *    others - to the left bin
+             */
+            s = maxv;
+            if( maxidx!=i2-1 )
+            {
+                for(i=0; i<=2*kdt->nx+kdt->ny-1; i++)
+                {
+                    v = kdt->xy.ptr.pp_double[maxidx][i];
+                    kdt->xy.ptr.pp_double[maxidx][i] = kdt->xy.ptr.pp_double[i2-1][i];
+                    kdt->xy.ptr.pp_double[i2-1][i] = v;
+                }
+                j = kdt->tags.ptr.p_int[maxidx];
+                kdt->tags.ptr.p_int[maxidx] = kdt->tags.ptr.p_int[i2-1];
+                kdt->tags.ptr.p_int[i2-1] = j;
+            }
+            i3 = i2-1;
+        }
+    }
+    
+    /*
+     * Generate 'split' node
+     */
+    kdt->nodes.ptr.p_int[*nodesoffs+0] = 0;
+    kdt->nodes.ptr.p_int[*nodesoffs+1] = d;
+    kdt->nodes.ptr.p_int[*nodesoffs+2] = *splitsoffs;
+    kdt->splits.ptr.p_double[*splitsoffs+0] = s;
+    oldoffs = *nodesoffs;
+    *nodesoffs = *nodesoffs+nearestneighbor_splitnodesize;
+    *splitsoffs = *splitsoffs+1;
+    
+    /*
+     * Recirsive generation:
+     * * update CurBox
+     * * call subroutine
+     * * restore CurBox
+     */
+    kdt->nodes.ptr.p_int[oldoffs+3] = *nodesoffs;
+    v = kdt->curboxmax.ptr.p_double[d];
+    kdt->curboxmax.ptr.p_double[d] = s;
+    nearestneighbor_kdtreegeneratetreerec(kdt, nodesoffs, splitsoffs, i1, i3, maxleafsize, _state);
+    kdt->curboxmax.ptr.p_double[d] = v;
+    kdt->nodes.ptr.p_int[oldoffs+4] = *nodesoffs;
+    v = kdt->curboxmin.ptr.p_double[d];
+    kdt->curboxmin.ptr.p_double[d] = s;
+    nearestneighbor_kdtreegeneratetreerec(kdt, nodesoffs, splitsoffs, i3, i2, maxleafsize, _state);
+    kdt->curboxmin.ptr.p_double[d] = v;
+}
+
+
+/*************************************************************************
+Recursive subroutine for NN queries.
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+static void nearestneighbor_kdtreequerynnrec(kdtree* kdt,
+     ae_int_t offs,
+     ae_state *_state)
+{
+    double ptdist;
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t nx;
+    ae_int_t i1;
+    ae_int_t i2;
+    ae_int_t d;
+    double s;
+    double v;
+    double t1;
+    ae_int_t childbestoffs;
+    ae_int_t childworstoffs;
+    ae_int_t childoffs;
+    double prevdist;
+    ae_bool todive;
+    ae_bool bestisleft;
+    ae_bool updatemin;
+
+
+    
+    /*
+     * Leaf node.
+     * Process points.
+     */
+    if( kdt->nodes.ptr.p_int[offs]>0 )
+    {
+        i1 = kdt->nodes.ptr.p_int[offs+1];
+        i2 = i1+kdt->nodes.ptr.p_int[offs];
+        for(i=i1; i<=i2-1; i++)
+        {
+            
+            /*
+             * Calculate distance
+             */
+            ptdist = 0;
+            nx = kdt->nx;
+            if( kdt->normtype==0 )
+            {
+                for(j=0; j<=nx-1; j++)
+                {
+                    ptdist = ae_maxreal(ptdist, ae_fabs(kdt->xy.ptr.pp_double[i][j]-kdt->x.ptr.p_double[j], _state), _state);
+                }
+            }
+            if( kdt->normtype==1 )
+            {
+                for(j=0; j<=nx-1; j++)
+                {
+                    ptdist = ptdist+ae_fabs(kdt->xy.ptr.pp_double[i][j]-kdt->x.ptr.p_double[j], _state);
+                }
+            }
+            if( kdt->normtype==2 )
+            {
+                for(j=0; j<=nx-1; j++)
+                {
+                    ptdist = ptdist+ae_sqr(kdt->xy.ptr.pp_double[i][j]-kdt->x.ptr.p_double[j], _state);
+                }
+            }
+            
+            /*
+             * Skip points with zero distance if self-matches are turned off
+             */
+            if( ae_fp_eq(ptdist,0)&&!kdt->selfmatch )
+            {
+                continue;
+            }
+            
+            /*
+             * We CAN'T process point if R-criterion isn't satisfied,
+             * i.e. (RNeeded<>0) AND (PtDist>R).
+             */
+            if( ae_fp_eq(kdt->rneeded,0)||ae_fp_less_eq(ptdist,kdt->rneeded) )
+            {
+                
+                /*
+                 * R-criterion is satisfied, we must either:
+                 * * replace worst point, if (KNeeded<>0) AND (KCur=KNeeded)
+                 *   (or skip, if worst point is better)
+                 * * add point without replacement otherwise
+                 */
+                if( kdt->kcur<kdt->kneeded||kdt->kneeded==0 )
+                {
+                    
+                    /*
+                     * add current point to heap without replacement
+                     */
+                    tagheappushi(&kdt->r, &kdt->idx, &kdt->kcur, ptdist, i, _state);
+                }
+                else
+                {
+                    
+                    /*
+                     * New points are added or not, depending on their distance.
+                     * If added, they replace element at the top of the heap
+                     */
+                    if( ae_fp_less(ptdist,kdt->r.ptr.p_double[0]) )
+                    {
+                        if( kdt->kneeded==1 )
+                        {
+                            kdt->idx.ptr.p_int[0] = i;
+                            kdt->r.ptr.p_double[0] = ptdist;
+                        }
+                        else
+                        {
+                            tagheapreplacetopi(&kdt->r, &kdt->idx, kdt->kneeded, ptdist, i, _state);
+                        }
+                    }
+                }
+            }
+        }
+        return;
+    }
+    
+    /*
+     * Simple split
+     */
+    if( kdt->nodes.ptr.p_int[offs]==0 )
+    {
+        
+        /*
+         * Load:
+         * * D  dimension to split
+         * * S  split position
+         */
+        d = kdt->nodes.ptr.p_int[offs+1];
+        s = kdt->splits.ptr.p_double[kdt->nodes.ptr.p_int[offs+2]];
+        
+        /*
+         * Calculate:
+         * * ChildBestOffs      child box with best chances
+         * * ChildWorstOffs     child box with worst chances
+         */
+        if( ae_fp_less_eq(kdt->x.ptr.p_double[d],s) )
+        {
+            childbestoffs = kdt->nodes.ptr.p_int[offs+3];
+            childworstoffs = kdt->nodes.ptr.p_int[offs+4];
+            bestisleft = ae_true;
+        }
+        else
+        {
+            childbestoffs = kdt->nodes.ptr.p_int[offs+4];
+            childworstoffs = kdt->nodes.ptr.p_int[offs+3];
+            bestisleft = ae_false;
+        }
+        
+        /*
+         * Navigate through childs
+         */
+        for(i=0; i<=1; i++)
+        {
+            
+            /*
+             * Select child to process:
+             * * ChildOffs      current child offset in Nodes[]
+             * * UpdateMin      whether minimum or maximum value
+             *                  of bounding box is changed on update
+             */
+            if( i==0 )
+            {
+                childoffs = childbestoffs;
+                updatemin = !bestisleft;
+            }
+            else
+            {
+                updatemin = bestisleft;
+                childoffs = childworstoffs;
+            }
+            
+            /*
+             * Update bounding box and current distance
+             */
+            if( updatemin )
+            {
+                prevdist = kdt->curdist;
+                t1 = kdt->x.ptr.p_double[d];
+                v = kdt->curboxmin.ptr.p_double[d];
+                if( ae_fp_less_eq(t1,s) )
+                {
+                    if( kdt->normtype==0 )
+                    {
+                        kdt->curdist = ae_maxreal(kdt->curdist, s-t1, _state);
+                    }
+                    if( kdt->normtype==1 )
+                    {
+                        kdt->curdist = kdt->curdist-ae_maxreal(v-t1, 0, _state)+s-t1;
+                    }
+                    if( kdt->normtype==2 )
+                    {
+                        kdt->curdist = kdt->curdist-ae_sqr(ae_maxreal(v-t1, 0, _state), _state)+ae_sqr(s-t1, _state);
+                    }
+                }
+                kdt->curboxmin.ptr.p_double[d] = s;
+            }
+            else
+            {
+                prevdist = kdt->curdist;
+                t1 = kdt->x.ptr.p_double[d];
+                v = kdt->curboxmax.ptr.p_double[d];
+                if( ae_fp_greater_eq(t1,s) )
+                {
+                    if( kdt->normtype==0 )
+                    {
+                        kdt->curdist = ae_maxreal(kdt->curdist, t1-s, _state);
+                    }
+                    if( kdt->normtype==1 )
+                    {
+                        kdt->curdist = kdt->curdist-ae_maxreal(t1-v, 0, _state)+t1-s;
+                    }
+                    if( kdt->normtype==2 )
+                    {
+                        kdt->curdist = kdt->curdist-ae_sqr(ae_maxreal(t1-v, 0, _state), _state)+ae_sqr(t1-s, _state);
+                    }
+                }
+                kdt->curboxmax.ptr.p_double[d] = s;
+            }
+            
+            /*
+             * Decide: to dive into cell or not to dive
+             */
+            if( ae_fp_neq(kdt->rneeded,0)&&ae_fp_greater(kdt->curdist,kdt->rneeded) )
+            {
+                todive = ae_false;
+            }
+            else
+            {
+                if( kdt->kcur<kdt->kneeded||kdt->kneeded==0 )
+                {
+                    
+                    /*
+                     * KCur<KNeeded (i.e. not all points are found)
+                     */
+                    todive = ae_true;
+                }
+                else
+                {
+                    
+                    /*
+                     * KCur=KNeeded, decide to dive or not to dive
+                     * using point position relative to bounding box.
+                     */
+                    todive = ae_fp_less_eq(kdt->curdist,kdt->r.ptr.p_double[0]*kdt->approxf);
+                }
+            }
+            if( todive )
+            {
+                nearestneighbor_kdtreequerynnrec(kdt, childoffs, _state);
+            }
+            
+            /*
+             * Restore bounding box and distance
+             */
+            if( updatemin )
+            {
+                kdt->curboxmin.ptr.p_double[d] = v;
+            }
+            else
+            {
+                kdt->curboxmax.ptr.p_double[d] = v;
+            }
+            kdt->curdist = prevdist;
+        }
+        return;
+    }
+}
+
+
+/*************************************************************************
+Copies X[] to KDT.X[]
+Loads distance from X[] to bounding box.
+Initializes CurBox[].
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+static void nearestneighbor_kdtreeinitbox(kdtree* kdt,
+     /* Real    */ ae_vector* x,
+     ae_state *_state)
+{
+    ae_int_t i;
+    double vx;
+    double vmin;
+    double vmax;
+
+
+    
+    /*
+     * calculate distance from point to current bounding box
+     */
+    kdt->curdist = 0;
+    if( kdt->normtype==0 )
+    {
+        for(i=0; i<=kdt->nx-1; i++)
+        {
+            vx = x->ptr.p_double[i];
+            vmin = kdt->boxmin.ptr.p_double[i];
+            vmax = kdt->boxmax.ptr.p_double[i];
+            kdt->x.ptr.p_double[i] = vx;
+            kdt->curboxmin.ptr.p_double[i] = vmin;
+            kdt->curboxmax.ptr.p_double[i] = vmax;
+            if( ae_fp_less(vx,vmin) )
+            {
+                kdt->curdist = ae_maxreal(kdt->curdist, vmin-vx, _state);
+            }
+            else
+            {
+                if( ae_fp_greater(vx,vmax) )
+                {
+                    kdt->curdist = ae_maxreal(kdt->curdist, vx-vmax, _state);
+                }
+            }
+        }
+    }
+    if( kdt->normtype==1 )
+    {
+        for(i=0; i<=kdt->nx-1; i++)
+        {
+            vx = x->ptr.p_double[i];
+            vmin = kdt->boxmin.ptr.p_double[i];
+            vmax = kdt->boxmax.ptr.p_double[i];
+            kdt->x.ptr.p_double[i] = vx;
+            kdt->curboxmin.ptr.p_double[i] = vmin;
+            kdt->curboxmax.ptr.p_double[i] = vmax;
+            if( ae_fp_less(vx,vmin) )
+            {
+                kdt->curdist = kdt->curdist+vmin-vx;
+            }
+            else
+            {
+                if( ae_fp_greater(vx,vmax) )
+                {
+                    kdt->curdist = kdt->curdist+vx-vmax;
+                }
+            }
+        }
+    }
+    if( kdt->normtype==2 )
+    {
+        for(i=0; i<=kdt->nx-1; i++)
+        {
+            vx = x->ptr.p_double[i];
+            vmin = kdt->boxmin.ptr.p_double[i];
+            vmax = kdt->boxmax.ptr.p_double[i];
+            kdt->x.ptr.p_double[i] = vx;
+            kdt->curboxmin.ptr.p_double[i] = vmin;
+            kdt->curboxmax.ptr.p_double[i] = vmax;
+            if( ae_fp_less(vx,vmin) )
+            {
+                kdt->curdist = kdt->curdist+ae_sqr(vmin-vx, _state);
+            }
+            else
+            {
+                if( ae_fp_greater(vx,vmax) )
+                {
+                    kdt->curdist = kdt->curdist+ae_sqr(vx-vmax, _state);
+                }
+            }
+        }
+    }
+}
+
+
+ae_bool _kdtree_init(kdtree* p, ae_state *_state, ae_bool make_automatic)
+{
+    if( !ae_matrix_init(&p->xy, 0, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->tags, 0, DT_INT, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->boxmin, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->boxmax, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->curboxmin, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->curboxmax, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->nodes, 0, DT_INT, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->splits, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->idx, 0, DT_INT, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->r, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->buf, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    return ae_true;
+}
+
+
+ae_bool _kdtree_init_copy(kdtree* dst, kdtree* src, ae_state *_state, ae_bool make_automatic)
+{
+    dst->n = src->n;
+    dst->nx = src->nx;
+    dst->ny = src->ny;
+    dst->normtype = src->normtype;
+    dst->distmatrixtype = src->distmatrixtype;
+    if( !ae_matrix_init_copy(&dst->xy, &src->xy, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->tags, &src->tags, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->boxmin, &src->boxmin, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->boxmax, &src->boxmax, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->curboxmin, &src->curboxmin, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->curboxmax, &src->curboxmax, _state, make_automatic) )
+        return ae_false;
+    dst->curdist = src->curdist;
+    if( !ae_vector_init_copy(&dst->nodes, &src->nodes, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->splits, &src->splits, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic) )
+        return ae_false;
+    dst->kneeded = src->kneeded;
+    dst->rneeded = src->rneeded;
+    dst->selfmatch = src->selfmatch;
+    dst->approxf = src->approxf;
+    dst->kcur = src->kcur;
+    if( !ae_vector_init_copy(&dst->idx, &src->idx, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->r, &src->r, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->buf, &src->buf, _state, make_automatic) )
+        return ae_false;
+    dst->debugcounter = src->debugcounter;
+    return ae_true;
+}
+
+
+void _kdtree_clear(kdtree* p)
+{
+    ae_matrix_clear(&p->xy);
+    ae_vector_clear(&p->tags);
+    ae_vector_clear(&p->boxmin);
+    ae_vector_clear(&p->boxmax);
+    ae_vector_clear(&p->curboxmin);
+    ae_vector_clear(&p->curboxmax);
+    ae_vector_clear(&p->nodes);
+    ae_vector_clear(&p->splits);
+    ae_vector_clear(&p->x);
+    ae_vector_clear(&p->idx);
+    ae_vector_clear(&p->r);
+    ae_vector_clear(&p->buf);
+}
+
+
+
+}
+
diff --git a/contrib/lbfgs/alglibmisc.h b/contrib/lbfgs/alglibmisc.h
new file mode 100755
index 0000000000..491fb3314e
--- /dev/null
+++ b/contrib/lbfgs/alglibmisc.h
@@ -0,0 +1,685 @@
+/*************************************************************************
+Copyright (c) Sergey Bochkanov (ALGLIB project).
+
+>>> SOURCE LICENSE >>>
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation (www.fsf.org); either version 2 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+A copy of the GNU General Public License is available at
+http://www.fsf.org/licensing/licenses
+>>> END OF LICENSE >>>
+*************************************************************************/
+#ifndef _alglibmisc_pkg_h
+#define _alglibmisc_pkg_h
+#include "ap.h"
+#include "alglibinternal.h"
+
+/////////////////////////////////////////////////////////////////////////
+//
+// THIS SECTION CONTAINS COMPUTATIONAL CORE DECLARATIONS (DATATYPES)
+//
+/////////////////////////////////////////////////////////////////////////
+namespace alglib_impl
+{
+typedef struct
+{
+    ae_int_t s1;
+    ae_int_t s2;
+    double v;
+    ae_int_t magicv;
+} hqrndstate;
+typedef struct
+{
+    ae_int_t n;
+    ae_int_t nx;
+    ae_int_t ny;
+    ae_int_t normtype;
+    ae_int_t distmatrixtype;
+    ae_matrix xy;
+    ae_vector tags;
+    ae_vector boxmin;
+    ae_vector boxmax;
+    ae_vector curboxmin;
+    ae_vector curboxmax;
+    double curdist;
+    ae_vector nodes;
+    ae_vector splits;
+    ae_vector x;
+    ae_int_t kneeded;
+    double rneeded;
+    ae_bool selfmatch;
+    double approxf;
+    ae_int_t kcur;
+    ae_vector idx;
+    ae_vector r;
+    ae_vector buf;
+    ae_int_t debugcounter;
+} kdtree;
+
+}
+
+/////////////////////////////////////////////////////////////////////////
+//
+// THIS SECTION CONTAINS C++ INTERFACE
+//
+/////////////////////////////////////////////////////////////////////////
+namespace alglib
+{
+
+/*************************************************************************
+Portable high quality random number generator state.
+Initialized with HQRNDRandomize() or HQRNDSeed().
+
+Fields:
+    S1, S2      -   seed values
+    V           -   precomputed value
+    MagicV      -   'magic' value used to determine whether State structure
+                    was correctly initialized.
+*************************************************************************/
+class _hqrndstate_owner
+{
+public:
+    _hqrndstate_owner();
+    _hqrndstate_owner(const _hqrndstate_owner &rhs);
+    _hqrndstate_owner& operator=(const _hqrndstate_owner &rhs);
+    virtual ~_hqrndstate_owner();
+    alglib_impl::hqrndstate* c_ptr();
+    alglib_impl::hqrndstate* c_ptr() const;
+protected:
+    alglib_impl::hqrndstate *p_struct;
+};
+class hqrndstate : public _hqrndstate_owner
+{
+public:
+    hqrndstate();
+    hqrndstate(const hqrndstate &rhs);
+    hqrndstate& operator=(const hqrndstate &rhs);
+    virtual ~hqrndstate();
+
+};
+
+/*************************************************************************
+
+*************************************************************************/
+class _kdtree_owner
+{
+public:
+    _kdtree_owner();
+    _kdtree_owner(const _kdtree_owner &rhs);
+    _kdtree_owner& operator=(const _kdtree_owner &rhs);
+    virtual ~_kdtree_owner();
+    alglib_impl::kdtree* c_ptr();
+    alglib_impl::kdtree* c_ptr() const;
+protected:
+    alglib_impl::kdtree *p_struct;
+};
+class kdtree : public _kdtree_owner
+{
+public:
+    kdtree();
+    kdtree(const kdtree &rhs);
+    kdtree& operator=(const kdtree &rhs);
+    virtual ~kdtree();
+
+};
+
+/*************************************************************************
+HQRNDState  initialization  with  random  values  which come from standard
+RNG.
+
+  -- ALGLIB --
+     Copyright 02.12.2009 by Bochkanov Sergey
+*************************************************************************/
+void hqrndrandomize(hqrndstate &state);
+
+
+/*************************************************************************
+HQRNDState initialization with seed values
+
+  -- ALGLIB --
+     Copyright 02.12.2009 by Bochkanov Sergey
+*************************************************************************/
+void hqrndseed(const ae_int_t s1, const ae_int_t s2, hqrndstate &state);
+
+
+/*************************************************************************
+This function generates random real number in (0,1),
+not including interval boundaries
+
+State structure must be initialized with HQRNDRandomize() or HQRNDSeed().
+
+  -- ALGLIB --
+     Copyright 02.12.2009 by Bochkanov Sergey
+*************************************************************************/
+double hqrnduniformr(const hqrndstate &state);
+
+
+/*************************************************************************
+This function generates random integer number in [0, N)
+
+1. N must be less than HQRNDMax-1.
+2. State structure must be initialized with HQRNDRandomize() or HQRNDSeed()
+
+  -- ALGLIB --
+     Copyright 02.12.2009 by Bochkanov Sergey
+*************************************************************************/
+ae_int_t hqrnduniformi(const hqrndstate &state, const ae_int_t n);
+
+
+/*************************************************************************
+Random number generator: normal numbers
+
+This function generates one random number from normal distribution.
+Its performance is equal to that of HQRNDNormal2()
+
+State structure must be initialized with HQRNDRandomize() or HQRNDSeed().
+
+  -- ALGLIB --
+     Copyright 02.12.2009 by Bochkanov Sergey
+*************************************************************************/
+double hqrndnormal(const hqrndstate &state);
+
+
+/*************************************************************************
+Random number generator: random X and Y such that X^2+Y^2=1
+
+State structure must be initialized with HQRNDRandomize() or HQRNDSeed().
+
+  -- ALGLIB --
+     Copyright 02.12.2009 by Bochkanov Sergey
+*************************************************************************/
+void hqrndunit2(const hqrndstate &state, double &x, double &y);
+
+
+/*************************************************************************
+Random number generator: normal numbers
+
+This function generates two independent random numbers from normal
+distribution. Its performance is equal to that of HQRNDNormal()
+
+State structure must be initialized with HQRNDRandomize() or HQRNDSeed().
+
+  -- ALGLIB --
+     Copyright 02.12.2009 by Bochkanov Sergey
+*************************************************************************/
+void hqrndnormal2(const hqrndstate &state, double &x1, double &x2);
+
+
+/*************************************************************************
+Random number generator: exponential distribution
+
+State structure must be initialized with HQRNDRandomize() or HQRNDSeed().
+
+  -- ALGLIB --
+     Copyright 11.08.2007 by Bochkanov Sergey
+*************************************************************************/
+double hqrndexponential(const hqrndstate &state, const double lambdav);
+
+/*************************************************************************
+KD-tree creation
+
+This subroutine creates KD-tree from set of X-values and optional Y-values
+
+INPUT PARAMETERS
+    XY      -   dataset, array[0..N-1,0..NX+NY-1].
+                one row corresponds to one point.
+                first NX columns contain X-values, next NY (NY may be zero)
+                columns may contain associated Y-values
+    N       -   number of points, N>=1
+    NX      -   space dimension, NX>=1.
+    NY      -   number of optional Y-values, NY>=0.
+    NormType-   norm type:
+                * 0 denotes infinity-norm
+                * 1 denotes 1-norm
+                * 2 denotes 2-norm (Euclidean norm)
+
+OUTPUT PARAMETERS
+    KDT     -   KD-tree
+
+
+NOTES
+
+1. KD-tree  creation  have O(N*logN) complexity and O(N*(2*NX+NY))  memory
+   requirements.
+2. Although KD-trees may be used with any combination of N  and  NX,  they
+   are more efficient than brute-force search only when N >> 4^NX. So they
+   are most useful in low-dimensional tasks (NX=2, NX=3). NX=1  is another
+   inefficient case, because  simple  binary  search  (without  additional
+   structures) is much more efficient in such tasks than KD-trees.
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void kdtreebuild(const real_2d_array &xy, const ae_int_t n, const ae_int_t nx, const ae_int_t ny, const ae_int_t normtype, kdtree &kdt);
+void kdtreebuild(const real_2d_array &xy, const ae_int_t nx, const ae_int_t ny, const ae_int_t normtype, kdtree &kdt);
+
+
+/*************************************************************************
+KD-tree creation
+
+This  subroutine  creates  KD-tree  from set of X-values, integer tags and
+optional Y-values
+
+INPUT PARAMETERS
+    XY      -   dataset, array[0..N-1,0..NX+NY-1].
+                one row corresponds to one point.
+                first NX columns contain X-values, next NY (NY may be zero)
+                columns may contain associated Y-values
+    Tags    -   tags, array[0..N-1], contains integer tags associated
+                with points.
+    N       -   number of points, N>=1
+    NX      -   space dimension, NX>=1.
+    NY      -   number of optional Y-values, NY>=0.
+    NormType-   norm type:
+                * 0 denotes infinity-norm
+                * 1 denotes 1-norm
+                * 2 denotes 2-norm (Euclidean norm)
+
+OUTPUT PARAMETERS
+    KDT     -   KD-tree
+
+NOTES
+
+1. KD-tree  creation  have O(N*logN) complexity and O(N*(2*NX+NY))  memory
+   requirements.
+2. Although KD-trees may be used with any combination of N  and  NX,  they
+   are more efficient than brute-force search only when N >> 4^NX. So they
+   are most useful in low-dimensional tasks (NX=2, NX=3). NX=1  is another
+   inefficient case, because  simple  binary  search  (without  additional
+   structures) is much more efficient in such tasks than KD-trees.
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void kdtreebuildtagged(const real_2d_array &xy, const integer_1d_array &tags, const ae_int_t n, const ae_int_t nx, const ae_int_t ny, const ae_int_t normtype, kdtree &kdt);
+void kdtreebuildtagged(const real_2d_array &xy, const integer_1d_array &tags, const ae_int_t nx, const ae_int_t ny, const ae_int_t normtype, kdtree &kdt);
+
+
+/*************************************************************************
+K-NN query: K nearest neighbors
+
+INPUT PARAMETERS
+    KDT         -   KD-tree
+    X           -   point, array[0..NX-1].
+    K           -   number of neighbors to return, K>=1
+    SelfMatch   -   whether self-matches are allowed:
+                    * if True, nearest neighbor may be the point itself
+                      (if it exists in original dataset)
+                    * if False, then only points with non-zero distance
+                      are returned
+                    * if not given, considered True
+
+RESULT
+    number of actual neighbors found (either K or N, if K>N).
+
+This  subroutine  performs  query  and  stores  its result in the internal
+structures of the KD-tree. You can use  following  subroutines  to  obtain
+these results:
+* KDTreeQueryResultsX() to get X-values
+* KDTreeQueryResultsXY() to get X- and Y-values
+* KDTreeQueryResultsTags() to get tag values
+* KDTreeQueryResultsDistances() to get distances
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+ae_int_t kdtreequeryknn(const kdtree &kdt, const real_1d_array &x, const ae_int_t k, const bool selfmatch);
+ae_int_t kdtreequeryknn(const kdtree &kdt, const real_1d_array &x, const ae_int_t k);
+
+
+/*************************************************************************
+R-NN query: all points within R-sphere centered at X
+
+INPUT PARAMETERS
+    KDT         -   KD-tree
+    X           -   point, array[0..NX-1].
+    R           -   radius of sphere (in corresponding norm), R>0
+    SelfMatch   -   whether self-matches are allowed:
+                    * if True, nearest neighbor may be the point itself
+                      (if it exists in original dataset)
+                    * if False, then only points with non-zero distance
+                      are returned
+                    * if not given, considered True
+
+RESULT
+    number of neighbors found, >=0
+
+This  subroutine  performs  query  and  stores  its result in the internal
+structures of the KD-tree. You can use  following  subroutines  to  obtain
+actual results:
+* KDTreeQueryResultsX() to get X-values
+* KDTreeQueryResultsXY() to get X- and Y-values
+* KDTreeQueryResultsTags() to get tag values
+* KDTreeQueryResultsDistances() to get distances
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+ae_int_t kdtreequeryrnn(const kdtree &kdt, const real_1d_array &x, const double r, const bool selfmatch);
+ae_int_t kdtreequeryrnn(const kdtree &kdt, const real_1d_array &x, const double r);
+
+
+/*************************************************************************
+K-NN query: approximate K nearest neighbors
+
+INPUT PARAMETERS
+    KDT         -   KD-tree
+    X           -   point, array[0..NX-1].
+    K           -   number of neighbors to return, K>=1
+    SelfMatch   -   whether self-matches are allowed:
+                    * if True, nearest neighbor may be the point itself
+                      (if it exists in original dataset)
+                    * if False, then only points with non-zero distance
+                      are returned
+                    * if not given, considered True
+    Eps         -   approximation factor, Eps>=0. eps-approximate  nearest
+                    neighbor  is  a  neighbor  whose distance from X is at
+                    most (1+eps) times distance of true nearest neighbor.
+
+RESULT
+    number of actual neighbors found (either K or N, if K>N).
+
+NOTES
+    significant performance gain may be achieved only when Eps  is  is  on
+    the order of magnitude of 1 or larger.
+
+This  subroutine  performs  query  and  stores  its result in the internal
+structures of the KD-tree. You can use  following  subroutines  to  obtain
+these results:
+* KDTreeQueryResultsX() to get X-values
+* KDTreeQueryResultsXY() to get X- and Y-values
+* KDTreeQueryResultsTags() to get tag values
+* KDTreeQueryResultsDistances() to get distances
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+ae_int_t kdtreequeryaknn(const kdtree &kdt, const real_1d_array &x, const ae_int_t k, const bool selfmatch, const double eps);
+ae_int_t kdtreequeryaknn(const kdtree &kdt, const real_1d_array &x, const ae_int_t k, const double eps);
+
+
+/*************************************************************************
+X-values from last query
+
+INPUT PARAMETERS
+    KDT     -   KD-tree
+    X       -   possibly pre-allocated buffer. If X is too small to store
+                result, it is resized. If size(X) is enough to store
+                result, it is left unchanged.
+
+OUTPUT PARAMETERS
+    X       -   rows are filled with X-values
+
+NOTES
+1. points are ordered by distance from the query point (first = closest)
+2. if  XY is larger than required to store result, only leading part  will
+   be overwritten; trailing part will be left unchanged. So  if  on  input
+   XY = [[A,B],[C,D]], and result is [1,2],  then  on  exit  we  will  get
+   XY = [[1,2],[C,D]]. This is done purposely to increase performance;  if
+   you want function  to  resize  array  according  to  result  size,  use
+   function with same name and suffix 'I'.
+
+SEE ALSO
+* KDTreeQueryResultsXY()            X- and Y-values
+* KDTreeQueryResultsTags()          tag values
+* KDTreeQueryResultsDistances()     distances
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void kdtreequeryresultsx(const kdtree &kdt, real_2d_array &x);
+
+
+/*************************************************************************
+X- and Y-values from last query
+
+INPUT PARAMETERS
+    KDT     -   KD-tree
+    XY      -   possibly pre-allocated buffer. If XY is too small to store
+                result, it is resized. If size(XY) is enough to store
+                result, it is left unchanged.
+
+OUTPUT PARAMETERS
+    XY      -   rows are filled with points: first NX columns with
+                X-values, next NY columns - with Y-values.
+
+NOTES
+1. points are ordered by distance from the query point (first = closest)
+2. if  XY is larger than required to store result, only leading part  will
+   be overwritten; trailing part will be left unchanged. So  if  on  input
+   XY = [[A,B],[C,D]], and result is [1,2],  then  on  exit  we  will  get
+   XY = [[1,2],[C,D]]. This is done purposely to increase performance;  if
+   you want function  to  resize  array  according  to  result  size,  use
+   function with same name and suffix 'I'.
+
+SEE ALSO
+* KDTreeQueryResultsX()             X-values
+* KDTreeQueryResultsTags()          tag values
+* KDTreeQueryResultsDistances()     distances
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void kdtreequeryresultsxy(const kdtree &kdt, real_2d_array &xy);
+
+
+/*************************************************************************
+Tags from last query
+
+INPUT PARAMETERS
+    KDT     -   KD-tree
+    Tags    -   possibly pre-allocated buffer. If X is too small to store
+                result, it is resized. If size(X) is enough to store
+                result, it is left unchanged.
+
+OUTPUT PARAMETERS
+    Tags    -   filled with tags associated with points,
+                or, when no tags were supplied, with zeros
+
+NOTES
+1. points are ordered by distance from the query point (first = closest)
+2. if  XY is larger than required to store result, only leading part  will
+   be overwritten; trailing part will be left unchanged. So  if  on  input
+   XY = [[A,B],[C,D]], and result is [1,2],  then  on  exit  we  will  get
+   XY = [[1,2],[C,D]]. This is done purposely to increase performance;  if
+   you want function  to  resize  array  according  to  result  size,  use
+   function with same name and suffix 'I'.
+
+SEE ALSO
+* KDTreeQueryResultsX()             X-values
+* KDTreeQueryResultsXY()            X- and Y-values
+* KDTreeQueryResultsDistances()     distances
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void kdtreequeryresultstags(const kdtree &kdt, integer_1d_array &tags);
+
+
+/*************************************************************************
+Distances from last query
+
+INPUT PARAMETERS
+    KDT     -   KD-tree
+    R       -   possibly pre-allocated buffer. If X is too small to store
+                result, it is resized. If size(X) is enough to store
+                result, it is left unchanged.
+
+OUTPUT PARAMETERS
+    R       -   filled with distances (in corresponding norm)
+
+NOTES
+1. points are ordered by distance from the query point (first = closest)
+2. if  XY is larger than required to store result, only leading part  will
+   be overwritten; trailing part will be left unchanged. So  if  on  input
+   XY = [[A,B],[C,D]], and result is [1,2],  then  on  exit  we  will  get
+   XY = [[1,2],[C,D]]. This is done purposely to increase performance;  if
+   you want function  to  resize  array  according  to  result  size,  use
+   function with same name and suffix 'I'.
+
+SEE ALSO
+* KDTreeQueryResultsX()             X-values
+* KDTreeQueryResultsXY()            X- and Y-values
+* KDTreeQueryResultsTags()          tag values
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void kdtreequeryresultsdistances(const kdtree &kdt, real_1d_array &r);
+
+
+/*************************************************************************
+X-values from last query; 'interactive' variant for languages like  Python
+which   support    constructs   like  "X = KDTreeQueryResultsXI(KDT)"  and
+interactive mode of interpreter.
+
+This function allocates new array on each call,  so  it  is  significantly
+slower than its 'non-interactive' counterpart, but it is  more  convenient
+when you call it from command line.
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void kdtreequeryresultsxi(const kdtree &kdt, real_2d_array &x);
+
+
+/*************************************************************************
+XY-values from last query; 'interactive' variant for languages like Python
+which   support    constructs   like "XY = KDTreeQueryResultsXYI(KDT)" and
+interactive mode of interpreter.
+
+This function allocates new array on each call,  so  it  is  significantly
+slower than its 'non-interactive' counterpart, but it is  more  convenient
+when you call it from command line.
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void kdtreequeryresultsxyi(const kdtree &kdt, real_2d_array &xy);
+
+
+/*************************************************************************
+Tags  from  last  query;  'interactive' variant for languages like  Python
+which  support  constructs  like "Tags = KDTreeQueryResultsTagsI(KDT)" and
+interactive mode of interpreter.
+
+This function allocates new array on each call,  so  it  is  significantly
+slower than its 'non-interactive' counterpart, but it is  more  convenient
+when you call it from command line.
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void kdtreequeryresultstagsi(const kdtree &kdt, integer_1d_array &tags);
+
+
+/*************************************************************************
+Distances from last query; 'interactive' variant for languages like Python
+which  support  constructs   like  "R = KDTreeQueryResultsDistancesI(KDT)"
+and interactive mode of interpreter.
+
+This function allocates new array on each call,  so  it  is  significantly
+slower than its 'non-interactive' counterpart, but it is  more  convenient
+when you call it from command line.
+
+  -- ALGLIB --
+     Copyright 28.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void kdtreequeryresultsdistancesi(const kdtree &kdt, real_1d_array &r);
+}
+
+/////////////////////////////////////////////////////////////////////////
+//
+// THIS SECTION CONTAINS COMPUTATIONAL CORE DECLARATIONS (FUNCTIONS)
+//
+/////////////////////////////////////////////////////////////////////////
+namespace alglib_impl
+{
+void hqrndrandomize(hqrndstate* state, ae_state *_state);
+void hqrndseed(ae_int_t s1,
+     ae_int_t s2,
+     hqrndstate* state,
+     ae_state *_state);
+double hqrnduniformr(hqrndstate* state, ae_state *_state);
+ae_int_t hqrnduniformi(hqrndstate* state, ae_int_t n, ae_state *_state);
+double hqrndnormal(hqrndstate* state, ae_state *_state);
+void hqrndunit2(hqrndstate* state, double* x, double* y, ae_state *_state);
+void hqrndnormal2(hqrndstate* state,
+     double* x1,
+     double* x2,
+     ae_state *_state);
+double hqrndexponential(hqrndstate* state,
+     double lambdav,
+     ae_state *_state);
+ae_bool _hqrndstate_init(hqrndstate* p, ae_state *_state, ae_bool make_automatic);
+ae_bool _hqrndstate_init_copy(hqrndstate* dst, hqrndstate* src, ae_state *_state, ae_bool make_automatic);
+void _hqrndstate_clear(hqrndstate* p);
+void kdtreebuild(/* Real    */ ae_matrix* xy,
+     ae_int_t n,
+     ae_int_t nx,
+     ae_int_t ny,
+     ae_int_t normtype,
+     kdtree* kdt,
+     ae_state *_state);
+void kdtreebuildtagged(/* Real    */ ae_matrix* xy,
+     /* Integer */ ae_vector* tags,
+     ae_int_t n,
+     ae_int_t nx,
+     ae_int_t ny,
+     ae_int_t normtype,
+     kdtree* kdt,
+     ae_state *_state);
+ae_int_t kdtreequeryknn(kdtree* kdt,
+     /* Real    */ ae_vector* x,
+     ae_int_t k,
+     ae_bool selfmatch,
+     ae_state *_state);
+ae_int_t kdtreequeryrnn(kdtree* kdt,
+     /* Real    */ ae_vector* x,
+     double r,
+     ae_bool selfmatch,
+     ae_state *_state);
+ae_int_t kdtreequeryaknn(kdtree* kdt,
+     /* Real    */ ae_vector* x,
+     ae_int_t k,
+     ae_bool selfmatch,
+     double eps,
+     ae_state *_state);
+void kdtreequeryresultsx(kdtree* kdt,
+     /* Real    */ ae_matrix* x,
+     ae_state *_state);
+void kdtreequeryresultsxy(kdtree* kdt,
+     /* Real    */ ae_matrix* xy,
+     ae_state *_state);
+void kdtreequeryresultstags(kdtree* kdt,
+     /* Integer */ ae_vector* tags,
+     ae_state *_state);
+void kdtreequeryresultsdistances(kdtree* kdt,
+     /* Real    */ ae_vector* r,
+     ae_state *_state);
+void kdtreequeryresultsxi(kdtree* kdt,
+     /* Real    */ ae_matrix* x,
+     ae_state *_state);
+void kdtreequeryresultsxyi(kdtree* kdt,
+     /* Real    */ ae_matrix* xy,
+     ae_state *_state);
+void kdtreequeryresultstagsi(kdtree* kdt,
+     /* Integer */ ae_vector* tags,
+     ae_state *_state);
+void kdtreequeryresultsdistancesi(kdtree* kdt,
+     /* Real    */ ae_vector* r,
+     ae_state *_state);
+ae_bool _kdtree_init(kdtree* p, ae_state *_state, ae_bool make_automatic);
+ae_bool _kdtree_init_copy(kdtree* dst, kdtree* src, ae_state *_state, ae_bool make_automatic);
+void _kdtree_clear(kdtree* p);
+
+}
+#endif
+
diff --git a/contrib/lbfgs/ap.cpp b/contrib/lbfgs/ap.cpp
new file mode 100755
index 0000000000..14bfb10910
--- /dev/null
+++ b/contrib/lbfgs/ap.cpp
@@ -0,0 +1,8952 @@
+/*************************************************************************
+Copyright (c) Sergey Bochkanov (ALGLIB project).
+
+>>> SOURCE LICENSE >>>
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation (www.fsf.org); either version 2 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+A copy of the GNU General Public License is available at
+http://www.fsf.org/licensing/licenses
+>>> END OF LICENSE >>>
+*************************************************************************/
+#include "stdafx.h"
+#include "ap.h"
+#include <limits>
+#include <locale.h>
+using namespace std;
+
+#if defined(AE_CPU)
+#if (AE_CPU==AE_INTEL)
+
+#if AE_COMPILER==AE_MSVC
+#include <intrin.h>
+#endif
+
+#endif
+#endif
+
+// disable some irrelevant warnings
+#if (AE_COMPILER==AE_MSVC)
+#pragma warning(disable:4100)
+#pragma warning(disable:4127)
+#pragma warning(disable:4702)
+#pragma warning(disable:4996)
+#endif
+
+/////////////////////////////////////////////////////////////////////////
+//
+// THIS SECTION IMPLEMENTS BASIC FUNCTIONALITY LIKE
+// MEMORY MANAGEMENT FOR VECTORS/MATRICES WHICH IS
+// SHARED BETWEEN C++ AND PURE C LIBRARIES
+//
+/////////////////////////////////////////////////////////////////////////
+namespace alglib_impl
+{
+/*
+ * local definitions
+ */
+#define x_nb 16
+#define AE_DATA_ALIGN 16
+#define AE_PTR_ALIGN sizeof(void*)
+#define DYN_BOTTOM ((void*)1)
+#define DYN_FRAME  ((void*)2)
+#define AE_LITTLE_ENDIAN 1
+#define AE_BIG_ENDIAN 2
+#define AE_MIXED_ENDIAN 3
+
+
+/*
+ * alloc counter (if used)
+ */
+#ifdef AE_USE_ALLOC_COUNTER
+ae_int64_t _alloc_counter = 0;
+#endif
+
+/*
+ * These declarations are used to ensure that
+ * sizeof(ae_int32_t)==4, sizeof(ae_int64_t)==8, sizeof(ae_int_t)==sizeof(void*).
+ * they will lead to syntax error otherwise (array size will be negative).
+ *
+ * you can remove them, if you want - they are not used anywhere.
+ *
+ */
+static char _ae_int32_t_must_be_32_bits_wide[1-2*((int)(sizeof(ae_int32_t))-4)*((int)(sizeof(ae_int32_t))-4)];
+static char _ae_int64_t_must_be_64_bits_wide[1-2*((int)(sizeof(ae_int64_t))-8)*((int)(sizeof(ae_int64_t))-8)];
+static char _ae_int_t_must_be_pointer_sized [1-2*((int)(sizeof(ae_int_t))-(int)sizeof(void*))*((int)(sizeof(ae_int_t))-(int)(sizeof(void*)))];  
+
+ae_int_t ae_misalignment(const void *ptr, size_t alignment)
+{
+    union _u
+    {
+        const void *ptr;
+        ae_int_t iptr;
+    } u;
+    u.ptr = ptr;
+    return (ae_int_t)(u.iptr%alignment);
+}
+
+void* ae_align(void *ptr, size_t alignment)
+{
+    char *result = (char*)ptr;
+    if( (result-(char*)0)%alignment!=0 )
+        result += alignment - (result-(char*)0)%alignment;
+    return result;
+}
+
+void ae_break(ae_state *state, ae_error_type error_type, const char *msg)
+{
+#ifndef AE_USE_CPP_ERROR_HANDLING
+    if( state!=NULL )
+    {
+        ae_state_clear(state);
+        state->last_error = error_type;
+        state->error_msg = msg;
+        if( state->break_jump!=NULL )
+            longjmp(*(state->break_jump), 1);
+        else
+            abort();
+    }
+    else
+        abort();
+#else
+    if( state!=NULL )
+    {
+        ae_state_clear(state);
+        state->last_error = error_type;
+        state->error_msg = msg;
+    }
+    throw error_type;
+#endif
+}
+
+void* aligned_malloc(size_t size, size_t alignment)
+{
+    if( size==0 )
+        return NULL;
+    if( alignment<=1 )
+    {
+        /* no alignment, just call malloc */
+        void *block;
+        void **p; ;
+        block = malloc(sizeof(void*)+size);
+        if( block==NULL )
+            return NULL;
+        p = (void**)block;
+        *p = block;
+#ifdef AE_USE_ALLOC_COUNTER
+        _alloc_counter++;
+#endif
+        return (void*)((char*)block+sizeof(void*));
+    }
+    else
+    {
+        /* align */
+        void *block;
+        char *result;
+        block = malloc(alignment-1+sizeof(void*)+size);
+        if( block==NULL )
+            return NULL;
+        result = (char*)block+sizeof(void*);
+        /*if( (result-(char*)0)%alignment!=0 )
+            result += alignment - (result-(char*)0)%alignment;*/
+        result = (char*)ae_align(result, alignment);
+        *((void**)(result-sizeof(void*))) = block;
+#ifdef AE_USE_ALLOC_COUNTER
+        _alloc_counter++;
+#endif
+        return result;
+    }
+}
+
+void aligned_free(void *block)
+{
+    void *p;
+    if( block==NULL )
+        return;
+    p = *((void**)((char*)block-sizeof(void*)));
+    free(p);
+#ifdef AE_USE_ALLOC_COUNTER
+    _alloc_counter--;
+#endif
+}
+
+/************************************************************************
+Malloc's memory with automatic alignment.
+
+Returns NULL when zero size is specified.
+
+Error handling:
+* if state is NULL, returns NULL on allocation error
+* if state is not NULL, calls ae_break() on allocation error
+************************************************************************/
+void* ae_malloc(size_t size, ae_state *state)
+{
+    void *result;
+    if( size==0 )
+        return NULL;
+    result = aligned_malloc(size,AE_DATA_ALIGN);
+    if( result==NULL && state!=NULL)
+        ae_break(state, ERR_OUT_OF_MEMORY, "ae_malloc(): out of memory");
+    return result;
+}
+
+void ae_free(void *p)
+{
+    if( p!=NULL )
+        aligned_free(p);
+}
+
+/************************************************************************
+Sets pointers to the matrix rows.
+
+* dst must be correctly initialized matrix
+* dst->data.ptr points to the beginning of memory block  allocated  for  
+  row pointers.
+* dst->ptr - undefined (initialized during algorithm processing)
+* storage parameter points to the beginning of actual storage
+************************************************************************/
+void ae_matrix_update_row_pointers(ae_matrix *dst, void *storage)
+{
+    char *p_base;
+    void **pp_ptr;
+    ae_int_t i;
+    if( dst->rows>0 && dst->cols>0 )
+    {
+        p_base = (char*)storage;
+        pp_ptr = (void**)dst->data.ptr;
+        dst->ptr.pp_void = pp_ptr;
+        for(i=0; i<dst->rows; i++, p_base+=dst->stride*ae_sizeof(dst->datatype))
+            pp_ptr[i] = p_base;
+    }
+    else
+        dst->ptr.pp_void = NULL;
+}
+
+/************************************************************************
+Returns size of datatype.
+Zero for dynamic types like strings or multiple precision types.
+************************************************************************/
+ae_int_t ae_sizeof(ae_datatype datatype)
+{
+    switch(datatype)
+    {
+        case DT_BOOL:       return (ae_int_t)sizeof(ae_bool);
+        case DT_INT:        return (ae_int_t)sizeof(ae_int_t);
+        case DT_REAL:       return (ae_int_t)sizeof(double);
+        case DT_COMPLEX:    return 2*(ae_int_t)sizeof(double);
+        default:            return 0;
+    }
+}
+
+/************************************************************************
+This function initializes ALGLIB environment state.
+
+NOTES:
+* stacks contain no frames, so ae_make_frame() must be called before
+  attaching dynamic blocks. Without it ae_leave_frame() will cycle
+  forever (which is intended behavior).
+************************************************************************/
+void ae_state_init(ae_state *state)
+{
+    ae_int32_t *vp;
+
+    /*
+     * p_next points to itself because:
+     * * correct program should be able to detect end of the list
+     *   by looking at the ptr field.
+     * * NULL p_next may be used to distinguish automatic blocks
+     *   (in the list) from non-automatic (not in the list)
+     */
+    state->last_block.p_next = &(state->last_block);
+    state->last_block.deallocator = NULL;
+    state->last_block.ptr = DYN_BOTTOM;
+    state->p_top_block = &(state->last_block);
+#ifndef AE_USE_CPP_ERROR_HANDLING
+    state->break_jump = NULL;
+#endif
+    state->error_msg = "";
+    
+    /*
+     * determine endianness and initialize precomputed IEEE special quantities.
+     */
+    state->endianness = ae_get_endianness();
+    if( state->endianness==AE_LITTLE_ENDIAN )
+    {
+        vp = (ae_int32_t*)(&state->v_nan);
+        vp[0] = 0;
+        vp[1] = (ae_int32_t)0x7FF80000;
+        vp = (ae_int32_t*)(&state->v_posinf);
+        vp[0] = 0;
+        vp[1] = (ae_int32_t)0x7FF00000;
+        vp = (ae_int32_t*)(&state->v_neginf);
+        vp[0] = 0;
+        vp[1] = (ae_int32_t)0xFFF00000;
+    }
+    else if( state->endianness==AE_BIG_ENDIAN )
+    {
+        vp = (ae_int32_t*)(&state->v_nan);
+        vp[1] = 0;
+        vp[0] = (ae_int32_t)0x7FF80000;
+        vp = (ae_int32_t*)(&state->v_posinf);
+        vp[1] = 0;
+        vp[0] = (ae_int32_t)0x7FF00000;
+        vp = (ae_int32_t*)(&state->v_neginf);
+        vp[1] = 0;
+        vp[0] = (ae_int32_t)0xFFF00000;
+    }
+    else
+        abort();
+}
+
+
+/************************************************************************
+This function clears ALGLIB environment state.
+All dynamic data controlled by state are freed.
+************************************************************************/
+void ae_state_clear(ae_state *state)
+{
+    while( state->p_top_block->ptr!=DYN_BOTTOM )
+        ae_frame_leave(state);
+}
+
+
+#ifndef AE_USE_CPP_ERROR_HANDLING
+/************************************************************************
+This function sets jump buffer for error handling.
+
+buf may be NULL.
+************************************************************************/
+void ae_state_set_break_jump(ae_state *state, jmp_buf *buf)
+{
+    state->break_jump = buf;
+}
+#endif
+
+
+/************************************************************************
+This function makes new stack frame.
+
+This function takes two parameters: environment state and pointer to  the
+dynamic block which will be used as indicator  of  the  frame  beginning.
+This dynamic block must be initialized by caller and mustn't  be changed/
+deallocated/reused till ae_leave_frame called. It may be global or  local
+variable (local is even better).
+************************************************************************/
+void ae_frame_make(ae_state *state, ae_frame *tmp)
+{
+    tmp->db_marker.p_next = state->p_top_block;
+    tmp->db_marker.deallocator = NULL;
+    tmp->db_marker.ptr = DYN_FRAME;
+    state->p_top_block = &tmp->db_marker;
+}
+
+
+/************************************************************************
+This function leaves current stack frame and deallocates all automatic
+dynamic blocks which were attached to this frame.
+************************************************************************/
+void ae_frame_leave(ae_state *state)
+{
+    while( state->p_top_block->ptr!=DYN_FRAME && state->p_top_block->ptr!=DYN_BOTTOM)
+    {
+        if( state->p_top_block->ptr!=NULL && state->p_top_block->deallocator!=NULL)
+            ((ae_deallocator)(state->p_top_block->deallocator))(state->p_top_block->ptr);
+        state->p_top_block = state->p_top_block->p_next;
+    }
+    state->p_top_block = state->p_top_block->p_next;
+}
+
+
+/************************************************************************
+This function attaches block to the dynamic block list
+
+block               block
+state               ALGLIB environment state
+
+NOTES:
+* never call it for special blocks which marks frame boundaries!
+************************************************************************/
+void ae_db_attach(ae_dyn_block *block, ae_state *state)
+{
+    block->p_next = state->p_top_block;
+    state->p_top_block = block;
+}
+
+
+/************************************************************************
+This function malloc's dynamic block:
+
+block               destination block, assumed to be uninitialized
+size                size (in bytes)
+state               ALGLIB environment state. May be NULL.
+make_automatic      if true, vector is added to the dynamic block list
+
+block is assumed to be uninitialized, its fields are ignored.
+
+Error handling:
+* if state is NULL, returns ae_false on allocation error
+* if state is not NULL, calls ae_break() on allocation error
+* returns ae_true on success
+
+NOTES:
+* never call it for blocks which are already in the list
+************************************************************************/
+ae_bool ae_db_malloc(ae_dyn_block *block, ae_int_t size, ae_state *state, ae_bool make_automatic)
+{
+    /* ensure that size is >=0
+       two ways to exit: 1) through ae_assert, if we have non-NULL state, 2) by returning ae_false */
+    if( state!=NULL )
+        ae_assert(size>=0, "ae_db_malloc(): negative size", state);
+    if( size<0 )
+        return ae_false;
+    
+    /* alloc */
+    block->ptr = ae_malloc((size_t)size, state);
+    if( block->ptr==NULL && size!=0 )
+        return ae_false;
+    if( make_automatic && state!=NULL )
+        ae_db_attach(block, state);
+    else
+        block->p_next = NULL;
+    block->deallocator = ae_free;
+    return ae_true;
+}
+
+
+/************************************************************************
+This function realloc's dynamic block:
+
+block               destination block (initialized)
+size                new size (in bytes)
+state               ALGLIB environment state
+
+block is assumed to be initialized.
+
+This function:
+* deletes old contents
+* preserves automatic state
+
+Error handling:
+* if state is NULL, returns ae_false on allocation error
+* if state is not NULL, calls ae_break() on allocation error
+* returns ae_true on success
+
+NOTES:
+* never call it for special blocks which mark frame boundaries!
+************************************************************************/
+ae_bool ae_db_realloc(ae_dyn_block *block, ae_int_t size, ae_state *state)
+{
+    /* ensure that size is >=0
+       two ways to exit: 1) through ae_assert, if we have non-NULL state, 2) by returning ae_false */
+    if( state!=NULL )
+        ae_assert(size>=0, "ae_db_realloc(): negative size", state);
+    if( size<0 )
+        return ae_false;
+    
+    /* realloc */
+    if( block->ptr!=NULL )
+        ((ae_deallocator)block->deallocator)(block->ptr);
+    block->ptr = ae_malloc((size_t)size, state);
+    if( block->ptr==NULL && size!=0 )
+        return ae_false;
+    block->deallocator = ae_free;
+    return ae_true;
+}
+
+
+/************************************************************************
+This function clears dynamic block (releases  all  dynamically  allocated
+memory). Dynamic block may be in automatic management list - in this case
+it will NOT be removed from list.
+
+block               destination block (initialized)
+
+NOTES:
+* never call it for special blocks which marks frame boundaries!
+************************************************************************/
+void ae_db_free(ae_dyn_block *block)
+{
+    if( block->ptr!=NULL )
+        ((ae_deallocator)block->deallocator)(block->ptr);
+    block->ptr = NULL;
+    block->deallocator = ae_free;
+}
+
+/************************************************************************
+This function swaps contents of two dynamic blocks (pointers and 
+deallocators) leaving other parameters (automatic management settings, 
+etc.) unchanged.
+
+NOTES:
+* never call it for special blocks which marks frame boundaries!
+************************************************************************/
+void ae_db_swap(ae_dyn_block *block1, ae_dyn_block *block2)
+{
+    void (*deallocator)(void*) = NULL;
+    void * volatile ptr;
+    ptr = block1->ptr;
+    deallocator = block1->deallocator;
+    block1->ptr = block2->ptr;
+    block1->deallocator = block2->deallocator;
+    block2->ptr = ptr;
+    block2->deallocator = deallocator;
+}
+
+/************************************************************************
+This function creates ae_vector.
+
+Vector size may be zero. Vector contents is uninitialized.
+
+dst                 destination vector
+size                vector size, may be zero
+datatype            guess what...
+state               ALGLIB environment state
+make_automatic      if true, vector is added to the dynamic block list
+
+Error handling:
+* if state is NULL, returns ae_false on allocation error
+* if state is not NULL, calls ae_break() on allocation error
+* returns ae_true on success
+
+dst is assumed to be uninitialized, its fields are ignored.
+************************************************************************/
+ae_bool ae_vector_init(ae_vector *dst, ae_int_t size, ae_datatype datatype, ae_state *state, ae_bool make_automatic)
+{
+    /* ensure that size is >=0
+       two ways to exit: 1) through ae_assert, if we have non-NULL state, 2) by returning ae_false */
+    if( state!=NULL )
+        ae_assert(size>=0, "ae_vector_init(): negative size", state);
+    if( size<0 )
+        return ae_false;
+
+    /* init */
+    dst->cnt = size;
+    dst->datatype = datatype;
+    if( !ae_db_malloc(&dst->data, size*ae_sizeof(datatype), state, make_automatic) )
+        return ae_false;
+    dst->ptr.p_ptr = dst->data.ptr;
+    return ae_true;
+}
+
+
+/************************************************************************
+This function creates copy of ae_vector.
+
+dst                 destination vector
+src                 well, it is source
+state               ALGLIB environment state
+make_automatic      if true, vector is added to the dynamic block list
+
+Error handling:
+* if state is NULL, returns ae_false on allocation error
+* if state is not NULL, calls ae_break() on allocation error
+* returns ae_true on success
+
+dst is assumed to be uninitialized, its fields are ignored.
+************************************************************************/
+ae_bool ae_vector_init_copy(ae_vector *dst, ae_vector *src, ae_state *state, ae_bool make_automatic)
+{
+    if( !ae_vector_init(dst, src->cnt, src->datatype, state, make_automatic) )
+        return ae_false;
+    if( src->cnt!=0 )
+        memcpy(dst->ptr.p_ptr, src->ptr.p_ptr, (size_t)(src->cnt*ae_sizeof(src->datatype)));
+    return ae_true;
+}
+
+/************************************************************************
+This function creates ae_vector from x_vector:
+
+dst                 destination vector
+src                 source, vector in x-format
+state               ALGLIB environment state
+make_automatic      if true, vector is added to the dynamic block list
+
+dst is assumed to be uninitialized, its fields are ignored.
+************************************************************************/
+void ae_vector_init_from_x(ae_vector *dst, x_vector *src, ae_state *state, ae_bool make_automatic)
+{
+    ae_vector_init(dst, (ae_int_t)src->cnt, (ae_datatype)src->datatype, state, make_automatic);
+    if( src->cnt>0 )
+        memcpy(dst->ptr.p_ptr, src->ptr, (size_t)(((ae_int_t)src->cnt)*ae_sizeof((ae_datatype)src->datatype)));
+}
+
+
+/************************************************************************
+This function changes length of ae_vector.
+
+dst                 destination vector
+newsize             vector size, may be zero
+state               ALGLIB environment state
+
+Error handling:
+* if state is NULL, returns ae_false on allocation error
+* if state is not NULL, calls ae_break() on allocation error
+* returns ae_true on success
+
+NOTES:
+* vector must be initialized
+* all contents is destroyed during setlength() call
+* new size may be zero.
+************************************************************************/
+ae_bool ae_vector_set_length(ae_vector *dst, ae_int_t newsize, ae_state *state)
+{
+    /* ensure that size is >=0
+       two ways to exit: 1) through ae_assert, if we have non-NULL state, 2) by returning ae_false */
+    if( state!=NULL )
+        ae_assert(newsize>=0, "ae_vector_set_length(): negative size", state);
+    if( newsize<0 )
+        return ae_false;
+
+    /* set length */
+    if( dst->cnt==newsize )
+        return ae_true;
+    dst->cnt = newsize;
+    if( !ae_db_realloc(&dst->data, newsize*ae_sizeof(dst->datatype), state) )
+        return ae_false;
+    dst->ptr.p_ptr = dst->data.ptr;
+    return ae_true;
+}
+
+
+/************************************************************************
+This function clears vector contents (releases all dynamically  allocated
+memory). Vector may be in automatic management list  -  in this  case  it
+will NOT be removed from list.
+
+IMPORTANT: this function does NOT invalidates dst; it just  releases  all
+dynamically allocated storage, but dst still may be used  after  call  to
+ae_vector_set_length().
+
+dst                 destination vector
+************************************************************************/
+void ae_vector_clear(ae_vector *dst)
+{
+    dst->cnt = 0;
+    ae_db_free(&dst->data);
+    dst->ptr.p_ptr = 0;
+}
+
+
+/************************************************************************
+This function efficiently swaps contents of two vectors, leaving other
+pararemeters (automatic management, etc.) unchanged.
+************************************************************************/
+void ae_swap_vectors(ae_vector *vec1, ae_vector *vec2)
+{
+    ae_int_t cnt;
+    ae_datatype datatype;
+    void *p_ptr;
+    
+    ae_db_swap(&vec1->data, &vec2->data);
+    
+    cnt = vec1->cnt;
+    datatype = vec1->datatype;
+    p_ptr = vec1->ptr.p_ptr;
+    vec1->cnt = vec2->cnt;
+    vec1->datatype = vec2->datatype;
+    vec1->ptr.p_ptr = vec2->ptr.p_ptr;
+    vec2->cnt = cnt;
+    vec2->datatype = datatype;
+    vec2->ptr.p_ptr = p_ptr;
+}
+
+/************************************************************************
+This function creates ae_matrix.
+
+Matrix size may be zero, in such cases both rows and cols are zero.
+Matrix contents is uninitialized.
+
+dst                 destination natrix
+rows                rows count
+cols                cols count
+datatype            element type
+state               ALGLIB environment state
+make_automatic      if true, matrix is added to the dynamic block list
+
+Error handling:
+* if state is NULL, returns ae_false on allocation error
+* if state is not NULL, calls ae_break() on allocation error
+* returns ae_true on success
+
+dst is assumed to be uninitialized, its fields are ignored.
+************************************************************************/
+ae_bool ae_matrix_init(ae_matrix *dst, ae_int_t rows, ae_int_t cols, ae_datatype datatype, ae_state *state, ae_bool make_automatic)
+{
+    /* ensure that size is >=0
+       two ways to exit: 1) through ae_assert, if we have non-NULL state, 2) by returning ae_false */
+    if( state!=NULL )
+        ae_assert(rows>=0 && cols>=0, "ae_matrix_init(): negative length", state);
+    if( rows<0 || cols<0 )
+        return ae_false;
+
+    /* if one of rows/cols is zero, another MUST be too */
+    if( rows==0 || cols==0 )
+    {
+        rows = 0;
+        cols = 0;
+    }
+
+    /* init */
+    dst->rows = rows;
+    dst->cols = cols;
+    dst->stride = cols;
+    while( dst->stride*ae_sizeof(datatype)%AE_DATA_ALIGN!=0 )
+        dst->stride++;
+    dst->datatype = datatype;
+    if( !ae_db_malloc(&dst->data, dst->rows*((ae_int_t)sizeof(void*)+dst->stride*ae_sizeof(datatype))+AE_DATA_ALIGN-1, state, make_automatic) )
+        return ae_false;
+    ae_matrix_update_row_pointers(dst, ae_align((char*)dst->data.ptr+dst->rows*sizeof(void*),AE_DATA_ALIGN));
+    return ae_true;
+}
+
+
+/************************************************************************
+This function creates copy of ae_matrix.
+
+dst                 destination matrix
+src                 well, it is source
+state               ALGLIB environment state
+make_automatic      if true, matrix is added to the dynamic block list
+
+Error handling:
+* if state is NULL, returns ae_false on allocation error
+* if state is not NULL, calls ae_break() on allocation error
+* returns ae_true on success
+
+dst is assumed to be uninitialized, its fields are ignored.
+************************************************************************/
+ae_bool ae_matrix_init_copy(ae_matrix *dst, ae_matrix *src, ae_state *state, ae_bool make_automatic)
+{
+    ae_int_t i;
+    if( !ae_matrix_init(dst, src->rows, src->cols, src->datatype, state, make_automatic) )
+        return ae_false;
+    if( src->rows!=0 && src->cols!=0 )
+    {
+        if( dst->stride==src->stride )
+            memcpy(dst->ptr.pp_void[0], src->ptr.pp_void[0], (size_t)(src->rows*src->stride*ae_sizeof(src->datatype)));
+        else
+            for(i=0; i<dst->rows; i++)
+                memcpy(dst->ptr.pp_void[i], src->ptr.pp_void[i], (size_t)(dst->cols*ae_sizeof(dst->datatype)));
+    }
+    return ae_true;
+}
+
+
+void ae_matrix_init_from_x(ae_matrix *dst, x_matrix *src, ae_state *state, ae_bool make_automatic)
+{
+    char *p_src_row;
+    char *p_dst_row;
+    ae_int_t row_size;
+    ae_int_t i;
+    ae_matrix_init(dst, (ae_int_t)src->rows, (ae_int_t)src->cols, (ae_datatype)src->datatype, state, make_automatic);
+    if( src->rows!=0 && src->cols!=0 )
+    {
+        p_src_row = (char*)src->ptr;
+        p_dst_row = (char*)(dst->ptr.pp_void[0]);
+        row_size = ae_sizeof((ae_datatype)src->datatype)*(ae_int_t)src->cols;
+        for(i=0; i<src->rows; i++, p_src_row+=src->stride*ae_sizeof((ae_datatype)src->datatype), p_dst_row+=dst->stride*ae_sizeof((ae_datatype)src->datatype))
+            memcpy(p_dst_row, p_src_row, (size_t)(row_size));
+    }
+}
+
+
+/************************************************************************
+This function changes length of ae_matrix.
+
+dst                 destination matrix
+rows                size, may be zero
+cols                size, may be zero
+state               ALGLIB environment state
+
+Error handling:
+* if state is NULL, returns ae_false on allocation error
+* if state is not NULL, calls ae_break() on allocation error
+* returns ae_true on success
+
+NOTES:
+* matrix must be initialized
+* all contents is destroyed during setlength() call
+* new size may be zero.
+************************************************************************/
+ae_bool ae_matrix_set_length(ae_matrix *dst, ae_int_t rows, ae_int_t cols, ae_state *state)
+{
+    /* ensure that size is >=0
+       two ways to exit: 1) through ae_assert, if we have non-NULL state, 2) by returning ae_false */
+    if( state!=NULL )
+        ae_assert(rows>=0 && cols>=0, "ae_matrix_set_length(): negative length", state);
+    if( rows<0 || cols<0 )
+        return ae_false;
+
+    if( dst->rows==rows && dst->cols==cols )
+        return ae_true;    
+    dst->rows = rows;
+    dst->cols = cols;
+    dst->stride = cols;
+    while( dst->stride*ae_sizeof(dst->datatype)%AE_DATA_ALIGN!=0 )
+        dst->stride++;
+    if( !ae_db_realloc(&dst->data, dst->rows*((ae_int_t)sizeof(void*)+dst->stride*ae_sizeof(dst->datatype))+AE_DATA_ALIGN-1, state) )
+        return ae_false;
+    ae_matrix_update_row_pointers(dst, ae_align((char*)dst->data.ptr+dst->rows*sizeof(void*),AE_DATA_ALIGN));
+    return ae_true;
+}
+
+
+/************************************************************************
+This function clears matrix contents (releases all dynamically  allocated
+memory). Matrix may be in automatic management list  -  in this  case  it
+will NOT be removed from list.
+
+IMPORTANT: this function does NOT invalidates dst; it just  releases  all
+dynamically allocated storage, but dst still may be used  after  call  to
+ae_matrix_set_length().
+
+dst                 destination matrix
+************************************************************************/
+void ae_matrix_clear(ae_matrix *dst)
+{
+    dst->rows = 0;
+    dst->cols = 0;
+    dst->stride = 0;
+    ae_db_free(&dst->data);
+    dst->ptr.p_ptr = 0;
+}
+
+
+/************************************************************************
+This function efficiently swaps contents of two vectors, leaving other
+pararemeters (automatic management, etc.) unchanged.
+************************************************************************/
+void ae_swap_matrices(ae_matrix *mat1, ae_matrix *mat2)
+{
+    ae_int_t rows;
+    ae_int_t cols;
+    ae_int_t stride;
+    ae_datatype datatype;
+    void *p_ptr;
+    
+    ae_db_swap(&mat1->data, &mat2->data);
+    
+    rows = mat1->rows;
+    cols = mat1->cols;
+    stride = mat1->stride;
+    datatype = mat1->datatype;
+    p_ptr = mat1->ptr.p_ptr;
+
+    mat1->rows = mat2->rows;
+    mat1->cols = mat2->cols;
+    mat1->stride = mat2->stride;
+    mat1->datatype = mat2->datatype;
+    mat1->ptr.p_ptr = mat2->ptr.p_ptr;
+
+    mat2->rows = rows;
+    mat2->cols = cols;
+    mat2->stride = stride;
+    mat2->datatype = datatype;
+    mat2->ptr.p_ptr = p_ptr;
+}
+
+/************************************************************************
+This function fills x_vector by ae_vector's contents:
+
+dst                 destination vector
+src                 source, vector in x-format
+state               ALGLIB environment state
+
+NOTES:
+* dst is assumed to be initialized. Its contents is freed before  copying
+  data  from src  (if  size / type  are  different)  or  overwritten  (if
+  possible given destination size).
+************************************************************************/
+void ae_x_set_vector(x_vector *dst, ae_vector *src, ae_state *state)
+{
+    if( dst->cnt!=src->cnt || dst->datatype!=src->datatype )
+    {
+        if( dst->owner==OWN_AE )
+            ae_free(dst->ptr);
+        dst->ptr = ae_malloc((size_t)(src->cnt*ae_sizeof(src->datatype)), state);
+        dst->last_action = ACT_NEW_LOCATION;
+        dst->cnt = src->cnt;
+        dst->datatype = src->datatype;
+        dst->owner = OWN_AE;
+    }
+    else
+        dst->last_action = ACT_SAME_LOCATION;
+    if( src->cnt )
+        memcpy(dst->ptr, src->ptr.p_ptr, (size_t)(src->cnt*ae_sizeof(src->datatype)));
+}
+
+/************************************************************************
+This function fills x_matrix by ae_matrix's contents:
+
+dst                 destination vector
+src                 source, matrix in x-format
+state               ALGLIB environment state
+
+NOTES:
+* dst is assumed to be initialized. Its contents is freed before  copying
+  data  from src  (if  size / type  are  different)  or  overwritten  (if
+  possible given destination size).
+************************************************************************/
+void ae_x_set_matrix(x_matrix *dst, ae_matrix *src, ae_state *state)
+{
+    char *p_src_row;
+    char *p_dst_row;
+    ae_int_t i;
+    ae_int_t row_size;
+    if( dst->rows!=src->rows || dst->cols!=src->cols || dst->datatype!=src->datatype )
+    {
+        if( dst->owner==OWN_AE )
+            ae_free(dst->ptr);
+        dst->rows = src->rows;
+        dst->cols = src->cols;
+        dst->stride = src->cols;
+        dst->datatype = src->datatype;
+        dst->ptr = ae_malloc((size_t)(dst->rows*((ae_int_t)dst->stride)*ae_sizeof(src->datatype)), state);
+        dst->last_action = ACT_NEW_LOCATION;
+        dst->owner = OWN_AE;
+    }
+    else
+        dst->last_action = ACT_SAME_LOCATION;
+    if( src->rows!=0 && src->cols!=0 )
+    {
+        p_src_row = (char*)(src->ptr.pp_void[0]);
+        p_dst_row = (char*)dst->ptr;
+        row_size = ae_sizeof(src->datatype)*src->cols;
+        for(i=0; i<src->rows; i++, p_src_row+=src->stride*ae_sizeof(src->datatype), p_dst_row+=dst->stride*ae_sizeof(src->datatype))
+            memcpy(p_dst_row, p_src_row, (size_t)(row_size));
+    }
+}
+
+/************************************************************************
+This function attaches x_vector to ae_vector's contents.
+Ownership of memory allocated is not changed (it is still managed by
+ae_matrix).
+
+dst                 destination vector
+src                 source, vector in x-format
+state               ALGLIB environment state
+
+NOTES:
+* dst is assumed to be initialized. Its contents is freed before
+  attaching to src.
+* this function doesn't need ae_state parameter because it can't fail
+  (assuming correctly initialized src)
+************************************************************************/
+void ae_x_attach_to_vector(x_vector *dst, ae_vector *src)
+{
+    if( dst->owner==OWN_AE )
+        ae_free(dst->ptr);
+    dst->ptr = src->ptr.p_ptr;
+    dst->last_action = ACT_NEW_LOCATION;
+    dst->cnt = src->cnt;
+    dst->datatype = src->datatype;
+    dst->owner = OWN_CALLER;
+}
+
+/************************************************************************
+This function attaches x_matrix to ae_matrix's contents.
+Ownership of memory allocated is not changed (it is still managed by
+ae_matrix).
+
+dst                 destination vector
+src                 source, matrix in x-format
+state               ALGLIB environment state
+
+NOTES:
+* dst is assumed to be initialized. Its contents is freed before
+  attaching to src.
+* this function doesn't need ae_state parameter because it can't fail
+  (assuming correctly initialized src)
+************************************************************************/
+void ae_x_attach_to_matrix(x_matrix *dst, ae_matrix *src)
+{
+    if( dst->owner==OWN_AE )
+            ae_free(dst->ptr);
+    dst->rows = src->rows;
+    dst->cols = src->cols;
+    dst->stride = src->stride;
+    dst->datatype = src->datatype;
+    dst->ptr = &(src->ptr.pp_double[0][0]);
+    dst->last_action = ACT_NEW_LOCATION;
+    dst->owner = OWN_CALLER;
+}
+
+/************************************************************************
+This function clears x_vector. It does nothing  if vector is not owned by
+ALGLIB environment.
+
+dst                 vector
+************************************************************************/
+void x_vector_clear(x_vector *dst)
+{
+    if( dst->owner==OWN_AE )
+        aligned_free(dst->ptr);
+    dst->ptr = NULL;
+    dst->cnt = 0;
+}
+
+/************************************************************************
+Assertion
+************************************************************************/
+void ae_assert(ae_bool cond, const char *msg, ae_state *state)
+{
+    if( !cond )
+        ae_break(state, ERR_ASSERTION_FAILED, msg);
+}
+
+/************************************************************************
+CPUID
+
+Returns information about features CPU and compiler support.
+
+You must tell ALGLIB what CPU family is used by defining AE_CPU symbol
+(without this hint zero will be returned).
+
+Note: results of this function depend on both CPU and compiler;
+if compiler doesn't support SSE intrinsics, function won't set 
+corresponding flag.
+************************************************************************/
+ae_int_t ae_cpuid()
+{
+    /*
+     * to speed up CPU detection we cache our data in the static vars.
+     * there is no synchronization, but it is still thread safe.
+     *
+     * thread safety is guaranteed on all modern architectures which
+     * have following property: simultaneous writes by different cores
+     * to the same location will be executed in serial manner.
+     *
+     */
+    static ae_bool initialized = ae_false;
+    static ae_bool has_sse2 = ae_false;
+    ae_int_t result;
+    
+    /*
+     * if not initialized, determine system properties
+     */
+    if( !initialized )
+    {
+        /*
+         * SSE2
+         */
+#if defined(AE_CPU)
+#if (AE_CPU==AE_INTEL) && defined(AE_HAS_SSE2_INTRINSICS)
+#if AE_COMPILER==AE_GNUC
+#elif AE_COMPILER==AE_MSVC
+        {
+            int CPUInfo[4];
+            __cpuid(CPUInfo, 1);
+            if( (CPUInfo[3]&0x04000000)!=0 )
+                has_sse2 = ae_true;
+        }
+#elif AE_COMPILER==AE_SUNC
+#else
+#endif
+#endif
+#endif
+        /*
+         * set initialization flag
+         */
+        initialized = ae_true;
+    }
+    
+    /*
+     * return
+     */
+    result = 0;
+    if( has_sse2 )
+        result = result|CPU_SSE2;
+    return result;
+}
+
+/************************************************************************
+Real math functions
+************************************************************************/
+ae_bool ae_fp_eq(double v1, double v2)
+{
+    /* IEEE-strict floating point comparison */
+    volatile double x = v1;
+    volatile double y = v2;
+    return x==y;
+}
+
+ae_bool ae_fp_neq(double v1, double v2)
+{
+    /* IEEE-strict floating point comparison */
+    return !ae_fp_eq(v1,v2);
+}
+
+ae_bool ae_fp_less(double v1, double v2)
+{
+    /* IEEE-strict floating point comparison */
+    volatile double x = v1;
+    volatile double y = v2;
+    return x<y;
+}
+
+ae_bool ae_fp_less_eq(double v1, double v2)
+{
+    /* IEEE-strict floating point comparison */
+    volatile double x = v1;
+    volatile double y = v2;
+    return x<=y;
+}
+
+ae_bool ae_fp_greater(double v1, double v2)
+{
+    /* IEEE-strict floating point comparison */
+    volatile double x = v1;
+    volatile double y = v2;
+    return x>y;
+}
+
+ae_bool ae_fp_greater_eq(double v1, double v2)
+{
+    /* IEEE-strict floating point comparison */
+    volatile double x = v1;
+    volatile double y = v2;
+    return x>=y;
+}
+
+ae_bool ae_isfinite_stateless(double x, ae_int_t endianness)
+{
+    union _u
+    {
+        double a;
+        ae_int32_t p[2];
+    } u;
+    ae_int32_t high;
+    u.a = x;
+    if( endianness==AE_LITTLE_ENDIAN )
+        high = u.p[1];
+    else
+        high = u.p[0];
+    return (high & (ae_int32_t)0x7FF00000)!=(ae_int32_t)0x7FF00000;
+}
+
+ae_bool ae_isnan_stateless(double x,    ae_int_t endianness)
+{
+    union _u
+    {
+        double a;
+        ae_int32_t p[2];
+    } u;
+    ae_int32_t high, low;
+    u.a = x;
+    if( endianness==AE_LITTLE_ENDIAN )
+    {
+        high = u.p[1];
+        low =  u.p[0];
+    }
+    else
+    {
+        high = u.p[0];
+        low =  u.p[1];
+    }
+    return ((high &0x7FF00000)==0x7FF00000) && (((high &0x000FFFFF)!=0) || (low!=0));
+}
+
+ae_bool ae_isinf_stateless(double x,    ae_int_t endianness)
+{
+    union _u
+    {
+        double a;
+        ae_int32_t p[2];
+    } u;
+    ae_int32_t high, low;
+    u.a = x;
+    if( endianness==AE_LITTLE_ENDIAN )
+    {
+        high = u.p[1];
+        low  = u.p[0];
+    }
+    else
+    {
+        high = u.p[0];
+        low  = u.p[1];
+    }
+    
+    // 31 least significant bits of high are compared
+    return ((high&0x7FFFFFFF)==0x7FF00000) && (low==0); 
+}
+
+ae_bool ae_isposinf_stateless(double x, ae_int_t endianness)
+{
+    union _u
+    {
+        double a;
+        ae_int32_t p[2];
+    } u;
+    ae_int32_t high, low;
+    u.a = x;
+    if( endianness==AE_LITTLE_ENDIAN )
+    {
+        high = u.p[1];
+        low  = u.p[0];
+    }
+    else
+    {
+        high = u.p[0];
+        low  = u.p[1];
+    }
+    
+    // all 32 bits of high are compared
+    return (high==(ae_int32_t)0x7FF00000) && (low==0); 
+}
+
+ae_bool ae_isneginf_stateless(double x, ae_int_t endianness)
+{
+    union _u
+    {
+        double a;
+        ae_int32_t p[2];
+    } u;
+    ae_int32_t high, low;
+    u.a = x;
+    if( endianness==AE_LITTLE_ENDIAN )
+    {
+        high = u.p[1];
+        low  = u.p[0];
+    }
+    else
+    {
+        high = u.p[0];
+        low  = u.p[1];
+    }
+    
+    // this code is a bit tricky to avoid comparison of high with 0xFFF00000, which may be unsafe with some buggy compilers
+    return ((high&0x7FFFFFFF)==0x7FF00000) && (high!=(ae_int32_t)0x7FF00000) && (low==0);
+}
+
+ae_int_t ae_get_endianness()
+{
+    union
+    {
+        double a;
+        ae_int32_t p[2];
+    } u;
+    
+    /*
+     * determine endianness
+     * two types are supported: big-endian and little-endian.
+     * mixed-endian hardware is NOT supported.
+     *
+     * 1983 is used as magic number because its non-periodic double 
+     * representation allow us to easily distinguish between upper 
+     * and lower halfs and to detect mixed endian hardware.
+     *
+     */
+    u.a = 1.0/1983.0; 
+    if( u.p[1]==(ae_int32_t)0x3f408642 )
+        return AE_LITTLE_ENDIAN;
+    if( u.p[0]==(ae_int32_t)0x3f408642 )
+        return AE_BIG_ENDIAN;
+    return AE_MIXED_ENDIAN;
+}
+
+ae_bool ae_isfinite(double x,ae_state *state)
+{
+    return ae_isfinite_stateless(x, state->endianness);
+}
+
+ae_bool ae_isnan(double x,   ae_state *state)
+{
+    return ae_isnan_stateless(x, state->endianness);
+}
+
+ae_bool ae_isinf(double x,   ae_state *state)
+{
+    return ae_isinf_stateless(x, state->endianness);
+}
+
+ae_bool ae_isposinf(double x,ae_state *state)
+{
+    return ae_isposinf_stateless(x, state->endianness);
+}
+
+ae_bool ae_isneginf(double x,ae_state *state)
+{
+    return ae_isneginf_stateless(x, state->endianness);
+}
+
+double ae_fabs(double x,  ae_state *state)
+{
+    return fabs(x);
+}
+
+ae_int_t ae_iabs(ae_int_t x, ae_state *state)
+{
+    return x>=0 ? x : -x;
+}
+
+double ae_sqr(double x,  ae_state *state)
+{
+    return x*x;
+}
+
+double ae_sqrt(double x, ae_state *state)
+{
+    return sqrt(x);
+}
+
+ae_int_t ae_sign(double x, ae_state *state)
+{
+    if( x>0 ) return  1;
+    if( x<0 ) return -1;
+    return 0;
+}
+
+ae_int_t ae_round(double x, ae_state *state)
+{
+    return (ae_int_t)(ae_ifloor(x+0.5,state));
+}
+
+ae_int_t ae_trunc(double x, ae_state *state)
+{
+    return (ae_int_t)(x>0 ? ae_ifloor(x,state) : ae_iceil(x,state));
+}
+
+ae_int_t ae_ifloor(double x, ae_state *state)
+{
+    return (ae_int_t)(floor(x));
+}
+
+ae_int_t ae_iceil(double x,  ae_state *state)
+{
+    return (ae_int_t)(ceil(x));
+}
+
+ae_int_t ae_maxint(ae_int_t m1, ae_int_t m2, ae_state *state)
+{
+    return m1>m2 ? m1 : m2;
+}
+
+ae_int_t ae_minint(ae_int_t m1, ae_int_t m2, ae_state *state)
+{
+    return m1>m2 ? m2 : m1;
+}
+
+double ae_maxreal(double m1, double m2, ae_state *state)
+{
+    return m1>m2 ? m1 : m2;
+}
+
+double ae_minreal(double m1, double m2, ae_state *state)
+{
+    return m1>m2 ? m2 : m1;
+}
+
+double ae_randomreal(ae_state *state)
+{
+    int i1 = rand();
+    int i2 = rand();
+    double mx;
+    while(i1==RAND_MAX)
+        i1 =rand();
+    while(i2==RAND_MAX)
+        i2 =rand();
+    mx = RAND_MAX;
+    return (i1+i2/mx)/mx;
+}
+
+ae_int_t ae_randominteger(ae_int_t maxv, ae_state *state)
+{
+    return rand()%maxv;
+}
+
+double   ae_sin(double x, ae_state *state)
+{
+    return sin(x);
+}
+
+double   ae_cos(double x, ae_state *state)
+{
+    return cos(x);
+}
+
+double   ae_tan(double x, ae_state *state)
+{
+    return tan(x);
+}
+
+double   ae_sinh(double x, ae_state *state)
+{
+    return sinh(x);
+}
+
+double   ae_cosh(double x, ae_state *state)
+{
+    return cosh(x);
+}
+double   ae_tanh(double x, ae_state *state)
+{
+    return tanh(x);
+}
+
+double   ae_asin(double x, ae_state *state)
+{
+    return asin(x);
+}
+
+double   ae_acos(double x, ae_state *state)
+{
+    return acos(x);
+}
+
+double   ae_atan(double x, ae_state *state)
+{
+    return atan(x);
+}
+
+double   ae_atan2(double x, double y, ae_state *state)
+{
+    return atan2(x,y);
+}
+
+double   ae_log(double x, ae_state *state)
+{
+    return log(x);
+}
+
+double   ae_pow(double x, double y, ae_state *state)
+{
+    return pow(x,y);
+}
+
+double   ae_exp(double x, ae_state *state)
+{
+    return exp(x);
+}
+
+/************************************************************************
+Symmetric/Hermitian properties: check and force
+************************************************************************/
+static void x_split_length(ae_int_t n, ae_int_t nb, ae_int_t* n1, ae_int_t* n2)
+{
+    ae_int_t r;
+    if( n<=nb )
+    {
+        *n1 = n;
+        *n2 = 0;
+    }
+    else
+    {
+        if( n%nb!=0 )
+        {
+            *n2 = n%nb;
+            *n1 = n-(*n2);
+        }
+        else
+        {
+            *n2 = n/2;
+            *n1 = n-(*n2);
+            if( *n1%nb==0 )
+            {
+                return;
+            }
+            r = nb-*n1%nb;
+            *n1 = *n1+r;
+            *n2 = *n2-r;
+        }
+    }
+}
+static double x_safepythag2(double x, double y)
+{
+    double w;
+    double xabs;
+    double yabs;
+    double z;
+    xabs = fabs(x);
+    yabs = fabs(y);
+    w = xabs>yabs ? xabs : yabs;
+    z = xabs<yabs ? xabs : yabs;
+    if( z==0 )
+        return w;
+    else
+    {
+        double t;
+        t = z/w;
+        return w*sqrt(1+t*t);
+    }
+}
+/*
+ * this function checks difference between offdiagonal blocks BL and BU
+ * (see below). Block BL is specified by offsets (offset0,offset1)  and
+ * sizes (len0,len1).
+ *
+ *     [ .          ]
+ *     [   A0  BU   ]
+ * A = [   BL  A1   ]
+ *     [          . ]
+ *
+ *  this subroutine updates current values of:
+ *  a) mx       maximum value of A[i,j] found so far
+ *  b) err      componentwise difference between elements of BL and BU^T
+ *
+ */
+static void is_symmetric_rec_off_stat(x_matrix *a, ae_int_t offset0, ae_int_t offset1, ae_int_t len0, ae_int_t len1, ae_bool *nonfinite, double *mx, double *err, ae_state *_state)
+{
+    /* try to split problem into two smaller ones */
+    if( len0>x_nb || len1>x_nb )
+    {
+        ae_int_t n1, n2;
+        if( len0>len1 )
+        {
+            x_split_length(len0, x_nb, &n1, &n2);
+            is_symmetric_rec_off_stat(a, offset0,    offset1, n1, len1, nonfinite, mx, err, _state);
+            is_symmetric_rec_off_stat(a, offset0+n1, offset1, n2, len1, nonfinite, mx, err, _state);
+        }
+        else
+        {
+            x_split_length(len1, x_nb, &n1, &n2);
+            is_symmetric_rec_off_stat(a, offset0, offset1,    len0, n1, nonfinite, mx, err, _state);
+            is_symmetric_rec_off_stat(a, offset0, offset1+n1, len0, n2, nonfinite, mx, err, _state);
+        }
+        return;
+    }
+    else
+    {
+        /* base case */
+        double *p1, *p2, *prow, *pcol;
+        double v;
+        ae_int_t i, j;
+
+        p1 = (double*)(a->ptr)+offset0*a->stride+offset1;
+        p2 = (double*)(a->ptr)+offset1*a->stride+offset0;
+        for(i=0; i<len0; i++)
+        {
+            pcol = p2+i;
+            prow = p1+i*a->stride;
+            for(j=0; j<len1; j++)
+            {
+                if( !ae_isfinite(*pcol,_state) || !ae_isfinite(*prow,_state) )
+                {
+                    *nonfinite = ae_true;
+                }
+                else
+                {
+                    v = fabs(*pcol);
+                    *mx =  *mx>v ? *mx : v;
+                    v = fabs(*prow);
+                    *mx =  *mx>v ? *mx : v;
+                    v = fabs(*pcol-*prow);
+                    *err = *err>v ? *err : v;
+                }                
+                pcol += a->stride;
+                prow++;
+            }
+        }
+    }
+}
+/*
+ * this function checks that diagonal block A0 is symmetric.
+ * Block A0 is specified by its offset and size.
+ *
+ *     [ .          ]
+ *     [   A0       ]
+ * A = [       .    ]
+ *     [          . ]
+ *
+ *  this subroutine updates current values of:
+ *  a) mx       maximum value of A[i,j] found so far
+ *  b) err      componentwise difference between A0 and A0^T
+ *
+ */
+static void is_symmetric_rec_diag_stat(x_matrix *a, ae_int_t offset, ae_int_t len, ae_bool *nonfinite, double *mx, double *err, ae_state *_state)
+{
+    double *p, *prow, *pcol;
+    double v;
+    ae_int_t i, j;
+    
+    /* try to split problem into two smaller ones */
+    if( len>x_nb )
+    {
+        ae_int_t n1, n2;
+        x_split_length(len, x_nb, &n1, &n2);
+        is_symmetric_rec_diag_stat(a, offset, n1, nonfinite, mx, err, _state);
+        is_symmetric_rec_diag_stat(a, offset+n1, n2, nonfinite, mx, err, _state);
+        is_symmetric_rec_off_stat(a, offset+n1, offset, n2, n1, nonfinite, mx, err, _state);
+        return;
+    }
+    
+    /* base case */
+    p = (double*)(a->ptr)+offset*a->stride+offset;
+    for(i=0; i<len; i++)
+    {
+        pcol = p+i;
+        prow = p+i*a->stride;
+        for(j=0; j<i; j++,pcol+=a->stride,prow++)
+        {
+            if( !ae_isfinite(*pcol,_state) || !ae_isfinite(*prow,_state) )
+            {
+                *nonfinite = ae_true;
+            }
+            else
+            {
+                v = fabs(*pcol);
+                *mx =  *mx>v ? *mx : v;
+                v = fabs(*prow);
+                *mx =  *mx>v ? *mx : v;
+                v = fabs(*pcol-*prow);
+                *err = *err>v ? *err : v;
+            }
+        }
+        v = fabs(p[i+i*a->stride]);
+        *mx =  *mx>v ? *mx : v;
+    }
+}
+/*
+ * this function checks difference between offdiagonal blocks BL and BU
+ * (see below). Block BL is specified by offsets (offset0,offset1)  and
+ * sizes (len0,len1).
+ *
+ *     [ .          ]
+ *     [   A0  BU   ]
+ * A = [   BL  A1   ]
+ *     [          . ]
+ *
+ *  this subroutine updates current values of:
+ *  a) mx       maximum value of A[i,j] found so far
+ *  b) err      componentwise difference between elements of BL and BU^H
+ *
+ */
+static void is_hermitian_rec_off_stat(x_matrix *a, ae_int_t offset0, ae_int_t offset1, ae_int_t len0, ae_int_t len1, ae_bool *nonfinite, double *mx, double *err, ae_state *_state)
+{
+    /* try to split problem into two smaller ones */
+    if( len0>x_nb || len1>x_nb )
+    {
+        ae_int_t n1, n2;
+        if( len0>len1 )
+        {
+            x_split_length(len0, x_nb, &n1, &n2);
+            is_hermitian_rec_off_stat(a, offset0,    offset1, n1, len1, nonfinite, mx, err, _state);
+            is_hermitian_rec_off_stat(a, offset0+n1, offset1, n2, len1, nonfinite, mx, err, _state);
+        }
+        else
+        {
+            x_split_length(len1, x_nb, &n1, &n2);
+            is_hermitian_rec_off_stat(a, offset0, offset1,    len0, n1, nonfinite, mx, err, _state);
+            is_hermitian_rec_off_stat(a, offset0, offset1+n1, len0, n2, nonfinite, mx, err, _state);
+        }
+        return;
+    }
+    else
+    {
+        /* base case */
+        ae_complex *p1, *p2, *prow, *pcol;
+        double v;
+        ae_int_t i, j;
+
+        p1 = (ae_complex*)(a->ptr)+offset0*a->stride+offset1;
+        p2 = (ae_complex*)(a->ptr)+offset1*a->stride+offset0;
+        for(i=0; i<len0; i++)
+        {
+            pcol = p2+i;
+            prow = p1+i*a->stride;
+            for(j=0; j<len1; j++)
+            {
+                if( !ae_isfinite(pcol->x, _state) || !ae_isfinite(pcol->y, _state) || !ae_isfinite(prow->x, _state) || !ae_isfinite(prow->y, _state) )
+                {
+                    *nonfinite = ae_true;
+                }
+                else
+                {
+                    v = x_safepythag2(pcol->x, pcol->y);
+                    *mx =  *mx>v ? *mx : v;
+                    v = x_safepythag2(prow->x, prow->y);
+                    *mx =  *mx>v ? *mx : v;
+                    v = x_safepythag2(pcol->x-prow->x, pcol->y+prow->y);
+                    *err = *err>v ? *err : v;
+                }
+                pcol += a->stride;
+                prow++;
+            }
+        }
+    }
+}
+/*
+ * this function checks that diagonal block A0 is Hermitian.
+ * Block A0 is specified by its offset and size.
+ *
+ *     [ .          ]
+ *     [   A0       ]
+ * A = [       .    ]
+ *     [          . ]
+ *
+ *  this subroutine updates current values of:
+ *  a) mx       maximum value of A[i,j] found so far
+ *  b) err      componentwise difference between A0 and A0^H
+ *
+ */
+static void is_hermitian_rec_diag_stat(x_matrix *a, ae_int_t offset, ae_int_t len, ae_bool *nonfinite, double *mx, double *err, ae_state *_state)
+{
+    ae_complex *p, *prow, *pcol;
+    double v;
+    ae_int_t i, j;
+    
+    /* try to split problem into two smaller ones */
+    if( len>x_nb )
+    {
+        ae_int_t n1, n2;
+        x_split_length(len, x_nb, &n1, &n2);
+        is_hermitian_rec_diag_stat(a, offset, n1, nonfinite, mx, err, _state);
+        is_hermitian_rec_diag_stat(a, offset+n1, n2, nonfinite, mx, err, _state);
+        is_hermitian_rec_off_stat(a, offset+n1, offset, n2, n1, nonfinite, mx, err, _state);
+        return;
+    }
+    
+    /* base case */
+    p = (ae_complex*)(a->ptr)+offset*a->stride+offset;
+    for(i=0; i<len; i++)
+    {
+        pcol = p+i;
+        prow = p+i*a->stride;
+        for(j=0; j<i; j++,pcol+=a->stride,prow++)
+        {
+            if( !ae_isfinite(pcol->x, _state) || !ae_isfinite(pcol->y, _state) || !ae_isfinite(prow->x, _state) || !ae_isfinite(prow->y, _state) )
+            {
+                *nonfinite = ae_true;
+            }
+            else
+            {
+                v = x_safepythag2(pcol->x, pcol->y);
+                *mx =  *mx>v ? *mx : v;
+                v = x_safepythag2(prow->x, prow->y);
+                *mx =  *mx>v ? *mx : v;
+                v = x_safepythag2(pcol->x-prow->x, pcol->y+prow->y);
+                *err = *err>v ? *err : v;
+            }
+        }
+        if( !ae_isfinite(p[i+i*a->stride].x, _state) || !ae_isfinite(p[i+i*a->stride].y, _state) )
+        {
+            *nonfinite = ae_true;
+        }
+        else
+        {
+            v = fabs(p[i+i*a->stride].x);
+            *mx =  *mx>v ? *mx : v;
+            v = fabs(p[i+i*a->stride].y);
+            *err =  *err>v ? *err : v;
+        }
+    }
+}
+/*
+ * this function copies offdiagonal block BL to its symmetric counterpart
+ * BU (see below). Block BL is specified by offsets (offset0,offset1)
+ * and sizes (len0,len1).
+ *
+ *     [ .          ]
+ *     [   A0  BU   ]
+ * A = [   BL  A1   ]
+ *     [          . ]
+ *
+ */
+static void force_symmetric_rec_off_stat(x_matrix *a, ae_int_t offset0, ae_int_t offset1, ae_int_t len0, ae_int_t len1)
+{
+    /* try to split problem into two smaller ones */
+    if( len0>x_nb || len1>x_nb )
+    {
+        ae_int_t n1, n2;
+        if( len0>len1 )
+        {
+            x_split_length(len0, x_nb, &n1, &n2);
+            force_symmetric_rec_off_stat(a, offset0,    offset1, n1, len1);
+            force_symmetric_rec_off_stat(a, offset0+n1, offset1, n2, len1);
+        }
+        else
+        {
+            x_split_length(len1, x_nb, &n1, &n2);
+            force_symmetric_rec_off_stat(a, offset0, offset1,    len0, n1);
+            force_symmetric_rec_off_stat(a, offset0, offset1+n1, len0, n2);
+        }
+        return;
+    }
+    else
+    {
+        /* base case */
+        double *p1, *p2, *prow, *pcol;
+        ae_int_t i, j;
+
+        p1 = (double*)(a->ptr)+offset0*a->stride+offset1;
+        p2 = (double*)(a->ptr)+offset1*a->stride+offset0;
+        for(i=0; i<len0; i++)
+        {
+            pcol = p2+i;
+            prow = p1+i*a->stride;
+            for(j=0; j<len1; j++)
+            {
+                *pcol = *prow;
+                pcol += a->stride;
+                prow++;
+            }
+        }
+    }
+}
+/*
+ * this function copies lower part of diagonal block A0 to its upper part
+ * Block is specified by offset and size.
+ *
+ *     [ .          ]
+ *     [   A0       ]
+ * A = [       .    ]
+ *     [          . ]
+ *
+ */
+static void force_symmetric_rec_diag_stat(x_matrix *a, ae_int_t offset, ae_int_t len)
+{
+    double *p, *prow, *pcol;
+    ae_int_t i, j;
+    
+    /* try to split problem into two smaller ones */
+    if( len>x_nb )
+    {
+        ae_int_t n1, n2;
+        x_split_length(len, x_nb, &n1, &n2);
+        force_symmetric_rec_diag_stat(a, offset, n1);
+        force_symmetric_rec_diag_stat(a, offset+n1, n2);
+        force_symmetric_rec_off_stat(a, offset+n1, offset, n2, n1);
+        return;
+    }
+    
+    /* base case */
+    p = (double*)(a->ptr)+offset*a->stride+offset;
+    for(i=0; i<len; i++)
+    {
+        pcol = p+i;
+        prow = p+i*a->stride;
+        for(j=0; j<i; j++,pcol+=a->stride,prow++)
+            *pcol = *prow;
+    }
+}
+/*
+ * this function copies Hermitian transpose of offdiagonal block BL to
+ * its symmetric counterpart BU (see below). Block BL is specified by
+ * offsets (offset0,offset1) and sizes (len0,len1).
+ *
+ *     [ .          ]
+ *     [   A0  BU   ]
+ * A = [   BL  A1   ]
+ *     [          . ]
+ */
+static void force_hermitian_rec_off_stat(x_matrix *a, ae_int_t offset0, ae_int_t offset1, ae_int_t len0, ae_int_t len1)
+{
+    /* try to split problem into two smaller ones */
+    if( len0>x_nb || len1>x_nb )
+    {
+        ae_int_t n1, n2;
+        if( len0>len1 )
+        {
+            x_split_length(len0, x_nb, &n1, &n2);
+            force_hermitian_rec_off_stat(a, offset0,    offset1, n1, len1);
+            force_hermitian_rec_off_stat(a, offset0+n1, offset1, n2, len1);
+        }
+        else
+        {
+            x_split_length(len1, x_nb, &n1, &n2);
+            force_hermitian_rec_off_stat(a, offset0, offset1,    len0, n1);
+            force_hermitian_rec_off_stat(a, offset0, offset1+n1, len0, n2);
+        }
+        return;
+    }
+    else
+    {
+        /* base case */
+        ae_complex *p1, *p2, *prow, *pcol;
+        ae_int_t i, j;
+
+        p1 = (ae_complex*)(a->ptr)+offset0*a->stride+offset1;
+        p2 = (ae_complex*)(a->ptr)+offset1*a->stride+offset0;
+        for(i=0; i<len0; i++)
+        {
+            pcol = p2+i;
+            prow = p1+i*a->stride;
+            for(j=0; j<len1; j++)
+            {
+                *pcol = *prow;
+                pcol += a->stride;
+                prow++;
+            }
+        }
+    }
+}
+/*
+ * this function copies Hermitian transpose of lower part of
+ * diagonal block A0 to its upper part Block is specified by offset and size.
+ *
+ *     [ .          ]
+ *     [   A0       ]
+ * A = [       .    ]
+ *     [          . ]
+ *
+ */
+static void force_hermitian_rec_diag_stat(x_matrix *a, ae_int_t offset, ae_int_t len)
+{
+    ae_complex *p, *prow, *pcol;
+    ae_int_t i, j;
+    
+    /* try to split problem into two smaller ones */
+    if( len>x_nb )
+    {
+        ae_int_t n1, n2;
+        x_split_length(len, x_nb, &n1, &n2);
+        force_hermitian_rec_diag_stat(a, offset, n1);
+        force_hermitian_rec_diag_stat(a, offset+n1, n2);
+        force_hermitian_rec_off_stat(a, offset+n1, offset, n2, n1);
+        return;
+    }
+    
+    /* base case */
+    p = (ae_complex*)(a->ptr)+offset*a->stride+offset;
+    for(i=0; i<len; i++)
+    {
+        pcol = p+i;
+        prow = p+i*a->stride;
+        for(j=0; j<i; j++,pcol+=a->stride,prow++)
+            *pcol = *prow;
+    }
+}
+ae_bool x_is_symmetric(x_matrix *a)
+{
+    double mx, err;
+    ae_bool nonfinite;
+    ae_state _alglib_env_state;
+    if( a->datatype!=DT_REAL )
+        return ae_false;
+    if( a->cols!=a->rows )
+        return ae_false;
+    if( a->cols==0 || a->rows==0 )
+        return ae_true;
+    ae_state_init(&_alglib_env_state);
+    mx = 0;
+    err = 0;
+    nonfinite = ae_false;
+    is_symmetric_rec_diag_stat(a, 0, (ae_int_t)a->rows, &nonfinite, &mx, &err, &_alglib_env_state);
+    if( nonfinite )
+        return ae_false;
+    if( mx==0 )
+        return ae_true;
+    return err/mx<=1.0E-14;
+}
+ae_bool x_is_hermitian(x_matrix *a)
+{
+    double mx, err;
+    ae_bool nonfinite;
+    ae_state _alglib_env_state;
+    if( a->datatype!=DT_COMPLEX )
+        return ae_false;
+    if( a->cols!=a->rows )
+        return ae_false;
+    if( a->cols==0 || a->rows==0 )
+        return ae_true;
+    ae_state_init(&_alglib_env_state);
+    mx = 0;
+    err = 0;
+    nonfinite = ae_false;
+    is_hermitian_rec_diag_stat(a, 0, (ae_int_t)a->rows, &nonfinite, &mx, &err, &_alglib_env_state);
+    if( nonfinite )
+        return ae_false;
+    if( mx==0 )
+        return ae_true;
+    return err/mx<=1.0E-14;
+}
+ae_bool x_force_symmetric(x_matrix *a)
+{
+    if( a->datatype!=DT_REAL )
+        return ae_false;
+    if( a->cols!=a->rows )
+        return ae_false;
+    if( a->cols==0 || a->rows==0 )
+        return ae_true;
+    force_symmetric_rec_diag_stat(a, 0, (ae_int_t)a->rows);
+    return ae_true;
+}
+ae_bool x_force_hermitian(x_matrix *a)
+{
+    if( a->datatype!=DT_COMPLEX )
+        return ae_false;
+    if( a->cols!=a->rows )
+        return ae_false;
+    if( a->cols==0 || a->rows==0 )
+        return ae_true;
+    force_hermitian_rec_diag_stat(a, 0, (ae_int_t)a->rows);
+    return ae_true;
+}
+
+ae_bool ae_is_symmetric(ae_matrix *a)
+{
+    x_matrix x;
+    x.owner = OWN_CALLER;
+    ae_x_attach_to_matrix(&x, a);
+    return x_is_symmetric(&x);
+}
+
+ae_bool ae_is_hermitian(ae_matrix *a)
+{
+    x_matrix x;
+    x.owner = OWN_CALLER;
+    ae_x_attach_to_matrix(&x, a);
+    return x_is_hermitian(&x);
+}
+
+ae_bool ae_force_symmetric(ae_matrix *a)
+{
+    x_matrix x;
+    x.owner = OWN_CALLER;
+    ae_x_attach_to_matrix(&x, a);
+    return x_force_symmetric(&x);
+}
+
+ae_bool ae_force_hermitian(ae_matrix *a)
+{
+    x_matrix x;
+    x.owner = OWN_CALLER;
+    ae_x_attach_to_matrix(&x, a);
+    return x_force_hermitian(&x);
+}
+
+
+/************************************************************************
+Complex math functions
+************************************************************************/
+ae_complex ae_complex_from_d(double v)
+{
+    ae_complex r;
+    r.x = v;
+    r.y = 0.0;
+    return r;
+}
+
+ae_complex ae_c_neg(ae_complex lhs)
+{
+    ae_complex result;
+    result.x = -lhs.x;
+    result.y = -lhs.y;
+    return result;
+}
+
+ae_complex ae_c_conj(ae_complex lhs, ae_state *state)
+{
+    ae_complex result;
+    result.x = +lhs.x;
+    result.y = -lhs.y;
+    return result;
+}
+
+ae_complex ae_c_sqr(ae_complex lhs, ae_state *state)
+{
+    ae_complex result;
+    result.x = lhs.x*lhs.x-lhs.y*lhs.y;
+    result.y = 2*lhs.x*lhs.y;
+    return result;
+}
+
+double ae_c_abs(ae_complex z, ae_state *state)
+{
+    double w;
+    double xabs;
+    double yabs;
+    double v;
+
+    xabs = fabs(z.x);
+    yabs = fabs(z.y);
+    w = xabs>yabs ? xabs : yabs;
+    v = xabs<yabs ? xabs : yabs;
+    if( v==0 )
+        return w;
+    else
+    {
+        double t = v/w;
+        return w*sqrt(1+t*t);
+    }
+}
+
+ae_bool ae_c_eq(ae_complex lhs,   ae_complex rhs)
+{
+    volatile double x1 = lhs.x;
+    volatile double x2 = rhs.x;
+    volatile double y1 = lhs.y;
+    volatile double y2 = rhs.y;
+    return x1==x2 && y1==y2;
+}
+
+ae_bool ae_c_neq(ae_complex lhs,  ae_complex rhs)
+{
+    volatile double x1 = lhs.x;
+    volatile double x2 = rhs.x;
+    volatile double y1 = lhs.y;
+    volatile double y2 = rhs.y;
+    return x1!=x2 || y1!=y2;
+}
+
+ae_complex ae_c_add(ae_complex lhs,  ae_complex rhs)
+{
+    ae_complex result;
+    result.x = lhs.x+rhs.x;
+    result.y = lhs.y+rhs.y;
+    return result;
+}
+
+ae_complex ae_c_mul(ae_complex lhs,  ae_complex rhs)
+{
+    ae_complex result;
+    result.x = lhs.x*rhs.x-lhs.y*rhs.y;
+    result.y = lhs.x*rhs.y+lhs.y*rhs.x;
+    return result;
+}
+
+ae_complex ae_c_sub(ae_complex lhs,   ae_complex rhs)
+{
+    ae_complex result;
+    result.x = lhs.x-rhs.x;
+    result.y = lhs.y-rhs.y;
+    return result;
+}
+
+ae_complex ae_c_div(ae_complex lhs,   ae_complex rhs)
+{
+    ae_complex result;
+    double e;
+    double f;
+    if( fabs(rhs.y)<fabs(rhs.x) )
+    {
+        e = rhs.y/rhs.x;
+        f = rhs.x+rhs.y*e;
+        result.x = (lhs.x+lhs.y*e)/f;
+        result.y = (lhs.y-lhs.x*e)/f;
+    }
+    else
+    {
+        e = rhs.x/rhs.y;
+        f = rhs.y+rhs.x*e;
+        result.x = (lhs.y+lhs.x*e)/f;
+        result.y = (-lhs.x+lhs.y*e)/f;
+    }
+    return result;
+}
+
+ae_bool ae_c_eq_d(ae_complex lhs,  double rhs)
+{
+    volatile double x1 = lhs.x;
+    volatile double x2 = rhs;
+    volatile double y1 = lhs.y;
+    volatile double y2 = 0;
+    return x1==x2 && y1==y2;
+}
+
+ae_bool ae_c_neq_d(ae_complex lhs, double rhs)
+{
+    volatile double x1 = lhs.x;
+    volatile double x2 = rhs;
+    volatile double y1 = lhs.y;
+    volatile double y2 = 0;
+    return x1!=x2 || y1!=y2;
+}
+
+ae_complex ae_c_add_d(ae_complex lhs, double rhs)
+{
+    ae_complex result;
+    result.x = lhs.x+rhs;
+    result.y = lhs.y;
+    return result;
+}
+
+ae_complex ae_c_mul_d(ae_complex lhs, double rhs)
+{
+    ae_complex result;
+    result.x = lhs.x*rhs;
+    result.y = lhs.y*rhs;
+    return result;
+}
+
+ae_complex ae_c_sub_d(ae_complex lhs, double rhs)
+{
+    ae_complex result;
+    result.x = lhs.x-rhs;
+    result.y = lhs.y;
+    return result;
+}
+
+ae_complex ae_c_d_sub(double lhs,     ae_complex rhs)
+{
+    ae_complex result;
+    result.x = lhs-rhs.x;
+    result.y = -rhs.y;
+    return result;
+}
+
+ae_complex ae_c_div_d(ae_complex lhs, double rhs)
+{
+    ae_complex result;
+    result.x = lhs.x/rhs;
+    result.y = lhs.y/rhs;
+    return result;
+}
+
+ae_complex ae_c_d_div(double lhs,   ae_complex rhs)
+{
+    ae_complex result;
+    double e;
+    double f;
+    if( fabs(rhs.y)<fabs(rhs.x) )
+    {
+        e = rhs.y/rhs.x;
+        f = rhs.x+rhs.y*e;
+        result.x = lhs/f;
+        result.y = -lhs*e/f;
+    }
+    else
+    {
+        e = rhs.x/rhs.y;
+        f = rhs.y+rhs.x*e;
+        result.x = lhs*e/f;
+        result.y = -lhs/f;
+    }
+    return result;
+}
+
+
+/************************************************************************
+Complex BLAS operations
+************************************************************************/
+ae_complex ae_v_cdotproduct(const ae_complex *v0, ae_int_t stride0, const char *conj0, const ae_complex *v1, ae_int_t stride1, const char *conj1, ae_int_t n)
+{
+    double rx = 0, ry = 0; 
+    ae_int_t i;
+    ae_bool bconj0 = !((conj0[0]=='N') || (conj0[0]=='n'));
+    ae_bool bconj1 = !((conj1[0]=='N') || (conj1[0]=='n'));
+    ae_complex result;
+    if( bconj0 && bconj1 )
+    {
+        double v0x, v0y, v1x, v1y;
+        for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
+        {
+            v0x = v0->x;
+            v0y = -v0->y;
+            v1x = v1->x;
+            v1y = -v1->y;
+            rx += v0x*v1x-v0y*v1y;
+            ry += v0x*v1y+v0y*v1x;
+        }
+    }
+    if( !bconj0 && bconj1 )
+    {
+        double v0x, v0y, v1x, v1y;
+        for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
+        {
+            v0x = v0->x;
+            v0y = v0->y;
+            v1x = v1->x;
+            v1y = -v1->y;
+            rx += v0x*v1x-v0y*v1y;
+            ry += v0x*v1y+v0y*v1x;
+        }
+    }
+    if( bconj0 && !bconj1 )
+    {
+        double v0x, v0y, v1x, v1y;
+        for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
+        {
+            v0x = v0->x;
+            v0y = -v0->y;
+            v1x = v1->x;
+            v1y = v1->y;
+            rx += v0x*v1x-v0y*v1y;
+            ry += v0x*v1y+v0y*v1x;
+        }
+    }
+    if( !bconj0 && !bconj1 )
+    {
+        double v0x, v0y, v1x, v1y;
+        for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
+        {
+            v0x = v0->x;
+            v0y = v0->y;
+            v1x = v1->x;
+            v1y = v1->y;
+            rx += v0x*v1x-v0y*v1y;
+            ry += v0x*v1y+v0y*v1x;
+        }
+    }
+    result.x = rx;
+    result.y = ry;
+    return result;
+}
+
+void ae_v_cmove(ae_complex *vdst, ae_int_t stride_dst, const ae_complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
+{
+    ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
+    ae_int_t i;
+    if( stride_dst!=1 || stride_src!=1 )
+    {
+        /*
+         * general unoptimized case
+         */
+        if( bconj )
+        {
+            for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            {
+                vdst->x =  vsrc->x;
+                vdst->y = -vsrc->y;
+            }
+        }
+        else
+        {
+            for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+                *vdst = *vsrc;
+        }
+    }
+    else
+    {
+        /*
+         * optimized case
+         */
+        if( bconj )
+        {
+            for(i=0; i<n; i++, vdst++, vsrc++)
+            {
+                vdst->x =  vsrc->x;
+                vdst->y = -vsrc->y;
+            }
+        }
+        else
+        {
+            for(i=0; i<n; i++, vdst++, vsrc++)
+                *vdst = *vsrc;
+        }
+    }
+}
+
+void ae_v_cmoveneg(ae_complex *vdst, ae_int_t stride_dst, const ae_complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
+{
+    ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
+    ae_int_t i;
+    if( stride_dst!=1 || stride_src!=1 )
+    {
+        /*
+         * general unoptimized case
+         */
+        if( bconj )
+        {
+            for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            {
+                vdst->x = -vsrc->x;
+                vdst->y =  vsrc->y;
+            }
+        }
+        else
+        {
+            for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            {
+                vdst->x = -vsrc->x;
+                vdst->y = -vsrc->y;
+            }
+        }
+    }
+    else
+    {
+        /*
+         * optimized case
+         */
+        if( bconj )
+        {
+            for(i=0; i<n; i++, vdst++, vsrc++)
+            {
+                vdst->x = -vsrc->x;
+                vdst->y =  vsrc->y;
+            }
+        }
+        else
+        {
+            for(i=0; i<n; i++, vdst++, vsrc++)
+            {
+                vdst->x = -vsrc->x;
+                vdst->y = -vsrc->y;
+            }
+        }
+    }
+}
+
+void ae_v_cmoved(ae_complex *vdst, ae_int_t stride_dst, const ae_complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha)
+{
+    ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
+    ae_int_t i;
+    if( stride_dst!=1 || stride_src!=1 )
+    {
+        /*
+         * general unoptimized case
+         */
+        if( bconj )
+        {
+            for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            {
+                vdst->x =  alpha*vsrc->x;
+                vdst->y = -alpha*vsrc->y;
+            }
+        }
+        else
+        {
+            for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            {
+                vdst->x = alpha*vsrc->x;
+                vdst->y = alpha*vsrc->y;
+            }
+        }
+    }
+    else
+    {
+        /*
+         * optimized case
+         */
+        if( bconj )
+        {
+            for(i=0; i<n; i++, vdst++, vsrc++)
+            {
+                vdst->x =  alpha*vsrc->x;
+                vdst->y = -alpha*vsrc->y;
+            }
+        }
+        else
+        {
+            for(i=0; i<n; i++, vdst++, vsrc++)
+            {
+                vdst->x = alpha*vsrc->x;
+                vdst->y = alpha*vsrc->y;
+            }
+        }
+    }
+}
+
+void ae_v_cmovec(ae_complex *vdst, ae_int_t stride_dst, const ae_complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, ae_complex alpha)
+{
+    ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
+    ae_int_t i;
+    if( stride_dst!=1 || stride_src!=1 )
+    {
+        /*
+         * general unoptimized case
+         */
+        if( bconj )
+        {
+            double ax = alpha.x, ay = alpha.y;
+            for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            {
+                vdst->x =  ax*vsrc->x+ay*vsrc->y;
+                vdst->y = -ax*vsrc->y+ay*vsrc->x;
+            }
+        }
+        else
+        {
+            double ax = alpha.x, ay = alpha.y;
+            for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            {
+                vdst->x = ax*vsrc->x-ay*vsrc->y;
+                vdst->y = ax*vsrc->y+ay*vsrc->x;
+            }
+        }
+    }
+    else
+    {
+        /*
+         * highly optimized case
+         */
+        if( bconj )
+        {
+            double ax = alpha.x, ay = alpha.y;
+            for(i=0; i<n; i++, vdst++, vsrc++)
+            {
+                vdst->x =  ax*vsrc->x+ay*vsrc->y;
+                vdst->y = -ax*vsrc->y+ay*vsrc->x;
+            }
+        }
+        else
+        {
+            double ax = alpha.x, ay = alpha.y;
+            for(i=0; i<n; i++, vdst++, vsrc++)
+            {
+                vdst->x = ax*vsrc->x-ay*vsrc->y;
+                vdst->y = ax*vsrc->y+ay*vsrc->x;
+            }
+        }
+    }
+}
+
+void ae_v_cadd(ae_complex *vdst,     ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
+{
+    ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
+    ae_int_t i;
+    if( stride_dst!=1 || stride_src!=1 )
+    {
+        /*
+         * general unoptimized case
+         */
+        if( bconj )
+        {
+            for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            {
+                vdst->x += vsrc->x;
+                vdst->y -= vsrc->y;
+            }
+        }
+        else
+        {
+            for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            {
+                vdst->x += vsrc->x;
+                vdst->y += vsrc->y;
+            }
+        }
+    }
+    else
+    {
+        /*
+         * optimized case
+         */
+        if( bconj )
+        {
+            for(i=0; i<n; i++, vdst++, vsrc++)
+            {
+                vdst->x += vsrc->x;
+                vdst->y -= vsrc->y;
+            }
+        }
+        else
+        {
+            for(i=0; i<n; i++, vdst++, vsrc++)
+            {
+                vdst->x += vsrc->x;
+                vdst->y += vsrc->y;
+            }
+        }
+    }
+}
+
+void ae_v_caddd(ae_complex *vdst,    ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha)
+{
+    ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
+    ae_int_t i;
+    if( stride_dst!=1 || stride_src!=1 )
+    {
+        /*
+         * general unoptimized case
+         */
+        if( bconj )
+        {
+            for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            {
+                vdst->x += alpha*vsrc->x;
+                vdst->y -= alpha*vsrc->y;
+            }
+        }
+        else
+        {
+            for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            {
+                vdst->x += alpha*vsrc->x;
+                vdst->y += alpha*vsrc->y;
+            }
+        }
+    }
+    else
+    {
+        /*
+         * optimized case
+         */
+        if( bconj )
+        {
+            for(i=0; i<n; i++, vdst++, vsrc++)
+            {
+                vdst->x += alpha*vsrc->x;
+                vdst->y -= alpha*vsrc->y;
+            }
+        }
+        else
+        {
+            for(i=0; i<n; i++, vdst++, vsrc++)
+            {
+                vdst->x += alpha*vsrc->x;
+                vdst->y += alpha*vsrc->y;
+            }
+        }
+    }
+}
+
+void ae_v_caddc(ae_complex *vdst,    ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, ae_complex alpha)
+{
+    ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
+    ae_int_t i;
+    if( stride_dst!=1 || stride_src!=1 )
+    {
+        /*
+         * general unoptimized case
+         */
+        double ax = alpha.x, ay = alpha.y;
+        if( bconj )
+        {
+            for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            {
+                vdst->x += ax*vsrc->x+ay*vsrc->y;
+                vdst->y -= ax*vsrc->y-ay*vsrc->x;
+            }
+        }
+        else
+        {
+            for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            {
+                vdst->x += ax*vsrc->x-ay*vsrc->y;
+                vdst->y += ax*vsrc->y+ay*vsrc->x;
+            }
+        }
+    }
+    else
+    {
+        /*
+         * highly optimized case
+         */
+        double ax = alpha.x, ay = alpha.y;
+        if( bconj )
+        {
+            for(i=0; i<n; i++, vdst++, vsrc++)
+            {
+                vdst->x += ax*vsrc->x+ay*vsrc->y;
+                vdst->y -= ax*vsrc->y-ay*vsrc->x;
+            }
+        }
+        else
+        {
+            for(i=0; i<n; i++, vdst++, vsrc++)
+            {
+                vdst->x += ax*vsrc->x-ay*vsrc->y;
+                vdst->y += ax*vsrc->y+ay*vsrc->x;
+            }
+        }
+    }
+}
+
+void ae_v_csub(ae_complex *vdst,     ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
+{
+    ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
+    ae_int_t i;
+    if( stride_dst!=1 || stride_src!=1 )
+    {
+        /*
+         * general unoptimized case
+         */
+        if( bconj )
+        {
+            for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            {
+                vdst->x -= vsrc->x;
+                vdst->y += vsrc->y;
+            }
+        }
+        else
+        {
+            for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            {
+                vdst->x -= vsrc->x;
+                vdst->y -= vsrc->y;
+            }
+        }
+    }
+    else
+    {
+        /*
+         * highly optimized case
+         */
+        if( bconj )
+        {
+            for(i=0; i<n; i++, vdst++, vsrc++)
+            {
+                vdst->x -= vsrc->x;
+                vdst->y += vsrc->y;
+            }
+        }
+        else
+        {
+            for(i=0; i<n; i++, vdst++, vsrc++)
+            {
+                vdst->x -= vsrc->x;
+                vdst->y -= vsrc->y;
+            }
+        }
+    }
+}
+
+void ae_v_csubd(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha)
+{
+    ae_v_caddd(vdst, stride_dst, vsrc, stride_src, conj_src, n, -alpha);
+}
+
+void ae_v_csubc(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, ae_complex alpha)
+{
+    alpha.x = -alpha.x;
+    alpha.y = -alpha.y;
+    ae_v_caddc(vdst, stride_dst, vsrc, stride_src, conj_src, n, alpha);
+}
+
+void ae_v_cmuld(ae_complex *vdst, ae_int_t stride_dst, ae_int_t n, double alpha)
+{
+    ae_int_t i;
+    if( stride_dst!=1 )
+    {
+        /*
+         * general unoptimized case
+         */
+        for(i=0; i<n; i++, vdst+=stride_dst)
+        {
+            vdst->x *= alpha;
+            vdst->y *= alpha;
+        }
+    }
+    else
+    {
+        /*
+         * optimized case
+         */
+        for(i=0; i<n; i++, vdst++)
+        {
+            vdst->x *= alpha;
+            vdst->y *= alpha;
+        }
+    }
+}
+
+void ae_v_cmulc(ae_complex *vdst, ae_int_t stride_dst, ae_int_t n, ae_complex alpha)
+{
+    ae_int_t i;
+    if( stride_dst!=1 )
+    {
+        /*
+         * general unoptimized case
+         */
+        double ax = alpha.x, ay = alpha.y;
+        for(i=0; i<n; i++, vdst+=stride_dst)
+        {
+            double  dstx = vdst->x, dsty = vdst->y;
+            vdst->x = ax*dstx-ay*dsty;
+            vdst->y = ax*dsty+ay*dstx;
+        }
+    }
+    else
+    {
+        /*
+         * highly optimized case
+         */
+        double ax = alpha.x, ay = alpha.y;
+        for(i=0; i<n; i++, vdst++)
+        {
+            double  dstx = vdst->x, dsty = vdst->y;
+            vdst->x = ax*dstx-ay*dsty;
+            vdst->y = ax*dsty+ay*dstx;
+        }
+    }
+}
+
+/************************************************************************
+Real BLAS operations
+************************************************************************/
+double ae_v_dotproduct(const double *v0, ae_int_t stride0, const double *v1, ae_int_t stride1, ae_int_t n)
+{
+    double result = 0;
+    ae_int_t i;
+    if( stride0!=1 || stride1!=1 )
+    {
+        /*
+         * slow general code
+         */
+        for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
+            result += (*v0)*(*v1);
+    }
+    else
+    {
+        /*
+         * optimized code for stride=1
+         */
+        ae_int_t n4 = n/4;
+        ae_int_t nleft = n%4;
+        for(i=0; i<n4; i++, v0+=4, v1+=4)
+            result += v0[0]*v1[0]+v0[1]*v1[1]+v0[2]*v1[2]+v0[3]*v1[3];
+        for(i=0; i<nleft; i++, v0++, v1++)
+            result += v0[0]*v1[0];
+    }
+    return result;
+}
+
+void ae_v_move(double *vdst,  ae_int_t stride_dst, const double* vsrc,  ae_int_t stride_src, ae_int_t n)
+{
+    ae_int_t i;
+    if( stride_dst!=1 || stride_src!=1 )
+    {
+        /*
+         * general unoptimized case
+         */
+        for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            *vdst = *vsrc;
+    }
+    else
+    {
+        /*
+         * optimized case
+         */
+        ae_int_t n2 = n/2;
+        for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
+        {
+            vdst[0] = vsrc[0];
+            vdst[1] = vsrc[1];
+        }
+        if( n%2!=0 )
+            vdst[0] = vsrc[0];
+    }
+}
+
+void ae_v_moveneg(double *vdst,  ae_int_t stride_dst, const double* vsrc,  ae_int_t stride_src, ae_int_t n)
+{
+    ae_int_t i;
+    if( stride_dst!=1 || stride_src!=1 )
+    {
+        /*
+         * general unoptimized case
+         */
+        for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            *vdst = -*vsrc;
+    }
+    else
+    {
+        /*
+         * optimized case
+         */
+        ae_int_t n2 = n/2;
+        for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
+        {
+            vdst[0] = -vsrc[0];
+            vdst[1] = -vsrc[1];
+        }
+        if( n%2!=0 )
+            vdst[0] = -vsrc[0];
+    }
+}
+
+void ae_v_moved(double *vdst,  ae_int_t stride_dst, const double* vsrc,  ae_int_t stride_src, ae_int_t n, double alpha)
+{
+    ae_int_t i;
+    if( stride_dst!=1 || stride_src!=1 )
+    {
+        /*
+         * general unoptimized case
+         */
+        for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            *vdst = alpha*(*vsrc);
+    }
+    else
+    {
+        /*
+         * optimized case
+         */
+        ae_int_t n2 = n/2;
+        for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
+        {
+            vdst[0] = alpha*vsrc[0];
+            vdst[1] = alpha*vsrc[1];
+        }
+        if( n%2!=0 )
+            vdst[0] = alpha*vsrc[0];
+    }
+}
+
+void ae_v_add(double *vdst,     ae_int_t stride_dst, const double *vsrc,  ae_int_t stride_src, ae_int_t n)
+{
+    ae_int_t i;
+    if( stride_dst!=1 || stride_src!=1 )
+    {
+        /*
+         * general unoptimized case
+         */
+        for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            *vdst += *vsrc;
+    }
+    else
+    {
+        /*
+         * optimized case
+         */
+        ae_int_t n2 = n/2;
+        for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
+        {
+            vdst[0] += vsrc[0];
+            vdst[1] += vsrc[1];
+        }
+        if( n%2!=0 )
+            vdst[0] += vsrc[0];
+    }
+}
+
+void ae_v_addd(double *vdst,    ae_int_t stride_dst, const double *vsrc,  ae_int_t stride_src, ae_int_t n, double alpha)
+{
+    ae_int_t i;
+    if( stride_dst!=1 || stride_src!=1 )
+    {
+        /*
+         * general unoptimized case
+         */
+        for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            *vdst += alpha*(*vsrc);
+    }
+    else
+    {
+        /*
+         * optimized case
+         */
+        ae_int_t n2 = n/2;
+        for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
+        {
+            vdst[0] += alpha*vsrc[0];
+            vdst[1] += alpha*vsrc[1];
+        }
+        if( n%2!=0 )
+            vdst[0] += alpha*vsrc[0];
+    }
+}
+
+void ae_v_sub(double *vdst,     ae_int_t stride_dst, const double *vsrc,  ae_int_t stride_src, ae_int_t n)
+{
+    ae_int_t i;
+    if( stride_dst!=1 || stride_src!=1 )
+    {
+        /*
+         * general unoptimized case
+         */
+        for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            *vdst -= *vsrc;
+    }
+    else
+    {
+        /*
+         * highly optimized case
+         */
+        ae_int_t n2 = n/2;
+        for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
+        {
+            vdst[0] -= vsrc[0];
+            vdst[1] -= vsrc[1];
+        }
+        if( n%2!=0 )
+            vdst[0] -= vsrc[0];
+    }
+}
+
+void ae_v_subd(double *vdst,  ae_int_t stride_dst, const double *vsrc,  ae_int_t stride_src, ae_int_t n, double alpha)
+{
+    ae_v_addd(vdst, stride_dst, vsrc, stride_src, n, -alpha);
+}
+
+void ae_v_muld(double *vdst,  ae_int_t stride_dst, ae_int_t n, double alpha)
+{
+    ae_int_t i;
+    if( stride_dst!=1 )
+    {
+        /*
+         * general unoptimized case
+         */
+        for(i=0; i<n; i++, vdst+=stride_dst)
+            *vdst *= alpha;
+    }
+    else
+    {
+        /*
+         * highly optimized case
+         */
+        for(i=0; i<n; i++, vdst++)
+            *vdst *= alpha;
+    }
+}
+
+/************************************************************************
+Other functions
+************************************************************************/
+ae_int_t ae_v_len(ae_int_t a, ae_int_t b)
+{
+    return b-a+1;
+}
+
+/************************************************************************
+RComm functions
+************************************************************************/
+ae_bool _rcommstate_init(rcommstate* p, ae_state *_state, ae_bool make_automatic)
+{
+    if( !ae_vector_init(&p->ba, 0, DT_BOOL,    _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->ia, 0, DT_INT,     _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->ra, 0, DT_REAL,    _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->ca, 0, DT_COMPLEX, _state, make_automatic) )
+        return ae_false;
+    return ae_true;
+}
+
+ae_bool _rcommstate_init_copy(rcommstate* dst, rcommstate* src, ae_state *_state, ae_bool make_automatic)
+{
+    if( !ae_vector_init_copy(&dst->ba, &src->ba, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->ia, &src->ia, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->ra, &src->ra, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->ca, &src->ca, _state, make_automatic) )
+        return ae_false;
+    dst->stage = src->stage;
+    return ae_true;
+}
+
+void _rcommstate_clear(rcommstate* p)
+{
+    ae_vector_clear(&p->ba);
+    ae_vector_clear(&p->ia);
+    ae_vector_clear(&p->ra);
+    ae_vector_clear(&p->ca);
+}
+
+}
+
+/////////////////////////////////////////////////////////////////////////
+//
+// THIS SECTION CONTAINS C++ RELATED FUNCTIONALITY
+//
+/////////////////////////////////////////////////////////////////////////
+/********************************************************************
+Internal forwards
+********************************************************************/
+namespace alglib
+{
+    double get_aenv_nan();
+    double get_aenv_posinf();
+    double get_aenv_neginf();
+    ae_int_t my_stricmp(const char *s1, const char *s2);
+    char* filter_spaces(const char *s);
+    void str_vector_create(const char *src, bool match_head_only, std::vector<const char*> *p_vec);
+    void str_matrix_create(const char *src, std::vector< std::vector<const char*> > *p_mat);
+    
+    ae_bool parse_bool_delim(const char *s, const char *delim);
+    ae_int_t parse_int_delim(const char *s, const char *delim);
+    bool _parse_real_delim(const char *s, const char *delim, double *result, const char **new_s);
+    double parse_real_delim(const char *s, const char *delim);
+    alglib::complex parse_complex_delim(const char *s, const char *delim);
+
+    std::string arraytostring(const bool *ptr, ae_int_t n);
+    std::string arraytostring(const ae_int_t *ptr, ae_int_t n);
+    std::string arraytostring(const double *ptr, ae_int_t n, int dps);
+    std::string arraytostring(const alglib::complex *ptr, ae_int_t n, int dps);
+}
+
+/********************************************************************
+Global and local constants
+********************************************************************/
+const double alglib::machineepsilon = 5E-16;
+const double alglib::maxrealnumber  = 1E300;
+const double alglib::minrealnumber  = 1E-300;
+const alglib::ae_int_t alglib::endianness =  alglib_impl::ae_get_endianness();
+const double alglib::fp_nan         =  alglib::get_aenv_nan();
+const double alglib::fp_posinf      =  alglib::get_aenv_posinf();
+const double alglib::fp_neginf      =  alglib::get_aenv_neginf();
+
+
+/********************************************************************
+ap_error
+********************************************************************/
+alglib::ap_error::ap_error()
+{
+}
+
+alglib::ap_error::ap_error(const char *s)
+{
+    msg = s; 
+}
+
+void alglib::ap_error::make_assertion(bool bClause)
+{
+    if(!bClause) 
+        throw ap_error(); 
+}
+
+void alglib::ap_error::make_assertion(bool bClause, const char *msg)
+{ 
+    if(!bClause) 
+        throw ap_error(msg); 
+}
+
+
+/********************************************************************
+Complex number with double precision.
+********************************************************************/
+alglib::complex::complex():x(0.0),y(0.0)
+{
+}
+
+alglib::complex::complex(const double &_x):x(_x),y(0.0)
+{
+}
+
+alglib::complex::complex(const double &_x, const double &_y):x(_x),y(_y)
+{
+}
+
+alglib::complex::complex(const alglib::complex &z):x(z.x),y(z.y)
+{
+}
+
+alglib::complex& alglib::complex::operator= (const double& v)
+{
+    x = v; 
+    y = 0.0; 
+    return *this; 
+}
+
+alglib::complex& alglib::complex::operator+=(const double& v)
+{
+    x += v;
+    return *this; 
+}
+
+alglib::complex& alglib::complex::operator-=(const double& v)
+{
+    x -= v;
+    return *this;
+}
+
+alglib::complex& alglib::complex::operator*=(const double& v)
+{
+    x *= v;
+    y *= v;
+    return *this; 
+}
+
+alglib::complex& alglib::complex::operator/=(const double& v)
+{
+    x /= v;
+    y /= v;
+    return *this;
+}
+
+alglib::complex& alglib::complex::operator= (const alglib::complex& z)
+{
+    x = z.x;
+    y = z.y;
+    return *this;
+}
+
+alglib::complex& alglib::complex::operator+=(const alglib::complex& z)
+{
+    x += z.x;
+    y += z.y;
+    return *this;
+}
+
+alglib::complex& alglib::complex::operator-=(const alglib::complex& z)
+{
+    x -= z.x;
+    y -= z.y;
+    return *this;
+}
+
+alglib::complex& alglib::complex::operator*=(const alglib::complex& z)
+{
+    double t = x*z.x-y*z.y;
+    y = x*z.y+y*z.x;
+    x = t; 
+    return *this;
+}
+
+alglib::complex& alglib::complex::operator/=(const alglib::complex& z)
+{
+    alglib::complex result;
+    double e;
+    double f;
+    if( fabs(z.y)<fabs(z.x) )
+    {
+        e = z.y/z.x;
+        f = z.x+z.y*e;
+        result.x = (x+y*e)/f;
+        result.y = (y-x*e)/f;
+    }
+    else
+    {
+        e = z.x/z.y;
+        f = z.y+z.x*e;
+        result.x = (y+x*e)/f;
+        result.y = (-x+y*e)/f;
+    }
+    *this = result;
+    return *this;
+}
+
+alglib_impl::ae_complex* alglib::complex::c_ptr()
+{
+    return (alglib_impl::ae_complex*)this;
+}
+
+const alglib_impl::ae_complex* alglib::complex::c_ptr() const
+{
+    return (const alglib_impl::ae_complex*)this;
+}
+    
+std::string alglib::complex::tostring(int dps) const
+{
+    char mask[32];
+    char buf_x[32];
+    char buf_y[32];
+    char buf_zero[32];
+    if( dps<=0 || dps>=20 )
+        throw ap_error("complex::tostring(): incorrect dps");
+
+    // handle IEEE special quantities
+    if( fp_isnan(x) || fp_isnan(y) )
+        return "NAN";
+    if( fp_isinf(x) || fp_isinf(y) )
+        return "INF";
+
+    // generate mask
+    if( sprintf(mask, "%%.%df", dps)>=(int)sizeof(mask) )
+        throw ap_error("complex::tostring(): buffer overflow");
+
+    // print |x|, |y| and zero with same mask and compare
+    if( sprintf(buf_x, mask, (double)(fabs(x)))>=(int)sizeof(buf_x) )
+        throw ap_error("complex::tostring(): buffer overflow");
+    if( sprintf(buf_y, mask, (double)(fabs(y)))>=(int)sizeof(buf_y) )
+        throw ap_error("complex::tostring(): buffer overflow");
+    if( sprintf(buf_zero, mask, (double)0)>=(int)sizeof(buf_zero) )
+        throw ap_error("complex::tostring(): buffer overflow");
+
+    // different zero/nonzero patterns
+    if( strcmp(buf_x,buf_zero)!=0 && strcmp(buf_y,buf_zero)!=0 )
+        return std::string(x>0 ? "" : "-")+buf_x+(y>0 ? "+" : "-")+buf_y+"i";
+    if( strcmp(buf_x,buf_zero)!=0 && strcmp(buf_y,buf_zero)==0 )
+        return std::string(x>0 ? "" : "-")+buf_x;
+    if( strcmp(buf_x,buf_zero)==0 && strcmp(buf_y,buf_zero)!=0 )
+        return std::string(y>0 ? "" : "-")+buf_y+"i";
+    return std::string("0");
+}
+
+const bool alglib::operator==(const alglib::complex& lhs, const alglib::complex& rhs)
+{
+    volatile double x1 = lhs.x;
+    volatile double x2 = rhs.x;
+    volatile double y1 = lhs.y;
+    volatile double y2 = rhs.y;
+    return x1==x2 && y1==y2;
+}
+
+const bool alglib::operator!=(const alglib::complex& lhs, const alglib::complex& rhs)
+{ return !(lhs==rhs); }
+
+const alglib::complex alglib::operator+(const alglib::complex& lhs)
+{ return lhs; }
+
+const alglib::complex alglib::operator-(const alglib::complex& lhs)
+{ return alglib::complex(-lhs.x, -lhs.y); }
+
+const alglib::complex alglib::operator+(const alglib::complex& lhs, const alglib::complex& rhs)
+{ alglib::complex r = lhs; r += rhs; return r; }
+
+const alglib::complex alglib::operator+(const alglib::complex& lhs, const double& rhs)
+{ alglib::complex r = lhs; r += rhs; return r; }
+
+const alglib::complex alglib::operator+(const double& lhs, const alglib::complex& rhs)
+{ alglib::complex r = rhs; r += lhs; return r; }
+
+const alglib::complex alglib::operator-(const alglib::complex& lhs, const alglib::complex& rhs)
+{ alglib::complex r = lhs; r -= rhs; return r; }
+
+const alglib::complex alglib::operator-(const alglib::complex& lhs, const double& rhs)
+{ alglib::complex r = lhs; r -= rhs; return r; }
+
+const alglib::complex alglib::operator-(const double& lhs, const alglib::complex& rhs)
+{ alglib::complex r = lhs; r -= rhs; return r; }
+
+const alglib::complex alglib::operator*(const alglib::complex& lhs, const alglib::complex& rhs)
+{ return alglib::complex(lhs.x*rhs.x - lhs.y*rhs.y,  lhs.x*rhs.y + lhs.y*rhs.x); }
+
+const alglib::complex alglib::operator*(const alglib::complex& lhs, const double& rhs)
+{ return alglib::complex(lhs.x*rhs,  lhs.y*rhs); }
+
+const alglib::complex alglib::operator*(const double& lhs, const alglib::complex& rhs)
+{ return alglib::complex(lhs*rhs.x,  lhs*rhs.y); }
+
+const alglib::complex alglib::operator/(const alglib::complex& lhs, const alglib::complex& rhs)
+{
+    alglib::complex result;
+    double e;
+    double f;
+    if( fabs(rhs.y)<fabs(rhs.x) )
+    {
+        e = rhs.y/rhs.x;
+        f = rhs.x+rhs.y*e;
+        result.x = (lhs.x+lhs.y*e)/f;
+        result.y = (lhs.y-lhs.x*e)/f;
+    }
+    else
+    {
+        e = rhs.x/rhs.y;
+        f = rhs.y+rhs.x*e;
+        result.x = (lhs.y+lhs.x*e)/f;
+        result.y = (-lhs.x+lhs.y*e)/f;
+    }
+    return result;
+}
+
+const alglib::complex alglib::operator/(const double& lhs, const alglib::complex& rhs)
+{
+    alglib::complex result;
+    double e;
+    double f;
+    if( fabs(rhs.y)<fabs(rhs.x) )
+    {
+        e = rhs.y/rhs.x;
+        f = rhs.x+rhs.y*e;
+        result.x = lhs/f;
+        result.y = -lhs*e/f;
+    }
+    else
+    {
+        e = rhs.x/rhs.y;
+        f = rhs.y+rhs.x*e;
+        result.x = lhs*e/f;
+        result.y = -lhs/f;
+    }
+    return result;
+}
+
+const alglib::complex alglib::operator/(const alglib::complex& lhs, const double& rhs)
+{ return alglib::complex(lhs.x/rhs, lhs.y/rhs); }
+
+double alglib::abscomplex(const alglib::complex &z)
+{
+    double w;
+    double xabs;
+    double yabs;
+    double v;
+
+    xabs = fabs(z.x);
+    yabs = fabs(z.y);
+    w = xabs>yabs ? xabs : yabs;
+    v = xabs<yabs ? xabs : yabs; 
+    if( v==0 )
+        return w;
+    else
+    {
+        double t = v/w;
+        return w*sqrt(1+t*t);
+    }
+}
+
+alglib::complex alglib::conj(const alglib::complex &z)
+{ return alglib::complex(z.x, -z.y); }
+
+alglib::complex alglib::csqr(const alglib::complex &z)
+{ return alglib::complex(z.x*z.x-z.y*z.y, 2*z.x*z.y); }
+
+
+/********************************************************************
+Level 1 BLAS functions
+********************************************************************/
+double alglib::vdotproduct(const double *v0, ae_int_t stride0, const double *v1, ae_int_t stride1, ae_int_t n)
+{
+    double result = 0;
+    ae_int_t i;
+    if( stride0!=1 || stride1!=1 )
+    {
+        //
+        // slow general code
+        //
+        for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
+            result += (*v0)*(*v1);
+    }
+    else
+    {
+        //
+        // optimized code for stride=1
+        //
+        ae_int_t n4 = n/4;
+        ae_int_t nleft = n%4;
+        for(i=0; i<n4; i++, v0+=4, v1+=4)
+            result += v0[0]*v1[0]+v0[1]*v1[1]+v0[2]*v1[2]+v0[3]*v1[3];
+        for(i=0; i<nleft; i++, v0++, v1++)
+            result += v0[0]*v1[0];
+    }
+    return result;
+}
+
+double alglib::vdotproduct(const double *v1, const double *v2, ae_int_t N)
+{
+    return vdotproduct(v1, 1, v2, 1, N);
+}
+
+alglib::complex alglib::vdotproduct(const alglib::complex *v0, ae_int_t stride0, const char *conj0, const alglib::complex *v1, ae_int_t stride1, const char *conj1, ae_int_t n)
+{
+    double rx = 0, ry = 0;
+    ae_int_t i;
+    bool bconj0 = !((conj0[0]=='N') || (conj0[0]=='n'));
+    bool bconj1 = !((conj1[0]=='N') || (conj1[0]=='n'));
+    if( bconj0 && bconj1 )
+    {
+        double v0x, v0y, v1x, v1y;
+        for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
+        {
+            v0x = v0->x;
+            v0y = -v0->y;
+            v1x = v1->x;
+            v1y = -v1->y;
+            rx += v0x*v1x-v0y*v1y;
+            ry += v0x*v1y+v0y*v1x;
+        }
+    }
+    if( !bconj0 && bconj1 )
+    {
+        double v0x, v0y, v1x, v1y;
+        for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
+        {
+            v0x = v0->x;
+            v0y = v0->y;
+            v1x = v1->x;
+            v1y = -v1->y;
+            rx += v0x*v1x-v0y*v1y;
+            ry += v0x*v1y+v0y*v1x;
+        }
+    }
+    if( bconj0 && !bconj1 )
+    {
+        double v0x, v0y, v1x, v1y;
+        for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
+        {
+            v0x = v0->x;
+            v0y = -v0->y;
+            v1x = v1->x;
+            v1y = v1->y;
+            rx += v0x*v1x-v0y*v1y;
+            ry += v0x*v1y+v0y*v1x;
+        }
+    }
+    if( !bconj0 && !bconj1 )
+    {
+        double v0x, v0y, v1x, v1y;
+        for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
+        {
+            v0x = v0->x;
+            v0y = v0->y;
+            v1x = v1->x;
+            v1y = v1->y;
+            rx += v0x*v1x-v0y*v1y;
+            ry += v0x*v1y+v0y*v1x;
+        }
+    }
+    return alglib::complex(rx,ry);
+}
+
+alglib::complex alglib::vdotproduct(const alglib::complex *v1, const alglib::complex *v2, ae_int_t N)
+{
+    return vdotproduct(v1, 1, "N", v2, 1, "N", N);
+}
+
+void alglib::vmove(double *vdst, ae_int_t stride_dst, const double* vsrc,  ae_int_t stride_src, ae_int_t n)
+{
+    ae_int_t i;
+    if( stride_dst!=1 || stride_src!=1 )
+    {
+        //
+        // general unoptimized case
+        //
+        for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            *vdst = *vsrc;
+    }
+    else
+    {
+        //
+        // optimized case
+        //
+        ae_int_t n2 = n/2;
+        for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
+        {
+            vdst[0] = vsrc[0];
+            vdst[1] = vsrc[1];
+        }
+        if( n%2!=0 )
+            vdst[0] = vsrc[0];
+    }
+}
+
+void alglib::vmove(double *vdst, const double* vsrc, ae_int_t N)
+{
+    vmove(vdst, 1, vsrc, 1, N);
+}
+
+void alglib::vmove(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
+{
+    bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
+    ae_int_t i;
+    if( stride_dst!=1 || stride_src!=1 )
+    {
+        //
+        // general unoptimized case
+        //
+        if( bconj )
+        {
+            for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            {
+                vdst->x =  vsrc->x;
+                vdst->y = -vsrc->y;
+            }
+        }
+        else
+        {
+            for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+                *vdst = *vsrc;
+        }
+    }
+    else
+    {
+        //
+        // optimized case
+        //
+        if( bconj )
+        {
+            for(i=0; i<n; i++, vdst++, vsrc++)
+            {
+                vdst->x =  vsrc->x;
+                vdst->y = -vsrc->y;
+            }
+        }
+        else
+        {
+            for(i=0; i<n; i++, vdst++, vsrc++)
+                *vdst = *vsrc;
+        }
+    }
+}
+
+void alglib::vmove(alglib::complex *vdst, const alglib::complex* vsrc, ae_int_t N)
+{
+    vmove(vdst, 1, vsrc, 1, "N", N);
+}
+
+void alglib::vmoveneg(double *vdst,  ae_int_t stride_dst, const double* vsrc,  ae_int_t stride_src, ae_int_t n)
+{
+    ae_int_t i;
+    if( stride_dst!=1 || stride_src!=1 )
+    {
+        //
+        // general unoptimized case
+        //
+        for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            *vdst = -*vsrc;
+    }
+    else
+    {
+        //
+        // optimized case
+        //
+        ae_int_t n2 = n/2;
+        for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
+        {
+            vdst[0] = -vsrc[0];
+            vdst[1] = -vsrc[1];
+        }
+        if( n%2!=0 )
+            vdst[0] = -vsrc[0];
+    }
+}
+
+void alglib::vmoveneg(double *vdst, const double *vsrc, ae_int_t N)
+{
+    vmoveneg(vdst, 1, vsrc, 1, N);
+}
+
+void alglib::vmoveneg(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
+{
+    bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
+    ae_int_t i;
+    if( stride_dst!=1 || stride_src!=1 )
+    {
+        //
+        // general unoptimized case
+        //
+        if( bconj )
+        {
+            for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            {
+                vdst->x = -vsrc->x;
+                vdst->y =  vsrc->y;
+            }
+        }
+        else
+        {
+            for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            {
+                vdst->x = -vsrc->x;
+                vdst->y = -vsrc->y;
+            }
+        }
+    }
+    else
+    {
+        //
+        // optimized case
+        //
+        if( bconj )
+        {
+            for(i=0; i<n; i++, vdst++, vsrc++)
+            {
+                vdst->x = -vsrc->x;
+                vdst->y =  vsrc->y;
+            }
+        }
+        else
+        {
+            for(i=0; i<n; i++, vdst++, vsrc++)
+            {
+                vdst->x = -vsrc->x;
+                vdst->y = -vsrc->y;
+            }
+        }
+    }
+}
+
+void alglib::vmoveneg(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N)
+{
+    vmoveneg(vdst, 1, vsrc, 1, "N", N);
+}
+
+void alglib::vmove(double *vdst,  ae_int_t stride_dst, const double* vsrc,  ae_int_t stride_src, ae_int_t n, double alpha)
+{
+    ae_int_t i;
+    if( stride_dst!=1 || stride_src!=1 )
+    {
+        //
+        // general unoptimized case
+        //
+        for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            *vdst = alpha*(*vsrc);
+    }
+    else
+    {
+        //
+        // optimized case
+        //
+        ae_int_t n2 = n/2;
+        for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
+        {
+            vdst[0] = alpha*vsrc[0];
+            vdst[1] = alpha*vsrc[1];
+        }
+        if( n%2!=0 )
+            vdst[0] = alpha*vsrc[0];
+    }
+}
+
+void alglib::vmove(double *vdst, const double *vsrc, ae_int_t N, double alpha)
+{
+    vmove(vdst, 1, vsrc, 1, N, alpha);
+}
+
+void alglib::vmove(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha)
+{
+    bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
+    ae_int_t i;
+    if( stride_dst!=1 || stride_src!=1 )
+    {
+        //
+        // general unoptimized case
+        //
+        if( bconj )
+        {
+            for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            {
+                vdst->x =  alpha*vsrc->x;
+                vdst->y = -alpha*vsrc->y;
+            }
+        }
+        else
+        {
+            for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            {
+                vdst->x = alpha*vsrc->x;
+                vdst->y = alpha*vsrc->y;
+            }
+        }
+    }
+    else
+    {
+        //
+        // optimized case
+        //
+        if( bconj )
+        {
+            for(i=0; i<n; i++, vdst++, vsrc++)
+            {
+                vdst->x =  alpha*vsrc->x;
+                vdst->y = -alpha*vsrc->y;
+            }
+        }
+        else
+        {
+            for(i=0; i<n; i++, vdst++, vsrc++)
+            {
+                vdst->x = alpha*vsrc->x;
+                vdst->y = alpha*vsrc->y;
+            }
+        }
+    }
+}
+
+void alglib::vmove(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, double alpha)
+{
+    vmove(vdst, 1, vsrc, 1, "N", N, alpha);
+}
+
+void alglib::vmove(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, alglib::complex alpha)
+{
+    bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
+    ae_int_t i;
+    if( stride_dst!=1 || stride_src!=1 )
+    {
+        //
+        // general unoptimized case
+        //
+        if( bconj )
+        {
+            double ax = alpha.x, ay = alpha.y;
+            for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            {
+                vdst->x =  ax*vsrc->x+ay*vsrc->y;
+                vdst->y = -ax*vsrc->y+ay*vsrc->x;
+            }
+        }
+        else
+        {
+            double ax = alpha.x, ay = alpha.y;
+            for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            {
+                vdst->x = ax*vsrc->x-ay*vsrc->y;
+                vdst->y = ax*vsrc->y+ay*vsrc->x;
+            }
+        }
+    }
+    else
+    {
+        //
+        // optimized case
+        //
+        if( bconj )
+        {
+            double ax = alpha.x, ay = alpha.y;
+            for(i=0; i<n; i++, vdst++, vsrc++)
+            {
+                vdst->x =  ax*vsrc->x+ay*vsrc->y;
+                vdst->y = -ax*vsrc->y+ay*vsrc->x;
+            }
+        }
+        else
+        {
+            double ax = alpha.x, ay = alpha.y;
+            for(i=0; i<n; i++, vdst++, vsrc++)
+            {
+                vdst->x = ax*vsrc->x-ay*vsrc->y;
+                vdst->y = ax*vsrc->y+ay*vsrc->x;
+            }
+        }
+    }
+}
+
+void alglib::vmove(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, alglib::complex alpha)
+{
+    vmove(vdst, 1, vsrc, 1, "N", N, alpha);
+}
+
+void alglib::vadd(double *vdst,  ae_int_t stride_dst, const double *vsrc,  ae_int_t stride_src, ae_int_t n)
+{
+    ae_int_t i;
+    if( stride_dst!=1 || stride_src!=1 )
+    {
+        //
+        // general unoptimized case
+        //
+        for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            *vdst += *vsrc;
+    }
+    else
+    {
+        //
+        // optimized case
+        //
+        ae_int_t n2 = n/2;
+        for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
+        {
+            vdst[0] += vsrc[0];
+            vdst[1] += vsrc[1];
+        }
+        if( n%2!=0 )
+            vdst[0] += vsrc[0];
+    }
+}
+
+void alglib::vadd(double *vdst, const double *vsrc, ae_int_t N)
+{
+    vadd(vdst, 1, vsrc, 1, N);
+}
+
+void alglib::vadd(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
+{
+    bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
+    ae_int_t i;
+    if( stride_dst!=1 || stride_src!=1 )
+    {
+        //
+        // general unoptimized case
+        //
+        if( bconj )
+        {
+            for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            {
+                vdst->x += vsrc->x;
+                vdst->y -= vsrc->y;
+            }
+        }
+        else
+        {
+            for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            {
+                vdst->x += vsrc->x;
+                vdst->y += vsrc->y;
+            }
+        }
+    }
+    else
+    {
+        //
+        // optimized case
+        //
+        if( bconj )
+        {
+            for(i=0; i<n; i++, vdst++, vsrc++)
+            {
+                vdst->x += vsrc->x;
+                vdst->y -= vsrc->y;
+            }
+        }
+        else
+        {
+            for(i=0; i<n; i++, vdst++, vsrc++)
+            {
+                vdst->x += vsrc->x;
+                vdst->y += vsrc->y;
+            }
+        }
+    }
+}
+
+void alglib::vadd(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N)
+{
+    vadd(vdst, 1, vsrc, 1, "N", N);
+}
+
+void alglib::vadd(double *vdst,  ae_int_t stride_dst, const double *vsrc,  ae_int_t stride_src, ae_int_t n, double alpha)
+{
+    ae_int_t i;
+    if( stride_dst!=1 || stride_src!=1 )
+    {
+        //
+        // general unoptimized case
+        //
+        for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            *vdst += alpha*(*vsrc);
+    }
+    else
+    {
+        //
+        // optimized case
+        //
+        ae_int_t n2 = n/2;
+        for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
+        {
+            vdst[0] += alpha*vsrc[0];
+            vdst[1] += alpha*vsrc[1];
+        }
+        if( n%2!=0 )
+            vdst[0] += alpha*vsrc[0];
+    }
+}
+
+void alglib::vadd(double *vdst, const double *vsrc, ae_int_t N, double alpha)
+{
+    vadd(vdst, 1, vsrc, 1, N, alpha);
+}
+
+void alglib::vadd(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha)
+{
+    bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
+    ae_int_t i;
+    if( stride_dst!=1 || stride_src!=1 )
+    {
+        //
+        // general unoptimized case
+        //
+        if( bconj )
+        {
+            for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            {
+                vdst->x += alpha*vsrc->x;
+                vdst->y -= alpha*vsrc->y;
+            }
+        }
+        else
+        {
+            for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            {
+                vdst->x += alpha*vsrc->x;
+                vdst->y += alpha*vsrc->y;
+            }
+        }
+    }
+    else
+    {
+        //
+        // optimized case
+        //
+        if( bconj )
+        {
+            for(i=0; i<n; i++, vdst++, vsrc++)
+            {
+                vdst->x += alpha*vsrc->x;
+                vdst->y -= alpha*vsrc->y;
+            }
+        }
+        else
+        {
+            for(i=0; i<n; i++, vdst++, vsrc++)
+            {
+                vdst->x += alpha*vsrc->x;
+                vdst->y += alpha*vsrc->y;
+            }
+        }
+    }
+}
+
+void alglib::vadd(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, double alpha)
+{
+    vadd(vdst, 1, vsrc, 1, "N", N, alpha);
+}
+
+void alglib::vadd(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, alglib::complex alpha)
+{
+    bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
+    ae_int_t i;
+    if( stride_dst!=1 || stride_src!=1 )
+    {
+        //
+        // general unoptimized case
+        //
+        double ax = alpha.x, ay = alpha.y;
+        if( bconj )
+        {
+            for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            {
+                vdst->x += ax*vsrc->x+ay*vsrc->y;
+                vdst->y -= ax*vsrc->y-ay*vsrc->x;
+            }
+        }
+        else
+        {
+            for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            {
+                vdst->x += ax*vsrc->x-ay*vsrc->y;
+                vdst->y += ax*vsrc->y+ay*vsrc->x;
+            }
+        }
+    }
+    else
+    {
+        //
+        // optimized case
+        //
+        double ax = alpha.x, ay = alpha.y;
+        if( bconj )
+        {
+            for(i=0; i<n; i++, vdst++, vsrc++)
+            {
+                vdst->x += ax*vsrc->x+ay*vsrc->y;
+                vdst->y -= ax*vsrc->y-ay*vsrc->x;
+            }
+        }
+        else
+        {
+            for(i=0; i<n; i++, vdst++, vsrc++)
+            {
+                vdst->x += ax*vsrc->x-ay*vsrc->y;
+                vdst->y += ax*vsrc->y+ay*vsrc->x;
+            }
+        }
+    }
+}
+
+void alglib::vadd(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, alglib::complex alpha)
+{
+    vadd(vdst, 1, vsrc, 1, "N", N, alpha);
+}
+
+void alglib::vsub(double *vdst,  ae_int_t stride_dst, const double *vsrc,  ae_int_t stride_src, ae_int_t n)
+{
+    ae_int_t i;
+    if( stride_dst!=1 || stride_src!=1 )
+    {
+        //
+        // general unoptimized case
+        //
+        for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            *vdst -= *vsrc;
+    }
+    else
+    {
+        //
+        // optimized case
+        //
+        ae_int_t n2 = n/2;
+        for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
+        {
+            vdst[0] -= vsrc[0];
+            vdst[1] -= vsrc[1];
+        }
+        if( n%2!=0 )
+            vdst[0] -= vsrc[0];
+    }
+}
+
+void alglib::vsub(double *vdst, const double *vsrc, ae_int_t N)
+{
+    vsub(vdst, 1, vsrc, 1, N);
+}
+
+void alglib::vsub(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
+{
+    bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
+    ae_int_t i;
+    if( stride_dst!=1 || stride_src!=1 )
+    {
+        //
+        // general unoptimized case
+        //
+        if( bconj )
+        {
+            for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            {
+                vdst->x -= vsrc->x;
+                vdst->y += vsrc->y;
+            }
+        }
+        else
+        {
+            for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
+            {
+                vdst->x -= vsrc->x;
+                vdst->y -= vsrc->y;
+            }
+        }
+    }
+    else
+    {
+        //
+        // optimized case
+        //
+        if( bconj )
+        {
+            for(i=0; i<n; i++, vdst++, vsrc++)
+            {
+                vdst->x -= vsrc->x;
+                vdst->y += vsrc->y;
+            }
+        }
+        else
+        {
+            for(i=0; i<n; i++, vdst++, vsrc++)
+            {
+                vdst->x -= vsrc->x;
+                vdst->y -= vsrc->y;
+            }
+        }
+    }
+}
+
+void alglib::vsub(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N)
+{
+    vsub(vdst, 1, vsrc, 1, "N", N);
+}
+
+void alglib::vsub(double *vdst,  ae_int_t stride_dst, const double *vsrc,  ae_int_t stride_src, ae_int_t n, double alpha)
+{
+    vadd(vdst, stride_dst, vsrc, stride_src, n, -alpha);
+}
+
+void alglib::vsub(double *vdst, const double *vsrc, ae_int_t N, double alpha)
+{
+    vadd(vdst, 1, vsrc, 1, N, -alpha);
+}
+
+void alglib::vsub(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha)
+{
+    vadd(vdst, stride_dst, vsrc, stride_src, conj_src, n, -alpha);
+}
+
+void alglib::vsub(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t n, double alpha)
+{
+    vadd(vdst, 1, vsrc, 1, "N", n, -alpha);
+}
+
+void alglib::vsub(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, alglib::complex alpha)
+{
+    vadd(vdst, stride_dst, vsrc, stride_src, conj_src, n, -alpha);
+}
+
+void alglib::vsub(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t n, alglib::complex alpha)
+{
+    vadd(vdst, 1, vsrc, 1, "N", n, -alpha);
+}
+void alglib::vmul(double *vdst,  ae_int_t stride_dst, ae_int_t n, double alpha)
+{
+    ae_int_t i;
+    if( stride_dst!=1 )
+    {
+        //
+        // general unoptimized case
+        //
+        for(i=0; i<n; i++, vdst+=stride_dst)
+            *vdst *= alpha;
+    }
+    else
+    {
+        //
+        // optimized case
+        //
+        for(i=0; i<n; i++, vdst++)
+            *vdst *= alpha;
+    }
+}
+
+void alglib::vmul(double *vdst, ae_int_t N, double alpha)
+{
+    vmul(vdst, 1, N, alpha);
+}
+
+void alglib::vmul(alglib::complex *vdst, ae_int_t stride_dst, ae_int_t n, double alpha)
+{
+    ae_int_t i;
+    if( stride_dst!=1 )
+    {
+        //
+        // general unoptimized case
+        //
+        for(i=0; i<n; i++, vdst+=stride_dst)
+        {
+            vdst->x *= alpha;
+            vdst->y *= alpha;
+        }
+    }
+    else
+    {
+        //
+        // optimized case
+        //
+        for(i=0; i<n; i++, vdst++)
+        {
+            vdst->x *= alpha;
+            vdst->y *= alpha;
+        }
+    }
+}
+
+void alglib::vmul(alglib::complex *vdst, ae_int_t N, double alpha)
+{
+    vmul(vdst, 1, N, alpha);
+}
+
+void alglib::vmul(alglib::complex *vdst, ae_int_t stride_dst, ae_int_t n, alglib::complex alpha)
+{
+    ae_int_t i;
+    if( stride_dst!=1 )
+    {
+        //
+        // general unoptimized case
+        //
+        double ax = alpha.x, ay = alpha.y;
+        for(i=0; i<n; i++, vdst+=stride_dst)
+        {
+            double  dstx = vdst->x, dsty = vdst->y;
+            vdst->x = ax*dstx-ay*dsty;
+            vdst->y = ax*dsty+ay*dstx;
+        }
+    }
+    else
+    {
+        //
+        // optimized case
+        //
+        double ax = alpha.x, ay = alpha.y;
+        for(i=0; i<n; i++, vdst++)
+        {
+            double  dstx = vdst->x, dsty = vdst->y;
+            vdst->x = ax*dstx-ay*dsty;
+            vdst->y = ax*dsty+ay*dstx;
+        }
+    }
+}
+
+void alglib::vmul(alglib::complex *vdst, ae_int_t N, alglib::complex alpha)
+{
+    vmul(vdst, 1, N, alpha);
+}
+
+
+/********************************************************************
+Matrices and vectors
+********************************************************************/
+alglib::ae_vector_wrapper::ae_vector_wrapper()
+{
+    p_vec = NULL;
+}
+
+alglib::ae_vector_wrapper::~ae_vector_wrapper()
+{
+    if( p_vec==&vec )
+        ae_vector_clear(p_vec);
+}
+
+alglib::ae_vector_wrapper::ae_vector_wrapper(const alglib::ae_vector_wrapper &rhs)
+{
+    if( rhs.p_vec!=NULL )
+    {
+        p_vec = &vec;
+        if( !ae_vector_init_copy(p_vec, rhs.p_vec, NULL, ae_false) )
+            throw alglib::ap_error("ALGLIB: malloc error!");
+    }
+    else
+        p_vec = NULL;
+}
+
+const alglib::ae_vector_wrapper& alglib::ae_vector_wrapper::operator=(const alglib::ae_vector_wrapper &rhs)
+{
+    if( this==&rhs )
+        return *this;
+    if( p_vec==&vec )
+        ae_vector_clear(p_vec);
+    if( rhs.p_vec!=NULL )
+    {
+        p_vec = &vec;
+        if( !ae_vector_init_copy(p_vec, rhs.p_vec, NULL, ae_false) )
+            throw alglib::ap_error("ALGLIB: malloc error!");
+    }
+    else
+        p_vec = NULL;
+    return *this;
+}
+
+void alglib::ae_vector_wrapper::setlength(ae_int_t iLen)
+{
+    if( p_vec==NULL )
+        throw alglib::ap_error("ALGLIB: setlength() error, p_vec==NULL (array was not correctly initialized)");
+    if( p_vec!=&vec )
+        throw alglib::ap_error("ALGLIB: setlength() error, p_vec!=&vec (attempt to resize frozen array)");
+    if( !ae_vector_set_length(p_vec, iLen, NULL) )
+        throw alglib::ap_error("ALGLIB: malloc error");
+}
+
+alglib::ae_int_t alglib::ae_vector_wrapper::length() const
+{
+    if( p_vec==NULL )
+        return 0;
+    return p_vec->cnt;
+}
+
+void alglib::ae_vector_wrapper::attach_to(alglib_impl::ae_vector *ptr)
+{
+    if( ptr==&vec )
+        throw alglib::ap_error("ALGLIB: attempt to attach vector to itself");
+    if( p_vec==&vec )
+        ae_vector_clear(p_vec);
+    p_vec = ptr;
+}
+
+void alglib::ae_vector_wrapper::allocate_own(ae_int_t size, alglib_impl::ae_datatype datatype)
+{
+    if( p_vec==&vec )
+        ae_vector_clear(p_vec);
+    p_vec = &vec;
+    if( !ae_vector_init(p_vec, size, datatype, NULL, false) )
+        throw alglib::ap_error("ALGLIB: malloc error");
+}
+
+const alglib_impl::ae_vector* alglib::ae_vector_wrapper::c_ptr() const
+{
+    return p_vec;
+}
+
+alglib_impl::ae_vector* alglib::ae_vector_wrapper::c_ptr()
+{
+    return p_vec;
+}
+
+alglib::boolean_1d_array::boolean_1d_array()  
+{
+    allocate_own(0, alglib_impl::DT_BOOL);
+}
+
+alglib::boolean_1d_array::boolean_1d_array(const char *s)  
+{
+    std::vector<const char*> svec;
+    size_t i;
+    char *p = filter_spaces(s);
+    try
+    {
+        str_vector_create(p, true, &svec);
+        allocate_own((ae_int_t)(svec.size()), alglib_impl::DT_BOOL);
+        for(i=0; i<svec.size(); i++)
+            operator()((ae_int_t)i) = parse_bool_delim(svec[i],",]");
+        alglib_impl::ae_free(p);
+    }
+    catch(...)
+    {
+        alglib_impl::ae_free(p);
+        throw;
+    }
+}
+
+alglib::boolean_1d_array::boolean_1d_array(alglib_impl::ae_vector *p)  
+{
+    p_vec = NULL;
+    attach_to(p);
+}
+
+alglib::boolean_1d_array::~boolean_1d_array() 
+{
+}
+
+const ae_bool& alglib::boolean_1d_array::operator()(ae_int_t i) const
+{
+    return p_vec->ptr.p_bool[i];
+}
+
+ae_bool& alglib::boolean_1d_array::operator()(ae_int_t i)
+{
+    return p_vec->ptr.p_bool[i];
+}
+
+const ae_bool& alglib::boolean_1d_array::operator[](ae_int_t i) const
+{
+    return p_vec->ptr.p_bool[i];
+}
+
+ae_bool& alglib::boolean_1d_array::operator[](ae_int_t i)
+{
+    return p_vec->ptr.p_bool[i];
+}
+
+void alglib::boolean_1d_array::setcontent(ae_int_t iLen, const bool *pContent )
+{
+    ae_int_t i;
+    setlength(iLen);
+    for(i=0; i<iLen; i++)
+        p_vec->ptr.p_bool[i] = pContent[i];
+}
+
+ae_bool* alglib::boolean_1d_array::getcontent()
+{
+    return p_vec->ptr.p_bool;
+}
+
+const ae_bool* alglib::boolean_1d_array::getcontent() const
+{
+    return p_vec->ptr.p_bool;
+}
+
+std::string alglib::boolean_1d_array::tostring() const 
+{
+    if( length()==0 )
+        return "[]";
+    return arraytostring(&(operator()(0)), length());
+}
+
+alglib::integer_1d_array::integer_1d_array()  
+{
+    allocate_own(0, alglib_impl::DT_INT);
+}
+
+alglib::integer_1d_array::integer_1d_array(alglib_impl::ae_vector *p)  
+{
+    p_vec = NULL;
+    attach_to(p);
+}
+
+alglib::integer_1d_array::integer_1d_array(const char *s)  
+{
+    std::vector<const char*> svec;
+    size_t i;
+    char *p = filter_spaces(s);
+    try
+    {
+        str_vector_create(p, true, &svec);
+        allocate_own((ae_int_t)(svec.size()), alglib_impl::DT_INT);
+        for(i=0; i<svec.size(); i++)
+            operator()((ae_int_t)i) = parse_int_delim(svec[i],",]");
+        alglib_impl::ae_free(p);
+    }
+    catch(...)
+    {
+        alglib_impl::ae_free(p);
+        throw;
+    }
+}
+
+alglib::integer_1d_array::~integer_1d_array() 
+{
+}
+
+const alglib::ae_int_t& alglib::integer_1d_array::operator()(ae_int_t i) const
+{
+    return p_vec->ptr.p_int[i];
+}
+
+alglib::ae_int_t& alglib::integer_1d_array::operator()(ae_int_t i)
+{
+    return p_vec->ptr.p_int[i];
+}
+
+const alglib::ae_int_t& alglib::integer_1d_array::operator[](ae_int_t i) const
+{
+    return p_vec->ptr.p_int[i];
+}
+
+alglib::ae_int_t& alglib::integer_1d_array::operator[](ae_int_t i)
+{
+    return p_vec->ptr.p_int[i];
+}
+
+void alglib::integer_1d_array::setcontent(ae_int_t iLen, const ae_int_t *pContent )
+{
+    ae_int_t i;
+    setlength(iLen);
+    for(i=0; i<iLen; i++)
+        p_vec->ptr.p_int[i] = pContent[i];
+}
+
+alglib::ae_int_t* alglib::integer_1d_array::getcontent()
+{
+    return p_vec->ptr.p_int;
+}
+
+const alglib::ae_int_t* alglib::integer_1d_array::getcontent() const
+{
+    return p_vec->ptr.p_int;
+}
+
+std::string alglib::integer_1d_array::tostring() const 
+{
+    if( length()==0 )
+        return "[]";
+    return arraytostring(&operator()(0), length());
+}
+
+alglib::real_1d_array::real_1d_array()  
+{
+    allocate_own(0, alglib_impl::DT_REAL);
+}
+
+alglib::real_1d_array::real_1d_array(alglib_impl::ae_vector *p)  
+{
+    p_vec = NULL;
+    attach_to(p);
+}
+
+alglib::real_1d_array::real_1d_array(const char *s)  
+{
+    std::vector<const char*> svec;
+    size_t i;
+    char *p = filter_spaces(s);
+    try
+    {
+        str_vector_create(p, true, &svec);
+        allocate_own((ae_int_t)(svec.size()), alglib_impl::DT_REAL);
+        for(i=0; i<svec.size(); i++)
+            operator()((ae_int_t)i) = parse_real_delim(svec[i],",]");
+        alglib_impl::ae_free(p);
+    }
+    catch(...)
+    {
+        alglib_impl::ae_free(p);
+        throw;
+    }
+}
+
+alglib::real_1d_array::~real_1d_array() 
+{
+}
+
+const double& alglib::real_1d_array::operator()(ae_int_t i) const
+{
+    return p_vec->ptr.p_double[i];
+}
+
+double& alglib::real_1d_array::operator()(ae_int_t i)
+{
+    return p_vec->ptr.p_double[i];
+}
+
+const double& alglib::real_1d_array::operator[](ae_int_t i) const
+{
+    return p_vec->ptr.p_double[i];
+}
+
+double& alglib::real_1d_array::operator[](ae_int_t i)
+{
+    return p_vec->ptr.p_double[i];
+}
+
+void alglib::real_1d_array::setcontent(ae_int_t iLen, const double *pContent )
+{
+    ae_int_t i;
+    setlength(iLen);
+    for(i=0; i<iLen; i++)
+        p_vec->ptr.p_double[i] = pContent[i];
+}
+
+double* alglib::real_1d_array::getcontent()
+{
+    return p_vec->ptr.p_double;
+}
+
+const double* alglib::real_1d_array::getcontent() const
+{
+    return p_vec->ptr.p_double;
+}
+
+std::string alglib::real_1d_array::tostring(int dps) const 
+{
+    if( length()==0 )
+        return "[]";
+    return arraytostring(&operator()(0), length(), dps);
+}
+
+alglib::complex_1d_array::complex_1d_array()  
+{
+    allocate_own(0, alglib_impl::DT_COMPLEX);
+}
+
+alglib::complex_1d_array::complex_1d_array(alglib_impl::ae_vector *p)  
+{
+    p_vec = NULL;
+    attach_to(p);
+}
+
+alglib::complex_1d_array::complex_1d_array(const char *s)  
+{
+    std::vector<const char*> svec;
+    size_t i;
+    char *p = filter_spaces(s);
+    try
+    {
+        str_vector_create(p, true, &svec);
+        allocate_own((ae_int_t)(svec.size()), alglib_impl::DT_COMPLEX);
+        for(i=0; i<svec.size(); i++)
+            operator()((ae_int_t)i) = parse_complex_delim(svec[i],",]");
+        alglib_impl::ae_free(p);
+    }
+    catch(...)
+    {
+        alglib_impl::ae_free(p);
+        throw;
+    }
+}
+
+alglib::complex_1d_array::~complex_1d_array() 
+{
+}
+
+const alglib::complex& alglib::complex_1d_array::operator()(ae_int_t i) const
+{
+    return *((const alglib::complex*)(p_vec->ptr.p_complex+i));
+}
+
+alglib::complex& alglib::complex_1d_array::operator()(ae_int_t i)
+{
+    return *((alglib::complex*)(p_vec->ptr.p_complex+i));
+}
+
+const alglib::complex& alglib::complex_1d_array::operator[](ae_int_t i) const
+{
+    return *((const alglib::complex*)(p_vec->ptr.p_complex+i));
+}
+
+alglib::complex& alglib::complex_1d_array::operator[](ae_int_t i)
+{
+    return *((alglib::complex*)(p_vec->ptr.p_complex+i));
+}
+
+void alglib::complex_1d_array::setcontent(ae_int_t iLen, const alglib::complex *pContent )
+{
+    ae_int_t i;
+    setlength(iLen);
+    for(i=0; i<iLen; i++)
+    {
+        p_vec->ptr.p_complex[i].x = pContent[i].x;
+        p_vec->ptr.p_complex[i].y = pContent[i].y;
+    }
+}
+
+ alglib::complex* alglib::complex_1d_array::getcontent()
+{
+    return (alglib::complex*)p_vec->ptr.p_complex;
+}
+
+const alglib::complex* alglib::complex_1d_array::getcontent() const
+{
+    return (const alglib::complex*)p_vec->ptr.p_complex;
+}
+
+std::string alglib::complex_1d_array::tostring(int dps) const 
+{
+    if( length()==0 )
+        return "[]";
+    return arraytostring(&operator()(0), length(), dps);
+}
+
+alglib::ae_matrix_wrapper::ae_matrix_wrapper()
+{
+    p_mat = NULL;
+}
+
+alglib::ae_matrix_wrapper::~ae_matrix_wrapper()
+{
+    if( p_mat==&mat )
+        ae_matrix_clear(p_mat);
+}
+
+alglib::ae_matrix_wrapper::ae_matrix_wrapper(const alglib::ae_matrix_wrapper &rhs)
+{
+    if( rhs.p_mat!=NULL )
+    {
+        p_mat = &mat;
+        if( !ae_matrix_init_copy(p_mat, rhs.p_mat, NULL, ae_false) )
+            throw alglib::ap_error("ALGLIB: malloc error!");
+    }
+    else
+        p_mat = NULL;
+}
+
+const alglib::ae_matrix_wrapper& alglib::ae_matrix_wrapper::operator=(const alglib::ae_matrix_wrapper &rhs)
+{
+    if( this==&rhs )
+        return *this;
+    if( p_mat==&mat )
+        ae_matrix_clear(p_mat);
+    if( rhs.p_mat!=NULL )
+    {
+        p_mat = &mat;
+        if( !ae_matrix_init_copy(p_mat, rhs.p_mat, NULL, ae_false) )
+            throw alglib::ap_error("ALGLIB: malloc error!");
+    }
+    else
+        p_mat = NULL;
+    return *this;
+}
+
+void alglib::ae_matrix_wrapper::setlength(ae_int_t rows, ae_int_t cols)
+{
+    if( p_mat==NULL )
+        throw alglib::ap_error("ALGLIB: setlength() error, p_mat==NULL (array was not correctly initialized)");
+    if( p_mat!=&mat )
+        throw alglib::ap_error("ALGLIB: setlength() error, p_mat!=&mat (attempt to resize frozen array)");
+    if( !ae_matrix_set_length(p_mat, rows, cols, NULL) )
+        throw alglib::ap_error("ALGLIB: malloc error");
+}
+
+alglib::ae_int_t alglib::ae_matrix_wrapper::rows() const
+{
+    if( p_mat==NULL )
+        return 0;
+    return p_mat->rows;
+}
+
+alglib::ae_int_t alglib::ae_matrix_wrapper::cols() const
+{
+    if( p_mat==NULL )
+        return 0;
+    return p_mat->cols;
+}
+
+bool alglib::ae_matrix_wrapper::isempty() const
+{
+    return rows()==0 || cols()==0;
+}
+
+alglib::ae_int_t alglib::ae_matrix_wrapper::getstride() const
+{
+    if( p_mat==NULL )
+        return 0;
+    return p_mat->stride;
+}
+
+void alglib::ae_matrix_wrapper::attach_to(alglib_impl::ae_matrix *ptr)
+{
+    if( ptr==&mat )
+        throw alglib::ap_error("ALGLIB: attempt to attach matrix to itself");
+    if( p_mat==&mat )
+        ae_matrix_clear(p_mat);
+    p_mat = ptr;
+}
+
+void alglib::ae_matrix_wrapper::allocate_own(ae_int_t rows, ae_int_t cols, alglib_impl::ae_datatype datatype)
+{
+    if( p_mat==&mat )
+        ae_matrix_clear(p_mat);
+    p_mat = &mat;
+    if( !ae_matrix_init(p_mat, rows, cols, datatype, NULL, false) )
+        throw alglib::ap_error("ALGLIB: malloc error");
+}
+
+const alglib_impl::ae_matrix* alglib::ae_matrix_wrapper::c_ptr() const
+{
+    return p_mat;
+}
+
+alglib_impl::ae_matrix* alglib::ae_matrix_wrapper::c_ptr()
+{
+    return p_mat;
+}
+
+alglib::boolean_2d_array::boolean_2d_array()  
+{
+    allocate_own(0, 0, alglib_impl::DT_BOOL);
+}
+
+alglib::boolean_2d_array::boolean_2d_array(alglib_impl::ae_matrix *p)  
+{
+    p_mat = NULL;
+    attach_to(p);
+}
+
+alglib::boolean_2d_array::boolean_2d_array(const char *s)  
+{
+    std::vector< std::vector<const char*> > smat;
+    size_t i, j;
+    char *p = filter_spaces(s);
+    try
+    {
+        str_matrix_create(p, &smat);
+        if( smat.size()!=0 )
+        {
+            allocate_own((ae_int_t)(smat.size()), (ae_int_t)(smat[0].size()), alglib_impl::DT_BOOL);
+            for(i=0; i<smat.size(); i++)
+                for(j=0; j<smat[0].size(); j++)
+                    operator()((ae_int_t)i,(ae_int_t)j) = parse_bool_delim(smat[i][j],",]");
+        }
+        else
+            allocate_own(0, 0, alglib_impl::DT_BOOL);
+        alglib_impl::ae_free(p);
+    }
+    catch(...)
+    {
+        alglib_impl::ae_free(p);
+        throw;
+    }
+}
+
+alglib::boolean_2d_array::~boolean_2d_array() 
+{
+}
+
+const ae_bool& alglib::boolean_2d_array::operator()(ae_int_t i, ae_int_t j) const
+{
+    return p_mat->ptr.pp_bool[i][j];
+}
+
+ae_bool& alglib::boolean_2d_array::operator()(ae_int_t i, ae_int_t j)
+{
+    return p_mat->ptr.pp_bool[i][j];
+}
+
+const ae_bool* alglib::boolean_2d_array::operator[](ae_int_t i) const
+{
+    return p_mat->ptr.pp_bool[i];
+}
+
+ae_bool* alglib::boolean_2d_array::operator[](ae_int_t i)
+{
+    return p_mat->ptr.pp_bool[i];
+}
+
+void alglib::boolean_2d_array::setcontent(ae_int_t irows, ae_int_t icols, const bool *pContent )
+{
+    ae_int_t i, j;
+    setlength(irows, icols);
+    for(i=0; i<irows; i++)
+        for(j=0; j<icols; j++)
+            p_mat->ptr.pp_bool[i][j] = pContent[i*icols+j];
+}
+
+std::string alglib::boolean_2d_array::tostring() const 
+{
+    std::string result;
+    ae_int_t i;
+    if( isempty() )
+        return "[[]]";
+    result = "[";
+    for(i=0; i<rows(); i++)
+    {
+        if( i!=0 )
+            result += ",";
+        result += arraytostring(&operator()(i,0), cols());
+    }
+    result += "]";
+    return result;
+}
+
+alglib::integer_2d_array::integer_2d_array()  
+{
+    allocate_own(0, 0, alglib_impl::DT_INT);
+}
+
+alglib::integer_2d_array::integer_2d_array(alglib_impl::ae_matrix *p)  
+{
+    p_mat = NULL;
+    attach_to(p);
+}
+
+alglib::integer_2d_array::integer_2d_array(const char *s)  
+{
+    std::vector< std::vector<const char*> > smat;
+    size_t i, j;
+    char *p = filter_spaces(s);
+    try
+    {
+        str_matrix_create(p, &smat);
+        if( smat.size()!=0 )
+        {
+            allocate_own((ae_int_t)(smat.size()), (ae_int_t)(smat[0].size()), alglib_impl::DT_INT);
+            for(i=0; i<smat.size(); i++)
+                for(j=0; j<smat[0].size(); j++)
+                    operator()((ae_int_t)i,(ae_int_t)j) = parse_int_delim(smat[i][j],",]");
+        }
+        else
+            allocate_own(0, 0, alglib_impl::DT_INT);
+        alglib_impl::ae_free(p);
+    }
+    catch(...)
+    {
+        alglib_impl::ae_free(p);
+        throw;
+    }
+}
+
+alglib::integer_2d_array::~integer_2d_array() 
+{
+}
+
+const alglib::ae_int_t& alglib::integer_2d_array::operator()(ae_int_t i, ae_int_t j) const
+{
+    return p_mat->ptr.pp_int[i][j];
+}
+
+alglib::ae_int_t& alglib::integer_2d_array::operator()(ae_int_t i, ae_int_t j)
+{
+    return p_mat->ptr.pp_int[i][j];
+}
+
+const alglib::ae_int_t* alglib::integer_2d_array::operator[](ae_int_t i) const
+{
+    return p_mat->ptr.pp_int[i];
+}
+
+alglib::ae_int_t* alglib::integer_2d_array::operator[](ae_int_t i)
+{
+    return p_mat->ptr.pp_int[i];
+}
+
+void alglib::integer_2d_array::setcontent(ae_int_t irows, ae_int_t icols, const ae_int_t *pContent )
+{
+    ae_int_t i, j;
+    setlength(irows, icols);
+    for(i=0; i<irows; i++)
+        for(j=0; j<icols; j++)
+            p_mat->ptr.pp_int[i][j] = pContent[i*icols+j];
+}
+
+std::string alglib::integer_2d_array::tostring() const 
+{
+    std::string result;
+    ae_int_t i;
+    if( isempty() )
+        return "[[]]";
+    result = "[";
+    for(i=0; i<rows(); i++)
+    {
+        if( i!=0 )
+            result += ",";
+        result += arraytostring(&operator()(i,0), cols());
+    }
+    result += "]";
+    return result;
+}
+
+alglib::real_2d_array::real_2d_array()  
+{
+    allocate_own(0, 0, alglib_impl::DT_REAL);
+}
+
+alglib::real_2d_array::real_2d_array(alglib_impl::ae_matrix *p)
+{
+    p_mat = NULL;
+    attach_to(p);
+}
+
+alglib::real_2d_array::real_2d_array(const char *s)  
+{
+    std::vector< std::vector<const char*> > smat;
+    size_t i, j;
+    char *p = filter_spaces(s);
+    try
+    {
+        str_matrix_create(p, &smat);
+        if( smat.size()!=0 )
+        {
+            allocate_own((ae_int_t)(smat.size()), (ae_int_t)(smat[0].size()), alglib_impl::DT_REAL);
+            for(i=0; i<smat.size(); i++)
+                for(j=0; j<smat[0].size(); j++)
+                    operator()((ae_int_t)i,(ae_int_t)j) = parse_real_delim(smat[i][j],",]");
+        }
+        else
+            allocate_own(0, 0, alglib_impl::DT_REAL);
+        alglib_impl::ae_free(p);
+    }
+    catch(...)
+    {
+        alglib_impl::ae_free(p);
+        throw;
+    }
+}
+
+alglib::real_2d_array::~real_2d_array() 
+{
+}
+
+const double& alglib::real_2d_array::operator()(ae_int_t i, ae_int_t j) const
+{
+    return p_mat->ptr.pp_double[i][j];
+}
+
+double& alglib::real_2d_array::operator()(ae_int_t i, ae_int_t j)
+{
+    return p_mat->ptr.pp_double[i][j];
+}
+
+const double* alglib::real_2d_array::operator[](ae_int_t i) const
+{
+    return p_mat->ptr.pp_double[i];
+}
+
+double* alglib::real_2d_array::operator[](ae_int_t i)
+{
+    return p_mat->ptr.pp_double[i];
+}
+
+void alglib::real_2d_array::setcontent(ae_int_t irows, ae_int_t icols, const double *pContent )
+{
+    ae_int_t i, j;
+    setlength(irows, icols);
+    for(i=0; i<irows; i++)
+        for(j=0; j<icols; j++)
+            p_mat->ptr.pp_double[i][j] = pContent[i*icols+j];
+}
+
+std::string alglib::real_2d_array::tostring(int dps) const 
+{
+    std::string result;
+    ae_int_t i;
+    if( isempty() )
+        return "[[]]";
+    result = "[";
+    for(i=0; i<rows(); i++)
+    {
+        if( i!=0 )
+            result += ",";
+        result += arraytostring(&operator()(i,0), cols(), dps);
+    }
+    result += "]";
+    return result;
+}
+
+alglib::complex_2d_array::complex_2d_array()  
+{
+    allocate_own(0, 0, alglib_impl::DT_COMPLEX);
+}
+
+alglib::complex_2d_array::complex_2d_array(alglib_impl::ae_matrix *p)  
+{
+    p_mat = NULL;
+    attach_to(p);
+}
+
+alglib::complex_2d_array::complex_2d_array(const char *s)  
+{
+    std::vector< std::vector<const char*> > smat;
+    size_t i, j;
+    char *p = filter_spaces(s);
+    try
+    {
+        str_matrix_create(p, &smat);
+        if( smat.size()!=0 )
+        {
+            allocate_own((ae_int_t)(smat.size()), (ae_int_t)(smat[0].size()), alglib_impl::DT_COMPLEX);
+            for(i=0; i<smat.size(); i++)
+                for(j=0; j<smat[0].size(); j++)
+                    operator()((ae_int_t)i,(ae_int_t)j) = parse_complex_delim(smat[i][j],",]");
+        }
+        else
+            allocate_own(0, 0, alglib_impl::DT_COMPLEX);
+        alglib_impl::ae_free(p);
+    }
+    catch(...)
+    {
+        alglib_impl::ae_free(p);
+        throw;
+    }
+}
+
+alglib::complex_2d_array::~complex_2d_array() 
+{
+}
+
+const alglib::complex& alglib::complex_2d_array::operator()(ae_int_t i, ae_int_t j) const
+{
+    return *((const alglib::complex*)(p_mat->ptr.pp_complex[i]+j));
+}
+
+alglib::complex& alglib::complex_2d_array::operator()(ae_int_t i, ae_int_t j)
+{
+    return *((alglib::complex*)(p_mat->ptr.pp_complex[i]+j));
+}
+
+const alglib::complex* alglib::complex_2d_array::operator[](ae_int_t i) const
+{
+    return (const alglib::complex*)(p_mat->ptr.pp_complex[i]);
+}
+
+alglib::complex* alglib::complex_2d_array::operator[](ae_int_t i)
+{
+    return (alglib::complex*)(p_mat->ptr.pp_complex[i]);
+}
+
+void alglib::complex_2d_array::setcontent(ae_int_t irows, ae_int_t icols, const alglib::complex *pContent )
+{
+    ae_int_t i, j;
+    setlength(irows, icols);
+    for(i=0; i<irows; i++)
+        for(j=0; j<icols; j++)
+        {
+            p_mat->ptr.pp_complex[i][j].x = pContent[i*icols+j].x;
+            p_mat->ptr.pp_complex[i][j].y = pContent[i*icols+j].y;
+        }
+}
+
+std::string alglib::complex_2d_array::tostring(int dps) const 
+{
+    std::string result;
+    ae_int_t i;
+    if( isempty() )
+        return "[[]]";
+    result = "[";
+    for(i=0; i<rows(); i++)
+    {
+        if( i!=0 )
+            result += ",";
+        result += arraytostring(&operator()(i,0), cols(), dps);
+    }
+    result += "]";
+    return result;
+}
+
+
+/********************************************************************
+Internal functions
+********************************************************************/
+double alglib::get_aenv_nan()
+{
+    double r;
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    r = _alglib_env_state.v_nan;
+    alglib_impl::ae_state_clear(&_alglib_env_state);
+    return r;
+}
+
+double alglib::get_aenv_posinf()
+{
+    double r;
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    r = _alglib_env_state.v_posinf;
+    alglib_impl::ae_state_clear(&_alglib_env_state);
+    return r;
+}
+
+double alglib::get_aenv_neginf()
+{
+    double r;
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    r = _alglib_env_state.v_neginf;
+    alglib_impl::ae_state_clear(&_alglib_env_state);
+    return r;
+}
+
+alglib::ae_int_t alglib::my_stricmp(const char *s1, const char *s2)
+{
+    int c1, c2;
+    
+    //
+    // handle special cases
+    //
+    if(s1==NULL && s2!=NULL)
+        return -1;
+    if(s1!=NULL && s2==NULL)
+        return +1;
+    if(s1==NULL && s2==NULL)
+        return 0;
+
+    //
+    // compare
+    //
+    for (;;)
+    {
+        c1 = *s1;
+        c2 = *s2;
+        s1++;
+        s2++;
+        if( c1==0 )
+            return c2==0 ? 0 : -1;
+        if( c2==0 )
+            return c1==0 ? 0 : +1;
+        c1 = tolower(c1);
+        c2 = tolower(c2);
+        if( c1<c2 )
+            return -1;
+        if( c1>c2 )
+            return +1;
+    }
+}
+
+char* alglib::filter_spaces(const char *s)
+{
+    size_t i, j, n;
+    char *r;
+    char *r0;
+    n = strlen(s);
+    r = (char*)alglib_impl::ae_malloc(n+1, NULL);
+    if( r==NULL )
+        throw ap_error("malloc error");
+    for(i=0,j=0,r0=r; i<=n; i++,s++)
+        if( !isspace(*s) )
+        {
+            *r0 = *s;
+            r0++;
+        }
+    return r;
+}
+
+void alglib::str_vector_create(const char *src, bool match_head_only, std::vector<const char*> *p_vec)
+{
+    //
+    // parse beginning of the string.
+    // try to handle "[]" string
+    //
+    p_vec->clear();
+    if( *src!='[' )
+        throw alglib::ap_error("Incorrect initializer for vector");
+    src++;
+    if( *src==']' )
+        return;
+    p_vec->push_back(src);
+    for(;;)
+    {
+        if( *src==0 )
+            throw alglib::ap_error("Incorrect initializer for vector");
+        if( *src==']' )
+        {
+            if( src[1]==0 || !match_head_only)
+                return;
+            throw alglib::ap_error("Incorrect initializer for vector");
+        }
+        if( *src==',' )
+        {
+            p_vec->push_back(src+1);
+            src++;
+            continue;
+        }
+        src++;
+    }
+}
+
+void alglib::str_matrix_create(const char *src, std::vector< std::vector<const char*> > *p_mat)
+{
+    p_mat->clear();
+    
+    //
+    // Try to handle "[[]]" string
+    //
+    if( strcmp(src, "[[]]")==0 )
+        return;
+
+    //
+    // Parse non-empty string
+    //
+    if( *src!='[' )
+        throw alglib::ap_error("Incorrect initializer for matrix");
+    src++;
+    for(;;)
+    {
+        p_mat->push_back(std::vector<const char*>());
+        str_vector_create(src, false, &p_mat->back());
+        if( p_mat->back().size()==0 || p_mat->back().size()!=(*p_mat)[0].size() )
+            throw alglib::ap_error("Incorrect initializer for matrix");
+        src = strchr(src, ']');
+        if( src==NULL )
+            throw alglib::ap_error("Incorrect initializer for matrix");
+        src++;
+        if( *src==',' )
+        {
+            src++;
+            continue;
+        }
+        if( *src==']' )
+            break;
+        throw alglib::ap_error("Incorrect initializer for matrix");
+    }
+    src++;
+    if( *src!=0 )
+        throw alglib::ap_error("Incorrect initializer for matrix");
+}
+
+ae_bool alglib::parse_bool_delim(const char *s, const char *delim)
+{
+    const char *p;
+    char buf[8];
+    
+    // try to parse false
+    p = "false";
+    memset(buf, 0, sizeof(buf));
+    strncpy(buf, s, strlen(p));
+    if( my_stricmp(buf, p)==0 )
+    {
+        if( s[strlen(p)]==0 || strchr(delim,s[strlen(p)])==NULL )
+            throw alglib::ap_error("Cannot parse value");
+        return ae_false;
+    }
+
+    // try to parse true
+    p = "true";
+    memset(buf, 0, sizeof(buf));
+    strncpy(buf, s, strlen(p));
+    if( my_stricmp(buf, p)==0 )
+    {
+        if( s[strlen(p)]==0 || strchr(delim,s[strlen(p)])==NULL )
+            throw alglib::ap_error("Cannot parse value");
+        return ae_true;
+    }
+
+    // error
+    throw alglib::ap_error("Cannot parse value");
+}
+
+alglib::ae_int_t alglib::parse_int_delim(const char *s, const char *delim)
+{
+    const char *p;
+    long long_val;
+    volatile ae_int_t ae_val;
+    
+    p = s;
+
+    //
+    // check string structure:
+    // * leading sign
+    // * at least one digit
+    // * delimiter
+    //
+    if( *s=='-' || *s=='+' )
+        s++;
+    if( *s==0 || strchr("1234567890",*s)==NULL)
+        throw alglib::ap_error("Cannot parse value");
+    while( *s!=0 && strchr("1234567890",*s)!=NULL )
+        s++;
+    if( *s==0 || strchr(delim,*s)==NULL )
+        throw alglib::ap_error("Cannot parse value");
+
+    // convert and ensure that value fits into ae_int_t
+    s = p;
+    long_val = atol(s);
+    ae_val = long_val;
+    if( ae_val!=long_val )
+        throw alglib::ap_error("Cannot parse value");
+    return ae_val;
+}
+
+bool alglib::_parse_real_delim(const char *s, const char *delim, double *result, const char **new_s)
+{
+    const char *p;
+    char *t;
+    bool has_digits;
+    char buf[64];
+    int  isign;
+    lconv *loc;
+
+    p = s;
+    
+    //
+    // check string structure and decide what to do
+    //
+    isign = 1;
+    if( *s=='-' || *s=='+' )
+    {
+        isign = *s=='-' ? -1 : +1;
+        s++;
+    }
+    memset(buf, 0, sizeof(buf));
+    strncpy(buf, s, 3);
+    if( my_stricmp(buf,"nan")!=0 && my_stricmp(buf,"inf")!=0 )
+    {
+        //
+        // [sign] [ddd] [.] [ddd] [e|E[sign]ddd]
+        //
+        has_digits = false;
+        if( *s!=0 && strchr("1234567890",*s)!=NULL )
+        {
+            has_digits = true;
+            while( *s!=0 && strchr("1234567890",*s)!=NULL )
+                s++;
+        }
+        if( *s=='.' )
+            s++;
+        if( *s!=0 && strchr("1234567890",*s)!=NULL )
+        {
+            has_digits = true;
+            while( *s!=0 && strchr("1234567890",*s)!=NULL )
+                s++;
+        }
+        if (!has_digits )
+            return false;
+        if( *s=='e' || *s=='E' )
+        {
+            s++;
+            if( *s=='-' || *s=='+' )
+                s++;
+            if( *s==0 || strchr("1234567890",*s)==NULL )
+                return false;
+            while( *s!=0 && strchr("1234567890",*s)!=NULL )
+                s++;
+        }   
+        if( *s==0 || strchr(delim,*s)==NULL )
+            return false;
+        *new_s = s;
+
+        //
+        // finite value conversion
+        //
+        if( *new_s-p>=(int)sizeof(buf) )
+            return false;
+        strncpy(buf, p, (size_t)(*new_s-p));
+        buf[*new_s-p] = 0;
+        loc = localeconv();
+        t = strchr(buf,'.');
+        if( t!=NULL )
+            *t = *loc->decimal_point;
+        *result = atof(buf);
+        return true;
+    }
+    else
+    {
+        //
+        // check delimiter and update *new_s
+        //
+        s += 3;
+        if( *s==0 || strchr(delim,*s)==NULL )
+            return false;
+        *new_s = s;
+
+        //
+        // NAN, INF conversion
+        //
+        if( my_stricmp(buf,"nan")==0 )
+            *result = fp_nan;
+        if( my_stricmp(buf,"inf")==0 )
+            *result = isign>0 ? fp_posinf : fp_neginf;
+        return true;
+    }
+}
+
+double alglib::parse_real_delim(const char *s, const char *delim)
+{
+    double result;
+    const char *new_s;
+    if( !_parse_real_delim(s, delim, &result, &new_s) )
+        throw alglib::ap_error("Cannot parse value");
+    return result;
+}
+
+alglib::complex alglib::parse_complex_delim(const char *s, const char *delim)
+{
+    double d_result;
+    const char *new_s;
+    alglib::complex c_result;
+    
+    // parse as real value
+    if( _parse_real_delim(s, delim, &d_result, &new_s) )
+        return d_result;
+
+    // parse as "a+bi" or "a-bi"
+    if( _parse_real_delim(s, "+-", &c_result.x, &new_s) )
+    {
+        s = new_s;
+        if( !_parse_real_delim(s, "i", &c_result.y, &new_s) )
+            throw alglib::ap_error("Cannot parse value");
+        s = new_s+1;
+        if( *s==0 || strchr(delim,*s)==NULL )
+            throw alglib::ap_error("Cannot parse value");
+        return c_result;
+    }
+    
+    // parse as complex value "bi+a" or "bi-a"
+    if( _parse_real_delim(s, "i", &c_result.y, &new_s) )
+    {
+        s = new_s+1;
+        if( *s==0 )
+            throw alglib::ap_error("Cannot parse value");
+        if( strchr(delim,*s)!=NULL )
+        {
+            c_result.x = 0;
+            return c_result;
+        }
+        if( strchr("+-",*s)!=NULL )
+        {
+            if( !_parse_real_delim(s, delim, &c_result.x, &new_s) )
+                throw alglib::ap_error("Cannot parse value");
+            return c_result;
+        }
+        throw alglib::ap_error("Cannot parse value");
+    }
+
+    // error
+    throw alglib::ap_error("Cannot parse value");
+}
+
+std::string alglib::arraytostring(const bool *ptr, ae_int_t n)
+{
+    std::string result;
+    ae_int_t i;
+    result = "[";
+    for(i=0; i<n; i++)
+    {
+        if( i!=0 )
+            result += ",";
+        result += ptr[i] ? "true" : "false";
+    }
+    result += "]";
+    return result;
+}
+
+std::string alglib::arraytostring(const ae_int_t *ptr, ae_int_t n)
+{
+    std::string result;
+    ae_int_t i;
+    char buf[64];
+    result = "[";
+    for(i=0; i<n; i++)
+    {
+        if( sprintf(buf, i==0 ? "%ld" : ",%ld", long(ptr[i]))>=(int)sizeof(buf) )
+            throw ap_error("arraytostring(): buffer overflow");
+        result += buf;
+    }
+    result += "]";
+    return result;
+}
+
+std::string alglib::arraytostring(const double *ptr, ae_int_t n, int dps)
+{
+    std::string result;
+    ae_int_t i;
+    char buf[64];
+    char mask1[64];
+    char mask2[64];
+    result = "[";
+    if( sprintf(mask1, "%%.%df", dps)>=(int)sizeof(mask1) )
+        throw ap_error("arraytostring(): buffer overflow");
+    if( sprintf(mask2, ",%s", mask1)>=(int)sizeof(mask2) )
+        throw ap_error("arraytostring(): buffer overflow");
+    for(i=0; i<n; i++)
+    {
+        buf[0] = 0;
+        if( fp_isfinite(ptr[i]) )
+        {
+            if( sprintf(buf, i==0 ? mask1 : mask2, double(ptr[i]))>=(int)sizeof(buf) )
+                throw ap_error("arraytostring(): buffer overflow");
+        }
+        else if( fp_isnan(ptr[i]) )
+            strcpy(buf, i==0 ?  "NAN" :  ",NAN");
+        else if( fp_isposinf(ptr[i]) )
+            strcpy(buf, i==0 ? "+INF" : ",+INF");
+        else if( fp_isneginf(ptr[i]) )
+            strcpy(buf, i==0 ? "-INF" : ",-INF");
+        result += buf;
+    }
+    result += "]";
+    return result;
+}
+
+std::string alglib::arraytostring(const alglib::complex *ptr, ae_int_t n, int dps)
+{
+    std::string result;
+    ae_int_t i;
+    result = "[";
+    for(i=0; i<n; i++)
+    {
+        if( i!=0 )
+            result += ",";
+        result += ptr[i].tostring(dps);
+    }
+    result += "]";
+    return result;
+}
+
+
+/********************************************************************
+standard functions
+********************************************************************/
+int alglib::sign2(double x)
+{
+    if( x>0 ) return  1;
+    if( x<0 ) return -1;
+    return 0;
+}
+
+double alglib::randomreal()
+{
+    int i1 = rand();
+    int i2 = rand();
+    while(i1==RAND_MAX)
+        i1 =rand();
+    while(i2==RAND_MAX)
+        i2 =rand();
+    double mx = RAND_MAX;
+    return (i1+i2/mx)/mx;
+}
+
+int alglib::randominteger(int maxv)
+{  return rand()%maxv; }
+
+int alglib::round(double x)
+{ return int(floor(x+0.5)); }
+
+int alglib::trunc(double x)
+{ return int(x>0 ? floor(x) : ceil(x)); }
+
+int alglib::ifloor(double x)
+{ return int(floor(x)); }
+
+int alglib::iceil(double x)
+{ return int(ceil(x)); }
+
+double alglib::pi()
+{ return 3.14159265358979323846; }
+
+double alglib::sqr(double x)
+{ return x*x; }
+
+int alglib::maxint(int m1, int m2)
+{
+    return m1>m2 ? m1 : m2;
+}
+
+int alglib::minint(int m1, int m2)
+{
+    return m1>m2 ? m2 : m1;
+}
+
+double alglib::maxreal(double m1, double m2)
+{
+    return m1>m2 ? m1 : m2;
+}
+
+double alglib::minreal(double m1, double m2)
+{
+    return m1>m2 ? m2 : m1;
+}
+
+bool alglib::fp_eq(double v1, double v2)
+{
+    // IEEE-strict floating point comparison
+    volatile double x = v1;
+    volatile double y = v2;
+    return x==y;
+}
+
+bool alglib::fp_neq(double v1, double v2)
+{
+    // IEEE-strict floating point comparison
+    return !fp_eq(v1,v2);
+}
+
+bool alglib::fp_less(double v1, double v2)
+{
+    // IEEE-strict floating point comparison
+    volatile double x = v1;
+    volatile double y = v2;
+    return x<y;
+}
+
+bool alglib::fp_less_eq(double v1, double v2)
+{
+    // IEEE-strict floating point comparison
+    volatile double x = v1;
+    volatile double y = v2;
+    return x<=y;
+}
+
+bool alglib::fp_greater(double v1, double v2)
+{
+    // IEEE-strict floating point comparison
+    volatile double x = v1;
+    volatile double y = v2;
+    return x>y;
+}
+
+bool alglib::fp_greater_eq(double v1, double v2)
+{
+    // IEEE-strict floating point comparison
+    volatile double x = v1;
+    volatile double y = v2;
+    return x>=y;
+}
+
+bool alglib::fp_isnan(double x)
+{
+    return alglib_impl::ae_isnan_stateless(x,endianness);
+}
+
+bool alglib::fp_isposinf(double x)
+{
+    return alglib_impl::ae_isposinf_stateless(x,endianness);
+}
+
+bool alglib::fp_isneginf(double x)
+{
+    return alglib_impl::ae_isneginf_stateless(x,endianness);
+}
+
+bool alglib::fp_isinf(double x)
+{
+    return alglib_impl::ae_isinf_stateless(x,endianness);
+}
+
+bool alglib::fp_isfinite(double x)
+{
+    return alglib_impl::ae_isfinite_stateless(x,endianness);
+}
+
+/********************************************************************
+Dataset functions
+********************************************************************/
+/*bool alglib::readstrings(std::string file, std::list<std::string> *pOutput)
+{
+    return readstrings(file, pOutput, "");
+}
+
+bool alglib::readstrings(std::string file, std::list<std::string> *pOutput, std::string comment)
+{
+    std::string cmd, s;
+    FILE *f;
+    char buf[32768];
+    char *str;
+
+    f = fopen(file.c_str(), "rb");
+    if( !f )
+        return false;
+    s = "";
+    pOutput->clear();
+    while(str=fgets(buf, sizeof(buf), f))
+    {
+        // TODO: read file by small chunks, combine in one large string
+        if( strlen(str)==0 )
+            continue;
+            
+        //
+        // trim trailing newline chars
+        //
+        char *eos = str+strlen(str)-1;
+        if( *eos=='\n' )
+        {
+            *eos = 0;
+            eos--;
+        }
+        if( *eos=='\r' )
+        {
+            *eos = 0;
+            eos--;
+        }
+        s = str;
+
+        //
+        // skip comments
+        //
+        if( comment.length()>0 )
+            if( strncmp(s.c_str(), comment.c_str(), comment.length())==0 )
+            {
+                s = "";
+                continue;
+            }
+
+        //
+        // read data
+        //
+        if( s.length()<1 )
+        {
+            fclose(f);
+            throw alglib::ap_error("internal error in read_strings");
+        }
+        pOutput->push_back(s);
+    }
+    fclose(f);
+    return true;
+}
+
+void alglib::explodestring(std::string s, char sep, std::vector<std::string> *pOutput)
+{
+    std::string tmp;
+    int i;
+    tmp = "";
+    pOutput->clear();
+    for(i=0; i<s.length(); i++)
+    {
+        if( s[i]!=sep )
+        {
+            tmp += s[i];
+            continue;
+        }
+        //if( tmp.length()!=0 )
+        pOutput->push_back(tmp);
+        tmp = "";
+    }
+    if( tmp.length()!=0 )
+        pOutput->push_back(tmp);
+}
+
+std::string alglib::strtolower(const std::string &s)
+{
+    std::string r = s;
+    for(int i=0; i<r.length(); i++)
+        r[i] = tolower(r[i]);
+    return r;
+}
+
+std::string alglib::xtrim(std::string s)
+{
+    char *pstr = (char*)malloc(s.length()+1);
+    char *p2 = pstr;
+    if( pstr==NULL )
+        throw "xalloc in xtrim()";
+    try
+    {
+        bool bws;
+        int i;
+
+        //
+        // special cases:
+        // * zero length string
+        // * string includes only spaces
+        //
+        if( s.length()==0 )
+        {
+            free(pstr);
+            return "";
+        }
+        bws = true;
+        for(i=0; i<s.length(); i++)
+            if( s[i]!=' ' )
+                bws = false;
+        if( bws )
+        {
+            free(pstr);
+            return "";
+        }
+
+        //
+        // merge internal spaces
+        //
+        bws = false;
+        for(i=0; i<s.length(); i++)
+        {
+            if( s[i]==' ' && bws )
+                continue;
+            if( s[i]==' ' )
+            {
+                *p2 = ' ';
+                p2++;
+                bws = true;
+                continue;
+            }
+            *p2 = s[i];
+            bws = false;
+            p2++;
+        }
+        *p2 = 0;
+
+        //
+        // trim leading/trailing spaces.
+        // we expect at least one non-space character in the string
+        //
+        p2--;
+        while(*p2==' ')
+        {
+            *p2 = 0;
+            p2--;
+        }
+        p2 = pstr;
+        while((*p2)==' ')
+            p2++;
+
+        //
+        // result
+        //
+        std::string r = p2;
+        free(pstr);
+        return r;
+    }
+    catch(...)
+    {
+        free(pstr);
+        throw "unknown exception in xtrim()";
+    }
+}
+
+bool alglib::opendataset(std::string file, dataset *pdataset)
+{
+    std::list<std::string> Lines;
+    std::vector<std::string> Values, RowsArr, ColsArr, VarsArr, HeadArr;
+    std::list<std::string>::iterator i;
+    std::string s;
+    int TrnFirst, TrnLast, ValFirst, ValLast, TstFirst, TstLast, LinesRead, j;
+
+    //
+    // Read data
+    //
+    if( pdataset==NULL )
+        return false;
+    if( !readstrings(file, &Lines, "//") )
+        return false;
+    i = Lines.begin();
+    *pdataset = dataset();
+
+    //
+    // Read header
+    //
+    if( i==Lines.end() )
+        return false;
+    s = alglib::xtrim(*i);
+    alglib::explodestring(s, '#', &HeadArr);
+    if( HeadArr.size()!=2 )
+        return false;
+
+    //
+    // Rows info
+    //
+    alglib::explodestring(alglib::xtrim(HeadArr[0]), ' ', &RowsArr);
+    if( RowsArr.size()==0 || RowsArr.size()>3 )
+        return false;
+    if( RowsArr.size()==1 )
+    {
+        pdataset->totalsize = atol(RowsArr[0].c_str());
+        pdataset->trnsize = pdataset->totalsize;
+    }
+    if( RowsArr.size()==2 )
+    {
+        pdataset->trnsize = atol(RowsArr[0].c_str());
+        pdataset->tstsize = atol(RowsArr[1].c_str());
+        pdataset->totalsize = pdataset->trnsize + pdataset->tstsize;
+    }
+    if( RowsArr.size()==3 )
+    {
+        pdataset->trnsize = atol(RowsArr[0].c_str());
+        pdataset->valsize = atol(RowsArr[1].c_str());
+        pdataset->tstsize = atol(RowsArr[2].c_str());
+        pdataset->totalsize = pdataset->trnsize + pdataset->valsize + pdataset->tstsize;
+    }
+    if( pdataset->totalsize<=0 || pdataset->trnsize<0 || pdataset->valsize<0 || pdataset->tstsize<0 )
+        return false;
+    TrnFirst = 0;
+    TrnLast = TrnFirst + pdataset->trnsize;
+    ValFirst = TrnLast;
+    ValLast = ValFirst + pdataset->valsize;
+    TstFirst = ValLast;
+    TstLast = TstFirst + pdataset->tstsize;
+                
+    //
+    // columns
+    //
+    alglib::explodestring(alglib::xtrim(HeadArr[1]), ' ', &ColsArr);
+    if( ColsArr.size()!=1 && ColsArr.size()!=4 )
+        return false;
+    if( ColsArr.size()==1 )
+    {
+        pdataset->nin = atoi(ColsArr[0].c_str());
+        if( pdataset->nin<=0 )
+            return false;
+    }
+    if( ColsArr.size()==4 )
+    {
+        if( alglib::strtolower(ColsArr[0])!="reg" && alglib::strtolower(ColsArr[0])!="cls" )
+            return false;
+        if( ColsArr[2]!="=>" )
+            return false;
+        pdataset->nin = atol(ColsArr[1].c_str());
+        if( pdataset->nin<1 )
+            return false;
+        if( alglib::strtolower(ColsArr[0])=="reg" )
+        {
+            pdataset->nclasses = 0;
+            pdataset->nout = atol(ColsArr[3].c_str());
+            if( pdataset->nout<1 )
+                return false;
+        }
+        else
+        {
+            pdataset->nclasses = atol(ColsArr[3].c_str());
+            pdataset->nout = 1;
+            if( pdataset->nclasses<2 )
+                return false;
+        }
+    }
+
+    //
+    // initialize arrays
+    //
+    pdataset->all.setlength(pdataset->totalsize, pdataset->nin+pdataset->nout);
+    if( pdataset->trnsize>0 ) pdataset->trn.setlength(pdataset->trnsize, pdataset->nin+pdataset->nout);
+    if( pdataset->valsize>0 ) pdataset->val.setlength(pdataset->valsize, pdataset->nin+pdataset->nout);
+    if( pdataset->tstsize>0 ) pdataset->tst.setlength(pdataset->tstsize, pdataset->nin+pdataset->nout);
+
+    //
+    // read data
+    //
+    for(LinesRead=0, i++; i!=Lines.end() && LinesRead<pdataset->totalsize; i++, LinesRead++)
+    {
+        std::string sss = *i;
+        alglib::explodestring(alglib::xtrim(*i), ' ', &VarsArr);
+        if( VarsArr.size()!=pdataset->nin+pdataset->nout )
+            return false;
+        int tmpc = alglib::round(atof(VarsArr[pdataset->nin+pdataset->nout-1].c_str()));
+        if( pdataset->nclasses>0 && (tmpc<0 || tmpc>=pdataset->nclasses) )
+            return false;
+        for(j=0; j<pdataset->nin+pdataset->nout; j++)
+        {
+            pdataset->all(LinesRead,j) = atof(VarsArr[j].c_str());
+            if( LinesRead>=TrnFirst && LinesRead<TrnLast )
+                pdataset->trn(LinesRead-TrnFirst,j) = atof(VarsArr[j].c_str());
+            if( LinesRead>=ValFirst && LinesRead<ValLast )
+                pdataset->val(LinesRead-ValFirst,j) = atof(VarsArr[j].c_str());
+            if( LinesRead>=TstFirst && LinesRead<TstLast )
+                pdataset->tst(LinesRead-TstFirst,j) = atof(VarsArr[j].c_str());
+        }
+    }
+    if( LinesRead!=pdataset->totalsize )
+        return false;
+    return true;
+}*/
+
+/*
+previous variant
+bool alglib::opendataset(std::string file, dataset *pdataset)
+{
+    std::list<std::string> Lines;
+    std::vector<std::string> Values;
+    std::list<std::string>::iterator i;
+    int nCol, nRow, nSplitted;
+    int nColumns, nRows;
+
+    //
+    // Read data
+    //
+    if( pdataset==NULL )
+        return false;
+    if( !readstrings(file, &Lines, "//") )
+        return false;
+    i = Lines.begin();
+    *pdataset = dataset();
+
+    //
+    // Read columns info
+    //
+    if( i==Lines.end() )
+        return false;
+    if( sscanf(i->c_str(), " columns = %d %d ", &pdataset->nin, &pdataset->nout)!=2 )
+        return false;
+    if( pdataset->nin<=0 || pdataset->nout==0 || pdataset->nout==-1)
+        return false;
+    if( pdataset->nout<0 )
+    {
+        pdataset->nclasses = -pdataset->nout;
+        pdataset->nout = 1;
+        pdataset->iscls = true;
+    }
+    else
+    {
+        pdataset->isreg = true;
+    }
+    nColumns = pdataset->nin+pdataset->nout;
+    i++;
+
+    //
+    // Read rows info
+    //
+    if( i==Lines.end() )
+        return false;
+    if( sscanf(i->c_str(), " rows = %d %d %d ", &pdataset->trnsize, &pdataset->valsize, &pdataset->tstsize)!=3 )
+        return false;
+    if( (pdataset->trnsize<0) || (pdataset->valsize<0) || (pdataset->tstsize<0) )
+        return false;
+    if( (pdataset->trnsize==0) && (pdataset->valsize==0) && (pdataset->tstsize==0) )
+        return false;
+    nRows = pdataset->trnsize+pdataset->valsize+pdataset->tstsize;
+    pdataset->size = nRows;
+    if( Lines.size()!=nRows+2 )
+        return false;
+    i++;
+
+    //
+    // Read all cases
+    //
+    alglib::real_2d_array &arr = pdataset->all;
+    arr.setbounds(0, nRows-1, 0, nColumns-1);
+    for(nRow=0; nRow<nRows; nRow++)
+    {
+        alglib::ap_error::make_assertion(i!=Lines.end());
+        explodestring(*i, '\t', &Values);
+        if( Values.size()!=nColumns )
+            return false;
+        for(nCol=0; nCol<nColumns; nCol++)
+        {
+            double v;
+            if( sscanf(Values[nCol].c_str(), "%lg", &v)!=1 )
+                return false;
+            if( (nCol==nColumns-1) && pdataset->iscls && ((round(v)<0) || (round(v)>=pdataset->nclasses)) )
+                return false;
+            if( (nCol==nColumns-1) && pdataset->iscls )
+                arr(nRow, nCol) = round(v);
+            else
+                arr(nRow, nCol) = v;
+        }
+        i++;
+    }
+
+    //
+    // Split to training, validation and test sets
+    //
+    if( pdataset->trnsize>0 )
+        pdataset->trn.setbounds(0, pdataset->trnsize-1, 0, nColumns-1);
+    if( pdataset->valsize>0 )
+        pdataset->val.setbounds(0, pdataset->valsize-1, 0, nColumns-1);
+    if( pdataset->tstsize>0 )
+        pdataset->tst.setbounds(0, pdataset->tstsize-1, 0, nColumns-1);
+    nSplitted=0;
+    for(nRow=0; nRow<=pdataset->trnsize-1; nRow++, nSplitted++)
+        for(nCol=0; nCol<=nColumns-1; nCol++)
+            pdataset->trn(nRow,nCol) = arr(nSplitted,nCol);
+    for(nRow=0; nRow<=pdataset->valsize-1; nRow++, nSplitted++)
+        for(nCol=0; nCol<=nColumns-1; nCol++)
+            pdataset->val(nRow,nCol) = arr(nSplitted,nCol);
+    for(nRow=0; nRow<=pdataset->tstsize-1; nRow++, nSplitted++)
+        for(nCol=0; nCol<=nColumns-1; nCol++)
+            pdataset->tst(nRow,nCol) = arr(nSplitted,nCol);
+    return true;
+}*/
+
+alglib::ae_int_t alglib::vlen(ae_int_t n1, ae_int_t n2)
+{
+    return n2-n1+1;
+}
+
+
+
+/////////////////////////////////////////////////////////////////////////
+//
+// THIS SECTIONS CONTAINS OPTIMIZED LINEAR ALGEBRA CODE
+// IT IS SHARED BETWEEN C++ AND PURE C LIBRARIES
+//
+/////////////////////////////////////////////////////////////////////////
+namespace alglib_impl
+{
+#define alglib_simd_alignment 16
+
+#define alglib_r_block        32
+#define alglib_half_r_block   16
+#define alglib_twice_r_block  64
+
+#define alglib_c_block        24
+#define alglib_half_c_block   12
+#define alglib_twice_c_block  48
+
+
+/********************************************************************
+This subroutine calculates fast 32x32 real matrix-vector product:
+
+    y := beta*y + alpha*A*x
+
+using either generic C code or native optimizations (if available)
+
+IMPORTANT:
+* A must be stored in row-major order,
+  stride is alglib_r_block,
+  aligned on alglib_simd_alignment boundary
+* X must be aligned on alglib_simd_alignment boundary
+* Y may be non-aligned
+********************************************************************/
+void _ialglib_mv_32(const double *a, const double *x, double *y, ae_int_t stride, double alpha, double beta)
+{
+    ae_int_t i, k;
+    const double *pa0, *pa1, *pb;
+
+    pa0 = a;
+    pa1 = a+alglib_r_block;
+    pb = x;
+    for(i=0; i<16; i++)
+    {
+        double v0 = 0, v1 = 0;
+        for(k=0; k<4; k++)
+        {
+            v0 += pa0[0]*pb[0];
+            v1 += pa1[0]*pb[0];
+            v0 += pa0[1]*pb[1];
+            v1 += pa1[1]*pb[1];
+            v0 += pa0[2]*pb[2];
+            v1 += pa1[2]*pb[2];
+            v0 += pa0[3]*pb[3];
+            v1 += pa1[3]*pb[3];
+            v0 += pa0[4]*pb[4];
+            v1 += pa1[4]*pb[4];
+            v0 += pa0[5]*pb[5];
+            v1 += pa1[5]*pb[5];
+            v0 += pa0[6]*pb[6];
+            v1 += pa1[6]*pb[6];
+            v0 += pa0[7]*pb[7];
+            v1 += pa1[7]*pb[7];
+            pa0 += 8;
+            pa1 += 8;
+            pb  += 8;
+        }
+        y[0] = beta*y[0]+alpha*v0;
+        y[stride] = beta*y[stride]+alpha*v1;
+
+        /*
+         * now we've processed rows I and I+1,
+         * pa0 and pa1 are pointing to rows I+1 and I+2.
+         * move to I+2 and I+3.
+         */
+        pa0 += alglib_r_block;
+        pa1 += alglib_r_block;
+        pb = x;
+        y+=2*stride;
+    }
+}
+
+
+/*************************************************************************
+This function calculates MxN real matrix-vector product:
+
+    y := beta*y + alpha*A*x
+
+using generic C code. It calls _ialglib_mv_32 if both M=32 and N=32.
+
+If beta is zero, we do not use previous values of y (they are  overwritten
+by alpha*A*x without ever being read).  If alpha is zero, no matrix-vector
+product is calculated (only beta is updated); however, this update  is not
+efficient  and  this  function  should  NOT  be used for multiplication of 
+vector and scalar.
+
+IMPORTANT:
+* 0<=M<=alglib_r_block, 0<=N<=alglib_r_block
+* A must be stored in row-major order with stride equal to alglib_r_block
+*************************************************************************/
+void _ialglib_rmv(ae_int_t m, ae_int_t n, const double *a, const double *x, double *y, ae_int_t stride, double alpha, double beta)
+{
+    /*
+     * Handle special cases:
+     * - alpha is zero or n is zero
+     * - m is zero
+     */
+    if( m==0 )
+        return;
+    if( alpha==0.0 || n==0 )
+    {
+        ae_int_t i;
+        if( beta==0.0 )
+        {
+            for(i=0; i<m; i++)
+            {
+                *y = 0.0;
+                y += stride;
+            }
+        }
+        else
+        {
+            for(i=0; i<m; i++)
+            {
+                *y *= beta;
+                y += stride;
+            }
+        }
+        return;
+    }
+    
+    /*
+     * Handle general case: nonzero alpha, n and m
+     *
+     */
+    if( m==32 && n==32 )
+    {
+        /*
+         * 32x32, may be we have something better than general implementation
+         */
+        _ialglib_mv_32(a, x, y, stride, alpha, beta);
+    }
+    else
+    {
+        ae_int_t i, k, m2, n8, n2, ntrail2;
+        const double *pa0, *pa1, *pb;
+
+        /*
+         * First M/2 rows of A are processed in pairs.
+         * optimized code is used.
+         */
+        m2 = m/2;
+        n8 = n/8;
+        ntrail2 = (n-8*n8)/2;
+        for(i=0; i<m2; i++)
+        {
+            double v0 = 0, v1 = 0;
+
+            /*
+             * 'a' points to the part of the matrix which
+             * is not processed yet
+             */
+            pb = x;
+            pa0 = a;
+            pa1 = a+alglib_r_block;
+            a += alglib_twice_r_block;
+
+            /*
+             * 8 elements per iteration
+             */
+            for(k=0; k<n8; k++)
+            {
+                v0 += pa0[0]*pb[0];
+                v1 += pa1[0]*pb[0];
+                v0 += pa0[1]*pb[1];
+                v1 += pa1[1]*pb[1];
+                v0 += pa0[2]*pb[2];
+                v1 += pa1[2]*pb[2];
+                v0 += pa0[3]*pb[3];
+                v1 += pa1[3]*pb[3];
+                v0 += pa0[4]*pb[4];
+                v1 += pa1[4]*pb[4];
+                v0 += pa0[5]*pb[5];
+                v1 += pa1[5]*pb[5];
+                v0 += pa0[6]*pb[6];
+                v1 += pa1[6]*pb[6];
+                v0 += pa0[7]*pb[7];
+                v1 += pa1[7]*pb[7];
+                pa0 += 8;
+                pa1 += 8;
+                pb  += 8;
+            }
+
+            /*
+             * 2 elements per iteration
+             */
+            for(k=0; k<ntrail2; k++)
+            {
+                v0 += pa0[0]*pb[0];
+                v1 += pa1[0]*pb[0];
+                v0 += pa0[1]*pb[1];
+                v1 += pa1[1]*pb[1];
+                pa0 += 2;
+                pa1 += 2;
+                pb  += 2;
+            }
+
+            /*
+             * last element, if needed
+             */
+            if( n%2!=0 )
+            {
+                v0 += pa0[0]*pb[0];
+                v1 += pa1[0]*pb[0];
+            }
+
+            /*
+             * final update
+             */
+            if( beta!=0 )
+            {
+                y[0] = beta*y[0]+alpha*v0;
+                y[stride] = beta*y[stride]+alpha*v1;
+            }
+            else
+            {
+                y[0] = alpha*v0;
+                y[stride] = alpha*v1;
+            }
+            
+            /*
+             * move to the next pair of elements
+             */
+            y+=2*stride;
+        }
+
+
+        /*
+         * Last (odd) row is processed with less optimized code.
+         */
+        if( m%2!=0 )
+        {
+            double v0 = 0;
+
+            /*
+             * 'a' points to the part of the matrix which
+             * is not processed yet
+             */
+            pb = x;
+            pa0 = a;
+
+            /*
+             * 2 elements per iteration
+             */
+            n2 = n/2;
+            for(k=0; k<n2; k++)
+            {
+                v0 += pa0[0]*pb[0]+pa0[1]*pb[1];
+                pa0 += 2;
+                pb  += 2;
+            }
+
+            /*
+             * last element, if needed
+             */
+            if( n%2!=0 )
+                v0 += pa0[0]*pb[0];
+
+            /*
+             * final update
+             */
+            if( beta!=0 )
+                y[0] = beta*y[0]+alpha*v0;
+            else
+                y[0] = alpha*v0;
+        }
+    }
+}
+
+
+/*************************************************************************
+This function calculates MxN real matrix-vector product:
+
+    y := beta*y + alpha*A*x
+
+using generic C code. It calls _ialglib_mv_32 if both M=32 and N=32.
+
+If beta is zero, we do not use previous values of y (they are  overwritten
+by alpha*A*x without ever being read).  If alpha is zero, no matrix-vector
+product is calculated (only beta is updated); however, this update  is not
+efficient  and  this  function  should  NOT  be used for multiplication of 
+vector and scalar.
+
+IMPORTANT:
+* 0<=M<=alglib_r_block, 0<=N<=alglib_r_block
+* A must be stored in row-major order with stride equal to alglib_r_block
+* y may be non-aligned
+* both A and x must have same offset with respect to 16-byte boundary:
+  either both are aligned, or both are aligned with offset 8. Function
+  will crash your system if you try to call it with misaligned or 
+  incorrectly aligned data.
+
+This function supports SSE2; it can be used when:
+1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time)
+2. ae_cpuid() result contains CPU_SSE2 (checked at run-time)
+
+If (1) is failed, this function will still be defined and callable, but it 
+will do nothing.  If (2)  is  failed , call to this function will probably 
+crash your system. 
+
+If  you  want  to  know  whether  it  is safe to call it, you should check 
+results  of  ae_cpuid(). If CPU_SSE2 bit is set, this function is callable 
+and will do its work.
+*************************************************************************/
+void _ialglib_rmv_sse2(ae_int_t m, ae_int_t n, const double *a, const double *x, double *y, ae_int_t stride, double alpha, double beta)
+{
+#if defined(AE_HAS_SSE2_INTRINSICS)
+    ae_int_t i, k, n2;
+    ae_int_t mb3, mtail, nhead, nb8, nb2, ntail;
+    const double *pa0, *pa1, *pa2, *pb;
+    __m128d v0, v1, v2, va0, va1, va2, vx, vtmp; 
+    double buf3[3], buf6[6];
+    double d;
+    
+    /*
+     * Handle special cases:
+     * - alpha is zero or n is zero
+     * - m is zero
+     */
+    if( m==0 )
+        return;
+    if( alpha==0.0 || n==0 )
+    {
+        if( beta==0.0 )
+        {
+            for(i=0; i<m; i++)
+            {
+                *y = 0.0;
+                y += stride;
+            }
+        }
+        else
+        {
+            for(i=0; i<m; i++)
+            {
+                *y *= beta;
+                y += stride;
+            }
+        }
+        return;
+    }
+    
+    /*
+     * Handle general case: nonzero alpha, n and m
+     *
+     * We divide problem as follows...
+     *
+     * Rows M are divided into:
+     * - mb3 blocks, each 3xN
+     * - mtail blocks, each 1xN
+     *
+     * Within a row, elements are divided  into:
+     * - nhead 1x1 blocks (used to align the rest, either 0 or 1)
+     * - nb8 1x8 blocks, aligned to 16-byte boundary
+     * - nb2 1x2 blocks, aligned to 16-byte boundary
+     * - ntail 1x1 blocks, aligned too (altough we don't rely on it)
+     *
+     */
+    n2 = n/2;    
+    mb3 = m/3;
+    mtail = m%3;
+    nhead = ae_misalignment(a,alglib_simd_alignment)==0 ? 0 : 1;
+    nb8 = (n-nhead)/8;
+    nb2 = (n-nhead-8*nb8)/2;
+    ntail = n-nhead-8*nb8-2*nb2;
+    for(i=0; i<mb3; i++)
+    {
+        double row0, row1, row2;
+        row0 = 0;
+        row1 = 0;
+        row2 = 0;
+        pb = x;
+        pa0 = a;
+        pa1 = a+alglib_r_block;
+        pa2 = a+alglib_twice_r_block;
+        a += 3*alglib_r_block;
+        if( nhead==1 )
+        {
+            vx  = _mm_load_sd(pb);
+            v0 = _mm_load_sd(pa0);
+            v1 = _mm_load_sd(pa1);
+            v2 = _mm_load_sd(pa2);
+            
+            v0 = _mm_mul_sd(v0,vx);
+            v1 = _mm_mul_sd(v1,vx);
+            v2 = _mm_mul_sd(v2,vx);
+            
+            pa0++;
+            pa1++;
+            pa2++;
+            pb++;
+        }
+        else
+        {
+            v0 = _mm_setzero_pd();
+            v1 = _mm_setzero_pd();
+            v2 = _mm_setzero_pd();
+        }
+        for(k=0; k<nb8; k++)
+        {
+            /*
+             * this code is a shuffle of simultaneous dot product.
+             * see below for commented unshuffled original version.
+             */
+            vx  = _mm_load_pd(pb);
+            va0 = _mm_load_pd(pa0);
+            va1 = _mm_load_pd(pa1);
+            va0 = _mm_mul_pd(va0,vx);
+            va2 = _mm_load_pd(pa2);
+            v0 = _mm_add_pd(va0,v0);
+            va1 = _mm_mul_pd(va1,vx);
+            va0 = _mm_load_pd(pa0+2);
+            v1 = _mm_add_pd(va1,v1);
+            va2 = _mm_mul_pd(va2,vx);
+            va1 = _mm_load_pd(pa1+2);
+            v2 = _mm_add_pd(va2,v2);
+            vx  = _mm_load_pd(pb+2);
+            va0 = _mm_mul_pd(va0,vx);
+            va2 = _mm_load_pd(pa2+2);
+            v0 = _mm_add_pd(va0,v0);
+            va1 = _mm_mul_pd(va1,vx);
+            va0 = _mm_load_pd(pa0+4);
+            v1 = _mm_add_pd(va1,v1);
+            va2 = _mm_mul_pd(va2,vx);
+            va1 = _mm_load_pd(pa1+4);
+            v2 = _mm_add_pd(va2,v2);
+            vx  = _mm_load_pd(pb+4);
+            va0 = _mm_mul_pd(va0,vx);
+            va2 = _mm_load_pd(pa2+4);
+            v0 = _mm_add_pd(va0,v0);
+            va1 = _mm_mul_pd(va1,vx);
+            va0 = _mm_load_pd(pa0+6);
+            v1 = _mm_add_pd(va1,v1);
+            va2 = _mm_mul_pd(va2,vx);
+            va1 = _mm_load_pd(pa1+6);
+            v2 = _mm_add_pd(va2,v2);
+            vx  = _mm_load_pd(pb+6);
+            va0 = _mm_mul_pd(va0,vx);
+            v0 = _mm_add_pd(va0,v0);
+            va2 = _mm_load_pd(pa2+6);
+            va1 = _mm_mul_pd(va1,vx);
+            v1 = _mm_add_pd(va1,v1);
+            va2 = _mm_mul_pd(va2,vx);
+            v2 = _mm_add_pd(va2,v2);
+            
+            pa0 += 8;
+            pa1 += 8;
+            pa2 += 8;
+            pb += 8;
+
+            /*
+            this is unshuffled version of code above
+            
+            vx  = _mm_load_pd(pb);
+            va0 = _mm_load_pd(pa0);            
+            va1 = _mm_load_pd(pa1);
+            va2 = _mm_load_pd(pa2);
+            
+            va0 = _mm_mul_pd(va0,vx);
+            va1 = _mm_mul_pd(va1,vx);
+            va2 = _mm_mul_pd(va2,vx);
+            
+            v0 = _mm_add_pd(va0,v0);
+            v1 = _mm_add_pd(va1,v1);
+            v2 = _mm_add_pd(va2,v2);
+            
+            vx  = _mm_load_pd(pb+2);
+            va0 = _mm_load_pd(pa0+2);            
+            va1 = _mm_load_pd(pa1+2);
+            va2 = _mm_load_pd(pa2+2);
+            
+            va0 = _mm_mul_pd(va0,vx);
+            va1 = _mm_mul_pd(va1,vx);
+            va2 = _mm_mul_pd(va2,vx);
+            
+            v0 = _mm_add_pd(va0,v0);
+            v1 = _mm_add_pd(va1,v1);
+            v2 = _mm_add_pd(va2,v2);
+
+            vx  = _mm_load_pd(pb+4);
+            va0 = _mm_load_pd(pa0+4);            
+            va1 = _mm_load_pd(pa1+4);
+            va2 = _mm_load_pd(pa2+4);
+            
+            va0 = _mm_mul_pd(va0,vx);
+            va1 = _mm_mul_pd(va1,vx);
+            va2 = _mm_mul_pd(va2,vx);
+            
+            v0 = _mm_add_pd(va0,v0);
+            v1 = _mm_add_pd(va1,v1);
+            v2 = _mm_add_pd(va2,v2);
+
+            vx  = _mm_load_pd(pb+6);
+            va0 = _mm_load_pd(pa0+6);
+            va1 = _mm_load_pd(pa1+6);
+            va2 = _mm_load_pd(pa2+6);
+            
+            va0 = _mm_mul_pd(va0,vx);
+            va1 = _mm_mul_pd(va1,vx);
+            va2 = _mm_mul_pd(va2,vx);
+            
+            v0 = _mm_add_pd(va0,v0);
+            v1 = _mm_add_pd(va1,v1);
+            v2 = _mm_add_pd(va2,v2);
+            */
+        }
+        for(k=0; k<nb2; k++)
+        {
+            vx  = _mm_load_pd(pb);
+            va0 = _mm_load_pd(pa0);
+            va1 = _mm_load_pd(pa1);
+            va2 = _mm_load_pd(pa2);
+            
+            va0 = _mm_mul_pd(va0,vx);
+            v0 = _mm_add_pd(va0,v0);
+            va1 = _mm_mul_pd(va1,vx);
+            v1 = _mm_add_pd(va1,v1);
+            va2 = _mm_mul_pd(va2,vx);
+            v2 = _mm_add_pd(va2,v2);
+            
+            pa0 += 2;
+            pa1 += 2;
+            pa2 += 2;
+            pb += 2;
+        }
+        for(k=0; k<ntail; k++)
+        {
+            vx  = _mm_load1_pd(pb);
+            va0 = _mm_load1_pd(pa0);
+            va1 = _mm_load1_pd(pa1);
+            va2 = _mm_load1_pd(pa2);
+            
+            va0 = _mm_mul_sd(va0,vx);
+            v0 = _mm_add_sd(v0,va0);
+            va1 = _mm_mul_sd(va1,vx);
+            v1 = _mm_add_sd(v1,va1);
+            va2 = _mm_mul_sd(va2,vx);
+            v2 = _mm_add_sd(v2,va2);
+        }        
+        vtmp = _mm_add_pd(_mm_unpacklo_pd(v0,v1),_mm_unpackhi_pd(v0,v1));
+        _mm_storel_pd(&row0, vtmp);
+        _mm_storeh_pd(&row1, vtmp);
+        v2 = _mm_add_sd(_mm_shuffle_pd(v2,v2,1),v2);
+        _mm_storel_pd(&row2, v2);
+        if( beta!=0 )
+        {
+            y[0] = beta*y[0]+alpha*row0;
+            y[stride] = beta*y[stride]+alpha*row1;
+            y[2*stride] = beta*y[2*stride]+alpha*row2;
+        }
+        else
+        {
+            y[0] = alpha*row0;
+            y[stride] = alpha*row1;
+            y[2*stride] = alpha*row2;
+        }
+        y+=3*stride;
+    }
+    for(i=0; i<mtail; i++)
+    {
+        double row0;
+        row0 = 0;
+        pb = x;
+        pa0 = a;
+        a += alglib_r_block;
+        for(k=0; k<n2; k++)
+        {
+            row0 += pb[0]*pa0[0]+pb[1]*pa0[1];            
+            pa0 += 2;
+            pb += 2;
+        }
+        if( n%2 )
+            row0 += pb[0]*pa0[0];
+        if( beta!=0 )
+            y[0] = beta*y[0]+alpha*row0;
+        else
+            y[0] = alpha*row0;
+        y+=stride;
+    }
+#endif
+}
+
+
+/*************************************************************************
+This subroutine calculates fast MxN complex matrix-vector product:
+
+    y := beta*y + alpha*A*x
+
+using generic C code, where A, x, y, alpha and beta are complex.
+
+If beta is zero, we do not use previous values of y (they are  overwritten
+by alpha*A*x without ever being read). However, when  alpha  is  zero,  we 
+still calculate A*x and  multiply  it  by  alpha  (this distinction can be
+important when A or x contain infinities/NANs).
+
+IMPORTANT:
+* 0<=M<=alglib_c_block, 0<=N<=alglib_c_block
+* A must be stored in row-major order, as sequence of double precision
+  pairs. Stride is alglib_c_block (it is measured in pairs of doubles, not
+  in doubles).
+* Y may be referenced by cy (pointer to ae_complex) or
+  dy (pointer to array of double precision pair) depending on what type of 
+  output you wish. Pass pointer to Y as one of these parameters,
+  AND SET OTHER PARAMETER TO NULL.
+* both A and x must be aligned; y may be non-aligned.
+*************************************************************************/
+void _ialglib_cmv(ae_int_t m, ae_int_t n, const double *a, const double *x, ae_complex *cy, double *dy, ae_int_t stride, ae_complex alpha, ae_complex beta)
+{
+    ae_int_t i, j;
+    const double *pa, *parow, *pb;
+
+    parow = a;
+    for(i=0; i<m; i++)
+    {
+        double v0 = 0, v1 = 0;
+        pa = parow;
+        pb = x;
+        for(j=0; j<n; j++)
+        {
+            v0 += pa[0]*pb[0];
+            v1 += pa[0]*pb[1];
+            v0 -= pa[1]*pb[1];
+            v1 += pa[1]*pb[0];
+
+            pa  += 2;
+            pb  += 2;
+        }
+        if( cy!=NULL )
+        {
+            double tx = (beta.x*cy->x-beta.y*cy->y)+(alpha.x*v0-alpha.y*v1);
+            double ty = (beta.x*cy->y+beta.y*cy->x)+(alpha.x*v1+alpha.y*v0);
+            cy->x = tx;
+            cy->y = ty;
+            cy+=stride;
+        }
+        else
+        {
+            double tx = (beta.x*dy[0]-beta.y*dy[1])+(alpha.x*v0-alpha.y*v1);
+            double ty = (beta.x*dy[1]+beta.y*dy[0])+(alpha.x*v1+alpha.y*v0);
+            dy[0] = tx;
+            dy[1] = ty;
+            dy += 2*stride;
+        }
+        parow += 2*alglib_c_block;
+    }
+}
+
+
+/*************************************************************************
+This subroutine calculates fast MxN complex matrix-vector product:
+
+    y := beta*y + alpha*A*x
+
+using generic C code, where A, x, y, alpha and beta are complex.
+
+If beta is zero, we do not use previous values of y (they are  overwritten
+by alpha*A*x without ever being read). However, when  alpha  is  zero,  we 
+still calculate A*x and  multiply  it  by  alpha  (this distinction can be
+important when A or x contain infinities/NANs).
+
+IMPORTANT:
+* 0<=M<=alglib_c_block, 0<=N<=alglib_c_block
+* A must be stored in row-major order, as sequence of double precision
+  pairs. Stride is alglib_c_block (it is measured in pairs of doubles, not
+  in doubles).
+* Y may be referenced by cy (pointer to ae_complex) or
+  dy (pointer to array of double precision pair) depending on what type of 
+  output you wish. Pass pointer to Y as one of these parameters,
+  AND SET OTHER PARAMETER TO NULL.
+* both A and x must be aligned; y may be non-aligned.
+
+This function supports SSE2; it can be used when:
+1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time)
+2. ae_cpuid() result contains CPU_SSE2 (checked at run-time)
+
+If (1) is failed, this function will still be defined and callable, but it 
+will do nothing.  If (2)  is  failed , call to this function will probably 
+crash your system. 
+
+If  you  want  to  know  whether  it  is safe to call it, you should check 
+results  of  ae_cpuid(). If CPU_SSE2 bit is set, this function is callable 
+and will do its work.
+*************************************************************************/
+void _ialglib_cmv_sse2(ae_int_t m, ae_int_t n, const double *a, const double *x, ae_complex *cy, double *dy, ae_int_t stride, ae_complex alpha, ae_complex beta)
+{
+#if defined(AE_HAS_SSE2_INTRINSICS)
+    ae_int_t i, j, m2;
+    const double *pa0, *pa1, *parow, *pb;
+    __m128d vbeta, vbetax, vbetay;
+    __m128d valpha, valphax, valphay;
+    
+    m2 = m/2;
+    parow = a;
+    if( cy!=NULL )
+    {
+        dy = (double*)cy;
+        cy = NULL;
+    }
+    vbeta = _mm_loadh_pd(_mm_load_sd(&beta.x),&beta.y);
+    vbetax = _mm_unpacklo_pd(vbeta,vbeta);
+    vbetay = _mm_unpackhi_pd(vbeta,vbeta);
+    valpha = _mm_loadh_pd(_mm_load_sd(&alpha.x),&alpha.y);
+    valphax = _mm_unpacklo_pd(valpha,valpha);
+    valphay = _mm_unpackhi_pd(valpha,valpha);
+    for(i=0; i<m2; i++)
+    {
+        double v0 = 0, v1 = 0, v2 = 0, v3 = 0;
+        double tx, ty;
+        __m128d vx, vy, vt0, vt1, vt2, vt3, vt4, vt5, vrx, vry, vtx, vty, vbeta;
+        pa0 = parow;
+        pa1 = parow+2*alglib_c_block;
+        pb = x;
+        vx = _mm_setzero_pd();
+        vy = _mm_setzero_pd();
+        for(j=0; j<n; j++)
+        {
+            vt0 = _mm_load1_pd(pb);
+            vt1 = _mm_load1_pd(pb+1);
+            vt2 = _mm_load_pd(pa0);
+            vt3 = _mm_load_pd(pa1);
+            vt5 = _mm_unpacklo_pd(vt2,vt3);
+            vt4 = _mm_unpackhi_pd(vt2,vt3);
+            vt2 = vt5;
+            vt3 = vt4;
+            
+            vt2 = _mm_mul_pd(vt2,vt0);
+            vx = _mm_add_pd(vx,vt2);
+            vt3 = _mm_mul_pd(vt3,vt1);
+            vx = _mm_sub_pd(vx,vt3);
+            vt4 = _mm_mul_pd(vt4,vt0);
+            vy = _mm_add_pd(vy,vt4);
+            vt5 = _mm_mul_pd(vt5,vt1);
+            vy = _mm_add_pd(vy,vt5);
+            
+            pa0 += 2;
+            pa1 += 2;
+            pb  += 2;
+        }
+        if( beta.x==0.0 && beta.y==0.0 )
+        {
+            vrx = _mm_setzero_pd();
+            vry = _mm_setzero_pd();
+        }
+        else
+        {
+            vtx = _mm_loadh_pd(_mm_load_sd(dy+0),dy+2*stride+0);
+            vty = _mm_loadh_pd(_mm_load_sd(dy+1),dy+2*stride+1);
+            vrx = _mm_sub_pd(_mm_mul_pd(vbetax,vtx),_mm_mul_pd(vbetay,vty));
+            vry = _mm_add_pd(_mm_mul_pd(vbetax,vty),_mm_mul_pd(vbetay,vtx));
+        }
+        vtx = _mm_sub_pd(_mm_mul_pd(valphax,vx),_mm_mul_pd(valphay,vy));
+        vty = _mm_add_pd(_mm_mul_pd(valphax,vy),_mm_mul_pd(valphay,vx));
+        vrx = _mm_add_pd(vrx,vtx);
+        vry = _mm_add_pd(vry,vty);
+        _mm_storel_pd(dy+0,          vrx);
+        _mm_storeh_pd(dy+2*stride+0, vrx);
+        _mm_storel_pd(dy+1,          vry);
+        _mm_storeh_pd(dy+2*stride+1, vry);
+        dy += 4*stride;        
+        parow += 4*alglib_c_block;
+    }
+    if( m%2 )
+    {
+        double v0 = 0, v1 = 0, v2 = 0, v3 = 0;
+        double tx, ty;
+        pa0 = parow;
+        pb = x;
+        for(j=0; j<n; j++)
+        {
+            v0 += pa0[0]*pb[0];
+            v1 += pa0[0]*pb[1];
+            v0 -= pa0[1]*pb[1];
+            v1 += pa0[1]*pb[0];
+
+            pa0 += 2;
+            pb  += 2;
+        }
+        if( beta.x==0.0 && beta.y==0.0 )
+        {
+            tx = 0.0;
+            ty = 0.0;
+        }
+        else
+        {
+            tx = beta.x*dy[0]-beta.y*dy[1];
+            ty = beta.x*dy[1]+beta.y*dy[0];
+        }
+        tx += alpha.x*v0-alpha.y*v1;
+        ty += alpha.x*v1+alpha.y*v0;
+        dy[0] = tx;
+        dy[1] = ty;
+        dy += 2*stride;
+        parow += 2*alglib_c_block;
+    }
+#endif
+}
+
+/********************************************************************
+This subroutine sets vector to zero
+********************************************************************/
+void _ialglib_vzero(ae_int_t n, double *p, ae_int_t stride)
+{
+    ae_int_t i;
+    if( stride==1 )
+    {
+        for(i=0; i<n; i++,p++)
+            *p = 0.0;
+    }
+    else
+    {
+        for(i=0; i<n; i++,p+=stride)
+            *p = 0.0;
+    }
+}
+
+/********************************************************************
+This subroutine sets vector to zero
+********************************************************************/
+void _ialglib_vzero_complex(ae_int_t n, ae_complex *p, ae_int_t stride)
+{
+    ae_int_t i;
+    if( stride==1 )
+    {
+        for(i=0; i<n; i++,p++)
+        {
+            p->x = 0.0;
+            p->y = 0.0;
+        }
+    }
+    else
+    {
+        for(i=0; i<n; i++,p+=stride)
+        {
+            p->x = 0.0;
+            p->y = 0.0;
+        }
+    }
+}
+
+
+/********************************************************************
+This subroutine copies unaligned real vector
+********************************************************************/
+void _ialglib_vcopy(ae_int_t n, const double *a, ae_int_t stridea, double *b, ae_int_t strideb)
+{
+    ae_int_t i, n2;
+    if( stridea==1 && strideb==1 )
+    {
+        n2 = n/2;
+        for(i=n2; i!=0; i--, a+=2, b+=2)
+        {
+            b[0] = a[0];
+            b[1] = a[1];
+        }
+        if( n%2!=0 )
+            b[0] = a[0];
+    }
+    else
+    {
+        for(i=0; i<n; i++,a+=stridea,b+=strideb)
+            *b = *a;
+    }
+}
+
+
+/********************************************************************
+This subroutine copies unaligned complex vector
+(passed as ae_complex*)
+
+1. strideb is stride measured in complex numbers, not doubles
+2. conj may be "N" (no conj.) or "C" (conj.)
+********************************************************************/
+void _ialglib_vcopy_complex(ae_int_t n, const ae_complex *a, ae_int_t stridea, double *b, ae_int_t strideb, const char *conj)
+{
+    ae_int_t i;
+
+    /*
+     * more general case
+     */
+    if( conj[0]=='N' || conj[0]=='n' )
+    {
+        for(i=0; i<n; i++,a+=stridea,b+=2*strideb)
+        {
+            b[0] = a->x;
+            b[1] = a->y;
+        }
+    }
+    else
+    {
+        for(i=0; i<n; i++,a+=stridea,b+=2*strideb)
+        {
+            b[0] = a->x;
+            b[1] = -a->y;
+        }
+    }
+}
+
+
+/********************************************************************
+This subroutine copies unaligned complex vector (passed as double*)
+
+1. strideb is stride measured in complex numbers, not doubles
+2. conj may be "N" (no conj.) or "C" (conj.)
+********************************************************************/
+void _ialglib_vcopy_dcomplex(ae_int_t n, const double *a, ae_int_t stridea, double *b, ae_int_t strideb, const char *conj)
+{
+    ae_int_t i;
+
+    /*
+     * more general case
+     */
+    if( conj[0]=='N' || conj[0]=='n' )
+    {
+        for(i=0; i<n; i++,a+=2*stridea,b+=2*strideb)
+        {
+            b[0] = a[0];
+            b[1] = a[1];
+        }
+    }
+    else
+    {
+        for(i=0; i<n; i++,a+=2*stridea,b+=2*strideb)
+        {
+            b[0] = a[0];
+            b[1] = -a[1];
+        }
+    }
+}
+
+
+/********************************************************************
+This subroutine copies matrix from  non-aligned non-contigous storage
+to aligned contigous storage
+
+A:
+* MxN
+* non-aligned
+* non-contigous
+* may be transformed during copying (as prescribed by op)
+
+B:
+* alglib_r_block*alglib_r_block (only MxN/NxM submatrix is used)
+* aligned
+* stride is alglib_r_block
+
+Transformation types:
+* 0 - no transform
+* 1 - transposition
+********************************************************************/
+void _ialglib_mcopyblock(ae_int_t m, ae_int_t n, const double *a, ae_int_t op, ae_int_t stride, double *b)
+{
+    ae_int_t i, j, n2;
+    const double *psrc;
+    double *pdst;
+    if( op==0 )
+    {
+        n2 = n/2;
+        for(i=0,psrc=a; i<m; i++,a+=stride,b+=alglib_r_block,psrc=a)
+        {
+            for(j=0,pdst=b; j<n2; j++,pdst+=2,psrc+=2)
+            {
+                pdst[0] = psrc[0];
+                pdst[1] = psrc[1];
+            }
+            if( n%2!=0 )
+                pdst[0] = psrc[0];
+        }
+    }
+    else
+    {
+        n2 = n/2;
+        for(i=0,psrc=a; i<m; i++,a+=stride,b+=1,psrc=a)
+        {
+            for(j=0,pdst=b; j<n2; j++,pdst+=alglib_twice_r_block,psrc+=2)
+            {
+                pdst[0] = psrc[0];
+                pdst[alglib_r_block] = psrc[1];
+            }
+            if( n%2!=0 )
+                pdst[0] = psrc[0];
+        }
+    }
+}
+
+
+/********************************************************************
+This subroutine copies matrix from  non-aligned non-contigous storage
+to aligned contigous storage
+
+A:
+* MxN
+* non-aligned
+* non-contigous
+* may be transformed during copying (as prescribed by op)
+
+B:
+* alglib_r_block*alglib_r_block (only MxN/NxM submatrix is used)
+* aligned
+* stride is alglib_r_block
+
+Transformation types:
+* 0 - no transform
+* 1 - transposition
+
+This function supports SSE2; it can be used when:
+1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time)
+2. ae_cpuid() result contains CPU_SSE2 (checked at run-time)
+
+If (1) is failed, this function will still be defined and callable, but it 
+will do nothing.  If (2)  is  failed , call to this function will probably 
+crash your system. 
+
+If  you  want  to  know  whether  it  is safe to call it, you should check 
+results  of  ae_cpuid(). If CPU_SSE2 bit is set, this function is callable 
+and will do its work.
+********************************************************************/
+void _ialglib_mcopyblock_sse2(ae_int_t m, ae_int_t n, const double *a, ae_int_t op, ae_int_t stride, double *b)
+{
+#if defined(AE_HAS_SSE2_INTRINSICS)
+    ae_int_t i, j, nb8, mb2, ntail;
+    const double *psrc0, *psrc1;
+    double *pdst;
+    nb8 = n/8;
+    ntail = n-8*nb8;
+    if( op==0 )
+    {
+        for(i=0,psrc0=a; i<m; i++,a+=stride,b+=alglib_r_block,psrc0=a)
+        {
+            pdst=b;
+            for(j=0; j<nb8; j++)
+            {
+                __m128d v0, v1;
+                v0 = _mm_loadu_pd(psrc0);
+                _mm_store_pd(pdst, v0);
+                v1 = _mm_loadu_pd(psrc0+2);
+                _mm_store_pd(pdst+2, v1);
+                v1 = _mm_loadu_pd(psrc0+4);
+                _mm_store_pd(pdst+4, v1);
+                v1 = _mm_loadu_pd(psrc0+6);
+                _mm_store_pd(pdst+6, v1);
+                pdst+=8;
+                psrc0+=8;
+            }
+            for(j=0; j<ntail; j++)
+                pdst[j] = psrc0[j];
+        }
+    }
+    else
+    {
+        const double *arow0, *arow1;
+        double *bcol0, *bcol1, *pdst0, *pdst1;
+        ae_int_t nb4, ntail, n2;
+                
+        n2 = n/2;
+        mb2 = m/2;
+        nb4 = n/4;
+        ntail = n-4*nb4;
+        
+        arow0 = a;
+        arow1 = a+stride;
+        bcol0 = b;
+        bcol1 = b+1;
+        for(i=0; i<mb2; i++)
+        {
+            psrc0 = arow0;
+            psrc1 = arow1;
+            pdst0 = bcol0;
+            pdst1 = bcol1;
+            for(j=0; j<nb4; j++)
+            {
+                __m128d v0, v1, v2, v3;
+                v0 = _mm_loadu_pd(psrc0);
+                v1 = _mm_loadu_pd(psrc1);
+                v2 = _mm_loadu_pd(psrc0+2);
+                v3 = _mm_loadu_pd(psrc1+2);
+                _mm_store_pd(pdst0, _mm_unpacklo_pd(v0,v1));
+                _mm_store_pd(pdst0+alglib_r_block, _mm_unpackhi_pd(v0,v1));                
+                _mm_store_pd(pdst0+2*alglib_r_block, _mm_unpacklo_pd(v2,v3));
+                _mm_store_pd(pdst0+3*alglib_r_block, _mm_unpackhi_pd(v2,v3));
+
+                pdst0 += 4*alglib_r_block;
+                pdst1 += 4*alglib_r_block;
+                psrc0 += 4;
+                psrc1 += 4;
+            }
+            for(j=0; j<ntail; j++)
+            {
+                pdst0[0] = psrc0[0];
+                pdst1[0] = psrc1[0];
+                pdst0 += alglib_r_block;
+                pdst1 += alglib_r_block;
+                psrc0 += 1;
+                psrc1 += 1;
+            }
+            arow0 += 2*stride;
+            arow1 += 2*stride;
+            bcol0 += 2;
+            bcol1 += 2;
+        }
+        if( m%2 )
+        {
+            psrc0 = arow0;
+            pdst0 = bcol0;
+            for(j=0; j<n2; j++)
+            {
+                pdst0[0] = psrc0[0];
+                pdst0[alglib_r_block] = psrc0[1];
+                pdst0 += alglib_twice_r_block;
+                psrc0 += 2;
+            }
+            if( n%2!=0 )
+                pdst0[0] = psrc0[0];
+        }
+    }
+#endif
+}
+
+
+/********************************************************************
+This subroutine copies matrix from  aligned contigous storage to non-
+aligned non-contigous storage
+
+A:
+* MxN
+* aligned
+* contigous
+* stride is alglib_r_block
+* may be transformed during copying (as prescribed by op)
+
+B:
+* alglib_r_block*alglib_r_block (only MxN/NxM submatrix is used)
+* non-aligned, non-contigous
+
+Transformation types:
+* 0 - no transform
+* 1 - transposition
+********************************************************************/
+void _ialglib_mcopyunblock(ae_int_t m, ae_int_t n, const double *a, ae_int_t op, double *b, ae_int_t stride)
+{
+    ae_int_t i, j, n2;
+    const double *psrc;
+    double *pdst;
+    if( op==0 )
+    {
+        n2 = n/2;
+        for(i=0,psrc=a; i<m; i++,a+=alglib_r_block,b+=stride,psrc=a)
+        {
+            for(j=0,pdst=b; j<n2; j++,pdst+=2,psrc+=2)
+            {
+                pdst[0] = psrc[0];
+                pdst[1] = psrc[1];
+            }
+            if( n%2!=0 )
+                pdst[0] = psrc[0];
+        }
+    }
+    else
+    {
+        n2 = n/2;
+        for(i=0,psrc=a; i<m; i++,a++,b+=stride,psrc=a)
+        {
+            for(j=0,pdst=b; j<n2; j++,pdst+=2,psrc+=alglib_twice_r_block)
+            {
+                pdst[0] = psrc[0];
+                pdst[1] = psrc[alglib_r_block];
+            }
+            if( n%2!=0 )
+                pdst[0] = psrc[0];
+        }
+    }
+}
+
+
+/********************************************************************
+This subroutine copies matrix from  non-aligned non-contigous storage
+to aligned contigous storage
+
+A:
+* MxN
+* non-aligned
+* non-contigous
+* may be transformed during copying (as prescribed by op)
+* pointer to ae_complex is passed
+
+B:
+* 2*alglib_c_block*alglib_c_block doubles (only MxN/NxM submatrix is used)
+* aligned
+* stride is alglib_c_block
+* pointer to double is passed
+
+Transformation types:
+* 0 - no transform
+* 1 - transposition
+* 2 - conjugate transposition
+* 3 - conjugate, but no  transposition
+********************************************************************/
+void _ialglib_mcopyblock_complex(ae_int_t m, ae_int_t n, const ae_complex *a, ae_int_t op, ae_int_t stride, double *b)
+{
+    ae_int_t i, j;
+    const ae_complex *psrc;
+    double *pdst;
+    if( op==0 )
+    {
+        for(i=0,psrc=a; i<m; i++,a+=stride,b+=alglib_twice_c_block,psrc=a)
+            for(j=0,pdst=b; j<n; j++,pdst+=2,psrc++)
+            {
+                pdst[0] = psrc->x;
+                pdst[1] = psrc->y;
+            }
+    }
+    if( op==1 )
+    {
+        for(i=0,psrc=a; i<m; i++,a+=stride,b+=2,psrc=a)
+            for(j=0,pdst=b; j<n; j++,pdst+=alglib_twice_c_block,psrc++)
+            {
+                pdst[0] = psrc->x;
+                pdst[1] = psrc->y;
+            }
+    }
+    if( op==2 )
+    {
+        for(i=0,psrc=a; i<m; i++,a+=stride,b+=2,psrc=a)
+            for(j=0,pdst=b; j<n; j++,pdst+=alglib_twice_c_block,psrc++)
+            {
+                pdst[0] = psrc->x;
+                pdst[1] = -psrc->y;
+            }
+    }
+    if( op==3 )
+    {
+        for(i=0,psrc=a; i<m; i++,a+=stride,b+=alglib_twice_c_block,psrc=a)
+            for(j=0,pdst=b; j<n; j++,pdst+=2,psrc++)
+            {
+                pdst[0] = psrc->x;
+                pdst[1] = -psrc->y;
+            }
+    }
+}
+
+
+/********************************************************************
+This subroutine copies matrix from aligned contigous storage to
+non-aligned non-contigous storage
+
+A:
+* 2*alglib_c_block*alglib_c_block doubles (only MxN submatrix is used)
+* aligned
+* stride is alglib_c_block
+* pointer to double is passed
+* may be transformed during copying (as prescribed by op)
+
+B:
+* MxN
+* non-aligned
+* non-contigous
+* pointer to ae_complex is passed
+
+Transformation types:
+* 0 - no transform
+* 1 - transposition
+* 2 - conjugate transposition
+* 3 - conjugate, but no  transposition
+********************************************************************/
+void _ialglib_mcopyunblock_complex(ae_int_t m, ae_int_t n, const double *a, ae_int_t op, ae_complex* b, ae_int_t stride)
+{
+    ae_int_t i, j;
+    const double *psrc;
+    ae_complex *pdst;
+    if( op==0 )
+    {
+        for(i=0,psrc=a; i<m; i++,a+=alglib_twice_c_block,b+=stride,psrc=a)
+            for(j=0,pdst=b; j<n; j++,pdst++,psrc+=2)
+            {
+                pdst->x = psrc[0];
+                pdst->y = psrc[1];
+            }
+    }
+    if( op==1 )
+    {
+        for(i=0,psrc=a; i<m; i++,a+=2,b+=stride,psrc=a)
+            for(j=0,pdst=b; j<n; j++,pdst++,psrc+=alglib_twice_c_block)
+            {
+                pdst->x = psrc[0];
+                pdst->y = psrc[1];
+            }
+    }
+    if( op==2 )
+    {
+        for(i=0,psrc=a; i<m; i++,a+=2,b+=stride,psrc=a)
+            for(j=0,pdst=b; j<n; j++,pdst++,psrc+=alglib_twice_c_block)
+            {
+                pdst->x = psrc[0];
+                pdst->y = -psrc[1];
+            }
+    }
+    if( op==3 )
+    {
+        for(i=0,psrc=a; i<m; i++,a+=alglib_twice_c_block,b+=stride,psrc=a)
+            for(j=0,pdst=b; j<n; j++,pdst++,psrc+=2)
+            {
+                pdst->x = psrc[0];
+                pdst->y = -psrc[1];
+            }
+    }
+}
+
+
+/********************************************************************
+Real GEMM kernel
+********************************************************************/
+ae_bool _ialglib_rmatrixgemm(ae_int_t m,
+     ae_int_t n,
+     ae_int_t k,
+     double alpha,
+     double *_a,
+     ae_int_t _a_stride,
+     ae_int_t optypea,
+     double *_b,
+     ae_int_t _b_stride,
+     ae_int_t optypeb,
+     double beta,
+     double *_c,
+     ae_int_t _c_stride)
+{
+    int i;
+    double *crow;
+    double __abuf[alglib_r_block+alglib_simd_alignment];
+    double __b[alglib_r_block*alglib_r_block+alglib_simd_alignment];
+    double * const abuf = (double * const) ae_align(__abuf,alglib_simd_alignment);
+    double * const b    = (double * const) ae_align(__b,   alglib_simd_alignment);
+    void (*rmv)(ae_int_t, ae_int_t, const double *, const double *, double *, ae_int_t, double, double) = &_ialglib_rmv;
+    void (*mcopyblock)(ae_int_t, ae_int_t, const double *, ae_int_t, ae_int_t, double *) = &_ialglib_mcopyblock;
+    
+    if( m>alglib_r_block || n>alglib_r_block || k>alglib_r_block || m<=0 || n<=0 || k<=0 || alpha==0.0 )
+        return ae_false;
+
+    /*
+     * Check for SSE2 support
+     */
+    if( ae_cpuid() & CPU_SSE2 )
+    {
+        rmv = &_ialglib_rmv_sse2;
+        mcopyblock = &_ialglib_mcopyblock_sse2;
+    }    
+    
+    /*
+     * copy b
+     */
+    if( optypeb==0 )
+        mcopyblock(k, n, _b, 1, _b_stride, b);
+    else
+        mcopyblock(n, k, _b, 0, _b_stride, b);
+
+    /*
+     * multiply B by A (from the right, by rows)
+     * and store result in C
+     */
+    crow  = _c;
+    if( optypea==0 )
+    {
+        const double *arow = _a;
+        for(i=0; i<m; i++)
+        {
+            _ialglib_vcopy(k, arow, 1, abuf, 1);
+            if( beta==0 )
+                _ialglib_vzero(n, crow, 1);
+            rmv(n, k, b, abuf, crow, 1, alpha, beta);
+            crow += _c_stride;
+            arow += _a_stride;
+        }
+    }
+    else
+    {
+        const double *acol = _a;
+        for(i=0; i<m; i++)
+        {
+            _ialglib_vcopy(k, acol, _a_stride, abuf, 1);
+            if( beta==0 )
+                _ialglib_vzero(n, crow, 1);
+            rmv(n, k, b, abuf, crow, 1, alpha, beta);
+            crow += _c_stride;
+            acol++;
+        }
+    }
+    return ae_true;
+}
+
+
+/********************************************************************
+Complex GEMM kernel
+********************************************************************/
+ae_bool _ialglib_cmatrixgemm(ae_int_t m,
+     ae_int_t n,
+     ae_int_t k,
+     ae_complex alpha,
+     ae_complex *_a,
+     ae_int_t _a_stride,
+     ae_int_t optypea,
+     ae_complex *_b,
+     ae_int_t _b_stride,
+     ae_int_t optypeb,
+     ae_complex beta,
+     ae_complex *_c,
+     ae_int_t _c_stride)
+ {
+    const ae_complex *arow;
+    ae_complex *crow;
+    ae_int_t i;
+    double _loc_abuf[2*alglib_c_block+alglib_simd_alignment];
+    double _loc_b[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
+    double * const abuf = (double * const) ae_align(_loc_abuf,alglib_simd_alignment);
+    double * const b    = (double * const) ae_align(_loc_b,   alglib_simd_alignment);
+    ae_int_t brows;
+    ae_int_t bcols;
+    void (*cmv)(ae_int_t, ae_int_t, const double *, const double *, ae_complex *, double *, ae_int_t, ae_complex, ae_complex) = &_ialglib_cmv;
+    
+    if( m>alglib_c_block || n>alglib_c_block || k>alglib_c_block )
+        return ae_false;
+
+    /*
+     * Check for SSE2 support
+     */
+    if( ae_cpuid() & CPU_SSE2 )
+    {
+        cmv = &_ialglib_cmv_sse2;
+    }    
+    
+    /*
+     * copy b
+     */
+    brows = optypeb==0 ? k : n;
+    bcols = optypeb==0 ? n : k;
+    if( optypeb==0 )
+        _ialglib_mcopyblock_complex(brows, bcols, _b, 1, _b_stride, b);
+    if( optypeb==1 )
+        _ialglib_mcopyblock_complex(brows, bcols, _b, 0, _b_stride, b);
+    if( optypeb==2 )
+        _ialglib_mcopyblock_complex(brows, bcols, _b, 3, _b_stride, b);
+
+    /*
+     * multiply B by A (from the right, by rows)
+     * and store result in C
+     */
+    arow  = _a;
+    crow  = _c;
+    for(i=0; i<m; i++)
+    {
+        if( optypea==0 )
+        {
+            _ialglib_vcopy_complex(k, arow, 1, abuf, 1, "No conj");
+            arow += _a_stride;
+        }
+        else if( optypea==1 )
+        {
+            _ialglib_vcopy_complex(k, arow, _a_stride, abuf, 1, "No conj");
+            arow++;
+        }
+        else
+        {
+            _ialglib_vcopy_complex(k, arow, _a_stride, abuf, 1, "Conj");
+            arow++;
+        }
+        if( beta.x==0 && beta.y==0 )
+            _ialglib_vzero_complex(n, crow, 1);
+        cmv(n, k, b, abuf, crow, NULL, 1, alpha, beta);
+        crow += _c_stride;
+    }
+    return ae_true;
+}
+
+
+/********************************************************************
+complex TRSM kernel
+********************************************************************/
+ae_bool _ialglib_cmatrixrighttrsm(ae_int_t m,
+     ae_int_t n,
+     ae_complex *_a,
+     ae_int_t _a_stride,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     ae_complex *_x,
+     ae_int_t _x_stride)
+{
+    /*
+     * local buffers
+     */
+    double *pdiag;
+    ae_int_t i;
+    double _loc_abuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
+    double _loc_xbuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
+    double _loc_tmpbuf[2*alglib_c_block+alglib_simd_alignment];
+    double * const abuf   = (double * const) ae_align(_loc_abuf,  alglib_simd_alignment);
+    double * const xbuf   = (double * const) ae_align(_loc_xbuf,  alglib_simd_alignment);
+    double * const tmpbuf = (double * const) ae_align(_loc_tmpbuf,alglib_simd_alignment);
+    ae_bool uppera;
+    void (*cmv)(ae_int_t, ae_int_t, const double *, const double *, ae_complex *, double *, ae_int_t, ae_complex, ae_complex) = &_ialglib_cmv;
+    
+    if( m>alglib_c_block || n>alglib_c_block )
+        return ae_false;
+
+    /*
+     * Check for SSE2 support
+     */
+    if( ae_cpuid() & CPU_SSE2 )
+    {
+        cmv = &_ialglib_cmv_sse2;
+    }    
+    
+    /*
+     * Prepare
+     */
+    _ialglib_mcopyblock_complex(n, n, _a, optype, _a_stride, abuf);
+    _ialglib_mcopyblock_complex(m, n, _x, 0, _x_stride, xbuf);
+    if( isunit )
+        for(i=0,pdiag=abuf; i<n; i++,pdiag+=2*(alglib_c_block+1))
+        {
+            pdiag[0] = 1.0;
+            pdiag[1] = 0.0;
+        }
+    if( optype==0 )
+        uppera = isupper;
+    else
+        uppera = !isupper;
+
+    /*
+     * Solve Y*A^-1=X where A is upper or lower triangular
+     */
+    if( uppera )
+    {
+        for(i=0,pdiag=abuf; i<n; i++,pdiag+=2*(alglib_c_block+1))
+        {
+            ae_complex tmp_c;
+            ae_complex beta;
+            ae_complex alpha;
+            tmp_c.x = pdiag[0];
+            tmp_c.y = pdiag[1];
+            beta = ae_c_d_div(1.0, tmp_c);
+            alpha.x = -beta.x;
+            alpha.y = -beta.y;
+            _ialglib_vcopy_dcomplex(i, abuf+2*i, alglib_c_block, tmpbuf, 1, "No conj");
+            cmv(m, i, xbuf, tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta);
+        }
+        _ialglib_mcopyunblock_complex(m, n, xbuf, 0, _x, _x_stride);
+    }
+    else
+    {
+        for(i=n-1,pdiag=abuf+2*((n-1)*alglib_c_block+(n-1)); i>=0; i--,pdiag-=2*(alglib_c_block+1))
+        {
+            ae_complex tmp_c;
+            ae_complex beta;
+            ae_complex alpha;
+            tmp_c.x = pdiag[0];
+            tmp_c.y = pdiag[1];
+            beta = ae_c_d_div(1.0, tmp_c);
+            alpha.x = -beta.x;
+            alpha.y = -beta.y;
+            _ialglib_vcopy_dcomplex(n-1-i, pdiag+2*alglib_c_block, alglib_c_block, tmpbuf, 1, "No conj");
+            cmv(m, n-1-i, xbuf+2*(i+1), tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta);
+        }
+        _ialglib_mcopyunblock_complex(m, n, xbuf, 0, _x, _x_stride);
+    }
+    return ae_true;
+}
+
+
+/********************************************************************
+real TRSM kernel
+********************************************************************/
+ae_bool _ialglib_rmatrixrighttrsm(ae_int_t m,
+     ae_int_t n,
+     double *_a,
+     ae_int_t _a_stride,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     double *_x,
+     ae_int_t _x_stride)
+{
+    /*
+     * local buffers
+     */
+    double *pdiag;
+    ae_int_t i;
+    double _loc_abuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
+    double _loc_xbuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
+    double _loc_tmpbuf[alglib_r_block+alglib_simd_alignment];
+    double * const abuf   = (double * const) ae_align(_loc_abuf,  alglib_simd_alignment);
+    double * const xbuf   = (double * const) ae_align(_loc_xbuf,  alglib_simd_alignment);
+    double * const tmpbuf = (double * const) ae_align(_loc_tmpbuf,alglib_simd_alignment);
+    ae_bool uppera;
+    void (*rmv)(ae_int_t, ae_int_t, const double *, const double *, double *, ae_int_t, double, double) = &_ialglib_rmv;
+    void (*mcopyblock)(ae_int_t, ae_int_t, const double *, ae_int_t, ae_int_t, double *) = &_ialglib_mcopyblock;
+    
+    if( m>alglib_r_block || n>alglib_r_block )
+        return ae_false;
+
+    /*
+     * Check for SSE2 support
+     */
+    if( ae_cpuid() & CPU_SSE2 )
+    {
+        rmv = &_ialglib_rmv_sse2;
+        mcopyblock = &_ialglib_mcopyblock_sse2;
+    }    
+    
+    /*
+     * Prepare
+     */
+    mcopyblock(n, n, _a, optype, _a_stride, abuf);
+    mcopyblock(m, n, _x, 0, _x_stride, xbuf);
+    if( isunit )
+        for(i=0,pdiag=abuf; i<n; i++,pdiag+=alglib_r_block+1)
+            *pdiag = 1.0;
+    if( optype==0 )
+        uppera = isupper;
+    else
+        uppera = !isupper;
+
+    /*
+     * Solve Y*A^-1=X where A is upper or lower triangular
+     */
+    if( uppera )
+    {
+        for(i=0,pdiag=abuf; i<n; i++,pdiag+=alglib_r_block+1)
+        {
+            double beta  = 1.0/(*pdiag);
+            double alpha = -beta;
+            _ialglib_vcopy(i, abuf+i, alglib_r_block, tmpbuf, 1);
+            rmv(m, i, xbuf, tmpbuf, xbuf+i, alglib_r_block, alpha, beta);
+        }
+        _ialglib_mcopyunblock(m, n, xbuf, 0, _x, _x_stride);
+    }
+    else
+    {
+        for(i=n-1,pdiag=abuf+(n-1)*alglib_r_block+(n-1); i>=0; i--,pdiag-=alglib_r_block+1)
+        {
+            double beta = 1.0/(*pdiag);
+            double alpha = -beta;
+            _ialglib_vcopy(n-1-i, pdiag+alglib_r_block, alglib_r_block, tmpbuf+i+1, 1);
+            rmv(m, n-1-i, xbuf+i+1, tmpbuf+i+1, xbuf+i, alglib_r_block, alpha, beta);
+        }
+        _ialglib_mcopyunblock(m, n, xbuf, 0, _x, _x_stride);
+    }
+    return ae_true;
+}
+
+
+/********************************************************************
+complex TRSM kernel
+********************************************************************/
+ae_bool _ialglib_cmatrixlefttrsm(ae_int_t m,
+     ae_int_t n,
+     ae_complex *_a,
+     ae_int_t _a_stride,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     ae_complex *_x,
+     ae_int_t _x_stride)
+{
+    /*
+     * local buffers
+     */
+    double *pdiag, *arow;
+    ae_int_t i;
+    double _loc_abuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
+    double _loc_xbuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
+    double _loc_tmpbuf[2*alglib_c_block+alglib_simd_alignment];
+    double * const abuf   = (double * const) ae_align(_loc_abuf,  alglib_simd_alignment);
+    double * const xbuf   = (double * const) ae_align(_loc_xbuf,  alglib_simd_alignment);
+    double * const tmpbuf = (double * const) ae_align(_loc_tmpbuf,alglib_simd_alignment);
+    ae_bool uppera;
+    void (*cmv)(ae_int_t, ae_int_t, const double *, const double *, ae_complex *, double *, ae_int_t, ae_complex, ae_complex) = &_ialglib_cmv;
+    
+    if( m>alglib_c_block || n>alglib_c_block )
+        return ae_false;
+
+    /*
+     * Check for SSE2 support
+     */
+    if( ae_cpuid() & CPU_SSE2 )
+    {
+        cmv = &_ialglib_cmv_sse2;
+    }    
+    
+    /*
+     * Prepare
+     * Transpose X (so we may use mv, which calculates A*x, but not x*A)
+     */
+    _ialglib_mcopyblock_complex(m, m, _a, optype, _a_stride, abuf);
+    _ialglib_mcopyblock_complex(m, n, _x, 1, _x_stride, xbuf);
+    if( isunit )
+        for(i=0,pdiag=abuf; i<m; i++,pdiag+=2*(alglib_c_block+1))
+        {
+            pdiag[0] = 1.0;
+            pdiag[1] = 0.0;
+        }
+    if( optype==0 )
+        uppera = isupper;
+    else
+        uppera = !isupper;
+
+    /*
+     * Solve A^-1*Y^T=X^T where A is upper or lower triangular
+     */
+    if( uppera )
+    {
+        for(i=m-1,pdiag=abuf+2*((m-1)*alglib_c_block+(m-1)); i>=0; i--,pdiag-=2*(alglib_c_block+1))
+        {
+            ae_complex tmp_c;
+            ae_complex beta;
+            ae_complex alpha;
+            tmp_c.x = pdiag[0];
+            tmp_c.y = pdiag[1];
+            beta = ae_c_d_div(1.0, tmp_c);
+            alpha.x = -beta.x;
+            alpha.y = -beta.y;
+            _ialglib_vcopy_dcomplex(m-1-i, pdiag+2, 1, tmpbuf, 1, "No conj");
+            cmv(n, m-1-i, xbuf+2*(i+1), tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta);
+        }
+        _ialglib_mcopyunblock_complex(m, n, xbuf, 1, _x, _x_stride);
+    }
+    else
+    {   for(i=0,pdiag=abuf,arow=abuf; i<m; i++,pdiag+=2*(alglib_c_block+1),arow+=2*alglib_c_block)
+        {
+            ae_complex tmp_c;
+            ae_complex beta;
+            ae_complex alpha;
+            tmp_c.x = pdiag[0];
+            tmp_c.y = pdiag[1];
+            beta = ae_c_d_div(1.0, tmp_c);
+            alpha.x = -beta.x;
+            alpha.y = -beta.y;
+            _ialglib_vcopy_dcomplex(i, arow, 1, tmpbuf, 1, "No conj");
+            cmv(n, i, xbuf, tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta);
+        }
+        _ialglib_mcopyunblock_complex(m, n, xbuf, 1, _x, _x_stride);
+    }
+    return ae_true;
+}
+
+
+/********************************************************************
+real TRSM kernel
+********************************************************************/
+ae_bool _ialglib_rmatrixlefttrsm(ae_int_t m,
+     ae_int_t n,
+     double *_a,
+     ae_int_t _a_stride,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     double *_x,
+     ae_int_t _x_stride)
+{
+    /*
+     * local buffers
+     */
+    double *pdiag, *arow;
+    ae_int_t i;
+    double _loc_abuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
+    double _loc_xbuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
+    double _loc_tmpbuf[alglib_r_block+alglib_simd_alignment];
+    double * const abuf   = (double * const) ae_align(_loc_abuf,  alglib_simd_alignment);
+    double * const xbuf   = (double * const) ae_align(_loc_xbuf,  alglib_simd_alignment);
+    double * const tmpbuf = (double * const) ae_align(_loc_tmpbuf,alglib_simd_alignment);
+    ae_bool uppera;
+    void (*rmv)(ae_int_t, ae_int_t, const double *, const double *, double *, ae_int_t, double, double) = &_ialglib_rmv;
+    void (*mcopyblock)(ae_int_t, ae_int_t, const double *, ae_int_t, ae_int_t, double *) = &_ialglib_mcopyblock;
+    
+    if( m>alglib_r_block || n>alglib_r_block )
+        return ae_false;
+
+    /*
+     * Check for SSE2 support
+     */
+    if( ae_cpuid() & CPU_SSE2 )
+    {
+        rmv = &_ialglib_rmv_sse2;
+        mcopyblock = &_ialglib_mcopyblock_sse2;
+    }    
+    
+    /*
+     * Prepare
+     * Transpose X (so we may use mv, which calculates A*x, but not x*A)
+     */
+    mcopyblock(m, m, _a, optype, _a_stride, abuf);
+    mcopyblock(m, n, _x, 1, _x_stride, xbuf);
+    if( isunit )
+        for(i=0,pdiag=abuf; i<m; i++,pdiag+=alglib_r_block+1)
+            *pdiag = 1.0;
+    if( optype==0 )
+        uppera = isupper;
+    else
+        uppera = !isupper;
+
+    /*
+     * Solve A^-1*Y^T=X^T where A is upper or lower triangular
+     */
+    if( uppera )
+    {
+        for(i=m-1,pdiag=abuf+(m-1)*alglib_r_block+(m-1); i>=0; i--,pdiag-=alglib_r_block+1)
+        {
+            double beta = 1.0/(*pdiag);
+            double alpha = -beta;
+            _ialglib_vcopy(m-1-i, pdiag+1, 1, tmpbuf+i+1, 1);
+            rmv(n, m-1-i, xbuf+i+1, tmpbuf+i+1, xbuf+i, alglib_r_block, alpha, beta);
+        }
+        _ialglib_mcopyunblock(m, n, xbuf, 1, _x, _x_stride);
+    }
+    else
+    {   for(i=0,pdiag=abuf,arow=abuf; i<m; i++,pdiag+=alglib_r_block+1,arow+=alglib_r_block)
+        {
+            double beta = 1.0/(*pdiag);
+            double alpha = -beta;
+            _ialglib_vcopy(i, arow, 1, tmpbuf, 1);
+            rmv(n, i, xbuf, tmpbuf, xbuf+i, alglib_r_block, alpha, beta);
+        }
+        _ialglib_mcopyunblock(m, n, xbuf, 1, _x, _x_stride);
+    }
+    return ae_true;
+}
+
+
+/********************************************************************
+complex SYRK kernel
+********************************************************************/
+ae_bool _ialglib_cmatrixsyrk(ae_int_t n,
+     ae_int_t k,
+     double alpha,
+     ae_complex *_a,
+     ae_int_t _a_stride,
+     ae_int_t optypea,
+     double beta,
+     ae_complex *_c,
+     ae_int_t _c_stride,
+     ae_bool isupper)
+{
+    /*
+     * local buffers
+     */
+    double *arow, *crow;
+    ae_complex c_alpha, c_beta;
+    ae_int_t i;
+    double _loc_abuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
+    double _loc_cbuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
+    double _loc_tmpbuf[2*alglib_c_block+alglib_simd_alignment];
+    double * const abuf   = (double * const) ae_align(_loc_abuf,  alglib_simd_alignment);
+    double * const cbuf   = (double * const) ae_align(_loc_cbuf,  alglib_simd_alignment);
+    double * const tmpbuf = (double * const) ae_align(_loc_tmpbuf,alglib_simd_alignment);
+
+    if( n>alglib_c_block || k>alglib_c_block )
+        return ae_false;
+    if( n==0 )
+        return ae_true;
+
+    /*
+     * copy A and C, task is transformed to "A*A^H"-form.
+     * if beta==0, then C is filled by zeros (and not referenced)
+     *
+     * alpha==0 or k==0 are correctly processed (A is not referenced)
+     */
+    c_alpha.x = alpha;
+    c_alpha.y = 0;
+    c_beta.x = beta;
+    c_beta.y = 0;
+    if( alpha==0 )
+        k = 0;
+    if( k>0 )
+    {
+        if( optypea==0 )
+            _ialglib_mcopyblock_complex(n, k, _a, 3, _a_stride, abuf);
+        else
+            _ialglib_mcopyblock_complex(k, n, _a, 1, _a_stride, abuf);
+    }
+    _ialglib_mcopyblock_complex(n, n, _c, 0, _c_stride, cbuf);
+    if( beta==0 )
+    {
+        for(i=0,crow=cbuf; i<n; i++,crow+=2*alglib_c_block)
+            if( isupper )
+                _ialglib_vzero(2*(n-i), crow+2*i, 1);
+            else
+                _ialglib_vzero(2*(i+1), crow, 1);
+    }
+
+
+    /*
+     * update C
+     */
+    if( isupper )
+    {
+        for(i=0,arow=abuf,crow=cbuf; i<n; i++,arow+=2*alglib_c_block,crow+=2*alglib_c_block)
+        {
+            _ialglib_vcopy_dcomplex(k, arow, 1, tmpbuf, 1, "Conj");
+            _ialglib_cmv(n-i, k, arow, tmpbuf, NULL, crow+2*i, 1, c_alpha, c_beta);
+        }
+    }
+    else
+    {
+        for(i=0,arow=abuf,crow=cbuf; i<n; i++,arow+=2*alglib_c_block,crow+=2*alglib_c_block)
+        {
+            _ialglib_vcopy_dcomplex(k, arow, 1, tmpbuf, 1, "Conj");
+            _ialglib_cmv(i+1, k, abuf, tmpbuf, NULL, crow, 1, c_alpha, c_beta);
+        }
+    }
+
+    /*
+     * copy back
+     */
+    _ialglib_mcopyunblock_complex(n, n, cbuf, 0, _c, _c_stride);
+
+    return ae_true;
+}
+
+
+/********************************************************************
+real SYRK kernel
+********************************************************************/
+ae_bool _ialglib_rmatrixsyrk(ae_int_t n,
+     ae_int_t k,
+     double alpha,
+     double *_a,
+     ae_int_t _a_stride,
+     ae_int_t optypea,
+     double beta,
+     double *_c,
+     ae_int_t _c_stride,
+     ae_bool isupper)
+{
+    /*
+     * local buffers
+     */
+    double *arow, *crow;
+    ae_int_t i;
+    double _loc_abuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
+    double _loc_cbuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
+    double * const abuf   = (double * const) ae_align(_loc_abuf,  alglib_simd_alignment);
+    double * const cbuf   = (double * const) ae_align(_loc_cbuf,  alglib_simd_alignment);
+
+    if( n>alglib_r_block || k>alglib_r_block )
+        return ae_false;
+    if( n==0 )
+        return ae_true;
+
+    /*
+     * copy A and C, task is transformed to "A*A^T"-form.
+     * if beta==0, then C is filled by zeros (and not referenced)
+     *
+     * alpha==0 or k==0 are correctly processed (A is not referenced)
+     */
+    if( alpha==0 )
+        k = 0;
+    if( k>0 )
+    {
+        if( optypea==0 )
+            _ialglib_mcopyblock(n, k, _a, 0, _a_stride, abuf);
+        else
+            _ialglib_mcopyblock(k, n, _a, 1, _a_stride, abuf);
+    }
+    _ialglib_mcopyblock(n, n, _c, 0, _c_stride, cbuf);
+    if( beta==0 )
+    {
+        for(i=0,crow=cbuf; i<n; i++,crow+=alglib_r_block)
+            if( isupper )
+                _ialglib_vzero(n-i, crow+i, 1);
+            else
+                _ialglib_vzero(i+1, crow, 1);
+    }
+
+
+    /*
+     * update C
+     */
+    if( isupper )
+    {
+        for(i=0,arow=abuf,crow=cbuf; i<n; i++,arow+=alglib_r_block,crow+=alglib_r_block)
+        {
+            _ialglib_rmv(n-i, k, arow, arow, crow+i, 1, alpha, beta);
+        }
+    }
+    else
+    {
+        for(i=0,arow=abuf,crow=cbuf; i<n; i++,arow+=alglib_r_block,crow+=alglib_r_block)
+        {
+            _ialglib_rmv(i+1, k, abuf, arow, crow, 1, alpha, beta);
+        }
+    }
+
+    /*
+     * copy back
+     */
+    _ialglib_mcopyunblock(n, n, cbuf, 0, _c, _c_stride);
+
+    return ae_true;
+}
+
+
+/********************************************************************
+complex rank-1 kernel
+********************************************************************/
+ae_bool _ialglib_cmatrixrank1(ae_int_t m,
+     ae_int_t n,
+     ae_complex *_a,
+     ae_int_t _a_stride,
+     ae_complex *_u,
+     ae_complex *_v)
+{
+    ae_complex *arow, *pu, *pv, *vtmp, *dst;
+    ae_int_t n2 = n/2;
+    ae_int_t i, j;
+
+    /*
+     * update pairs of rows
+     */
+    arow  = _a;
+    pu    = _u;
+    vtmp  = _v;
+    for(i=0; i<m; i++, arow+=_a_stride, pu++)
+    {
+        /*
+         * update by two
+         */
+        for(j=0,pv=vtmp, dst=arow; j<n2; j++, dst+=2, pv+=2)
+        {
+            double ux  = pu[0].x;
+            double uy  = pu[0].y;
+            double v0x = pv[0].x;
+            double v0y = pv[0].y;
+            double v1x = pv[1].x;
+            double v1y = pv[1].y;
+            dst[0].x += ux*v0x-uy*v0y;
+            dst[0].y += ux*v0y+uy*v0x;
+            dst[1].x += ux*v1x-uy*v1y;
+            dst[1].y += ux*v1y+uy*v1x;
+        }
+
+        /*
+         * final update
+         */
+        if( n%2!=0 )
+        {
+            double ux = pu[0].x;
+            double uy = pu[0].y;
+            double vx = pv[0].x;
+            double vy = pv[0].y;
+            dst[0].x += ux*vx-uy*vy;
+            dst[0].y += ux*vy+uy*vx;
+        }
+    }
+    return ae_true;
+}
+
+
+/********************************************************************
+real rank-1 kernel
+********************************************************************/
+ae_bool _ialglib_rmatrixrank1(ae_int_t m,
+     ae_int_t n,
+     double *_a,
+     ae_int_t _a_stride,
+     double *_u,
+     double *_v)
+{
+    double *arow0, *arow1, *pu, *pv, *vtmp, *dst0, *dst1;
+    ae_int_t m2 = m/2;
+    ae_int_t n2 = n/2;
+    ae_int_t stride  = _a_stride;
+    ae_int_t stride2 = 2*_a_stride;
+    ae_int_t i, j;
+
+    /*
+     * update pairs of rows
+     */
+    arow0 = _a;
+    arow1 = arow0+stride;
+    pu    = _u;
+    vtmp  = _v;
+    for(i=0; i<m2; i++,arow0+=stride2,arow1+=stride2,pu+=2)
+    {
+        /*
+         * update by two
+         */
+        for(j=0,pv=vtmp, dst0=arow0, dst1=arow1; j<n2; j++, dst0+=2, dst1+=2, pv+=2)
+        {
+            dst0[0] += pu[0]*pv[0];
+            dst0[1] += pu[0]*pv[1];
+            dst1[0] += pu[1]*pv[0];
+            dst1[1] += pu[1]*pv[1];
+        }
+
+        /*
+         * final update
+         */
+        if( n%2!=0 )
+        {
+            dst0[0] += pu[0]*pv[0];
+            dst1[0] += pu[1]*pv[0];
+        }
+    }
+
+    /*
+     * update last row
+     */
+    if( m%2!=0 )
+    {
+        /*
+         * update by two
+         */
+        for(j=0,pv=vtmp, dst0=arow0; j<n2; j++, dst0+=2, pv+=2)
+        {
+            dst0[0] += pu[0]*pv[0];
+            dst0[1] += pu[0]*pv[1];
+        }
+
+        /*
+         * final update
+         */
+        if( n%2!=0 )
+            dst0[0] += pu[0]*pv[0];
+    }
+    return ae_true;
+}
+
+
+/********************************************************************
+Interface functions for efficient kernels
+********************************************************************/
+ae_bool _ialglib_i_rmatrixgemmf(ae_int_t m,
+     ae_int_t n,
+     ae_int_t k,
+     double alpha,
+     ae_matrix *_a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t optypea,
+     ae_matrix *_b,
+     ae_int_t ib,
+     ae_int_t jb,
+     ae_int_t optypeb,
+     double beta,
+     ae_matrix *_c,
+     ae_int_t ic,
+     ae_int_t jc)
+{
+    return _ialglib_rmatrixgemm(m, n, k, alpha, _a->ptr.pp_double[ia]+ja, _a->stride, optypea, _b->ptr.pp_double[ib]+jb, _b->stride, optypeb, beta, _c->ptr.pp_double[ic]+jc, _c->stride);
+}
+
+ae_bool _ialglib_i_cmatrixgemmf(ae_int_t m,
+     ae_int_t n,
+     ae_int_t k,
+     ae_complex alpha,
+     ae_matrix *_a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t optypea,
+     ae_matrix *_b,
+     ae_int_t ib,
+     ae_int_t jb,
+     ae_int_t optypeb,
+     ae_complex beta,
+     ae_matrix *_c,
+     ae_int_t ic,
+     ae_int_t jc)
+{
+    return _ialglib_cmatrixgemm(m, n, k, alpha, _a->ptr.pp_complex[ia]+ja, _a->stride, optypea, _b->ptr.pp_complex[ib]+jb, _b->stride, optypeb, beta, _c->ptr.pp_complex[ic]+jc, _c->stride);
+}
+
+ae_bool _ialglib_i_cmatrixrighttrsmf(ae_int_t m,
+     ae_int_t n,
+     ae_matrix *a,
+     ae_int_t i1,
+     ae_int_t j1,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     ae_matrix *x,
+     ae_int_t i2,
+     ae_int_t j2)
+{
+    return _ialglib_cmatrixrighttrsm(m, n, &a->ptr.pp_complex[i1][j1], a->stride, isupper, isunit, optype, &x->ptr.pp_complex[i2][j2], x->stride);
+}
+
+ae_bool _ialglib_i_rmatrixrighttrsmf(ae_int_t m,
+     ae_int_t n,
+     ae_matrix *a,
+     ae_int_t i1,
+     ae_int_t j1,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     ae_matrix *x,
+     ae_int_t i2,
+     ae_int_t j2)
+{
+    return _ialglib_rmatrixrighttrsm(m, n, &a->ptr.pp_double[i1][j1], a->stride, isupper, isunit, optype, &x->ptr.pp_double[i2][j2], x->stride);
+}
+
+ae_bool _ialglib_i_cmatrixlefttrsmf(ae_int_t m,
+     ae_int_t n,
+     ae_matrix *a,
+     ae_int_t i1,
+     ae_int_t j1,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     ae_matrix *x,
+     ae_int_t i2,
+     ae_int_t j2)
+{
+    return _ialglib_cmatrixlefttrsm(m, n, &a->ptr.pp_complex[i1][j1], a->stride, isupper, isunit, optype, &x->ptr.pp_complex[i2][j2], x->stride);
+}
+
+ae_bool _ialglib_i_rmatrixlefttrsmf(ae_int_t m,
+     ae_int_t n,
+     ae_matrix *a,
+     ae_int_t i1,
+     ae_int_t j1,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     ae_matrix *x,
+     ae_int_t i2,
+     ae_int_t j2)
+{
+    return _ialglib_rmatrixlefttrsm(m, n, &a->ptr.pp_double[i1][j1], a->stride, isupper, isunit, optype, &x->ptr.pp_double[i2][j2], x->stride);
+}
+
+ae_bool _ialglib_i_cmatrixsyrkf(ae_int_t n,
+     ae_int_t k,
+     double alpha,
+     ae_matrix *a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t optypea,
+     double beta,
+     ae_matrix *c,
+     ae_int_t ic,
+     ae_int_t jc,
+     ae_bool isupper)
+{
+    return _ialglib_cmatrixsyrk(n, k, alpha, &a->ptr.pp_complex[ia][ja], a->stride, optypea, beta, &c->ptr.pp_complex[ic][jc], c->stride, isupper);
+}
+
+ae_bool _ialglib_i_rmatrixsyrkf(ae_int_t n,
+     ae_int_t k,
+     double alpha,
+     ae_matrix *a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t optypea,
+     double beta,
+     ae_matrix *c,
+     ae_int_t ic,
+     ae_int_t jc,
+     ae_bool isupper)
+{
+    return _ialglib_rmatrixsyrk(n, k, alpha, &a->ptr.pp_double[ia][ja], a->stride, optypea, beta, &c->ptr.pp_double[ic][jc], c->stride, isupper);
+}
+
+ae_bool _ialglib_i_cmatrixrank1f(ae_int_t m,
+     ae_int_t n,
+     ae_matrix *a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_vector *u,
+     ae_int_t uoffs,
+     ae_vector *v,
+     ae_int_t voffs)
+{
+    return _ialglib_cmatrixrank1(m, n, &a->ptr.pp_complex[ia][ja], a->stride, &u->ptr.p_complex[uoffs], &v->ptr.p_complex[voffs]);
+}
+
+ae_bool _ialglib_i_rmatrixrank1f(ae_int_t m,
+     ae_int_t n,
+     ae_matrix *a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_vector *u,
+     ae_int_t uoffs,
+     ae_vector *v,
+     ae_int_t voffs)
+{
+    return _ialglib_rmatrixrank1(m, n, &a->ptr.pp_double[ia][ja], a->stride, &u->ptr.p_double[uoffs], &v->ptr.p_double[voffs]);
+}
+
+
+
+
+/********************************************************************
+This function reads rectangular matrix A given by two column pointers
+col0 and col1 and stride src_stride and moves it into contiguous row-
+by-row storage given by dst.
+
+It can handle following special cases:
+* col1==NULL    in this case second column of A is filled by zeros
+********************************************************************/
+void _ialglib_pack_n2(
+    double *col0,
+    double *col1,
+    ae_int_t n,
+    ae_int_t src_stride,
+    double *dst)
+{
+    ae_int_t n2, j, stride2;
+    
+    /*
+     * handle special case
+     */
+    if( col1==NULL )
+    {
+        for(j=0; j<n; j++)
+        {
+            dst[0] = *col0;
+            dst[1] = 0.0;
+            col0 += src_stride;
+            dst  += 2;
+        }
+        return;
+    }
+
+    /*
+     * handle general case
+     */
+    n2 = n/2;
+    stride2 = src_stride*2;
+    for(j=0; j<n2; j++)
+    {
+        dst[0] = *col0;
+        dst[1] = *col1;
+        dst[2] = col0[src_stride];
+        dst[3] = col1[src_stride];
+        col0 += stride2;
+        col1 += stride2;
+        dst  += 4;
+    }
+    if( n%2 )
+    {
+        dst[0] = *col0;
+        dst[1] = *col1;
+    }
+}
+
+/*************************************************************************
+This function reads rectangular matrix A given by two column pointers col0 
+and  col1  and  stride src_stride and moves it into  contiguous row-by-row 
+storage given by dst.
+
+dst must be aligned, col0 and col1 may be non-aligned.
+
+It can handle following special cases:
+* col1==NULL        in this case second column of A is filled by zeros
+* src_stride==1     efficient SSE-based code is used
+* col1-col0==1      efficient SSE-based code is used
+
+This function supports SSE2; it can be used when:
+1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time)
+2. ae_cpuid() result contains CPU_SSE2 (checked at run-time)
+
+If (1) is failed, this function will still be defined and callable, but it 
+will do nothing.  If (2)  is  failed , call to this function will probably 
+crash your system. 
+
+If  you  want  to  know  whether  it  is safe to call it, you should check 
+results  of  ae_cpuid(). If CPU_SSE2 bit is set, this function is callable 
+and will do its work.
+*************************************************************************/
+void _ialglib_pack_n2_sse2(
+    double *col0,
+    double *col1,
+    ae_int_t n,
+    ae_int_t src_stride,
+    double *dst)
+{
+#if defined(AE_HAS_SSE2_INTRINSICS)
+    ae_int_t n2, j, stride2;
+    
+    /*
+     * handle special case: col1==NULL
+     */
+    if( col1==NULL )
+    {
+        for(j=0; j<n; j++)
+        {
+            dst[0] = *col0;
+            dst[1] = 0.0;
+            col0 += src_stride;
+            dst  += 2;
+        }
+        return;
+    }
+
+    /*
+     * handle unit stride
+     */
+    if( src_stride==1 )
+    {
+        __m128d v0, v1, r0, r1;
+        n2 = n/2;
+        for(j=0; j<n2; j++)
+        {
+            v0 = _mm_loadu_pd(col0);
+            col0 += 2;
+            v1 = _mm_loadu_pd(col1);
+            col1 += 2;
+            _mm_store_pd(dst,  _mm_unpacklo_pd(v0,v1));
+            _mm_store_pd(dst+2,_mm_unpackhi_pd(v0,v1));
+            dst  += 4;
+        }
+        if( n%2 )
+        {
+            dst[0] = *col0;
+            dst[1] = *col1;
+        }
+        return;
+    }
+
+    /*
+     * handle col1-col0==1
+     */
+    if( col1-col0==1 )
+    {
+        __m128d v0, v1;
+        n2 = n/2;
+        stride2 = 2*src_stride;
+        for(j=0; j<n2; j++)
+        {
+            v0 = _mm_loadu_pd(col0);
+            v1 = _mm_loadu_pd(col0+src_stride);
+            _mm_store_pd(dst,  v0);
+            _mm_store_pd(dst+2,v1);
+            col0 += stride2;
+            dst  += 4;
+        }
+        if( n%2 )
+        {
+            dst[0] = col0[0];
+            dst[1] = col0[1];
+        }
+        return;
+    }
+    
+    /*
+     * handle general case
+     */
+    n2 = n/2;
+    stride2 = src_stride*2;
+    for(j=0; j<n2; j++)
+    {
+        dst[0] = *col0;
+        dst[1] = *col1;
+        dst[2] = col0[src_stride];
+        dst[3] = col1[src_stride];
+        col0 += stride2;
+        col1 += stride2;
+        dst  += 4;
+    }
+    if( n%2 )
+    {
+        dst[0] = *col0;
+        dst[1] = *col1;
+    }
+#endif
+}
+
+
+/********************************************************************
+This function calculates R := alpha*A'*B+beta*R where A and B are Kx2 
+matrices stored in contiguous row-by-row storage,  R  is  2x2  matrix
+stored in non-contiguous row-by-row storage.
+
+A and B must be aligned; R may be non-aligned.
+
+If beta is zero, contents of R is ignored (not  multiplied  by zero -
+just ignored).
+
+However, when alpha is zero, we still calculate A'*B, which is 
+multiplied by zero afterwards.
+
+Function accepts additional parameter store_mode:
+* if 0, full R is stored
+* if 1, only first row of R is stored
+* if 2, only first column of R is stored
+* if 3, only top left element of R is stored
+********************************************************************/
+void _ialglib_mm22(double alpha, const double *a, const double *b, ae_int_t k, double beta, double *r, ae_int_t stride, ae_int_t store_mode)
+{
+    double v00, v01, v10, v11;
+    ae_int_t t;
+    v00 = 0.0;
+    v01 = 0.0;
+    v10 = 0.0;
+    v11 = 0.0;
+    for(t=0; t<k; t++)
+    {
+        v00 += a[0]*b[0];
+        v01 += a[0]*b[1];
+        v10 += a[1]*b[0];
+        v11 += a[1]*b[1];
+        a+=2;
+        b+=2;
+    }
+    if( store_mode==0 )
+    {
+        if( beta==0 )
+        {
+            r[0] = alpha*v00;
+            r[1] = alpha*v01;
+            r[stride+0] = alpha*v10;
+            r[stride+1] = alpha*v11;
+        }
+        else
+        {
+            r[0] = beta*r[0] + alpha*v00;
+            r[1] = beta*r[1] + alpha*v01;
+            r[stride+0] = beta*r[stride+0] + alpha*v10;
+            r[stride+1] = beta*r[stride+1] + alpha*v11;
+        }
+        return;
+    }
+    if( store_mode==1 )
+    {
+        if( beta==0 )
+        {
+            r[0] = alpha*v00;
+            r[1] = alpha*v01;
+        }
+        else
+        {
+            r[0] = beta*r[0] + alpha*v00;
+            r[1] = beta*r[1] + alpha*v01;
+        }
+        return;
+    }
+    if( store_mode==2 )
+    {
+        if( beta==0 )
+        {
+            r[0] =alpha*v00;
+            r[stride+0] = alpha*v10;
+        }
+        else
+        {
+            r[0] = beta*r[0] + alpha*v00; 
+            r[stride+0] = beta*r[stride+0] + alpha*v10;
+        }
+        return;
+    }
+    if( store_mode==3 )
+    {
+        if( beta==0 )
+        {
+            r[0] = alpha*v00;
+        }
+        else
+        {
+            r[0] = beta*r[0] + alpha*v00;
+        }
+        return;
+    }
+}
+
+
+/********************************************************************
+This function calculates R := alpha*A'*B+beta*R where A and B are Kx2 
+matrices stored in contiguous row-by-row storage,  R  is  2x2  matrix
+stored in non-contiguous row-by-row storage.
+
+A and B must be aligned; R may be non-aligned.
+
+If beta is zero, contents of R is ignored (not  multiplied  by zero -
+just ignored).
+
+However, when alpha is zero, we still calculate A'*B, which is 
+multiplied by zero afterwards.
+
+Function accepts additional parameter store_mode:
+* if 0, full R is stored
+* if 1, only first row of R is stored
+* if 2, only first column of R is stored
+* if 3, only top left element of R is stored
+
+This function supports SSE2; it can be used when:
+1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time)
+2. ae_cpuid() result contains CPU_SSE2 (checked at run-time)
+
+If (1) is failed, this function will still be defined and callable, but it 
+will do nothing.  If (2)  is  failed , call to this function will probably 
+crash your system. 
+
+If  you  want  to  know  whether  it  is safe to call it, you should check 
+results  of  ae_cpuid(). If CPU_SSE2 bit is set, this function is callable 
+and will do its work.
+********************************************************************/
+void _ialglib_mm22_sse2(double alpha, const double *a, const double *b, ae_int_t k, double beta, double *r, ae_int_t stride, ae_int_t store_mode)
+{
+#if defined(AE_HAS_SSE2_INTRINSICS)
+    /*
+     * We calculate product of two Kx2 matrices (result is 2x2). 
+     * VA and VB store result as follows:
+     *
+     *        [ VD[0]  VE[0] ]
+     * A'*B = [              ]
+     *        [ VE[1]  VD[1] ]
+     *
+     */
+    __m128d va, vb, vd, ve, vt, vt0, vt1, r0, r1, valpha, vbeta; 
+    ae_int_t t, k2, k3;
+    
+    /*
+     * calculate product
+     */
+    k2 = k/2;
+    vd = _mm_setzero_pd();
+    ve = _mm_setzero_pd();
+    for(t=0; t<k2; t++)
+    {
+        vb = _mm_load_pd(b);
+        va = _mm_load_pd(a);
+        vt = vb;
+        vb = _mm_mul_pd(va,vb);
+        vt = _mm_shuffle_pd(vt, vt, 1);
+        vd = _mm_add_pd(vb,vd);        
+        vt = _mm_mul_pd(va,vt);
+        vb = _mm_load_pd(b+2);
+        ve = _mm_add_pd(vt,ve);
+        va = _mm_load_pd(a+2);
+        vt = vb;
+        vb = _mm_mul_pd(va,vb);
+        vt = _mm_shuffle_pd(vt, vt, 1);
+        vd = _mm_add_pd(vb,vd);
+        vt = _mm_mul_pd(va,vt);
+        ve = _mm_add_pd(vt,ve);
+        a+=4;
+        b+=4;
+    }
+    if( k%2 )
+    {
+        va = _mm_load_pd(a);
+        vb = _mm_load_pd(b);
+        vt = _mm_shuffle_pd(vb, vb, 1);
+        vd = _mm_add_pd(_mm_mul_pd(va,vb),vd);
+        ve = _mm_add_pd(_mm_mul_pd(va,vt),ve);
+    }    
+    
+    /*
+     * r0 is first row of alpha*A'*B, r1 is second row
+     */
+    valpha = _mm_load1_pd(&alpha);
+    r0 = _mm_mul_pd(_mm_unpacklo_pd(vd,ve),valpha);
+    r1 = _mm_mul_pd(_mm_unpackhi_pd(ve,vd),valpha);
+    
+    /*
+     * store
+     */
+    if( store_mode==0 )
+    {
+        if( beta==0 )
+        {
+            _mm_storeu_pd(r,r0);
+            _mm_storeu_pd(r+stride,r1);
+        }
+        else
+        {
+            vbeta = _mm_load1_pd(&beta);
+            _mm_storeu_pd(r,_mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r),vbeta),r0));
+            _mm_storeu_pd(r+stride,_mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r+stride),vbeta),r1));
+        }
+        return;
+    }
+    if( store_mode==1 )
+    {
+        if( beta==0 )
+            _mm_storeu_pd(r,r0);
+        else
+            _mm_storeu_pd(r,_mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r),_mm_load1_pd(&beta)),r0));
+        return;
+    }
+    if( store_mode==2 )
+    {
+        double buf[4];
+        _mm_storeu_pd(buf,r0);
+        _mm_storeu_pd(buf+2,r1);
+        if( beta==0 )
+        {
+            r[0] =buf[0];
+            r[stride+0] = buf[2];
+        }
+        else
+        {
+            r[0] = beta*r[0] + buf[0]; 
+            r[stride+0] = beta*r[stride+0] + buf[2];
+        }
+        return;
+    }
+    if( store_mode==3 )
+    {
+        double buf[2];
+        _mm_storeu_pd(buf,r0);
+        if( beta==0 )
+            r[0] = buf[0];
+        else
+            r[0] = beta*r[0] + buf[0];
+        return;
+    }
+#endif
+}
+
+
+/*************************************************************************
+This function calculates R := alpha*A'*(B0|B1)+beta*R where A, B0  and  B1 
+are Kx2 matrices stored in contiguous row-by-row storage, R is 2x4  matrix 
+stored in non-contiguous row-by-row storage.
+
+A, B0 and B1 must be aligned; R may be non-aligned.
+
+Note  that  B0  and  B1  are  two  separate  matrices  stored in different 
+locations.
+
+If beta is zero, contents of R is ignored (not  multiplied  by zero - just 
+ignored).
+
+However,  when  alpha  is  zero , we still calculate MM product,  which is 
+multiplied by zero afterwards.
+
+Unlike mm22 functions, this function does NOT support partial  output of R 
+- we always store full 2x4 matrix.
+*************************************************************************/
+void _ialglib_mm22x2(double alpha, const double *a, const double *b0, const double *b1, ae_int_t k, double beta, double *r, ae_int_t stride)
+{
+    _ialglib_mm22(alpha, a, b0, k, beta, r, stride, 0);
+    _ialglib_mm22(alpha, a, b1, k, beta, r+2, stride, 0);
+}
+
+/*************************************************************************
+This function calculates R := alpha*A'*(B0|B1)+beta*R where A, B0  and  B1 
+are Kx2 matrices stored in contiguous row-by-row storage, R is 2x4  matrix 
+stored in non-contiguous row-by-row storage.
+
+A, B0 and B1 must be aligned; R may be non-aligned.
+
+Note  that  B0  and  B1  are  two  separate  matrices  stored in different 
+locations.
+
+If beta is zero, contents of R is ignored (not  multiplied  by zero - just 
+ignored).
+
+However,  when  alpha  is  zero , we still calculate MM product,  which is 
+multiplied by zero afterwards.
+
+Unlike mm22 functions, this function does NOT support partial  output of R 
+- we always store full 2x4 matrix.
+
+This function supports SSE2; it can be used when:
+1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time)
+2. ae_cpuid() result contains CPU_SSE2 (checked at run-time)
+
+If (1) is failed, this function will still be defined and callable, but it 
+will do nothing.  If (2)  is  failed , call to this function will probably 
+crash your system. 
+
+If  you  want  to  know  whether  it  is safe to call it, you should check 
+results  of  ae_cpuid(). If CPU_SSE2 bit is set, this function is callable 
+and will do its work.
+*************************************************************************/
+void _ialglib_mm22x2_sse2(double alpha, const double *a, const double *b0, const double *b1, ae_int_t k, double beta, double *r, ae_int_t stride)
+{
+#if defined(AE_HAS_SSE2_INTRINSICS)
+    /*
+     * We calculate product of two Kx2 matrices (result is 2x2). 
+     * V0, V1, V2, V3 store result as follows:
+     *
+     *     [ V0[0]  V1[1] V2[0]  V3[1] ]
+     * R = [                           ]
+     *     [ V1[0]  V0[1] V3[0]  V2[1] ]
+     *
+     * VA0 stores current 1x2 block of A, VA1 stores shuffle of VA0,
+     * VB0 and VB1 are used to store two copies of 1x2 block of B0 or B1
+     * (both vars store same data - either B0 or B1). Results from multiplication
+     * by VA0/VA1 are stored in VB0/VB1 too.
+     * 
+     */
+    __m128d v0, v1, v2, v3, va0, va1, vb0, vb1; 
+    __m128d r00, r01, r10, r11, valpha, vbeta; 
+    ae_int_t t, k2;
+    
+    k2 = k/2;
+    v0 = _mm_setzero_pd();
+    v1 = _mm_setzero_pd();
+    v2 = _mm_setzero_pd();
+    v3 = _mm_setzero_pd();
+    for(t=0; t<k; t++)
+    {
+        va0 = _mm_load_pd(a);
+        vb0 = _mm_load_pd(b0);
+        va1 = _mm_load_pd(a);
+        
+        vb0 = _mm_mul_pd(va0,vb0);
+        vb1 = _mm_load_pd(b0);
+        v0 = _mm_add_pd(v0,vb0);        
+        vb1 = _mm_mul_pd(va1,vb1);
+        vb0 = _mm_load_pd(b1);
+        v1 = _mm_add_pd(v1,vb1);        
+        
+        vb0 = _mm_mul_pd(va0,vb0);
+        vb1 = _mm_load_pd(b1);
+        v2 = _mm_add_pd(v2,vb0);        
+        vb1 = _mm_mul_pd(va1,vb1);
+        v3 = _mm_add_pd(v3,vb1);        
+
+        a+=2;
+        b0+=2;
+        b1+=2;
+    }
+
+    /*
+     * shuffle V1 and V3 (conversion to more convenient storage format):
+     *
+     *     [ V0[0]  V1[0] V2[0]  V3[0] ]
+     * R = [                           ]
+     *     [ V1[1]  V0[1] V3[1]  V2[1] ]
+     *
+     * unpack results to
+     *
+     * [ r00 r01 ]
+     * [ r10 r11 ]
+     *
+     */
+    valpha = _mm_load1_pd(&alpha);
+    v1 = _mm_shuffle_pd(v1, v1, 1);
+    v3 = _mm_shuffle_pd(v3, v3, 1);
+    r00 = _mm_mul_pd(_mm_unpacklo_pd(v0,v1),valpha);
+    r10 = _mm_mul_pd(_mm_unpackhi_pd(v1,v0),valpha);
+    r01 = _mm_mul_pd(_mm_unpacklo_pd(v2,v3),valpha);
+    r11 = _mm_mul_pd(_mm_unpackhi_pd(v3,v2),valpha);
+    
+    /*
+     * store
+     */
+    if( beta==0 )
+    {
+        _mm_storeu_pd(r,r00);
+        _mm_storeu_pd(r+2,r01);
+        _mm_storeu_pd(r+stride,r10);
+        _mm_storeu_pd(r+stride+2,r11);
+    }
+    else
+    {
+        vbeta = _mm_load1_pd(&beta);
+        _mm_storeu_pd(r,          _mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r),vbeta),r00));
+        _mm_storeu_pd(r+2,        _mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r+2),vbeta),r01));
+        _mm_storeu_pd(r+stride,   _mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r+stride),vbeta),r10));
+        _mm_storeu_pd(r+stride+2, _mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r+stride+2),vbeta),r11));
+    }    
+#endif
+}
+
+}
diff --git a/contrib/lbfgs/ap.h b/contrib/lbfgs/ap.h
new file mode 100755
index 0000000000..eef1d97acc
--- /dev/null
+++ b/contrib/lbfgs/ap.h
@@ -0,0 +1,1203 @@
+/*************************************************************************
+Copyright (c) Sergey Bochkanov (ALGLIB project).
+
+>>> SOURCE LICENSE >>>
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation (www.fsf.org); either version 2 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+A copy of the GNU General Public License is available at
+http://www.fsf.org/licensing/licenses
+>>> END OF LICENSE >>>
+*************************************************************************/
+#ifndef _ap_h
+#define _ap_h
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string>
+#include <cstring>
+#include <math.h>
+
+#ifdef __BORLANDC__
+#include <list.h>
+#include <vector.h>
+#else
+#include <list>
+#include <vector>
+#endif
+
+#define AE_USE_CPP
+
+/////////////////////////////////////////////////////////////////////////
+//
+// THIS SECTION CONTAINS DECLARATIONS FOR BASIC FUNCTIONALITY 
+// LIKE MEMORY MANAGEMENT FOR VECTORS/MATRICES WHICH IS SHARED 
+// BETWEEN C++ AND PURE C LIBRARIES
+//
+/////////////////////////////////////////////////////////////////////////
+namespace alglib_impl
+{
+/*
+ * definitions
+ */
+#define AE_UNKNOWN 0
+#define AE_MSVC 1
+#define AE_GNUC 2
+#define AE_SUNC 3
+#define AE_INTEL 1
+#define AE_SPARC 2
+ 
+/*
+ * automatically determine compiler
+ */
+#define AE_COMPILER AE_UNKNOWN
+#ifdef __GNUC__
+#undef AE_COMPILER
+#define AE_COMPILER AE_GNUC
+#endif
+#ifdef __SUNPRO_C
+#undef AE_COMPILER
+#define AE_COMPILER AE_SUNC
+#endif
+#ifdef _MSC_VER
+#undef AE_COMPILER
+#define AE_COMPILER AE_MSVC
+#endif 
+
+/*
+ * if we work under C++ environment, define several conditions
+ */
+#ifdef AE_USE_CPP
+#define AE_USE_CPP_BOOL
+#define AE_USE_CPP_ERROR_HANDLING
+#endif
+
+
+/*
+ * define ae_int32_t, ae_int64_t, ae_int_t, ae_bool, ae_complex, ae_error_type and ae_datatype
+ */
+#if defined(AE_HAVE_STDINT)
+#include <stdint.h>
+#endif
+
+#if defined(AE_INT32_T)
+typedef AE_INT32_T ae_int32_t;
+#endif
+#if defined(AE_HAVE_STDINT) && !defined(AE_INT32_T)
+typedef int32_t ae_int32_t;
+#endif
+#if !defined(AE_HAVE_STDINT) && !defined(AE_INT32_T)
+#if AE_COMPILER==AE_MSVC
+typedef _int32 ae_int32_t;
+#endif
+#if (AE_COMPILER==AE_GNUC) || (AE_COMPILER==AE_SUNC) || (AE_COMPILER==AE_UNKNOWN)
+typedef int ae_int32_t;
+#endif
+#endif
+
+#if defined(AE_INT64_T)
+typedef AE_INT64_T ae_int64_t;
+#endif
+#if defined(AE_HAVE_STDINT) && !defined(AE_INT64_T)
+typedef int64_t ae_int64_t;
+#endif
+#if !defined(AE_HAVE_STDINT) && !defined(AE_INT64_T)
+#if AE_COMPILER==AE_MSVC
+typedef _int64 ae_int64_t;
+#endif
+#if (AE_COMPILER==AE_GNUC) || (AE_COMPILER==AE_SUNC) || (AE_COMPILER==AE_UNKNOWN)
+typedef signed long long ae_int64_t;
+#endif
+#endif
+
+#if !defined(AE_INT_T)
+typedef ptrdiff_t ae_int_t;
+#endif
+
+#if !defined(AE_USE_CPP_BOOL)
+#define ae_bool char
+#define ae_true 1
+#define ae_false 0
+#else
+#define ae_bool bool
+#define ae_true true
+#define ae_false false
+#endif
+
+
+/*
+ * SSE2 intrinsics
+ *
+ * Preprocessor directives below:
+ * - include headers for SSE2 intrinsics
+ * - define AE_HAS_SSE2_INTRINSICS definition
+ *
+ * These actions are performed when we have:
+ * - x86 architecture definition (AE_CPU==AE_INTEL)
+ * - compiler which supports intrinsics
+ *
+ * Presence of AE_HAS_SSE2_INTRINSICS does NOT mean that our CPU
+ * actually supports SSE2 - such things should be determined at runtime
+ * with ae_cpuid() call. It means that we are working under Intel and
+ * out compiler can issue SSE2-capable code.
+ *
+ */
+#if defined(AE_CPU)
+#if AE_CPU==AE_INTEL
+
+#ifdef AE_USE_CPP
+} // end of namespace declaration, subsequent includes must be out of namespace
+#endif
+
+#if AE_COMPILER==AE_MSVC
+#include <emmintrin.h>
+#define AE_HAS_SSE2_INTRINSICS
+#endif
+
+#if (AE_COMPILER==AE_GNUC)||(AE_COMPILER==AE_SUNC)
+#include <xmmintrin.h>
+#define AE_HAS_SSE2_INTRINSICS
+#endif
+
+#ifdef AE_USE_CPP
+namespace alglib_impl { // namespace declaration continued
+#endif
+
+#endif
+#endif
+
+
+typedef struct { double x, y; } ae_complex;
+
+typedef enum
+{
+    ERR_OK = 0,
+    ERR_OUT_OF_MEMORY = 1,
+    ERR_XARRAY_TOO_LARGE = 2,
+    ERR_ASSERTION_FAILED = 3
+} ae_error_type;
+
+typedef ae_int_t ae_datatype;
+
+/*
+ * other definitions
+ */
+enum { OWN_CALLER=1, OWN_AE=2 };
+enum { ACT_UNCHANGED=1, ACT_SAME_LOCATION=2, ACT_NEW_LOCATION=3 };
+enum { DT_BOOL=1, DT_INT=2, DT_REAL=3, DT_COMPLEX=4 };
+enum { CPU_SSE2=1 };
+
+
+/************************************************************************
+x-string (zero-terminated):
+    owner       OWN_CALLER or OWN_AE. Determines what to do on realloc().
+                If vector is owned by caller, X-interface  will  just set
+                ptr to NULL before realloc(). If it is  owned  by  X,  it
+                will call ae_free/x_free/aligned_free family functions.
+
+    last_action ACT_UNCHANGED, ACT_SAME_LOCATION, ACT_NEW_LOCATION
+                contents is either: unchanged, stored at the same location,
+                stored at the new location.
+                this field is set on return from X.
+
+    ptr         pointer to the actual data
+
+Members of this structure are ae_int64_t to avoid alignment problems.
+************************************************************************/
+typedef struct
+{
+    ae_int64_t     owner;
+    ae_int64_t     last_action;
+    char *ptr;
+} x_string;
+
+/************************************************************************
+x-vector:
+    cnt         number of elements
+
+    datatype    one of the DT_XXXX values
+
+    owner       OWN_CALLER or OWN_AE. Determines what to do on realloc().
+                If vector is owned by caller, X-interface  will  just set
+                ptr to NULL before realloc(). If it is  owned  by  X,  it
+                will call ae_free/x_free/aligned_free family functions.
+
+    last_action ACT_UNCHANGED, ACT_SAME_LOCATION, ACT_NEW_LOCATION
+                contents is either: unchanged, stored at the same location,
+                stored at the new location.
+                this field is set on return from X interface and may be
+                used by caller as hint when deciding what to do with data
+                (if it was ACT_UNCHANGED or ACT_SAME_LOCATION, no array
+                reallocation or copying is required).
+
+    ptr         pointer to the actual data
+
+Members of this structure are ae_int64_t to avoid alignment problems.
+************************************************************************/
+typedef struct
+{
+    ae_int64_t     cnt;
+    ae_int64_t     datatype;
+    ae_int64_t     owner;
+    ae_int64_t     last_action;
+    void *ptr;
+} x_vector;
+
+
+/************************************************************************
+x-matrix:
+    rows        number of rows. may be zero only when cols is zero too.
+
+    cols        number of columns. may be zero only when rows is zero too.
+
+    stride      stride, i.e. distance between first elements of rows (in bytes)
+
+    datatype    one of the DT_XXXX values
+
+    owner       OWN_CALLER or OWN_AE. Determines what to do on realloc().
+                If vector is owned by caller, X-interface  will  just set
+                ptr to NULL before realloc(). If it is  owned  by  X,  it
+                will call ae_free/x_free/aligned_free family functions.
+
+    last_action ACT_UNCHANGED, ACT_SAME_LOCATION, ACT_NEW_LOCATION
+                contents is either: unchanged, stored at the same location,
+                stored at the new location.
+                this field is set on return from X interface and may be
+                used by caller as hint when deciding what to do with data
+                (if it was ACT_UNCHANGED or ACT_SAME_LOCATION, no array
+                reallocation or copying is required).
+
+    ptr         pointer to the actual data, stored rowwise
+
+Members of this structure are ae_int64_t to avoid alignment problems.
+************************************************************************/
+typedef struct
+{
+    ae_int64_t     rows;
+    ae_int64_t     cols;
+    ae_int64_t     stride;
+    ae_int64_t     datatype;
+    ae_int64_t     owner;
+    ae_int64_t     last_action;
+    void *ptr;
+} x_matrix;
+
+
+/************************************************************************
+dynamic block which may be automatically deallocated during stack unwinding
+
+p_next          next block in the stack unwinding list.
+                NULL means that this block is not in the list
+deallocator     deallocator function which should be used to deallocate block.
+                NULL for "special" blocks (frame/stack boundaries)
+ptr             pointer which should be passed to the deallocator.
+                may be null (for zero-size block), DYN_BOTTOM or DYN_FRAME
+                for "special" blocks (frame/stack boundaries).
+
+************************************************************************/
+typedef struct ae_dyn_block
+{
+    struct ae_dyn_block * volatile p_next;
+    /* void *deallocator; */
+    void (*deallocator)(void*);
+    void * volatile ptr;
+} ae_dyn_block;
+
+/************************************************************************
+frame marker
+************************************************************************/
+typedef struct ae_frame
+{
+    ae_dyn_block db_marker;
+} ae_frame;
+
+typedef struct
+{
+    ae_int_t endianness;
+    double v_nan;
+    double v_posinf;
+    double v_neginf;
+    
+    ae_dyn_block * volatile p_top_block;
+    ae_dyn_block last_block;
+#ifndef AE_USE_CPP_ERROR_HANDLING
+    jmp_buf * volatile break_jump;
+#endif
+    ae_error_type volatile last_error;
+    const char* volatile error_msg;
+} ae_state;
+
+typedef void(*ae_deallocator)(void*);
+
+typedef struct ae_vector
+{
+    ae_int_t cnt;
+    ae_datatype datatype;
+    ae_dyn_block data;
+    union
+    {
+        void *p_ptr;
+        ae_bool *p_bool;
+        ae_int_t *p_int;
+        double *p_double;
+        ae_complex *p_complex;
+    } ptr;
+} ae_vector;
+
+typedef struct ae_matrix
+{
+    ae_int_t rows;
+    ae_int_t cols;
+    ae_int_t stride;
+    ae_datatype datatype;
+    ae_dyn_block data;
+    union
+    {
+        void *p_ptr;
+        void **pp_void;
+        ae_bool **pp_bool;
+        ae_int_t **pp_int;
+        double **pp_double;
+        ae_complex **pp_complex;
+    } ptr;
+} ae_matrix;
+ 
+ae_int_t ae_misalignment(const void *ptr, size_t alignment);
+void* ae_align(void *ptr, size_t alignment);
+void* aligned_malloc(size_t size, size_t alignment);
+void  aligned_free(void *block);
+
+void* ae_malloc(size_t size, ae_state *state);
+void  ae_free(void *p);
+ae_int_t ae_sizeof(ae_datatype datatype);
+
+void ae_state_init(ae_state *state);
+void ae_state_clear(ae_state *state);
+#ifndef AE_USE_CPP_ERROR_HANDLING
+void ae_state_set_break_jump(ae_state *state, jmp_buf *buf);
+#endif
+void ae_break(ae_state *state, ae_error_type error_type, const char *msg);
+
+void ae_frame_make(ae_state *state, ae_frame *tmp);
+void ae_frame_leave(ae_state *state);
+
+void ae_db_attach(ae_dyn_block *block, ae_state *state);
+ae_bool ae_db_malloc(ae_dyn_block *block, ae_int_t size, ae_state *state, ae_bool make_automatic);
+ae_bool ae_db_realloc(ae_dyn_block *block, ae_int_t size, ae_state *state);
+void ae_db_free(ae_dyn_block *block);
+void ae_db_swap(ae_dyn_block *block1, ae_dyn_block *block2);
+
+ae_bool ae_vector_init(ae_vector *dst, ae_int_t size, ae_datatype datatype, ae_state *state, ae_bool make_automatic);
+ae_bool ae_vector_init_copy(ae_vector *dst, ae_vector *src, ae_state *state, ae_bool make_automatic);
+void ae_vector_init_from_x(ae_vector *dst, x_vector *src, ae_state *state, ae_bool make_automatic);
+ae_bool ae_vector_set_length(ae_vector *dst, ae_int_t newsize, ae_state *state);
+void ae_vector_clear(ae_vector *dst);
+void ae_swap_vectors(ae_vector *vec1, ae_vector *vec2);
+
+ae_bool ae_matrix_init(ae_matrix *dst, ae_int_t rows, ae_int_t cols, ae_datatype datatype, ae_state *state, ae_bool make_automatic);
+ae_bool ae_matrix_init_copy(ae_matrix *dst, ae_matrix *src, ae_state *state, ae_bool make_automatic);
+void ae_matrix_init_from_x(ae_matrix *dst, x_matrix *src, ae_state *state, ae_bool make_automatic);
+ae_bool ae_matrix_set_length(ae_matrix *dst, ae_int_t rows, ae_int_t cols, ae_state *state);
+void ae_matrix_clear(ae_matrix *dst);
+void ae_swap_matrices(ae_matrix *mat1, ae_matrix *mat2);
+
+void ae_x_set_vector(x_vector *dst, ae_vector *src, ae_state *state);
+void ae_x_set_matrix(x_matrix *dst, ae_matrix *src, ae_state *state);
+void ae_x_attach_to_vector(x_vector *dst, ae_vector *src);
+void ae_x_attach_to_matrix(x_matrix *dst, ae_matrix *src);
+
+void x_vector_clear(x_vector *dst);
+
+ae_bool x_is_symmetric(x_matrix *a);
+ae_bool x_is_hermitian(x_matrix *a);
+ae_bool x_force_symmetric(x_matrix *a);
+ae_bool x_force_hermitian(x_matrix *a);
+ae_bool ae_is_symmetric(ae_matrix *a);
+ae_bool ae_is_hermitian(ae_matrix *a);
+ae_bool ae_force_symmetric(ae_matrix *a);
+ae_bool ae_force_hermitian(ae_matrix *a);
+
+/************************************************************************
+Service functions
+************************************************************************/
+void ae_assert(ae_bool cond, const char *msg, ae_state *state);
+ae_int_t ae_cpuid();
+
+/************************************************************************
+Real math functions:
+* IEEE-compliant floating point comparisons
+* standard functions
+************************************************************************/
+ae_bool ae_fp_eq(double v1, double v2);
+ae_bool ae_fp_neq(double v1, double v2);
+ae_bool ae_fp_less(double v1, double v2);
+ae_bool ae_fp_less_eq(double v1, double v2);
+ae_bool ae_fp_greater(double v1, double v2);
+ae_bool ae_fp_greater_eq(double v1, double v2);
+
+ae_bool ae_isfinite_stateless(double x, ae_int_t endianness);
+ae_bool ae_isnan_stateless(double x,    ae_int_t endianness);
+ae_bool ae_isinf_stateless(double x,    ae_int_t endianness);
+ae_bool ae_isposinf_stateless(double x, ae_int_t endianness);
+ae_bool ae_isneginf_stateless(double x, ae_int_t endianness);
+
+ae_int_t ae_get_endianness();
+
+ae_bool ae_isfinite(double x,ae_state *state);
+ae_bool ae_isnan(double x,   ae_state *state);
+ae_bool ae_isinf(double x,   ae_state *state);
+ae_bool ae_isposinf(double x,ae_state *state);
+ae_bool ae_isneginf(double x,ae_state *state);
+
+double   ae_fabs(double x,   ae_state *state);
+ae_int_t ae_iabs(ae_int_t x, ae_state *state);
+double   ae_sqr(double x,    ae_state *state);
+double   ae_sqrt(double x,   ae_state *state);
+
+ae_int_t ae_sign(double x,   ae_state *state);
+ae_int_t ae_round(double x,  ae_state *state);
+ae_int_t ae_trunc(double x,  ae_state *state);
+ae_int_t ae_ifloor(double x, ae_state *state);
+ae_int_t ae_iceil(double x,  ae_state *state);
+
+ae_int_t ae_maxint(ae_int_t m1, ae_int_t m2, ae_state *state);
+ae_int_t ae_minint(ae_int_t m1, ae_int_t m2, ae_state *state);
+double   ae_maxreal(double m1, double m2, ae_state *state);
+double   ae_minreal(double m1, double m2, ae_state *state);
+double   ae_randomreal(ae_state *state);
+ae_int_t ae_randominteger(ae_int_t maxv, ae_state *state);
+
+double   ae_sin(double x, ae_state *state);
+double   ae_cos(double x, ae_state *state);
+double   ae_tan(double x, ae_state *state);
+double   ae_sinh(double x, ae_state *state);
+double   ae_cosh(double x, ae_state *state);
+double   ae_tanh(double x, ae_state *state);
+double   ae_asin(double x, ae_state *state);
+double   ae_acos(double x, ae_state *state);
+double   ae_atan(double x, ae_state *state);
+double   ae_atan2(double x, double y, ae_state *state);
+
+double   ae_log(double x, ae_state *state);
+double   ae_pow(double x, double y, ae_state *state);
+double   ae_exp(double x, ae_state *state);
+
+/************************************************************************
+Complex math functions:
+* basic arithmetic operations
+* standard functions
+************************************************************************/
+ae_complex ae_complex_from_d(double v);
+
+ae_complex ae_c_neg(ae_complex lhs);
+ae_bool ae_c_eq(ae_complex lhs,       ae_complex rhs);
+ae_bool ae_c_neq(ae_complex lhs,      ae_complex rhs);
+ae_complex ae_c_add(ae_complex lhs,   ae_complex rhs);
+ae_complex ae_c_mul(ae_complex lhs,   ae_complex rhs);
+ae_complex ae_c_sub(ae_complex lhs,   ae_complex rhs);
+ae_complex ae_c_div(ae_complex lhs,   ae_complex rhs);
+ae_bool ae_c_eq_d(ae_complex lhs,     double rhs);
+ae_bool ae_c_neq_d(ae_complex lhs,    double rhs);
+ae_complex ae_c_add_d(ae_complex lhs, double rhs);
+ae_complex ae_c_mul_d(ae_complex lhs, double rhs);
+ae_complex ae_c_sub_d(ae_complex lhs, double rhs);
+ae_complex ae_c_d_sub(double lhs,     ae_complex rhs);
+ae_complex ae_c_div_d(ae_complex lhs, double rhs);
+ae_complex ae_c_d_div(double lhs,   ae_complex rhs);
+
+ae_complex ae_c_conj(ae_complex lhs, ae_state *state);
+ae_complex ae_c_sqr(ae_complex lhs, ae_state *state);
+double     ae_c_abs(ae_complex z, ae_state *state);
+
+/************************************************************************
+Complex BLAS operations
+************************************************************************/
+ae_complex ae_v_cdotproduct(const ae_complex *v0, ae_int_t stride0, const char *conj0, const ae_complex *v1, ae_int_t stride1, const char *conj1, ae_int_t n);
+void ae_v_cmove(ae_complex *vdst,    ae_int_t stride_dst, const ae_complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n);
+void ae_v_cmoveneg(ae_complex *vdst, ae_int_t stride_dst, const ae_complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n);
+void ae_v_cmoved(ae_complex *vdst,   ae_int_t stride_dst, const ae_complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha);
+void ae_v_cmovec(ae_complex *vdst,   ae_int_t stride_dst, const ae_complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, ae_complex alpha);
+void ae_v_cadd(ae_complex *vdst,     ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n);
+void ae_v_caddd(ae_complex *vdst,    ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha);
+void ae_v_caddc(ae_complex *vdst,    ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, ae_complex alpha);
+void ae_v_csub(ae_complex *vdst,     ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n);
+void ae_v_csubd(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha);
+void ae_v_csubc(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, ae_complex alpha);
+void ae_v_cmuld(ae_complex *vdst, ae_int_t stride_dst, ae_int_t n, double alpha);
+void ae_v_cmulc(ae_complex *vdst, ae_int_t stride_dst, ae_int_t n, ae_complex alpha);
+
+/************************************************************************
+Real BLAS operations
+************************************************************************/
+double ae_v_dotproduct(const double *v0, ae_int_t stride0, const double *v1, ae_int_t stride1, ae_int_t n);
+void ae_v_move(double *vdst,    ae_int_t stride_dst, const double* vsrc,  ae_int_t stride_src, ae_int_t n);
+void ae_v_moveneg(double *vdst, ae_int_t stride_dst, const double* vsrc,  ae_int_t stride_src, ae_int_t n);
+void ae_v_moved(double *vdst,   ae_int_t stride_dst, const double* vsrc,  ae_int_t stride_src, ae_int_t n, double alpha);
+void ae_v_add(double *vdst,     ae_int_t stride_dst, const double *vsrc,  ae_int_t stride_src, ae_int_t n);
+void ae_v_addd(double *vdst,    ae_int_t stride_dst, const double *vsrc,  ae_int_t stride_src, ae_int_t n, double alpha);
+void ae_v_sub(double *vdst,     ae_int_t stride_dst, const double *vsrc,  ae_int_t stride_src, ae_int_t n);
+void ae_v_subd(double *vdst,    ae_int_t stride_dst, const double *vsrc,  ae_int_t stride_src, ae_int_t n, double alpha);
+void ae_v_muld(double *vdst,  ae_int_t stride_dst, ae_int_t n, double alpha);
+
+/************************************************************************
+Other functions
+************************************************************************/
+ae_int_t ae_v_len(ae_int_t a, ae_int_t b);
+
+/*
+extern const double ae_machineepsilon;
+extern const double ae_maxrealnumber;
+extern const double ae_minrealnumber;
+extern const double ae_pi;
+*/
+#define ae_machineepsilon 5E-16
+#define ae_maxrealnumber  1E300
+#define ae_minrealnumber  1E-300
+#define ae_pi 3.1415926535897932384626433832795
+
+
+/************************************************************************
+RComm functions
+************************************************************************/
+typedef struct rcommstate
+{
+    int stage;
+    ae_vector ia;
+    ae_vector ba;
+    ae_vector ra;
+    ae_vector ca;
+} rcommstate;
+ae_bool _rcommstate_init(rcommstate* p, ae_state *_state, ae_bool make_automatic);
+ae_bool _rcommstate_init_copy(rcommstate* dst, rcommstate* src, ae_state *_state, ae_bool make_automatic);
+void _rcommstate_clear(rcommstate* p);
+
+#ifdef AE_USE_ALLOC_COUNTER
+extern ae_int64_t _alloc_counter;
+#endif
+
+
+/************************************************************************
+debug functions (must be turned on by preprocessor definitions:
+* tickcount(), which is wrapper around GetTickCount()
+************************************************************************/
+#ifdef AE_DEBUG4WINDOWS
+#include <windows.h>
+#include <stdio.h>
+#define tickcount(s) GetTickCount()
+#define flushconsole(s) fflush(stdout)
+#endif
+
+
+
+}
+
+
+/////////////////////////////////////////////////////////////////////////
+//
+// THIS SECTION CONTAINS DECLARATIONS FOR C++ RELATED FUNCTIONALITY
+//
+/////////////////////////////////////////////////////////////////////////
+
+namespace alglib
+{
+
+typedef alglib_impl::ae_int_t ae_int_t;
+
+/********************************************************************
+Class forwards
+********************************************************************/
+class complex;
+
+ae_int_t vlen(ae_int_t n1, ae_int_t n2);
+
+/********************************************************************
+Exception class.
+********************************************************************/
+class ap_error
+{
+public:
+    std::string msg;
+    
+    ap_error();
+    ap_error(const char *s);
+    static void make_assertion(bool bClause);
+    static void make_assertion(bool bClause, const char *msg);
+private:
+};
+
+/********************************************************************
+Complex number with double precision.
+********************************************************************/
+class complex
+{
+public:
+    complex();
+    complex(const double &_x);
+    complex(const double &_x, const double &_y);
+    complex(const complex &z);
+
+    complex& operator= (const double& v);
+    complex& operator+=(const double& v);
+    complex& operator-=(const double& v);
+    complex& operator*=(const double& v);
+    complex& operator/=(const double& v);
+
+    complex& operator= (const complex& z);
+    complex& operator+=(const complex& z);
+    complex& operator-=(const complex& z);
+    complex& operator*=(const complex& z);
+    complex& operator/=(const complex& z);
+
+    alglib_impl::ae_complex*       c_ptr();
+    const alglib_impl::ae_complex* c_ptr() const;
+    
+    std::string tostring(int dps) const;
+
+    double x, y;
+};
+
+const alglib::complex operator/(const alglib::complex& lhs, const alglib::complex& rhs);
+const bool operator==(const alglib::complex& lhs, const alglib::complex& rhs);
+const bool operator!=(const alglib::complex& lhs, const alglib::complex& rhs);
+const alglib::complex operator+(const alglib::complex& lhs);
+const alglib::complex operator-(const alglib::complex& lhs);
+const alglib::complex operator+(const alglib::complex& lhs, const alglib::complex& rhs);
+const alglib::complex operator+(const alglib::complex& lhs, const double& rhs);
+const alglib::complex operator+(const double& lhs, const alglib::complex& rhs);
+const alglib::complex operator-(const alglib::complex& lhs, const alglib::complex& rhs);
+const alglib::complex operator-(const alglib::complex& lhs, const double& rhs);
+const alglib::complex operator-(const double& lhs, const alglib::complex& rhs);
+const alglib::complex operator*(const alglib::complex& lhs, const alglib::complex& rhs);
+const alglib::complex operator*(const alglib::complex& lhs, const double& rhs);
+const alglib::complex operator*(const double& lhs, const alglib::complex& rhs);
+const alglib::complex operator/(const alglib::complex& lhs, const alglib::complex& rhs);
+const alglib::complex operator/(const double& lhs, const alglib::complex& rhs);
+const alglib::complex operator/(const alglib::complex& lhs, const double& rhs);
+double abscomplex(const alglib::complex &z);
+alglib::complex conj(const alglib::complex &z);
+alglib::complex csqr(const alglib::complex &z);
+
+/********************************************************************
+Level 1 BLAS functions
+
+NOTES:
+* destination and source should NOT overlap
+* stride is assumed to be positive, but it is not 
+  assert'ed within function
+* conj_src parameter specifies whether complex source is conjugated 
+  before processing or not. Pass string which starts with 'N' or 'n'
+  ("No conj", for example) to use unmodified parameter. All other
+  values will result in conjugation of input, but it is recommended
+  to use "Conj" in such cases.
+********************************************************************/
+double vdotproduct(const double *v0, ae_int_t stride0, const double *v1, ae_int_t stride1, ae_int_t n);
+double vdotproduct(const double *v1, const double *v2, ae_int_t N);
+
+alglib::complex vdotproduct(const alglib::complex *v0, ae_int_t stride0, const char *conj0, const alglib::complex *v1, ae_int_t stride1, const char *conj1, ae_int_t n);
+alglib::complex vdotproduct(const alglib::complex *v1, const alglib::complex *v2, ae_int_t N);
+
+void vmove(double *vdst,  ae_int_t stride_dst, const double* vsrc,  ae_int_t stride_src, ae_int_t n);
+void vmove(double *vdst, const double* vsrc, ae_int_t N);
+
+void vmove(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n);
+void vmove(alglib::complex *vdst, const alglib::complex* vsrc, ae_int_t N);
+
+void vmoveneg(double *vdst,  ae_int_t stride_dst, const double* vsrc,  ae_int_t stride_src, ae_int_t n);
+void vmoveneg(double *vdst, const double *vsrc, ae_int_t N);
+
+void vmoveneg(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n);
+void vmoveneg(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N);
+
+void vmove(double *vdst,  ae_int_t stride_dst, const double* vsrc,  ae_int_t stride_src, ae_int_t n, double alpha);
+void vmove(double *vdst, const double *vsrc, ae_int_t N, double alpha);
+
+void vmove(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha);
+void vmove(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, double alpha);
+
+void vmove(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, alglib::complex alpha);
+void vmove(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, alglib::complex alpha);
+
+void vadd(double *vdst,  ae_int_t stride_dst, const double *vsrc,  ae_int_t stride_src, ae_int_t n);
+void vadd(double *vdst, const double *vsrc, ae_int_t N);
+
+void vadd(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n);
+void vadd(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N);
+
+void vadd(double *vdst,  ae_int_t stride_dst, const double *vsrc,  ae_int_t stride_src, ae_int_t n, double alpha);
+void vadd(double *vdst, const double *vsrc, ae_int_t N, double alpha);
+
+void vadd(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha);
+void vadd(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, double alpha);
+
+void vadd(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, alglib::complex alpha);
+void vadd(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, alglib::complex alpha);
+
+void vsub(double *vdst,  ae_int_t stride_dst, const double *vsrc,  ae_int_t stride_src, ae_int_t n);
+void vsub(double *vdst, const double *vsrc, ae_int_t N);
+
+void vsub(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n);
+void vsub(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N);
+
+void vsub(double *vdst,  ae_int_t stride_dst, const double *vsrc,  ae_int_t stride_src, ae_int_t n, double alpha);
+void vsub(double *vdst, const double *vsrc, ae_int_t N, double alpha);
+
+void vsub(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha);
+void vsub(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, double alpha);
+
+void vsub(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, alglib::complex alpha);
+void vsub(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, alglib::complex alpha);
+
+void vmul(double *vdst,  ae_int_t stride_dst, ae_int_t n, double alpha);
+void vmul(double *vdst, ae_int_t N, double alpha);
+
+void vmul(alglib::complex *vdst, ae_int_t stride_dst, ae_int_t n, double alpha);
+void vmul(alglib::complex *vdst, ae_int_t N, double alpha);
+
+void vmul(alglib::complex *vdst, ae_int_t stride_dst, ae_int_t n, alglib::complex alpha);
+void vmul(alglib::complex *vdst, ae_int_t N, alglib::complex alpha);
+
+
+
+/********************************************************************
+string conversion functions !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+********************************************************************/
+
+/********************************************************************
+1- and 2-dimensional arrays
+********************************************************************/
+class ae_vector_wrapper
+{
+public:
+    ae_vector_wrapper();
+    virtual ~ae_vector_wrapper();
+    ae_vector_wrapper(const ae_vector_wrapper &rhs);
+    const ae_vector_wrapper& operator=(const ae_vector_wrapper &rhs);
+
+    void setlength(ae_int_t iLen);
+    ae_int_t length() const;
+
+    void attach_to(alglib_impl::ae_vector *ptr);
+    void allocate_own(ae_int_t size, alglib_impl::ae_datatype datatype);
+    const alglib_impl::ae_vector* c_ptr() const;
+    alglib_impl::ae_vector* c_ptr();
+protected:
+    alglib_impl::ae_vector *p_vec;
+    alglib_impl::ae_vector vec;
+};
+
+class boolean_1d_array : public ae_vector_wrapper
+{
+public:
+    boolean_1d_array();
+    boolean_1d_array(const char *s);
+    boolean_1d_array(alglib_impl::ae_vector *p);
+    virtual ~boolean_1d_array() ;
+
+    const ae_bool& operator()(ae_int_t i) const;
+    ae_bool& operator()(ae_int_t i);
+
+    const ae_bool& operator[](ae_int_t i) const;
+    ae_bool& operator[](ae_int_t i);
+
+    void setcontent(ae_int_t iLen, const bool *pContent );
+    ae_bool* getcontent();
+    const ae_bool* getcontent() const;
+
+    std::string tostring() const;
+};
+
+class integer_1d_array : public ae_vector_wrapper
+{
+public:
+    integer_1d_array();
+    integer_1d_array(alglib_impl::ae_vector *p);
+    integer_1d_array(const char *s);
+    virtual ~integer_1d_array();
+
+    const ae_int_t& operator()(ae_int_t i) const;
+    ae_int_t& operator()(ae_int_t i);
+
+    const ae_int_t& operator[](ae_int_t i) const;
+    ae_int_t& operator[](ae_int_t i);
+
+    void setcontent(ae_int_t iLen, const ae_int_t *pContent );
+
+    ae_int_t* getcontent();
+    const ae_int_t* getcontent() const;
+
+    std::string tostring() const;
+};
+
+class real_1d_array : public ae_vector_wrapper
+{
+public:
+    real_1d_array();
+    real_1d_array(alglib_impl::ae_vector *p);
+    real_1d_array(const char *s);
+    virtual ~real_1d_array();
+
+    const double& operator()(ae_int_t i) const;
+    double& operator()(ae_int_t i);
+
+    const double& operator[](ae_int_t i) const;
+    double& operator[](ae_int_t i);
+
+    void setcontent(ae_int_t iLen, const double *pContent );
+    double* getcontent();
+    const double* getcontent() const;
+
+    std::string tostring(int dps) const;
+};
+
+class complex_1d_array : public ae_vector_wrapper
+{
+public:
+    complex_1d_array();
+    complex_1d_array(alglib_impl::ae_vector *p);
+    complex_1d_array(const char *s);
+    virtual ~complex_1d_array();
+
+    const alglib::complex& operator()(ae_int_t i) const;
+    alglib::complex& operator()(ae_int_t i);
+
+    const alglib::complex& operator[](ae_int_t i) const;
+    alglib::complex& operator[](ae_int_t i);
+
+    void setcontent(ae_int_t iLen, const alglib::complex *pContent );
+    alglib::complex* getcontent();
+    const alglib::complex* getcontent() const;
+
+    std::string tostring(int dps) const;
+};
+
+class ae_matrix_wrapper
+{
+public:
+    ae_matrix_wrapper();
+    virtual ~ae_matrix_wrapper();
+    ae_matrix_wrapper(const ae_matrix_wrapper &rhs);
+    const ae_matrix_wrapper& operator=(const ae_matrix_wrapper &rhs);
+
+    void setlength(ae_int_t rows, ae_int_t cols);
+    ae_int_t rows() const;
+    ae_int_t cols() const;
+    bool isempty() const;
+	ae_int_t getstride() const;
+
+    void attach_to(alglib_impl::ae_matrix *ptr);
+    void allocate_own(ae_int_t rows, ae_int_t cols, alglib_impl::ae_datatype datatype);
+    const alglib_impl::ae_matrix* c_ptr() const;
+    alglib_impl::ae_matrix* c_ptr();
+protected:
+    alglib_impl::ae_matrix *p_mat;
+    alglib_impl::ae_matrix mat;
+};
+
+class boolean_2d_array : public ae_matrix_wrapper
+{
+public:
+    boolean_2d_array();
+    boolean_2d_array(alglib_impl::ae_matrix *p);
+    boolean_2d_array(const char *s);
+    virtual ~boolean_2d_array();
+
+    const ae_bool& operator()(ae_int_t i, ae_int_t j) const;
+    ae_bool& operator()(ae_int_t i, ae_int_t j);
+
+    const ae_bool* operator[](ae_int_t i) const;
+    ae_bool* operator[](ae_int_t i);
+    
+    void setcontent(ae_int_t irows, ae_int_t icols, const bool *pContent );
+    
+    std::string tostring() const ;
+};
+
+class integer_2d_array : public ae_matrix_wrapper
+{
+public:
+    integer_2d_array();
+    integer_2d_array(alglib_impl::ae_matrix *p);
+    integer_2d_array(const char *s);
+    virtual ~integer_2d_array();
+
+    const ae_int_t& operator()(ae_int_t i, ae_int_t j) const;
+    ae_int_t& operator()(ae_int_t i, ae_int_t j);
+
+    const ae_int_t* operator[](ae_int_t i) const;
+    ae_int_t* operator[](ae_int_t i);
+
+    void setcontent(ae_int_t irows, ae_int_t icols, const ae_int_t *pContent );
+    
+    std::string tostring() const;
+};
+
+class real_2d_array : public ae_matrix_wrapper
+{
+public:
+    real_2d_array();
+    real_2d_array(alglib_impl::ae_matrix *p);
+    real_2d_array(const char *s);
+    virtual ~real_2d_array();
+
+    const double& operator()(ae_int_t i, ae_int_t j) const;
+    double& operator()(ae_int_t i, ae_int_t j);
+
+    const double* operator[](ae_int_t i) const;
+    double* operator[](ae_int_t i);
+
+    void setcontent(ae_int_t irows, ae_int_t icols, const double *pContent );
+
+    std::string tostring(int dps) const;
+};
+
+class complex_2d_array : public ae_matrix_wrapper
+{
+public:
+    complex_2d_array();
+    complex_2d_array(alglib_impl::ae_matrix *p);
+    complex_2d_array(const char *s);
+    virtual ~complex_2d_array();
+
+    const alglib::complex& operator()(ae_int_t i, ae_int_t j) const;
+    alglib::complex& operator()(ae_int_t i, ae_int_t j);
+
+    const alglib::complex* operator[](ae_int_t i) const;
+    alglib::complex* operator[](ae_int_t i);
+
+    void setcontent(ae_int_t irows, ae_int_t icols, const alglib::complex *pContent );
+
+    std::string tostring(int dps) const;
+};
+
+
+/********************************************************************
+dataset information.
+
+can store regression dataset, classification dataset, or non-labeled
+task:
+* nout==0 means non-labeled task (clustering, for example)
+* nout>0 && nclasses==0 means regression task
+* nout>0 && nclasses>0 means classification task
+********************************************************************/
+/*class dataset
+{
+public:
+    dataset():nin(0), nout(0), nclasses(0), trnsize(0), valsize(0), tstsize(0), totalsize(0){};
+
+    int nin, nout, nclasses;
+
+    int trnsize;
+    int valsize;
+    int tstsize;
+    int totalsize;
+
+    alglib::real_2d_array trn;
+    alglib::real_2d_array val;
+    alglib::real_2d_array tst;
+    alglib::real_2d_array all;
+};
+
+bool opendataset(std::string file, dataset *pdataset);
+
+//
+// internal functions
+//
+std::string strtolower(const std::string &s);
+bool readstrings(std::string file, std::list<std::string> *pOutput);
+bool readstrings(std::string file, std::list<std::string> *pOutput, std::string comment);
+void explodestring(std::string s, char sep, std::vector<std::string> *pOutput);
+std::string xtrim(std::string s);*/
+
+/********************************************************************
+Constants and functions introduced for compatibility with AlgoPascal
+********************************************************************/
+extern const double machineepsilon;
+extern const double maxrealnumber;
+extern const double minrealnumber;
+extern const double fp_nan;
+extern const double fp_posinf;
+extern const double fp_neginf;
+extern const ae_int_t endianness;
+
+int sign2(double x);
+double randomreal();
+int randominteger(int maxv);
+int round(double x);
+int trunc(double x);
+int ifloor(double x);
+int iceil(double x);
+double pi();
+double sqr(double x);
+int maxint(int m1, int m2);
+int minint(int m1, int m2);
+double maxreal(double m1, double m2);
+double minreal(double m1, double m2);
+
+bool fp_eq(double v1, double v2);
+bool fp_neq(double v1, double v2);
+bool fp_less(double v1, double v2);
+bool fp_less_eq(double v1, double v2);
+bool fp_greater(double v1, double v2);
+bool fp_greater_eq(double v1, double v2);
+
+bool fp_isnan(double x);
+bool fp_isposinf(double x);
+bool fp_isneginf(double x);
+bool fp_isinf(double x);
+bool fp_isfinite(double x);
+
+
+}//namespace alglib
+
+
+/////////////////////////////////////////////////////////////////////////
+//
+// THIS SECTIONS CONTAINS DECLARATIONS FOR OPTIMIZED LINEAR ALGEBRA CODES
+// IT IS SHARED BETWEEN C++ AND PURE C LIBRARIES
+//
+/////////////////////////////////////////////////////////////////////////
+namespace alglib_impl
+{
+#define ALGLIB_INTERCEPTS_ABLAS
+
+void _ialglib_vzero(ae_int_t n, double *p, ae_int_t stride);
+void _ialglib_vzero_complex(ae_int_t n, ae_complex *p, ae_int_t stride);
+void _ialglib_vcopy(ae_int_t n, const double *a, ae_int_t stridea, double *b, ae_int_t strideb);
+void _ialglib_vcopy_complex(ae_int_t n, const ae_complex *a, ae_int_t stridea, double *b, ae_int_t strideb, const char *conj);
+void _ialglib_vcopy_dcomplex(ae_int_t n, const double *a, ae_int_t stridea, double *b, ae_int_t strideb, const char *conj);
+void _ialglib_mcopyblock(ae_int_t m, ae_int_t n, const double *a, ae_int_t op, ae_int_t stride, double *b);
+void _ialglib_mcopyunblock(ae_int_t m, ae_int_t n, const double *a, ae_int_t op, double *b, ae_int_t stride);
+void _ialglib_mcopyblock_complex(ae_int_t m, ae_int_t n, const ae_complex *a, ae_int_t op, ae_int_t stride, double *b);
+void _ialglib_mcopyunblock_complex(ae_int_t m, ae_int_t n, const double *a, ae_int_t op, ae_complex* b, ae_int_t stride);
+
+ae_bool _ialglib_i_rmatrixgemmf(ae_int_t m,
+     ae_int_t n,
+     ae_int_t k,
+     double alpha,
+     ae_matrix *a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t optypea,
+     ae_matrix *b,
+     ae_int_t ib,
+     ae_int_t jb,
+     ae_int_t optypeb,
+     double beta,
+     ae_matrix *c,
+     ae_int_t ic,
+     ae_int_t jc);
+ae_bool _ialglib_i_cmatrixgemmf(ae_int_t m,
+     ae_int_t n,
+     ae_int_t k,
+     ae_complex alpha,
+     ae_matrix *a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t optypea,
+     ae_matrix *b,
+     ae_int_t ib,
+     ae_int_t jb,
+     ae_int_t optypeb,
+     ae_complex beta,
+     ae_matrix *c,
+     ae_int_t ic,
+     ae_int_t jc);
+ae_bool _ialglib_i_cmatrixrighttrsmf(ae_int_t m,
+     ae_int_t n,
+     ae_matrix *a,
+     ae_int_t i1,
+     ae_int_t j1,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     ae_matrix *x,
+     ae_int_t i2,
+     ae_int_t j2);
+ae_bool _ialglib_i_rmatrixrighttrsmf(ae_int_t m,
+     ae_int_t n,
+     ae_matrix *a,
+     ae_int_t i1,
+     ae_int_t j1,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     ae_matrix *x,
+     ae_int_t i2,
+     ae_int_t j2);
+ae_bool _ialglib_i_cmatrixlefttrsmf(ae_int_t m,
+     ae_int_t n,
+     ae_matrix *a,
+     ae_int_t i1,
+     ae_int_t j1,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     ae_matrix *x,
+     ae_int_t i2,
+     ae_int_t j2);
+ae_bool _ialglib_i_rmatrixlefttrsmf(ae_int_t m,
+     ae_int_t n,
+     ae_matrix *a,
+     ae_int_t i1,
+     ae_int_t j1,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     ae_matrix *x,
+     ae_int_t i2,
+     ae_int_t j2);
+ae_bool _ialglib_i_cmatrixsyrkf(ae_int_t n,
+     ae_int_t k,
+     double alpha,
+     ae_matrix *a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t optypea,
+     double beta,
+     ae_matrix *c,
+     ae_int_t ic,
+     ae_int_t jc,
+     ae_bool isupper);
+ae_bool _ialglib_i_rmatrixsyrkf(ae_int_t n,
+     ae_int_t k,
+     double alpha,
+     ae_matrix *a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t optypea,
+     double beta,
+     ae_matrix *c,
+     ae_int_t ic,
+     ae_int_t jc,
+     ae_bool isupper);
+ae_bool _ialglib_i_cmatrixrank1f(ae_int_t m,
+     ae_int_t n,
+     ae_matrix *a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_vector *u,
+     ae_int_t uoffs,
+     ae_vector *v,
+     ae_int_t voffs);
+ae_bool _ialglib_i_rmatrixrank1f(ae_int_t m,
+     ae_int_t n,
+     ae_matrix *a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_vector *u,
+     ae_int_t uoffs,
+     ae_vector *v,
+     ae_int_t voffs);
+
+}
+
+#endif
+
diff --git a/contrib/lbfgs/linalg.cpp b/contrib/lbfgs/linalg.cpp
new file mode 100755
index 0000000000..11998ea124
--- /dev/null
+++ b/contrib/lbfgs/linalg.cpp
@@ -0,0 +1,30199 @@
+/*************************************************************************
+Copyright (c) Sergey Bochkanov (ALGLIB project).
+
+>>> SOURCE LICENSE >>>
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation (www.fsf.org); either version 2 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+A copy of the GNU General Public License is available at
+http://www.fsf.org/licensing/licenses
+>>> END OF LICENSE >>>
+*************************************************************************/
+#include "stdafx.h"
+#include "linalg.h"
+
+// disable some irrelevant warnings
+#if (AE_COMPILER==AE_MSVC)
+#pragma warning(disable:4100)
+#pragma warning(disable:4127)
+#pragma warning(disable:4702)
+#pragma warning(disable:4996)
+#endif
+using namespace std;
+
+/////////////////////////////////////////////////////////////////////////
+//
+// THIS SECTION CONTAINS IMPLEMENTATION OF C++ INTERFACE
+//
+/////////////////////////////////////////////////////////////////////////
+namespace alglib
+{
+
+
+/*************************************************************************
+Cache-oblivous complex "copy-and-transpose"
+
+Input parameters:
+    M   -   number of rows
+    N   -   number of columns
+    A   -   source matrix, MxN submatrix is copied and transposed
+    IA  -   submatrix offset (row index)
+    JA  -   submatrix offset (column index)
+    A   -   destination matrix
+    IB  -   submatrix offset (row index)
+    JB  -   submatrix offset (column index)
+*************************************************************************/
+void cmatrixtranspose(const ae_int_t m, const ae_int_t n, const complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, complex_2d_array &b, const ae_int_t ib, const ae_int_t jb)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::cmatrixtranspose(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), ib, jb, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Cache-oblivous real "copy-and-transpose"
+
+Input parameters:
+    M   -   number of rows
+    N   -   number of columns
+    A   -   source matrix, MxN submatrix is copied and transposed
+    IA  -   submatrix offset (row index)
+    JA  -   submatrix offset (column index)
+    A   -   destination matrix
+    IB  -   submatrix offset (row index)
+    JB  -   submatrix offset (column index)
+*************************************************************************/
+void rmatrixtranspose(const ae_int_t m, const ae_int_t n, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, real_2d_array &b, const ae_int_t ib, const ae_int_t jb)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixtranspose(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), ib, jb, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Copy
+
+Input parameters:
+    M   -   number of rows
+    N   -   number of columns
+    A   -   source matrix, MxN submatrix is copied and transposed
+    IA  -   submatrix offset (row index)
+    JA  -   submatrix offset (column index)
+    B   -   destination matrix
+    IB  -   submatrix offset (row index)
+    JB  -   submatrix offset (column index)
+*************************************************************************/
+void cmatrixcopy(const ae_int_t m, const ae_int_t n, const complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, complex_2d_array &b, const ae_int_t ib, const ae_int_t jb)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::cmatrixcopy(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), ib, jb, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Copy
+
+Input parameters:
+    M   -   number of rows
+    N   -   number of columns
+    A   -   source matrix, MxN submatrix is copied and transposed
+    IA  -   submatrix offset (row index)
+    JA  -   submatrix offset (column index)
+    B   -   destination matrix
+    IB  -   submatrix offset (row index)
+    JB  -   submatrix offset (column index)
+*************************************************************************/
+void rmatrixcopy(const ae_int_t m, const ae_int_t n, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, real_2d_array &b, const ae_int_t ib, const ae_int_t jb)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixcopy(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), ib, jb, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Rank-1 correction: A := A + u*v'
+
+INPUT PARAMETERS:
+    M   -   number of rows
+    N   -   number of columns
+    A   -   target matrix, MxN submatrix is updated
+    IA  -   submatrix offset (row index)
+    JA  -   submatrix offset (column index)
+    U   -   vector #1
+    IU  -   subvector offset
+    V   -   vector #2
+    IV  -   subvector offset
+*************************************************************************/
+void cmatrixrank1(const ae_int_t m, const ae_int_t n, complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, complex_1d_array &u, const ae_int_t iu, complex_1d_array &v, const ae_int_t iv)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::cmatrixrank1(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, const_cast<alglib_impl::ae_vector*>(u.c_ptr()), iu, const_cast<alglib_impl::ae_vector*>(v.c_ptr()), iv, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Rank-1 correction: A := A + u*v'
+
+INPUT PARAMETERS:
+    M   -   number of rows
+    N   -   number of columns
+    A   -   target matrix, MxN submatrix is updated
+    IA  -   submatrix offset (row index)
+    JA  -   submatrix offset (column index)
+    U   -   vector #1
+    IU  -   subvector offset
+    V   -   vector #2
+    IV  -   subvector offset
+*************************************************************************/
+void rmatrixrank1(const ae_int_t m, const ae_int_t n, real_2d_array &a, const ae_int_t ia, const ae_int_t ja, real_1d_array &u, const ae_int_t iu, real_1d_array &v, const ae_int_t iv)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixrank1(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, const_cast<alglib_impl::ae_vector*>(u.c_ptr()), iu, const_cast<alglib_impl::ae_vector*>(v.c_ptr()), iv, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Matrix-vector product: y := op(A)*x
+
+INPUT PARAMETERS:
+    M   -   number of rows of op(A)
+            M>=0
+    N   -   number of columns of op(A)
+            N>=0
+    A   -   target matrix
+    IA  -   submatrix offset (row index)
+    JA  -   submatrix offset (column index)
+    OpA -   operation type:
+            * OpA=0     =>  op(A) = A
+            * OpA=1     =>  op(A) = A^T
+            * OpA=2     =>  op(A) = A^H
+    X   -   input vector
+    IX  -   subvector offset
+    IY  -   subvector offset
+
+OUTPUT PARAMETERS:
+    Y   -   vector which stores result
+
+if M=0, then subroutine does nothing.
+if N=0, Y is filled by zeros.
+
+
+  -- ALGLIB routine --
+
+     28.01.2010
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixmv(const ae_int_t m, const ae_int_t n, const complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t opa, const complex_1d_array &x, const ae_int_t ix, complex_1d_array &y, const ae_int_t iy)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::cmatrixmv(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, opa, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), ix, const_cast<alglib_impl::ae_vector*>(y.c_ptr()), iy, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Matrix-vector product: y := op(A)*x
+
+INPUT PARAMETERS:
+    M   -   number of rows of op(A)
+    N   -   number of columns of op(A)
+    A   -   target matrix
+    IA  -   submatrix offset (row index)
+    JA  -   submatrix offset (column index)
+    OpA -   operation type:
+            * OpA=0     =>  op(A) = A
+            * OpA=1     =>  op(A) = A^T
+    X   -   input vector
+    IX  -   subvector offset
+    IY  -   subvector offset
+
+OUTPUT PARAMETERS:
+    Y   -   vector which stores result
+
+if M=0, then subroutine does nothing.
+if N=0, Y is filled by zeros.
+
+
+  -- ALGLIB routine --
+
+     28.01.2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixmv(const ae_int_t m, const ae_int_t n, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t opa, const real_1d_array &x, const ae_int_t ix, real_1d_array &y, const ae_int_t iy)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixmv(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, opa, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), ix, const_cast<alglib_impl::ae_vector*>(y.c_ptr()), iy, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This subroutine calculates X*op(A^-1) where:
+* X is MxN general matrix
+* A is NxN upper/lower triangular/unitriangular matrix
+* "op" may be identity transformation, transposition, conjugate transposition
+
+Multiplication result replaces X.
+Cache-oblivious algorithm is used.
+
+INPUT PARAMETERS
+    N   -   matrix size, N>=0
+    M   -   matrix size, N>=0
+    A       -   matrix, actial matrix is stored in A[I1:I1+N-1,J1:J1+N-1]
+    I1      -   submatrix offset
+    J1      -   submatrix offset
+    IsUpper -   whether matrix is upper triangular
+    IsUnit  -   whether matrix is unitriangular
+    OpType  -   transformation type:
+                * 0 - no transformation
+                * 1 - transposition
+                * 2 - conjugate transposition
+    C   -   matrix, actial matrix is stored in C[I2:I2+M-1,J2:J2+N-1]
+    I2  -   submatrix offset
+    J2  -   submatrix offset
+
+  -- ALGLIB routine --
+     15.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixrighttrsm(const ae_int_t m, const ae_int_t n, const complex_2d_array &a, const ae_int_t i1, const ae_int_t j1, const bool isupper, const bool isunit, const ae_int_t optype, complex_2d_array &x, const ae_int_t i2, const ae_int_t j2)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::cmatrixrighttrsm(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), i1, j1, isupper, isunit, optype, const_cast<alglib_impl::ae_matrix*>(x.c_ptr()), i2, j2, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This subroutine calculates op(A^-1)*X where:
+* X is MxN general matrix
+* A is MxM upper/lower triangular/unitriangular matrix
+* "op" may be identity transformation, transposition, conjugate transposition
+
+Multiplication result replaces X.
+Cache-oblivious algorithm is used.
+
+INPUT PARAMETERS
+    N   -   matrix size, N>=0
+    M   -   matrix size, N>=0
+    A       -   matrix, actial matrix is stored in A[I1:I1+M-1,J1:J1+M-1]
+    I1      -   submatrix offset
+    J1      -   submatrix offset
+    IsUpper -   whether matrix is upper triangular
+    IsUnit  -   whether matrix is unitriangular
+    OpType  -   transformation type:
+                * 0 - no transformation
+                * 1 - transposition
+                * 2 - conjugate transposition
+    C   -   matrix, actial matrix is stored in C[I2:I2+M-1,J2:J2+N-1]
+    I2  -   submatrix offset
+    J2  -   submatrix offset
+
+  -- ALGLIB routine --
+     15.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixlefttrsm(const ae_int_t m, const ae_int_t n, const complex_2d_array &a, const ae_int_t i1, const ae_int_t j1, const bool isupper, const bool isunit, const ae_int_t optype, complex_2d_array &x, const ae_int_t i2, const ae_int_t j2)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::cmatrixlefttrsm(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), i1, j1, isupper, isunit, optype, const_cast<alglib_impl::ae_matrix*>(x.c_ptr()), i2, j2, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Same as CMatrixRightTRSM, but for real matrices
+
+OpType may be only 0 or 1.
+
+  -- ALGLIB routine --
+     15.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixrighttrsm(const ae_int_t m, const ae_int_t n, const real_2d_array &a, const ae_int_t i1, const ae_int_t j1, const bool isupper, const bool isunit, const ae_int_t optype, real_2d_array &x, const ae_int_t i2, const ae_int_t j2)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixrighttrsm(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), i1, j1, isupper, isunit, optype, const_cast<alglib_impl::ae_matrix*>(x.c_ptr()), i2, j2, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Same as CMatrixLeftTRSM, but for real matrices
+
+OpType may be only 0 or 1.
+
+  -- ALGLIB routine --
+     15.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixlefttrsm(const ae_int_t m, const ae_int_t n, const real_2d_array &a, const ae_int_t i1, const ae_int_t j1, const bool isupper, const bool isunit, const ae_int_t optype, real_2d_array &x, const ae_int_t i2, const ae_int_t j2)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixlefttrsm(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), i1, j1, isupper, isunit, optype, const_cast<alglib_impl::ae_matrix*>(x.c_ptr()), i2, j2, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This subroutine calculates  C=alpha*A*A^H+beta*C  or  C=alpha*A^H*A+beta*C
+where:
+* C is NxN Hermitian matrix given by its upper/lower triangle
+* A is NxK matrix when A*A^H is calculated, KxN matrix otherwise
+
+Additional info:
+* cache-oblivious algorithm is used.
+* multiplication result replaces C. If Beta=0, C elements are not used in
+  calculations (not multiplied by zero - just not referenced)
+* if Alpha=0, A is not used (not multiplied by zero - just not referenced)
+* if both Beta and Alpha are zero, C is filled by zeros.
+
+INPUT PARAMETERS
+    N       -   matrix size, N>=0
+    K       -   matrix size, K>=0
+    Alpha   -   coefficient
+    A       -   matrix
+    IA      -   submatrix offset
+    JA      -   submatrix offset
+    OpTypeA -   multiplication type:
+                * 0 - A*A^H is calculated
+                * 2 - A^H*A is calculated
+    Beta    -   coefficient
+    C       -   matrix
+    IC      -   submatrix offset
+    JC      -   submatrix offset
+    IsUpper -   whether C is upper triangular or lower triangular
+
+  -- ALGLIB routine --
+     16.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixsyrk(const ae_int_t n, const ae_int_t k, const double alpha, const complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t optypea, const double beta, complex_2d_array &c, const ae_int_t ic, const ae_int_t jc, const bool isupper)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::cmatrixsyrk(n, k, alpha, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, optypea, beta, const_cast<alglib_impl::ae_matrix*>(c.c_ptr()), ic, jc, isupper, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Same as CMatrixSYRK, but for real matrices
+
+OpType may be only 0 or 1.
+
+  -- ALGLIB routine --
+     16.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixsyrk(const ae_int_t n, const ae_int_t k, const double alpha, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t optypea, const double beta, real_2d_array &c, const ae_int_t ic, const ae_int_t jc, const bool isupper)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixsyrk(n, k, alpha, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, optypea, beta, const_cast<alglib_impl::ae_matrix*>(c.c_ptr()), ic, jc, isupper, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This subroutine calculates C = alpha*op1(A)*op2(B) +beta*C where:
+* C is MxN general matrix
+* op1(A) is MxK matrix
+* op2(B) is KxN matrix
+* "op" may be identity transformation, transposition, conjugate transposition
+
+Additional info:
+* cache-oblivious algorithm is used.
+* multiplication result replaces C. If Beta=0, C elements are not used in
+  calculations (not multiplied by zero - just not referenced)
+* if Alpha=0, A is not used (not multiplied by zero - just not referenced)
+* if both Beta and Alpha are zero, C is filled by zeros.
+
+INPUT PARAMETERS
+    N       -   matrix size, N>0
+    M       -   matrix size, N>0
+    K       -   matrix size, K>0
+    Alpha   -   coefficient
+    A       -   matrix
+    IA      -   submatrix offset
+    JA      -   submatrix offset
+    OpTypeA -   transformation type:
+                * 0 - no transformation
+                * 1 - transposition
+                * 2 - conjugate transposition
+    B       -   matrix
+    IB      -   submatrix offset
+    JB      -   submatrix offset
+    OpTypeB -   transformation type:
+                * 0 - no transformation
+                * 1 - transposition
+                * 2 - conjugate transposition
+    Beta    -   coefficient
+    C       -   matrix
+    IC      -   submatrix offset
+    JC      -   submatrix offset
+
+  -- ALGLIB routine --
+     16.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixgemm(const ae_int_t m, const ae_int_t n, const ae_int_t k, const alglib::complex alpha, const complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t optypea, const complex_2d_array &b, const ae_int_t ib, const ae_int_t jb, const ae_int_t optypeb, const alglib::complex beta, complex_2d_array &c, const ae_int_t ic, const ae_int_t jc)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::cmatrixgemm(m, n, k, *alpha.c_ptr(), const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, optypea, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), ib, jb, optypeb, *beta.c_ptr(), const_cast<alglib_impl::ae_matrix*>(c.c_ptr()), ic, jc, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Same as CMatrixGEMM, but for real numbers.
+OpType may be only 0 or 1.
+
+  -- ALGLIB routine --
+     16.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixgemm(const ae_int_t m, const ae_int_t n, const ae_int_t k, const double alpha, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t optypea, const real_2d_array &b, const ae_int_t ib, const ae_int_t jb, const ae_int_t optypeb, const double beta, real_2d_array &c, const ae_int_t ic, const ae_int_t jc)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixgemm(m, n, k, alpha, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, optypea, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), ib, jb, optypeb, beta, const_cast<alglib_impl::ae_matrix*>(c.c_ptr()), ic, jc, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+QR decomposition of a rectangular matrix of size MxN
+
+Input parameters:
+    A   -   matrix A whose indexes range within [0..M-1, 0..N-1].
+    M   -   number of rows in matrix A.
+    N   -   number of columns in matrix A.
+
+Output parameters:
+    A   -   matrices Q and R in compact form (see below).
+    Tau -   array of scalar factors which are used to form
+            matrix Q. Array whose index ranges within [0.. Min(M-1,N-1)].
+
+Matrix A is represented as A = QR, where Q is an orthogonal matrix of size
+MxM, R - upper triangular (or upper trapezoid) matrix of size M x N.
+
+The elements of matrix R are located on and above the main diagonal of
+matrix A. The elements which are located in Tau array and below the main
+diagonal of matrix A are used to form matrix Q as follows:
+
+Matrix Q is represented as a product of elementary reflections
+
+Q = H(0)*H(2)*...*H(k-1),
+
+where k = min(m,n), and each H(i) is in the form
+
+H(i) = 1 - tau * v * (v^T)
+
+where tau is a scalar stored in Tau[I]; v - real vector,
+so that v(0:i-1) = 0, v(i) = 1, v(i+1:m-1) stored in A(i+1:m-1,i).
+
+  -- ALGLIB routine --
+     17.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixqr(real_2d_array &a, const ae_int_t m, const ae_int_t n, real_1d_array &tau)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixqr(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+LQ decomposition of a rectangular matrix of size MxN
+
+Input parameters:
+    A   -   matrix A whose indexes range within [0..M-1, 0..N-1].
+    M   -   number of rows in matrix A.
+    N   -   number of columns in matrix A.
+
+Output parameters:
+    A   -   matrices L and Q in compact form (see below)
+    Tau -   array of scalar factors which are used to form
+            matrix Q. Array whose index ranges within [0..Min(M,N)-1].
+
+Matrix A is represented as A = LQ, where Q is an orthogonal matrix of size
+MxM, L - lower triangular (or lower trapezoid) matrix of size M x N.
+
+The elements of matrix L are located on and below  the  main  diagonal  of
+matrix A. The elements which are located in Tau array and above  the  main
+diagonal of matrix A are used to form matrix Q as follows:
+
+Matrix Q is represented as a product of elementary reflections
+
+Q = H(k-1)*H(k-2)*...*H(1)*H(0),
+
+where k = min(m,n), and each H(i) is of the form
+
+H(i) = 1 - tau * v * (v^T)
+
+where tau is a scalar stored in Tau[I]; v - real vector, so that v(0:i-1)=0,
+v(i) = 1, v(i+1:n-1) stored in A(i,i+1:n-1).
+
+  -- ALGLIB routine --
+     17.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixlq(real_2d_array &a, const ae_int_t m, const ae_int_t n, real_1d_array &tau)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixlq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+QR decomposition of a rectangular complex matrix of size MxN
+
+Input parameters:
+    A   -   matrix A whose indexes range within [0..M-1, 0..N-1]
+    M   -   number of rows in matrix A.
+    N   -   number of columns in matrix A.
+
+Output parameters:
+    A   -   matrices Q and R in compact form
+    Tau -   array of scalar factors which are used to form matrix Q. Array
+            whose indexes range within [0.. Min(M,N)-1]
+
+Matrix A is represented as A = QR, where Q is an orthogonal matrix of size
+MxM, R - upper triangular (or upper trapezoid) matrix of size MxN.
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     September 30, 1994
+*************************************************************************/
+void cmatrixqr(complex_2d_array &a, const ae_int_t m, const ae_int_t n, complex_1d_array &tau)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::cmatrixqr(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+LQ decomposition of a rectangular complex matrix of size MxN
+
+Input parameters:
+    A   -   matrix A whose indexes range within [0..M-1, 0..N-1]
+    M   -   number of rows in matrix A.
+    N   -   number of columns in matrix A.
+
+Output parameters:
+    A   -   matrices Q and L in compact form
+    Tau -   array of scalar factors which are used to form matrix Q. Array
+            whose indexes range within [0.. Min(M,N)-1]
+
+Matrix A is represented as A = LQ, where Q is an orthogonal matrix of size
+MxM, L - lower triangular (or lower trapezoid) matrix of size MxN.
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     September 30, 1994
+*************************************************************************/
+void cmatrixlq(complex_2d_array &a, const ae_int_t m, const ae_int_t n, complex_1d_array &tau)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::cmatrixlq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Partial unpacking of matrix Q from the QR decomposition of a matrix A
+
+Input parameters:
+    A       -   matrices Q and R in compact form.
+                Output of RMatrixQR subroutine.
+    M       -   number of rows in given matrix A. M>=0.
+    N       -   number of columns in given matrix A. N>=0.
+    Tau     -   scalar factors which are used to form Q.
+                Output of the RMatrixQR subroutine.
+    QColumns -  required number of columns of matrix Q. M>=QColumns>=0.
+
+Output parameters:
+    Q       -   first QColumns columns of matrix Q.
+                Array whose indexes range within [0..M-1, 0..QColumns-1].
+                If QColumns=0, the array remains unchanged.
+
+  -- ALGLIB routine --
+     17.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixqrunpackq(const real_2d_array &a, const ae_int_t m, const ae_int_t n, const real_1d_array &tau, const ae_int_t qcolumns, real_2d_array &q)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixqrunpackq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), qcolumns, const_cast<alglib_impl::ae_matrix*>(q.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Unpacking of matrix R from the QR decomposition of a matrix A
+
+Input parameters:
+    A       -   matrices Q and R in compact form.
+                Output of RMatrixQR subroutine.
+    M       -   number of rows in given matrix A. M>=0.
+    N       -   number of columns in given matrix A. N>=0.
+
+Output parameters:
+    R       -   matrix R, array[0..M-1, 0..N-1].
+
+  -- ALGLIB routine --
+     17.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixqrunpackr(const real_2d_array &a, const ae_int_t m, const ae_int_t n, real_2d_array &r)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixqrunpackr(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_matrix*>(r.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Partial unpacking of matrix Q from the LQ decomposition of a matrix A
+
+Input parameters:
+    A       -   matrices L and Q in compact form.
+                Output of RMatrixLQ subroutine.
+    M       -   number of rows in given matrix A. M>=0.
+    N       -   number of columns in given matrix A. N>=0.
+    Tau     -   scalar factors which are used to form Q.
+                Output of the RMatrixLQ subroutine.
+    QRows   -   required number of rows in matrix Q. N>=QRows>=0.
+
+Output parameters:
+    Q       -   first QRows rows of matrix Q. Array whose indexes range
+                within [0..QRows-1, 0..N-1]. If QRows=0, the array remains
+                unchanged.
+
+  -- ALGLIB routine --
+     17.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixlqunpackq(const real_2d_array &a, const ae_int_t m, const ae_int_t n, const real_1d_array &tau, const ae_int_t qrows, real_2d_array &q)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixlqunpackq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), qrows, const_cast<alglib_impl::ae_matrix*>(q.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Unpacking of matrix L from the LQ decomposition of a matrix A
+
+Input parameters:
+    A       -   matrices Q and L in compact form.
+                Output of RMatrixLQ subroutine.
+    M       -   number of rows in given matrix A. M>=0.
+    N       -   number of columns in given matrix A. N>=0.
+
+Output parameters:
+    L       -   matrix L, array[0..M-1, 0..N-1].
+
+  -- ALGLIB routine --
+     17.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixlqunpackl(const real_2d_array &a, const ae_int_t m, const ae_int_t n, real_2d_array &l)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixlqunpackl(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_matrix*>(l.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Partial unpacking of matrix Q from QR decomposition of a complex matrix A.
+
+Input parameters:
+    A           -   matrices Q and R in compact form.
+                    Output of CMatrixQR subroutine .
+    M           -   number of rows in matrix A. M>=0.
+    N           -   number of columns in matrix A. N>=0.
+    Tau         -   scalar factors which are used to form Q.
+                    Output of CMatrixQR subroutine .
+    QColumns    -   required number of columns in matrix Q. M>=QColumns>=0.
+
+Output parameters:
+    Q           -   first QColumns columns of matrix Q.
+                    Array whose index ranges within [0..M-1, 0..QColumns-1].
+                    If QColumns=0, array isn't changed.
+
+  -- ALGLIB routine --
+     17.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixqrunpackq(const complex_2d_array &a, const ae_int_t m, const ae_int_t n, const complex_1d_array &tau, const ae_int_t qcolumns, complex_2d_array &q)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::cmatrixqrunpackq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), qcolumns, const_cast<alglib_impl::ae_matrix*>(q.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Unpacking of matrix R from the QR decomposition of a matrix A
+
+Input parameters:
+    A       -   matrices Q and R in compact form.
+                Output of CMatrixQR subroutine.
+    M       -   number of rows in given matrix A. M>=0.
+    N       -   number of columns in given matrix A. N>=0.
+
+Output parameters:
+    R       -   matrix R, array[0..M-1, 0..N-1].
+
+  -- ALGLIB routine --
+     17.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixqrunpackr(const complex_2d_array &a, const ae_int_t m, const ae_int_t n, complex_2d_array &r)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::cmatrixqrunpackr(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_matrix*>(r.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Partial unpacking of matrix Q from LQ decomposition of a complex matrix A.
+
+Input parameters:
+    A           -   matrices Q and R in compact form.
+                    Output of CMatrixLQ subroutine .
+    M           -   number of rows in matrix A. M>=0.
+    N           -   number of columns in matrix A. N>=0.
+    Tau         -   scalar factors which are used to form Q.
+                    Output of CMatrixLQ subroutine .
+    QRows       -   required number of rows in matrix Q. N>=QColumns>=0.
+
+Output parameters:
+    Q           -   first QRows rows of matrix Q.
+                    Array whose index ranges within [0..QRows-1, 0..N-1].
+                    If QRows=0, array isn't changed.
+
+  -- ALGLIB routine --
+     17.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixlqunpackq(const complex_2d_array &a, const ae_int_t m, const ae_int_t n, const complex_1d_array &tau, const ae_int_t qrows, complex_2d_array &q)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::cmatrixlqunpackq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), qrows, const_cast<alglib_impl::ae_matrix*>(q.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Unpacking of matrix L from the LQ decomposition of a matrix A
+
+Input parameters:
+    A       -   matrices Q and L in compact form.
+                Output of CMatrixLQ subroutine.
+    M       -   number of rows in given matrix A. M>=0.
+    N       -   number of columns in given matrix A. N>=0.
+
+Output parameters:
+    L       -   matrix L, array[0..M-1, 0..N-1].
+
+  -- ALGLIB routine --
+     17.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixlqunpackl(const complex_2d_array &a, const ae_int_t m, const ae_int_t n, complex_2d_array &l)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::cmatrixlqunpackl(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_matrix*>(l.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Reduction of a rectangular matrix to  bidiagonal form
+
+The algorithm reduces the rectangular matrix A to  bidiagonal form by
+orthogonal transformations P and Q: A = Q*B*P.
+
+Input parameters:
+    A       -   source matrix. array[0..M-1, 0..N-1]
+    M       -   number of rows in matrix A.
+    N       -   number of columns in matrix A.
+
+Output parameters:
+    A       -   matrices Q, B, P in compact form (see below).
+    TauQ    -   scalar factors which are used to form matrix Q.
+    TauP    -   scalar factors which are used to form matrix P.
+
+The main diagonal and one of the  secondary  diagonals  of  matrix  A  are
+replaced with bidiagonal  matrix  B.  Other  elements  contain  elementary
+reflections which form MxM matrix Q and NxN matrix P, respectively.
+
+If M>=N, B is the upper  bidiagonal  MxN  matrix  and  is  stored  in  the
+corresponding  elements  of  matrix  A.  Matrix  Q  is  represented  as  a
+product   of   elementary   reflections   Q = H(0)*H(1)*...*H(n-1),  where
+H(i) = 1-tau*v*v'. Here tau is a scalar which is stored  in  TauQ[i],  and
+vector v has the following  structure:  v(0:i-1)=0, v(i)=1, v(i+1:m-1)  is
+stored   in   elements   A(i+1:m-1,i).   Matrix   P  is  as  follows:  P =
+G(0)*G(1)*...*G(n-2), where G(i) = 1 - tau*u*u'. Tau is stored in TauP[i],
+u(0:i)=0, u(i+1)=1, u(i+2:n-1) is stored in elements A(i,i+2:n-1).
+
+If M<N, B is the  lower  bidiagonal  MxN  matrix  and  is  stored  in  the
+corresponding   elements  of  matrix  A.  Q = H(0)*H(1)*...*H(m-2),  where
+H(i) = 1 - tau*v*v', tau is stored in TauQ, v(0:i)=0, v(i+1)=1, v(i+2:m-1)
+is    stored    in   elements   A(i+2:m-1,i).    P = G(0)*G(1)*...*G(m-1),
+G(i) = 1-tau*u*u', tau is stored in  TauP,  u(0:i-1)=0, u(i)=1, u(i+1:n-1)
+is stored in A(i,i+1:n-1).
+
+EXAMPLE:
+
+m=6, n=5 (m > n):               m=5, n=6 (m < n):
+
+(  d   e   u1  u1  u1 )         (  d   u1  u1  u1  u1  u1 )
+(  v1  d   e   u2  u2 )         (  e   d   u2  u2  u2  u2 )
+(  v1  v2  d   e   u3 )         (  v1  e   d   u3  u3  u3 )
+(  v1  v2  v3  d   e  )         (  v1  v2  e   d   u4  u4 )
+(  v1  v2  v3  v4  d  )         (  v1  v2  v3  e   d   u5 )
+(  v1  v2  v3  v4  v5 )
+
+Here vi and ui are vectors which form H(i) and G(i), and d and e -
+are the diagonal and off-diagonal elements of matrix B.
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     September 30, 1994.
+     Sergey Bochkanov, ALGLIB project, translation from FORTRAN to
+     pseudocode, 2007-2010.
+*************************************************************************/
+void rmatrixbd(real_2d_array &a, const ae_int_t m, const ae_int_t n, real_1d_array &tauq, real_1d_array &taup)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixbd(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tauq.c_ptr()), const_cast<alglib_impl::ae_vector*>(taup.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Unpacking matrix Q which reduces a matrix to bidiagonal form.
+
+Input parameters:
+    QP          -   matrices Q and P in compact form.
+                    Output of ToBidiagonal subroutine.
+    M           -   number of rows in matrix A.
+    N           -   number of columns in matrix A.
+    TAUQ        -   scalar factors which are used to form Q.
+                    Output of ToBidiagonal subroutine.
+    QColumns    -   required number of columns in matrix Q.
+                    M>=QColumns>=0.
+
+Output parameters:
+    Q           -   first QColumns columns of matrix Q.
+                    Array[0..M-1, 0..QColumns-1]
+                    If QColumns=0, the array is not modified.
+
+  -- ALGLIB --
+     2005-2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixbdunpackq(const real_2d_array &qp, const ae_int_t m, const ae_int_t n, const real_1d_array &tauq, const ae_int_t qcolumns, real_2d_array &q)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixbdunpackq(const_cast<alglib_impl::ae_matrix*>(qp.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tauq.c_ptr()), qcolumns, const_cast<alglib_impl::ae_matrix*>(q.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Multiplication by matrix Q which reduces matrix A to  bidiagonal form.
+
+The algorithm allows pre- or post-multiply by Q or Q'.
+
+Input parameters:
+    QP          -   matrices Q and P in compact form.
+                    Output of ToBidiagonal subroutine.
+    M           -   number of rows in matrix A.
+    N           -   number of columns in matrix A.
+    TAUQ        -   scalar factors which are used to form Q.
+                    Output of ToBidiagonal subroutine.
+    Z           -   multiplied matrix.
+                    array[0..ZRows-1,0..ZColumns-1]
+    ZRows       -   number of rows in matrix Z. If FromTheRight=False,
+                    ZRows=M, otherwise ZRows can be arbitrary.
+    ZColumns    -   number of columns in matrix Z. If FromTheRight=True,
+                    ZColumns=M, otherwise ZColumns can be arbitrary.
+    FromTheRight -  pre- or post-multiply.
+    DoTranspose -   multiply by Q or Q'.
+
+Output parameters:
+    Z           -   product of Z and Q.
+                    Array[0..ZRows-1,0..ZColumns-1]
+                    If ZRows=0 or ZColumns=0, the array is not modified.
+
+  -- ALGLIB --
+     2005-2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixbdmultiplybyq(const real_2d_array &qp, const ae_int_t m, const ae_int_t n, const real_1d_array &tauq, real_2d_array &z, const ae_int_t zrows, const ae_int_t zcolumns, const bool fromtheright, const bool dotranspose)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixbdmultiplybyq(const_cast<alglib_impl::ae_matrix*>(qp.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tauq.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), zrows, zcolumns, fromtheright, dotranspose, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Unpacking matrix P which reduces matrix A to bidiagonal form.
+The subroutine returns transposed matrix P.
+
+Input parameters:
+    QP      -   matrices Q and P in compact form.
+                Output of ToBidiagonal subroutine.
+    M       -   number of rows in matrix A.
+    N       -   number of columns in matrix A.
+    TAUP    -   scalar factors which are used to form P.
+                Output of ToBidiagonal subroutine.
+    PTRows  -   required number of rows of matrix P^T. N >= PTRows >= 0.
+
+Output parameters:
+    PT      -   first PTRows columns of matrix P^T
+                Array[0..PTRows-1, 0..N-1]
+                If PTRows=0, the array is not modified.
+
+  -- ALGLIB --
+     2005-2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixbdunpackpt(const real_2d_array &qp, const ae_int_t m, const ae_int_t n, const real_1d_array &taup, const ae_int_t ptrows, real_2d_array &pt)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixbdunpackpt(const_cast<alglib_impl::ae_matrix*>(qp.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(taup.c_ptr()), ptrows, const_cast<alglib_impl::ae_matrix*>(pt.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Multiplication by matrix P which reduces matrix A to  bidiagonal form.
+
+The algorithm allows pre- or post-multiply by P or P'.
+
+Input parameters:
+    QP          -   matrices Q and P in compact form.
+                    Output of RMatrixBD subroutine.
+    M           -   number of rows in matrix A.
+    N           -   number of columns in matrix A.
+    TAUP        -   scalar factors which are used to form P.
+                    Output of RMatrixBD subroutine.
+    Z           -   multiplied matrix.
+                    Array whose indexes range within [0..ZRows-1,0..ZColumns-1].
+    ZRows       -   number of rows in matrix Z. If FromTheRight=False,
+                    ZRows=N, otherwise ZRows can be arbitrary.
+    ZColumns    -   number of columns in matrix Z. If FromTheRight=True,
+                    ZColumns=N, otherwise ZColumns can be arbitrary.
+    FromTheRight -  pre- or post-multiply.
+    DoTranspose -   multiply by P or P'.
+
+Output parameters:
+    Z - product of Z and P.
+                Array whose indexes range within [0..ZRows-1,0..ZColumns-1].
+                If ZRows=0 or ZColumns=0, the array is not modified.
+
+  -- ALGLIB --
+     2005-2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixbdmultiplybyp(const real_2d_array &qp, const ae_int_t m, const ae_int_t n, const real_1d_array &taup, real_2d_array &z, const ae_int_t zrows, const ae_int_t zcolumns, const bool fromtheright, const bool dotranspose)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixbdmultiplybyp(const_cast<alglib_impl::ae_matrix*>(qp.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(taup.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), zrows, zcolumns, fromtheright, dotranspose, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Unpacking of the main and secondary diagonals of bidiagonal decomposition
+of matrix A.
+
+Input parameters:
+    B   -   output of RMatrixBD subroutine.
+    M   -   number of rows in matrix B.
+    N   -   number of columns in matrix B.
+
+Output parameters:
+    IsUpper -   True, if the matrix is upper bidiagonal.
+                otherwise IsUpper is False.
+    D       -   the main diagonal.
+                Array whose index ranges within [0..Min(M,N)-1].
+    E       -   the secondary diagonal (upper or lower, depending on
+                the value of IsUpper).
+                Array index ranges within [0..Min(M,N)-1], the last
+                element is not used.
+
+  -- ALGLIB --
+     2005-2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixbdunpackdiagonals(const real_2d_array &b, const ae_int_t m, const ae_int_t n, bool &isupper, real_1d_array &d, real_1d_array &e)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixbdunpackdiagonals(const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), m, n, &isupper, const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(e.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Reduction of a square matrix to  upper Hessenberg form: Q'*A*Q = H,
+where Q is an orthogonal matrix, H - Hessenberg matrix.
+
+Input parameters:
+    A       -   matrix A with elements [0..N-1, 0..N-1]
+    N       -   size of matrix A.
+
+Output parameters:
+    A       -   matrices Q and P in  compact form (see below).
+    Tau     -   array of scalar factors which are used to form matrix Q.
+                Array whose index ranges within [0..N-2]
+
+Matrix H is located on the main diagonal, on the lower secondary  diagonal
+and above the main diagonal of matrix A. The elements which are used to
+form matrix Q are situated in array Tau and below the lower secondary
+diagonal of matrix A as follows:
+
+Matrix Q is represented as a product of elementary reflections
+
+Q = H(0)*H(2)*...*H(n-2),
+
+where each H(i) is given by
+
+H(i) = 1 - tau * v * (v^T)
+
+where tau is a scalar stored in Tau[I]; v - is a real vector,
+so that v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) stored in A(i+2:n-1,i).
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     October 31, 1992
+*************************************************************************/
+void rmatrixhessenberg(real_2d_array &a, const ae_int_t n, real_1d_array &tau)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixhessenberg(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Unpacking matrix Q which reduces matrix A to upper Hessenberg form
+
+Input parameters:
+    A   -   output of RMatrixHessenberg subroutine.
+    N   -   size of matrix A.
+    Tau -   scalar factors which are used to form Q.
+            Output of RMatrixHessenberg subroutine.
+
+Output parameters:
+    Q   -   matrix Q.
+            Array whose indexes range within [0..N-1, 0..N-1].
+
+  -- ALGLIB --
+     2005-2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixhessenbergunpackq(const real_2d_array &a, const ae_int_t n, const real_1d_array &tau, real_2d_array &q)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixhessenbergunpackq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), const_cast<alglib_impl::ae_matrix*>(q.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Unpacking matrix H (the result of matrix A reduction to upper Hessenberg form)
+
+Input parameters:
+    A   -   output of RMatrixHessenberg subroutine.
+    N   -   size of matrix A.
+
+Output parameters:
+    H   -   matrix H. Array whose indexes range within [0..N-1, 0..N-1].
+
+  -- ALGLIB --
+     2005-2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixhessenbergunpackh(const real_2d_array &a, const ae_int_t n, real_2d_array &h)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixhessenbergunpackh(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, const_cast<alglib_impl::ae_matrix*>(h.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Reduction of a symmetric matrix which is given by its higher or lower
+triangular part to a tridiagonal matrix using orthogonal similarity
+transformation: Q'*A*Q=T.
+
+Input parameters:
+    A       -   matrix to be transformed
+                array with elements [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    IsUpper -   storage format. If IsUpper = True, then matrix A is given
+                by its upper triangle, and the lower triangle is not used
+                and not modified by the algorithm, and vice versa
+                if IsUpper = False.
+
+Output parameters:
+    A       -   matrices T and Q in  compact form (see lower)
+    Tau     -   array of factors which are forming matrices H(i)
+                array with elements [0..N-2].
+    D       -   main diagonal of symmetric matrix T.
+                array with elements [0..N-1].
+    E       -   secondary diagonal of symmetric matrix T.
+                array with elements [0..N-2].
+
+
+  If IsUpper=True, the matrix Q is represented as a product of elementary
+  reflectors
+
+     Q = H(n-2) . . . H(2) H(0).
+
+  Each H(i) has the form
+
+     H(i) = I - tau * v * v'
+
+  where tau is a real scalar, and v is a real vector with
+  v(i+1:n-1) = 0, v(i) = 1, v(0:i-1) is stored on exit in
+  A(0:i-1,i+1), and tau in TAU(i).
+
+  If IsUpper=False, the matrix Q is represented as a product of elementary
+  reflectors
+
+     Q = H(0) H(2) . . . H(n-2).
+
+  Each H(i) has the form
+
+     H(i) = I - tau * v * v'
+
+  where tau is a real scalar, and v is a real vector with
+  v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) is stored on exit in A(i+2:n-1,i),
+  and tau in TAU(i).
+
+  The contents of A on exit are illustrated by the following examples
+  with n = 5:
+
+  if UPLO = 'U':                       if UPLO = 'L':
+
+    (  d   e   v1  v2  v3 )              (  d                  )
+    (      d   e   v2  v3 )              (  e   d              )
+    (          d   e   v3 )              (  v0  e   d          )
+    (              d   e  )              (  v0  v1  e   d      )
+    (                  d  )              (  v0  v1  v2  e   d  )
+
+  where d and e denote diagonal and off-diagonal elements of T, and vi
+  denotes an element of the vector defining H(i).
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     October 31, 1992
+*************************************************************************/
+void smatrixtd(real_2d_array &a, const ae_int_t n, const bool isupper, real_1d_array &tau, real_1d_array &d, real_1d_array &e)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::smatrixtd(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(e.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Unpacking matrix Q which reduces symmetric matrix to a tridiagonal
+form.
+
+Input parameters:
+    A       -   the result of a SMatrixTD subroutine
+    N       -   size of matrix A.
+    IsUpper -   storage format (a parameter of SMatrixTD subroutine)
+    Tau     -   the result of a SMatrixTD subroutine
+
+Output parameters:
+    Q       -   transformation matrix.
+                array with elements [0..N-1, 0..N-1].
+
+  -- ALGLIB --
+     Copyright 2005-2010 by Bochkanov Sergey
+*************************************************************************/
+void smatrixtdunpackq(const real_2d_array &a, const ae_int_t n, const bool isupper, const real_1d_array &tau, real_2d_array &q)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::smatrixtdunpackq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), const_cast<alglib_impl::ae_matrix*>(q.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Reduction of a Hermitian matrix which is given  by  its  higher  or  lower
+triangular part to a real  tridiagonal  matrix  using  unitary  similarity
+transformation: Q'*A*Q = T.
+
+Input parameters:
+    A       -   matrix to be transformed
+                array with elements [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    IsUpper -   storage format. If IsUpper = True, then matrix A is  given
+                by its upper triangle, and the lower triangle is not  used
+                and not modified by the algorithm, and vice versa
+                if IsUpper = False.
+
+Output parameters:
+    A       -   matrices T and Q in  compact form (see lower)
+    Tau     -   array of factors which are forming matrices H(i)
+                array with elements [0..N-2].
+    D       -   main diagonal of real symmetric matrix T.
+                array with elements [0..N-1].
+    E       -   secondary diagonal of real symmetric matrix T.
+                array with elements [0..N-2].
+
+
+  If IsUpper=True, the matrix Q is represented as a product of elementary
+  reflectors
+
+     Q = H(n-2) . . . H(2) H(0).
+
+  Each H(i) has the form
+
+     H(i) = I - tau * v * v'
+
+  where tau is a complex scalar, and v is a complex vector with
+  v(i+1:n-1) = 0, v(i) = 1, v(0:i-1) is stored on exit in
+  A(0:i-1,i+1), and tau in TAU(i).
+
+  If IsUpper=False, the matrix Q is represented as a product of elementary
+  reflectors
+
+     Q = H(0) H(2) . . . H(n-2).
+
+  Each H(i) has the form
+
+     H(i) = I - tau * v * v'
+
+  where tau is a complex scalar, and v is a complex vector with
+  v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) is stored on exit in A(i+2:n-1,i),
+  and tau in TAU(i).
+
+  The contents of A on exit are illustrated by the following examples
+  with n = 5:
+
+  if UPLO = 'U':                       if UPLO = 'L':
+
+    (  d   e   v1  v2  v3 )              (  d                  )
+    (      d   e   v2  v3 )              (  e   d              )
+    (          d   e   v3 )              (  v0  e   d          )
+    (              d   e  )              (  v0  v1  e   d      )
+    (                  d  )              (  v0  v1  v2  e   d  )
+
+where d and e denote diagonal and off-diagonal elements of T, and vi
+denotes an element of the vector defining H(i).
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     October 31, 1992
+*************************************************************************/
+void hmatrixtd(complex_2d_array &a, const ae_int_t n, const bool isupper, complex_1d_array &tau, real_1d_array &d, real_1d_array &e)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::hmatrixtd(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(e.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Unpacking matrix Q which reduces a Hermitian matrix to a real  tridiagonal
+form.
+
+Input parameters:
+    A       -   the result of a HMatrixTD subroutine
+    N       -   size of matrix A.
+    IsUpper -   storage format (a parameter of HMatrixTD subroutine)
+    Tau     -   the result of a HMatrixTD subroutine
+
+Output parameters:
+    Q       -   transformation matrix.
+                array with elements [0..N-1, 0..N-1].
+
+  -- ALGLIB --
+     Copyright 2005-2010 by Bochkanov Sergey
+*************************************************************************/
+void hmatrixtdunpackq(const complex_2d_array &a, const ae_int_t n, const bool isupper, const complex_1d_array &tau, complex_2d_array &q)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::hmatrixtdunpackq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), const_cast<alglib_impl::ae_matrix*>(q.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Finding the eigenvalues and eigenvectors of a symmetric matrix
+
+The algorithm finds eigen pairs of a symmetric matrix by reducing it to
+tridiagonal form and using the QL/QR algorithm.
+
+Input parameters:
+    A       -   symmetric matrix which is given by its upper or lower
+                triangular part.
+                Array whose indexes range within [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    ZNeeded -   flag controlling whether the eigenvectors are needed or not.
+                If ZNeeded is equal to:
+                 * 0, the eigenvectors are not returned;
+                 * 1, the eigenvectors are returned.
+    IsUpper -   storage format.
+
+Output parameters:
+    D       -   eigenvalues in ascending order.
+                Array whose index ranges within [0..N-1].
+    Z       -   if ZNeeded is equal to:
+                 * 0, Z hasn’t changed;
+                 * 1, Z contains the eigenvectors.
+                Array whose indexes range within [0..N-1, 0..N-1].
+                The eigenvectors are stored in the matrix columns.
+
+Result:
+    True, if the algorithm has converged.
+    False, if the algorithm hasn't converged (rare case).
+
+  -- ALGLIB --
+     Copyright 2005-2008 by Bochkanov Sergey
+*************************************************************************/
+bool smatrixevd(const real_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, real_1d_array &d, real_2d_array &z)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        ae_bool result = alglib_impl::smatrixevd(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, zneeded, isupper, const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<bool*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Subroutine for finding the eigenvalues (and eigenvectors) of  a  symmetric
+matrix  in  a  given half open interval (A, B] by using  a  bisection  and
+inverse iteration
+
+Input parameters:
+    A       -   symmetric matrix which is given by its upper or lower
+                triangular part. Array [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    ZNeeded -   flag controlling whether the eigenvectors are needed or not.
+                If ZNeeded is equal to:
+                 * 0, the eigenvectors are not returned;
+                 * 1, the eigenvectors are returned.
+    IsUpperA -  storage format of matrix A.
+    B1, B2 -    half open interval (B1, B2] to search eigenvalues in.
+
+Output parameters:
+    M       -   number of eigenvalues found in a given half-interval (M>=0).
+    W       -   array of the eigenvalues found.
+                Array whose index ranges within [0..M-1].
+    Z       -   if ZNeeded is equal to:
+                 * 0, Z hasn’t changed;
+                 * 1, Z contains eigenvectors.
+                Array whose indexes range within [0..N-1, 0..M-1].
+                The eigenvectors are stored in the matrix columns.
+
+Result:
+    True, if successful. M contains the number of eigenvalues in the given
+    half-interval (could be equal to 0), W contains the eigenvalues,
+    Z contains the eigenvectors (if needed).
+
+    False, if the bisection method subroutine wasn't able to find the
+    eigenvalues in the given interval or if the inverse iteration subroutine
+    wasn't able to find all the corresponding eigenvectors.
+    In that case, the eigenvalues and eigenvectors are not returned,
+    M is equal to 0.
+
+  -- ALGLIB --
+     Copyright 07.01.2006 by Bochkanov Sergey
+*************************************************************************/
+bool smatrixevdr(const real_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, const double b1, const double b2, ae_int_t &m, real_1d_array &w, real_2d_array &z)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        ae_bool result = alglib_impl::smatrixevdr(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, zneeded, isupper, b1, b2, &m, const_cast<alglib_impl::ae_vector*>(w.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<bool*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Subroutine for finding the eigenvalues and  eigenvectors  of  a  symmetric
+matrix with given indexes by using bisection and inverse iteration methods.
+
+Input parameters:
+    A       -   symmetric matrix which is given by its upper or lower
+                triangular part. Array whose indexes range within [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    ZNeeded -   flag controlling whether the eigenvectors are needed or not.
+                If ZNeeded is equal to:
+                 * 0, the eigenvectors are not returned;
+                 * 1, the eigenvectors are returned.
+    IsUpperA -  storage format of matrix A.
+    I1, I2 -    index interval for searching (from I1 to I2).
+                0 <= I1 <= I2 <= N-1.
+
+Output parameters:
+    W       -   array of the eigenvalues found.
+                Array whose index ranges within [0..I2-I1].
+    Z       -   if ZNeeded is equal to:
+                 * 0, Z hasn’t changed;
+                 * 1, Z contains eigenvectors.
+                Array whose indexes range within [0..N-1, 0..I2-I1].
+                In that case, the eigenvectors are stored in the matrix columns.
+
+Result:
+    True, if successful. W contains the eigenvalues, Z contains the
+    eigenvectors (if needed).
+
+    False, if the bisection method subroutine wasn't able to find the
+    eigenvalues in the given interval or if the inverse iteration subroutine
+    wasn't able to find all the corresponding eigenvectors.
+    In that case, the eigenvalues and eigenvectors are not returned.
+
+  -- ALGLIB --
+     Copyright 07.01.2006 by Bochkanov Sergey
+*************************************************************************/
+bool smatrixevdi(const real_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, const ae_int_t i1, const ae_int_t i2, real_1d_array &w, real_2d_array &z)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        ae_bool result = alglib_impl::smatrixevdi(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, zneeded, isupper, i1, i2, const_cast<alglib_impl::ae_vector*>(w.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<bool*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Finding the eigenvalues and eigenvectors of a Hermitian matrix
+
+The algorithm finds eigen pairs of a Hermitian matrix by  reducing  it  to
+real tridiagonal form and using the QL/QR algorithm.
+
+Input parameters:
+    A       -   Hermitian matrix which is given  by  its  upper  or  lower
+                triangular part.
+                Array whose indexes range within [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    IsUpper -   storage format.
+    ZNeeded -   flag controlling whether the eigenvectors  are  needed  or
+                not. If ZNeeded is equal to:
+                 * 0, the eigenvectors are not returned;
+                 * 1, the eigenvectors are returned.
+
+Output parameters:
+    D       -   eigenvalues in ascending order.
+                Array whose index ranges within [0..N-1].
+    Z       -   if ZNeeded is equal to:
+                 * 0, Z hasn’t changed;
+                 * 1, Z contains the eigenvectors.
+                Array whose indexes range within [0..N-1, 0..N-1].
+                The eigenvectors are stored in the matrix columns.
+
+Result:
+    True, if the algorithm has converged.
+    False, if the algorithm hasn't converged (rare case).
+
+Note:
+    eigenvectors of Hermitian matrix are defined up to  multiplication  by
+    a complex number L, such that |L|=1.
+
+  -- ALGLIB --
+     Copyright 2005, 23 March 2007 by Bochkanov Sergey
+*************************************************************************/
+bool hmatrixevd(const complex_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, real_1d_array &d, complex_2d_array &z)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        ae_bool result = alglib_impl::hmatrixevd(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, zneeded, isupper, const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<bool*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Subroutine for finding the eigenvalues (and eigenvectors) of  a  Hermitian
+matrix  in  a  given half-interval (A, B] by using a bisection and inverse
+iteration
+
+Input parameters:
+    A       -   Hermitian matrix which is given  by  its  upper  or  lower
+                triangular  part.  Array  whose   indexes   range   within
+                [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    ZNeeded -   flag controlling whether the eigenvectors  are  needed  or
+                not. If ZNeeded is equal to:
+                 * 0, the eigenvectors are not returned;
+                 * 1, the eigenvectors are returned.
+    IsUpperA -  storage format of matrix A.
+    B1, B2 -    half-interval (B1, B2] to search eigenvalues in.
+
+Output parameters:
+    M       -   number of eigenvalues found in a given half-interval, M>=0
+    W       -   array of the eigenvalues found.
+                Array whose index ranges within [0..M-1].
+    Z       -   if ZNeeded is equal to:
+                 * 0, Z hasn’t changed;
+                 * 1, Z contains eigenvectors.
+                Array whose indexes range within [0..N-1, 0..M-1].
+                The eigenvectors are stored in the matrix columns.
+
+Result:
+    True, if successful. M contains the number of eigenvalues in the given
+    half-interval (could be equal to 0), W contains the eigenvalues,
+    Z contains the eigenvectors (if needed).
+
+    False, if the bisection method subroutine  wasn't  able  to  find  the
+    eigenvalues  in  the  given  interval  or  if  the  inverse  iteration
+    subroutine  wasn't  able  to  find all the corresponding eigenvectors.
+    In that case, the eigenvalues and eigenvectors are not returned, M  is
+    equal to 0.
+
+Note:
+    eigen vectors of Hermitian matrix are defined up to multiplication  by
+    a complex number L, such as |L|=1.
+
+  -- ALGLIB --
+     Copyright 07.01.2006, 24.03.2007 by Bochkanov Sergey.
+*************************************************************************/
+bool hmatrixevdr(const complex_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, const double b1, const double b2, ae_int_t &m, real_1d_array &w, complex_2d_array &z)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        ae_bool result = alglib_impl::hmatrixevdr(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, zneeded, isupper, b1, b2, &m, const_cast<alglib_impl::ae_vector*>(w.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<bool*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Subroutine for finding the eigenvalues and  eigenvectors  of  a  Hermitian
+matrix with given indexes by using bisection and inverse iteration methods
+
+Input parameters:
+    A       -   Hermitian matrix which is given  by  its  upper  or  lower
+                triangular part.
+                Array whose indexes range within [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    ZNeeded -   flag controlling whether the eigenvectors  are  needed  or
+                not. If ZNeeded is equal to:
+                 * 0, the eigenvectors are not returned;
+                 * 1, the eigenvectors are returned.
+    IsUpperA -  storage format of matrix A.
+    I1, I2 -    index interval for searching (from I1 to I2).
+                0 <= I1 <= I2 <= N-1.
+
+Output parameters:
+    W       -   array of the eigenvalues found.
+                Array whose index ranges within [0..I2-I1].
+    Z       -   if ZNeeded is equal to:
+                 * 0, Z hasn’t changed;
+                 * 1, Z contains eigenvectors.
+                Array whose indexes range within [0..N-1, 0..I2-I1].
+                In  that  case,  the eigenvectors are stored in the matrix
+                columns.
+
+Result:
+    True, if successful. W contains the eigenvalues, Z contains the
+    eigenvectors (if needed).
+
+    False, if the bisection method subroutine  wasn't  able  to  find  the
+    eigenvalues  in  the  given  interval  or  if  the  inverse  iteration
+    subroutine wasn't able to find  all  the  corresponding  eigenvectors.
+    In that case, the eigenvalues and eigenvectors are not returned.
+
+Note:
+    eigen vectors of Hermitian matrix are defined up to multiplication  by
+    a complex number L, such as |L|=1.
+
+  -- ALGLIB --
+     Copyright 07.01.2006, 24.03.2007 by Bochkanov Sergey.
+*************************************************************************/
+bool hmatrixevdi(const complex_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, const ae_int_t i1, const ae_int_t i2, real_1d_array &w, complex_2d_array &z)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        ae_bool result = alglib_impl::hmatrixevdi(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, zneeded, isupper, i1, i2, const_cast<alglib_impl::ae_vector*>(w.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<bool*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Finding the eigenvalues and eigenvectors of a tridiagonal symmetric matrix
+
+The algorithm finds the eigen pairs of a tridiagonal symmetric matrix by
+using an QL/QR algorithm with implicit shifts.
+
+Input parameters:
+    D       -   the main diagonal of a tridiagonal matrix.
+                Array whose index ranges within [0..N-1].
+    E       -   the secondary diagonal of a tridiagonal matrix.
+                Array whose index ranges within [0..N-2].
+    N       -   size of matrix A.
+    ZNeeded -   flag controlling whether the eigenvectors are needed or not.
+                If ZNeeded is equal to:
+                 * 0, the eigenvectors are not needed;
+                 * 1, the eigenvectors of a tridiagonal matrix
+                   are multiplied by the square matrix Z. It is used if the
+                   tridiagonal matrix is obtained by the similarity
+                   transformation of a symmetric matrix;
+                 * 2, the eigenvectors of a tridiagonal matrix replace the
+                   square matrix Z;
+                 * 3, matrix Z contains the first row of the eigenvectors
+                   matrix.
+    Z       -   if ZNeeded=1, Z contains the square matrix by which the
+                eigenvectors are multiplied.
+                Array whose indexes range within [0..N-1, 0..N-1].
+
+Output parameters:
+    D       -   eigenvalues in ascending order.
+                Array whose index ranges within [0..N-1].
+    Z       -   if ZNeeded is equal to:
+                 * 0, Z hasn’t changed;
+                 * 1, Z contains the product of a given matrix (from the left)
+                   and the eigenvectors matrix (from the right);
+                 * 2, Z contains the eigenvectors.
+                 * 3, Z contains the first row of the eigenvectors matrix.
+                If ZNeeded<3, Z is the array whose indexes range within [0..N-1, 0..N-1].
+                In that case, the eigenvectors are stored in the matrix columns.
+                If ZNeeded=3, Z is the array whose indexes range within [0..0, 0..N-1].
+
+Result:
+    True, if the algorithm has converged.
+    False, if the algorithm hasn't converged.
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     September 30, 1994
+*************************************************************************/
+bool smatrixtdevd(real_1d_array &d, const real_1d_array &e, const ae_int_t n, const ae_int_t zneeded, real_2d_array &z)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        ae_bool result = alglib_impl::smatrixtdevd(const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(e.c_ptr()), n, zneeded, const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<bool*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Subroutine for finding the tridiagonal matrix eigenvalues/vectors in a
+given half-interval (A, B] by using bisection and inverse iteration.
+
+Input parameters:
+    D       -   the main diagonal of a tridiagonal matrix.
+                Array whose index ranges within [0..N-1].
+    E       -   the secondary diagonal of a tridiagonal matrix.
+                Array whose index ranges within [0..N-2].
+    N       -   size of matrix, N>=0.
+    ZNeeded -   flag controlling whether the eigenvectors are needed or not.
+                If ZNeeded is equal to:
+                 * 0, the eigenvectors are not needed;
+                 * 1, the eigenvectors of a tridiagonal matrix are multiplied
+                   by the square matrix Z. It is used if the tridiagonal
+                   matrix is obtained by the similarity transformation
+                   of a symmetric matrix.
+                 * 2, the eigenvectors of a tridiagonal matrix replace matrix Z.
+    A, B    -   half-interval (A, B] to search eigenvalues in.
+    Z       -   if ZNeeded is equal to:
+                 * 0, Z isn't used and remains unchanged;
+                 * 1, Z contains the square matrix (array whose indexes range
+                   within [0..N-1, 0..N-1]) which reduces the given symmetric
+                   matrix to tridiagonal form;
+                 * 2, Z isn't used (but changed on the exit).
+
+Output parameters:
+    D       -   array of the eigenvalues found.
+                Array whose index ranges within [0..M-1].
+    M       -   number of eigenvalues found in the given half-interval (M>=0).
+    Z       -   if ZNeeded is equal to:
+                 * 0, doesn't contain any information;
+                 * 1, contains the product of a given NxN matrix Z (from the
+                   left) and NxM matrix of the eigenvectors found (from the
+                   right). Array whose indexes range within [0..N-1, 0..M-1].
+                 * 2, contains the matrix of the eigenvectors found.
+                   Array whose indexes range within [0..N-1, 0..M-1].
+
+Result:
+
+    True, if successful. In that case, M contains the number of eigenvalues
+    in the given half-interval (could be equal to 0), D contains the eigenvalues,
+    Z contains the eigenvectors (if needed).
+    It should be noted that the subroutine changes the size of arrays D and Z.
+
+    False, if the bisection method subroutine wasn't able to find the
+    eigenvalues in the given interval or if the inverse iteration subroutine
+    wasn't able to find all the corresponding eigenvectors. In that case,
+    the eigenvalues and eigenvectors are not returned, M is equal to 0.
+
+  -- ALGLIB --
+     Copyright 31.03.2008 by Bochkanov Sergey
+*************************************************************************/
+bool smatrixtdevdr(real_1d_array &d, const real_1d_array &e, const ae_int_t n, const ae_int_t zneeded, const double a, const double b, ae_int_t &m, real_2d_array &z)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        ae_bool result = alglib_impl::smatrixtdevdr(const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(e.c_ptr()), n, zneeded, a, b, &m, const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<bool*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Subroutine for finding tridiagonal matrix eigenvalues/vectors with given
+indexes (in ascending order) by using the bisection and inverse iteraion.
+
+Input parameters:
+    D       -   the main diagonal of a tridiagonal matrix.
+                Array whose index ranges within [0..N-1].
+    E       -   the secondary diagonal of a tridiagonal matrix.
+                Array whose index ranges within [0..N-2].
+    N       -   size of matrix. N>=0.
+    ZNeeded -   flag controlling whether the eigenvectors are needed or not.
+                If ZNeeded is equal to:
+                 * 0, the eigenvectors are not needed;
+                 * 1, the eigenvectors of a tridiagonal matrix are multiplied
+                   by the square matrix Z. It is used if the
+                   tridiagonal matrix is obtained by the similarity transformation
+                   of a symmetric matrix.
+                 * 2, the eigenvectors of a tridiagonal matrix replace
+                   matrix Z.
+    I1, I2  -   index interval for searching (from I1 to I2).
+                0 <= I1 <= I2 <= N-1.
+    Z       -   if ZNeeded is equal to:
+                 * 0, Z isn't used and remains unchanged;
+                 * 1, Z contains the square matrix (array whose indexes range within [0..N-1, 0..N-1])
+                   which reduces the given symmetric matrix to  tridiagonal form;
+                 * 2, Z isn't used (but changed on the exit).
+
+Output parameters:
+    D       -   array of the eigenvalues found.
+                Array whose index ranges within [0..I2-I1].
+    Z       -   if ZNeeded is equal to:
+                 * 0, doesn't contain any information;
+                 * 1, contains the product of a given NxN matrix Z (from the left) and
+                   Nx(I2-I1) matrix of the eigenvectors found (from the right).
+                   Array whose indexes range within [0..N-1, 0..I2-I1].
+                 * 2, contains the matrix of the eigenvalues found.
+                   Array whose indexes range within [0..N-1, 0..I2-I1].
+
+
+Result:
+
+    True, if successful. In that case, D contains the eigenvalues,
+    Z contains the eigenvectors (if needed).
+    It should be noted that the subroutine changes the size of arrays D and Z.
+
+    False, if the bisection method subroutine wasn't able to find the eigenvalues
+    in the given interval or if the inverse iteration subroutine wasn't able
+    to find all the corresponding eigenvectors. In that case, the eigenvalues
+    and eigenvectors are not returned.
+
+  -- ALGLIB --
+     Copyright 25.12.2005 by Bochkanov Sergey
+*************************************************************************/
+bool smatrixtdevdi(real_1d_array &d, const real_1d_array &e, const ae_int_t n, const ae_int_t zneeded, const ae_int_t i1, const ae_int_t i2, real_2d_array &z)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        ae_bool result = alglib_impl::smatrixtdevdi(const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(e.c_ptr()), n, zneeded, i1, i2, const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<bool*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Finding eigenvalues and eigenvectors of a general matrix
+
+The algorithm finds eigenvalues and eigenvectors of a general matrix by
+using the QR algorithm with multiple shifts. The algorithm can find
+eigenvalues and both left and right eigenvectors.
+
+The right eigenvector is a vector x such that A*x = w*x, and the left
+eigenvector is a vector y such that y'*A = w*y' (here y' implies a complex
+conjugate transposition of vector y).
+
+Input parameters:
+    A       -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    VNeeded -   flag controlling whether eigenvectors are needed or not.
+                If VNeeded is equal to:
+                 * 0, eigenvectors are not returned;
+                 * 1, right eigenvectors are returned;
+                 * 2, left eigenvectors are returned;
+                 * 3, both left and right eigenvectors are returned.
+
+Output parameters:
+    WR      -   real parts of eigenvalues.
+                Array whose index ranges within [0..N-1].
+    WR      -   imaginary parts of eigenvalues.
+                Array whose index ranges within [0..N-1].
+    VL, VR  -   arrays of left and right eigenvectors (if they are needed).
+                If WI[i]=0, the respective eigenvalue is a real number,
+                and it corresponds to the column number I of matrices VL/VR.
+                If WI[i]>0, we have a pair of complex conjugate numbers with
+                positive and negative imaginary parts:
+                    the first eigenvalue WR[i] + sqrt(-1)*WI[i];
+                    the second eigenvalue WR[i+1] + sqrt(-1)*WI[i+1];
+                    WI[i]>0
+                    WI[i+1] = -WI[i] < 0
+                In that case, the eigenvector  corresponding to the first
+                eigenvalue is located in i and i+1 columns of matrices
+                VL/VR (the column number i contains the real part, and the
+                column number i+1 contains the imaginary part), and the vector
+                corresponding to the second eigenvalue is a complex conjugate to
+                the first vector.
+                Arrays whose indexes range within [0..N-1, 0..N-1].
+
+Result:
+    True, if the algorithm has converged.
+    False, if the algorithm has not converged.
+
+Note 1:
+    Some users may ask the following question: what if WI[N-1]>0?
+    WI[N] must contain an eigenvalue which is complex conjugate to the
+    N-th eigenvalue, but the array has only size N?
+    The answer is as follows: such a situation cannot occur because the
+    algorithm finds a pairs of eigenvalues, therefore, if WI[i]>0, I is
+    strictly less than N-1.
+
+Note 2:
+    The algorithm performance depends on the value of the internal parameter
+    NS of the InternalSchurDecomposition subroutine which defines the number
+    of shifts in the QR algorithm (similarly to the block width in block-matrix
+    algorithms of linear algebra). If you require maximum performance
+    on your machine, it is recommended to adjust this parameter manually.
+
+
+See also the InternalTREVC subroutine.
+
+The algorithm is based on the LAPACK 3.0 library.
+*************************************************************************/
+bool rmatrixevd(const real_2d_array &a, const ae_int_t n, const ae_int_t vneeded, real_1d_array &wr, real_1d_array &wi, real_2d_array &vl, real_2d_array &vr)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        ae_bool result = alglib_impl::rmatrixevd(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, vneeded, const_cast<alglib_impl::ae_vector*>(wr.c_ptr()), const_cast<alglib_impl::ae_vector*>(wi.c_ptr()), const_cast<alglib_impl::ae_matrix*>(vl.c_ptr()), const_cast<alglib_impl::ae_matrix*>(vr.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<bool*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Generation of a random uniformly distributed (Haar) orthogonal matrix
+
+INPUT PARAMETERS:
+    N   -   matrix size, N>=1
+
+OUTPUT PARAMETERS:
+    A   -   orthogonal NxN matrix, array[0..N-1,0..N-1]
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixrndorthogonal(const ae_int_t n, real_2d_array &a)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixrndorthogonal(n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Generation of random NxN matrix with given condition number and norm2(A)=1
+
+INPUT PARAMETERS:
+    N   -   matrix size
+    C   -   condition number (in 2-norm)
+
+OUTPUT PARAMETERS:
+    A   -   random matrix with norm2(A)=1 and cond(A)=C
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixrndcond(const ae_int_t n, const double c, real_2d_array &a)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixrndcond(n, c, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Generation of a random Haar distributed orthogonal complex matrix
+
+INPUT PARAMETERS:
+    N   -   matrix size, N>=1
+
+OUTPUT PARAMETERS:
+    A   -   orthogonal NxN matrix, array[0..N-1,0..N-1]
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixrndorthogonal(const ae_int_t n, complex_2d_array &a)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::cmatrixrndorthogonal(n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Generation of random NxN complex matrix with given condition number C and
+norm2(A)=1
+
+INPUT PARAMETERS:
+    N   -   matrix size
+    C   -   condition number (in 2-norm)
+
+OUTPUT PARAMETERS:
+    A   -   random matrix with norm2(A)=1 and cond(A)=C
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixrndcond(const ae_int_t n, const double c, complex_2d_array &a)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::cmatrixrndcond(n, c, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Generation of random NxN symmetric matrix with given condition number  and
+norm2(A)=1
+
+INPUT PARAMETERS:
+    N   -   matrix size
+    C   -   condition number (in 2-norm)
+
+OUTPUT PARAMETERS:
+    A   -   random matrix with norm2(A)=1 and cond(A)=C
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void smatrixrndcond(const ae_int_t n, const double c, real_2d_array &a)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::smatrixrndcond(n, c, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Generation of random NxN symmetric positive definite matrix with given
+condition number and norm2(A)=1
+
+INPUT PARAMETERS:
+    N   -   matrix size
+    C   -   condition number (in 2-norm)
+
+OUTPUT PARAMETERS:
+    A   -   random SPD matrix with norm2(A)=1 and cond(A)=C
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void spdmatrixrndcond(const ae_int_t n, const double c, real_2d_array &a)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::spdmatrixrndcond(n, c, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Generation of random NxN Hermitian matrix with given condition number  and
+norm2(A)=1
+
+INPUT PARAMETERS:
+    N   -   matrix size
+    C   -   condition number (in 2-norm)
+
+OUTPUT PARAMETERS:
+    A   -   random matrix with norm2(A)=1 and cond(A)=C
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void hmatrixrndcond(const ae_int_t n, const double c, complex_2d_array &a)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::hmatrixrndcond(n, c, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Generation of random NxN Hermitian positive definite matrix with given
+condition number and norm2(A)=1
+
+INPUT PARAMETERS:
+    N   -   matrix size
+    C   -   condition number (in 2-norm)
+
+OUTPUT PARAMETERS:
+    A   -   random HPD matrix with norm2(A)=1 and cond(A)=C
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void hpdmatrixrndcond(const ae_int_t n, const double c, complex_2d_array &a)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::hpdmatrixrndcond(n, c, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Multiplication of MxN matrix by NxN random Haar distributed orthogonal matrix
+
+INPUT PARAMETERS:
+    A   -   matrix, array[0..M-1, 0..N-1]
+    M, N-   matrix size
+
+OUTPUT PARAMETERS:
+    A   -   A*Q, where Q is random NxN orthogonal matrix
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixrndorthogonalfromtheright(real_2d_array &a, const ae_int_t m, const ae_int_t n)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixrndorthogonalfromtheright(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Multiplication of MxN matrix by MxM random Haar distributed orthogonal matrix
+
+INPUT PARAMETERS:
+    A   -   matrix, array[0..M-1, 0..N-1]
+    M, N-   matrix size
+
+OUTPUT PARAMETERS:
+    A   -   Q*A, where Q is random MxM orthogonal matrix
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixrndorthogonalfromtheleft(real_2d_array &a, const ae_int_t m, const ae_int_t n)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixrndorthogonalfromtheleft(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Multiplication of MxN complex matrix by NxN random Haar distributed
+complex orthogonal matrix
+
+INPUT PARAMETERS:
+    A   -   matrix, array[0..M-1, 0..N-1]
+    M, N-   matrix size
+
+OUTPUT PARAMETERS:
+    A   -   A*Q, where Q is random NxN orthogonal matrix
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixrndorthogonalfromtheright(complex_2d_array &a, const ae_int_t m, const ae_int_t n)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::cmatrixrndorthogonalfromtheright(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Multiplication of MxN complex matrix by MxM random Haar distributed
+complex orthogonal matrix
+
+INPUT PARAMETERS:
+    A   -   matrix, array[0..M-1, 0..N-1]
+    M, N-   matrix size
+
+OUTPUT PARAMETERS:
+    A   -   Q*A, where Q is random MxM orthogonal matrix
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixrndorthogonalfromtheleft(complex_2d_array &a, const ae_int_t m, const ae_int_t n)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::cmatrixrndorthogonalfromtheleft(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Symmetric multiplication of NxN matrix by random Haar distributed
+orthogonal  matrix
+
+INPUT PARAMETERS:
+    A   -   matrix, array[0..N-1, 0..N-1]
+    N   -   matrix size
+
+OUTPUT PARAMETERS:
+    A   -   Q'*A*Q, where Q is random NxN orthogonal matrix
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void smatrixrndmultiply(real_2d_array &a, const ae_int_t n)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::smatrixrndmultiply(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Hermitian multiplication of NxN matrix by random Haar distributed
+complex orthogonal matrix
+
+INPUT PARAMETERS:
+    A   -   matrix, array[0..N-1, 0..N-1]
+    N   -   matrix size
+
+OUTPUT PARAMETERS:
+    A   -   Q^H*A*Q, where Q is random NxN orthogonal matrix
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void hmatrixrndmultiply(complex_2d_array &a, const ae_int_t n)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::hmatrixrndmultiply(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+LU decomposition of a general real matrix with row pivoting
+
+A is represented as A = P*L*U, where:
+* L is lower unitriangular matrix
+* U is upper triangular matrix
+* P = P0*P1*...*PK, K=min(M,N)-1,
+  Pi - permutation matrix for I and Pivots[I]
+
+This is cache-oblivous implementation of LU decomposition.
+It is optimized for square matrices. As for rectangular matrices:
+* best case - M>>N
+* worst case - N>>M, small M, large N, matrix does not fit in CPU cache
+
+INPUT PARAMETERS:
+    A       -   array[0..M-1, 0..N-1].
+    M       -   number of rows in matrix A.
+    N       -   number of columns in matrix A.
+
+
+OUTPUT PARAMETERS:
+    A       -   matrices L and U in compact form:
+                * L is stored under main diagonal
+                * U is stored on and above main diagonal
+    Pivots  -   permutation matrix in compact form.
+                array[0..Min(M-1,N-1)].
+
+  -- ALGLIB routine --
+     10.01.2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixlu(real_2d_array &a, const ae_int_t m, const ae_int_t n, integer_1d_array &pivots)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixlu(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+LU decomposition of a general complex matrix with row pivoting
+
+A is represented as A = P*L*U, where:
+* L is lower unitriangular matrix
+* U is upper triangular matrix
+* P = P0*P1*...*PK, K=min(M,N)-1,
+  Pi - permutation matrix for I and Pivots[I]
+
+This is cache-oblivous implementation of LU decomposition. It is optimized
+for square matrices. As for rectangular matrices:
+* best case - M>>N
+* worst case - N>>M, small M, large N, matrix does not fit in CPU cache
+
+INPUT PARAMETERS:
+    A       -   array[0..M-1, 0..N-1].
+    M       -   number of rows in matrix A.
+    N       -   number of columns in matrix A.
+
+
+OUTPUT PARAMETERS:
+    A       -   matrices L and U in compact form:
+                * L is stored under main diagonal
+                * U is stored on and above main diagonal
+    Pivots  -   permutation matrix in compact form.
+                array[0..Min(M-1,N-1)].
+
+  -- ALGLIB routine --
+     10.01.2010
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixlu(complex_2d_array &a, const ae_int_t m, const ae_int_t n, integer_1d_array &pivots)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::cmatrixlu(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Cache-oblivious Cholesky decomposition
+
+The algorithm computes Cholesky decomposition  of  a  Hermitian  positive-
+definite matrix. The result of an algorithm is a representation  of  A  as
+A=U'*U  or A=L*L' (here X' detones conj(X^T)).
+
+INPUT PARAMETERS:
+    A       -   upper or lower triangle of a factorized matrix.
+                array with elements [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    IsUpper -   if IsUpper=True, then A contains an upper triangle of
+                a symmetric matrix, otherwise A contains a lower one.
+
+OUTPUT PARAMETERS:
+    A       -   the result of factorization. If IsUpper=True, then
+                the upper triangle contains matrix U, so that A = U'*U,
+                and the elements below the main diagonal are not modified.
+                Similarly, if IsUpper = False.
+
+RESULT:
+    If  the  matrix  is  positive-definite,  the  function  returns  True.
+    Otherwise, the function returns False. Contents of A is not determined
+    in such case.
+
+  -- ALGLIB routine --
+     15.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+bool hpdmatrixcholesky(complex_2d_array &a, const ae_int_t n, const bool isupper)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        ae_bool result = alglib_impl::hpdmatrixcholesky(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<bool*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Cache-oblivious Cholesky decomposition
+
+The algorithm computes Cholesky decomposition  of  a  symmetric  positive-
+definite matrix. The result of an algorithm is a representation  of  A  as
+A=U^T*U  or A=L*L^T
+
+INPUT PARAMETERS:
+    A       -   upper or lower triangle of a factorized matrix.
+                array with elements [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    IsUpper -   if IsUpper=True, then A contains an upper triangle of
+                a symmetric matrix, otherwise A contains a lower one.
+
+OUTPUT PARAMETERS:
+    A       -   the result of factorization. If IsUpper=True, then
+                the upper triangle contains matrix U, so that A = U^T*U,
+                and the elements below the main diagonal are not modified.
+                Similarly, if IsUpper = False.
+
+RESULT:
+    If  the  matrix  is  positive-definite,  the  function  returns  True.
+    Otherwise, the function returns False. Contents of A is not determined
+    in such case.
+
+  -- ALGLIB routine --
+     15.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+bool spdmatrixcholesky(real_2d_array &a, const ae_int_t n, const bool isupper)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        ae_bool result = alglib_impl::spdmatrixcholesky(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<bool*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Estimate of a matrix condition number (1-norm)
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
+    N   -   size of matrix A.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double rmatrixrcond1(const real_2d_array &a, const ae_int_t n)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        double result = alglib_impl::rmatrixrcond1(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<double*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Estimate of a matrix condition number (infinity-norm).
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
+    N   -   size of matrix A.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double rmatrixrcondinf(const real_2d_array &a, const ae_int_t n)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        double result = alglib_impl::rmatrixrcondinf(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<double*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Condition number estimate of a symmetric positive definite matrix.
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+It should be noted that 1-norm and inf-norm of condition numbers of symmetric
+matrices are equal, so the algorithm doesn't take into account the
+differences between these types of norms.
+
+Input parameters:
+    A       -   symmetric positive definite matrix which is given by its
+                upper or lower triangle depending on the value of
+                IsUpper. Array with elements [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    IsUpper -   storage format.
+
+Result:
+    1/LowerBound(cond(A)), if matrix A is positive definite,
+   -1, if matrix A is not positive definite, and its condition number
+    could not be found by this algorithm.
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double spdmatrixrcond(const real_2d_array &a, const ae_int_t n, const bool isupper)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        double result = alglib_impl::spdmatrixrcond(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<double*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Triangular matrix: estimate of a condition number (1-norm)
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    A       -   matrix. Array[0..N-1, 0..N-1].
+    N       -   size of A.
+    IsUpper -   True, if the matrix is upper triangular.
+    IsUnit  -   True, if the matrix has a unit diagonal.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double rmatrixtrrcond1(const real_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        double result = alglib_impl::rmatrixtrrcond1(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, isunit, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<double*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Triangular matrix: estimate of a matrix condition number (infinity-norm).
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
+    N   -   size of matrix A.
+    IsUpper -   True, if the matrix is upper triangular.
+    IsUnit  -   True, if the matrix has a unit diagonal.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double rmatrixtrrcondinf(const real_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        double result = alglib_impl::rmatrixtrrcondinf(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, isunit, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<double*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Condition number estimate of a Hermitian positive definite matrix.
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+It should be noted that 1-norm and inf-norm of condition numbers of symmetric
+matrices are equal, so the algorithm doesn't take into account the
+differences between these types of norms.
+
+Input parameters:
+    A       -   Hermitian positive definite matrix which is given by its
+                upper or lower triangle depending on the value of
+                IsUpper. Array with elements [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    IsUpper -   storage format.
+
+Result:
+    1/LowerBound(cond(A)), if matrix A is positive definite,
+   -1, if matrix A is not positive definite, and its condition number
+    could not be found by this algorithm.
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double hpdmatrixrcond(const complex_2d_array &a, const ae_int_t n, const bool isupper)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        double result = alglib_impl::hpdmatrixrcond(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<double*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Estimate of a matrix condition number (1-norm)
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
+    N   -   size of matrix A.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double cmatrixrcond1(const complex_2d_array &a, const ae_int_t n)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        double result = alglib_impl::cmatrixrcond1(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<double*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Estimate of a matrix condition number (infinity-norm).
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
+    N   -   size of matrix A.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double cmatrixrcondinf(const complex_2d_array &a, const ae_int_t n)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        double result = alglib_impl::cmatrixrcondinf(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<double*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Estimate of the condition number of a matrix given by its LU decomposition (1-norm)
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    LUA         -   LU decomposition of a matrix in compact form. Output of
+                    the RMatrixLU subroutine.
+    N           -   size of matrix A.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double rmatrixlurcond1(const real_2d_array &lua, const ae_int_t n)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        double result = alglib_impl::rmatrixlurcond1(const_cast<alglib_impl::ae_matrix*>(lua.c_ptr()), n, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<double*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Estimate of the condition number of a matrix given by its LU decomposition
+(infinity norm).
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    LUA     -   LU decomposition of a matrix in compact form. Output of
+                the RMatrixLU subroutine.
+    N       -   size of matrix A.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double rmatrixlurcondinf(const real_2d_array &lua, const ae_int_t n)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        double result = alglib_impl::rmatrixlurcondinf(const_cast<alglib_impl::ae_matrix*>(lua.c_ptr()), n, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<double*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Condition number estimate of a symmetric positive definite matrix given by
+Cholesky decomposition.
+
+The algorithm calculates a lower bound of the condition number. In this
+case, the algorithm does not return a lower bound of the condition number,
+but an inverse number (to avoid an overflow in case of a singular matrix).
+
+It should be noted that 1-norm and inf-norm condition numbers of symmetric
+matrices are equal, so the algorithm doesn't take into account the
+differences between these types of norms.
+
+Input parameters:
+    CD  - Cholesky decomposition of matrix A,
+          output of SMatrixCholesky subroutine.
+    N   - size of matrix A.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double spdmatrixcholeskyrcond(const real_2d_array &a, const ae_int_t n, const bool isupper)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        double result = alglib_impl::spdmatrixcholeskyrcond(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<double*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Condition number estimate of a Hermitian positive definite matrix given by
+Cholesky decomposition.
+
+The algorithm calculates a lower bound of the condition number. In this
+case, the algorithm does not return a lower bound of the condition number,
+but an inverse number (to avoid an overflow in case of a singular matrix).
+
+It should be noted that 1-norm and inf-norm condition numbers of symmetric
+matrices are equal, so the algorithm doesn't take into account the
+differences between these types of norms.
+
+Input parameters:
+    CD  - Cholesky decomposition of matrix A,
+          output of SMatrixCholesky subroutine.
+    N   - size of matrix A.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double hpdmatrixcholeskyrcond(const complex_2d_array &a, const ae_int_t n, const bool isupper)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        double result = alglib_impl::hpdmatrixcholeskyrcond(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<double*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Estimate of the condition number of a matrix given by its LU decomposition (1-norm)
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    LUA         -   LU decomposition of a matrix in compact form. Output of
+                    the CMatrixLU subroutine.
+    N           -   size of matrix A.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double cmatrixlurcond1(const complex_2d_array &lua, const ae_int_t n)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        double result = alglib_impl::cmatrixlurcond1(const_cast<alglib_impl::ae_matrix*>(lua.c_ptr()), n, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<double*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Estimate of the condition number of a matrix given by its LU decomposition
+(infinity norm).
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    LUA     -   LU decomposition of a matrix in compact form. Output of
+                the CMatrixLU subroutine.
+    N       -   size of matrix A.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double cmatrixlurcondinf(const complex_2d_array &lua, const ae_int_t n)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        double result = alglib_impl::cmatrixlurcondinf(const_cast<alglib_impl::ae_matrix*>(lua.c_ptr()), n, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<double*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Triangular matrix: estimate of a condition number (1-norm)
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    A       -   matrix. Array[0..N-1, 0..N-1].
+    N       -   size of A.
+    IsUpper -   True, if the matrix is upper triangular.
+    IsUnit  -   True, if the matrix has a unit diagonal.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double cmatrixtrrcond1(const complex_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        double result = alglib_impl::cmatrixtrrcond1(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, isunit, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<double*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Triangular matrix: estimate of a matrix condition number (infinity-norm).
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
+    N   -   size of matrix A.
+    IsUpper -   True, if the matrix is upper triangular.
+    IsUnit  -   True, if the matrix has a unit diagonal.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double cmatrixtrrcondinf(const complex_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        double result = alglib_impl::cmatrixtrrcondinf(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, isunit, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<double*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Matrix inverse report:
+* R1    reciprocal of condition number in 1-norm
+* RInf  reciprocal of condition number in inf-norm
+*************************************************************************/
+_matinvreport_owner::_matinvreport_owner()
+{
+    p_struct = (alglib_impl::matinvreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::matinvreport), NULL);
+    if( p_struct==NULL )
+        throw ap_error("ALGLIB: malloc error");
+    if( !alglib_impl::_matinvreport_init(p_struct, NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+}
+
+_matinvreport_owner::_matinvreport_owner(const _matinvreport_owner &rhs)
+{
+    p_struct = (alglib_impl::matinvreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::matinvreport), NULL);
+    if( p_struct==NULL )
+        throw ap_error("ALGLIB: malloc error");
+    if( !alglib_impl::_matinvreport_init_copy(p_struct, const_cast<alglib_impl::matinvreport*>(rhs.p_struct), NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+}
+
+_matinvreport_owner& _matinvreport_owner::operator=(const _matinvreport_owner &rhs)
+{
+    if( this==&rhs )
+        return *this;
+    alglib_impl::_matinvreport_clear(p_struct);
+    if( !alglib_impl::_matinvreport_init_copy(p_struct, const_cast<alglib_impl::matinvreport*>(rhs.p_struct), NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+    return *this;
+}
+
+_matinvreport_owner::~_matinvreport_owner()
+{
+    alglib_impl::_matinvreport_clear(p_struct);
+    ae_free(p_struct);
+}
+
+alglib_impl::matinvreport* _matinvreport_owner::c_ptr()
+{
+    return p_struct;
+}
+
+alglib_impl::matinvreport* _matinvreport_owner::c_ptr() const
+{
+    return const_cast<alglib_impl::matinvreport*>(p_struct);
+}
+matinvreport::matinvreport() : _matinvreport_owner() ,r1(p_struct->r1),rinf(p_struct->rinf)
+{
+}
+
+matinvreport::matinvreport(const matinvreport &rhs):_matinvreport_owner(rhs) ,r1(p_struct->r1),rinf(p_struct->rinf)
+{
+}
+
+matinvreport& matinvreport::operator=(const matinvreport &rhs)
+{
+    if( this==&rhs )
+        return *this;
+    _matinvreport_owner::operator=(rhs);
+    return *this;
+}
+
+matinvreport::~matinvreport()
+{
+}
+
+/*************************************************************************
+Inversion of a matrix given by its LU decomposition.
+
+INPUT PARAMETERS:
+    A       -   LU decomposition of the matrix
+                (output of RMatrixLU subroutine).
+    Pivots  -   table of permutations
+                (the output of RMatrixLU subroutine).
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+
+OUTPUT PARAMETERS:
+    Info    -   return code:
+                * -3    A is singular, or VERY close to singular.
+                        it is filled by zeros in such cases.
+                *  1    task is solved (but matrix A may be ill-conditioned,
+                        check R1/RInf parameters for condition numbers).
+    Rep     -   solver report, see below for more info
+    A       -   inverse of matrix A.
+                Array whose indexes range within [0..N-1, 0..N-1].
+
+SOLVER REPORT
+
+Subroutine sets following fields of the Rep structure:
+* R1        reciprocal of condition number: 1/cond(A), 1-norm.
+* RInf      reciprocal of condition number: 1/cond(A), inf-norm.
+
+  -- ALGLIB routine --
+     05.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixluinverse(real_2d_array &a, const integer_1d_array &pivots, const ae_int_t n, ae_int_t &info, matinvreport &rep)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixluinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), n, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Inversion of a matrix given by its LU decomposition.
+
+INPUT PARAMETERS:
+    A       -   LU decomposition of the matrix
+                (output of RMatrixLU subroutine).
+    Pivots  -   table of permutations
+                (the output of RMatrixLU subroutine).
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+
+OUTPUT PARAMETERS:
+    Info    -   return code:
+                * -3    A is singular, or VERY close to singular.
+                        it is filled by zeros in such cases.
+                *  1    task is solved (but matrix A may be ill-conditioned,
+                        check R1/RInf parameters for condition numbers).
+    Rep     -   solver report, see below for more info
+    A       -   inverse of matrix A.
+                Array whose indexes range within [0..N-1, 0..N-1].
+
+SOLVER REPORT
+
+Subroutine sets following fields of the Rep structure:
+* R1        reciprocal of condition number: 1/cond(A), 1-norm.
+* RInf      reciprocal of condition number: 1/cond(A), inf-norm.
+
+  -- ALGLIB routine --
+     05.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixluinverse(real_2d_array &a, const integer_1d_array &pivots, ae_int_t &info, matinvreport &rep)
+{
+    alglib_impl::ae_state _alglib_env_state;    
+    ae_int_t n;
+    if( (a.cols()!=a.rows()) || (a.cols()!=pivots.length()))
+        throw ap_error("Error while calling 'rmatrixluinverse': looks like one of arguments has wrong size");
+    n = a.cols();
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixluinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), n, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
+
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Inversion of a general matrix.
+
+Input parameters:
+    A       -   matrix.
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+
+Output parameters:
+    Info    -   return code, same as in RMatrixLUInverse
+    Rep     -   solver report, same as in RMatrixLUInverse
+    A       -   inverse of matrix A, same as in RMatrixLUInverse
+
+Result:
+    True, if the matrix is not singular.
+    False, if the matrix is singular.
+
+  -- ALGLIB --
+     Copyright 2005-2010 by Bochkanov Sergey
+*************************************************************************/
+void rmatrixinverse(real_2d_array &a, const ae_int_t n, ae_int_t &info, matinvreport &rep)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Inversion of a general matrix.
+
+Input parameters:
+    A       -   matrix.
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+
+Output parameters:
+    Info    -   return code, same as in RMatrixLUInverse
+    Rep     -   solver report, same as in RMatrixLUInverse
+    A       -   inverse of matrix A, same as in RMatrixLUInverse
+
+Result:
+    True, if the matrix is not singular.
+    False, if the matrix is singular.
+
+  -- ALGLIB --
+     Copyright 2005-2010 by Bochkanov Sergey
+*************************************************************************/
+void rmatrixinverse(real_2d_array &a, ae_int_t &info, matinvreport &rep)
+{
+    alglib_impl::ae_state _alglib_env_state;    
+    ae_int_t n;
+    if( (a.cols()!=a.rows()))
+        throw ap_error("Error while calling 'rmatrixinverse': looks like one of arguments has wrong size");
+    n = a.cols();
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
+
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Inversion of a matrix given by its LU decomposition.
+
+INPUT PARAMETERS:
+    A       -   LU decomposition of the matrix
+                (output of CMatrixLU subroutine).
+    Pivots  -   table of permutations
+                (the output of CMatrixLU subroutine).
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+
+OUTPUT PARAMETERS:
+    Info    -   return code, same as in RMatrixLUInverse
+    Rep     -   solver report, same as in RMatrixLUInverse
+    A       -   inverse of matrix A, same as in RMatrixLUInverse
+
+  -- ALGLIB routine --
+     05.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixluinverse(complex_2d_array &a, const integer_1d_array &pivots, const ae_int_t n, ae_int_t &info, matinvreport &rep)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::cmatrixluinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), n, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Inversion of a matrix given by its LU decomposition.
+
+INPUT PARAMETERS:
+    A       -   LU decomposition of the matrix
+                (output of CMatrixLU subroutine).
+    Pivots  -   table of permutations
+                (the output of CMatrixLU subroutine).
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+
+OUTPUT PARAMETERS:
+    Info    -   return code, same as in RMatrixLUInverse
+    Rep     -   solver report, same as in RMatrixLUInverse
+    A       -   inverse of matrix A, same as in RMatrixLUInverse
+
+  -- ALGLIB routine --
+     05.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixluinverse(complex_2d_array &a, const integer_1d_array &pivots, ae_int_t &info, matinvreport &rep)
+{
+    alglib_impl::ae_state _alglib_env_state;    
+    ae_int_t n;
+    if( (a.cols()!=a.rows()) || (a.cols()!=pivots.length()))
+        throw ap_error("Error while calling 'cmatrixluinverse': looks like one of arguments has wrong size");
+    n = a.cols();
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::cmatrixluinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), n, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
+
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Inversion of a general matrix.
+
+Input parameters:
+    A       -   matrix
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+
+Output parameters:
+    Info    -   return code, same as in RMatrixLUInverse
+    Rep     -   solver report, same as in RMatrixLUInverse
+    A       -   inverse of matrix A, same as in RMatrixLUInverse
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+void cmatrixinverse(complex_2d_array &a, const ae_int_t n, ae_int_t &info, matinvreport &rep)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::cmatrixinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Inversion of a general matrix.
+
+Input parameters:
+    A       -   matrix
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+
+Output parameters:
+    Info    -   return code, same as in RMatrixLUInverse
+    Rep     -   solver report, same as in RMatrixLUInverse
+    A       -   inverse of matrix A, same as in RMatrixLUInverse
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+void cmatrixinverse(complex_2d_array &a, ae_int_t &info, matinvreport &rep)
+{
+    alglib_impl::ae_state _alglib_env_state;    
+    ae_int_t n;
+    if( (a.cols()!=a.rows()))
+        throw ap_error("Error while calling 'cmatrixinverse': looks like one of arguments has wrong size");
+    n = a.cols();
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::cmatrixinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
+
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Inversion of a symmetric positive definite matrix which is given
+by Cholesky decomposition.
+
+Input parameters:
+    A       -   Cholesky decomposition of the matrix to be inverted:
+                A=U’*U or A = L*L'.
+                Output of  SPDMatrixCholesky subroutine.
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+    IsUpper -   storage type (optional):
+                * if True, symmetric  matrix  A  is  given  by  its  upper
+                  triangle, and the lower triangle isn’t  used/changed  by
+                  function
+                * if False,  symmetric matrix  A  is  given  by  its lower
+                  triangle, and the  upper triangle isn’t used/changed  by
+                  function
+                * if not given, lower half is used.
+
+Output parameters:
+    Info    -   return code, same as in RMatrixLUInverse
+    Rep     -   solver report, same as in RMatrixLUInverse
+    A       -   inverse of matrix A, same as in RMatrixLUInverse
+
+  -- ALGLIB routine --
+     10.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void spdmatrixcholeskyinverse(real_2d_array &a, const ae_int_t n, const bool isupper, ae_int_t &info, matinvreport &rep)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::spdmatrixcholeskyinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Inversion of a symmetric positive definite matrix which is given
+by Cholesky decomposition.
+
+Input parameters:
+    A       -   Cholesky decomposition of the matrix to be inverted:
+                A=U’*U or A = L*L'.
+                Output of  SPDMatrixCholesky subroutine.
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+    IsUpper -   storage type (optional):
+                * if True, symmetric  matrix  A  is  given  by  its  upper
+                  triangle, and the lower triangle isn’t  used/changed  by
+                  function
+                * if False,  symmetric matrix  A  is  given  by  its lower
+                  triangle, and the  upper triangle isn’t used/changed  by
+                  function
+                * if not given, lower half is used.
+
+Output parameters:
+    Info    -   return code, same as in RMatrixLUInverse
+    Rep     -   solver report, same as in RMatrixLUInverse
+    A       -   inverse of matrix A, same as in RMatrixLUInverse
+
+  -- ALGLIB routine --
+     10.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void spdmatrixcholeskyinverse(real_2d_array &a, ae_int_t &info, matinvreport &rep)
+{
+    alglib_impl::ae_state _alglib_env_state;    
+    ae_int_t n;
+    bool isupper;
+    if( (a.cols()!=a.rows()))
+        throw ap_error("Error while calling 'spdmatrixcholeskyinverse': looks like one of arguments has wrong size");
+    n = a.cols();
+    isupper = false;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::spdmatrixcholeskyinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
+
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Inversion of a symmetric positive definite matrix.
+
+Given an upper or lower triangle of a symmetric positive definite matrix,
+the algorithm generates matrix A^-1 and saves the upper or lower triangle
+depending on the input.
+
+Input parameters:
+    A       -   matrix to be inverted (upper or lower triangle).
+                Array with elements [0..N-1,0..N-1].
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+    IsUpper -   storage type (optional):
+                * if True, symmetric  matrix  A  is  given  by  its  upper
+                  triangle, and the lower triangle isn’t  used/changed  by
+                  function
+                * if False,  symmetric matrix  A  is  given  by  its lower
+                  triangle, and the  upper triangle isn’t used/changed  by
+                  function
+                * if not given,  both lower and upper  triangles  must  be
+                  filled.
+
+Output parameters:
+    Info    -   return code, same as in RMatrixLUInverse
+    Rep     -   solver report, same as in RMatrixLUInverse
+    A       -   inverse of matrix A, same as in RMatrixLUInverse
+
+  -- ALGLIB routine --
+     10.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void spdmatrixinverse(real_2d_array &a, const ae_int_t n, const bool isupper, ae_int_t &info, matinvreport &rep)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::spdmatrixinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Inversion of a symmetric positive definite matrix.
+
+Given an upper or lower triangle of a symmetric positive definite matrix,
+the algorithm generates matrix A^-1 and saves the upper or lower triangle
+depending on the input.
+
+Input parameters:
+    A       -   matrix to be inverted (upper or lower triangle).
+                Array with elements [0..N-1,0..N-1].
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+    IsUpper -   storage type (optional):
+                * if True, symmetric  matrix  A  is  given  by  its  upper
+                  triangle, and the lower triangle isn’t  used/changed  by
+                  function
+                * if False,  symmetric matrix  A  is  given  by  its lower
+                  triangle, and the  upper triangle isn’t used/changed  by
+                  function
+                * if not given,  both lower and upper  triangles  must  be
+                  filled.
+
+Output parameters:
+    Info    -   return code, same as in RMatrixLUInverse
+    Rep     -   solver report, same as in RMatrixLUInverse
+    A       -   inverse of matrix A, same as in RMatrixLUInverse
+
+  -- ALGLIB routine --
+     10.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void spdmatrixinverse(real_2d_array &a, ae_int_t &info, matinvreport &rep)
+{
+    alglib_impl::ae_state _alglib_env_state;    
+    ae_int_t n;
+    bool isupper;
+    if( (a.cols()!=a.rows()))
+        throw ap_error("Error while calling 'spdmatrixinverse': looks like one of arguments has wrong size");
+    if( !alglib_impl::ae_is_symmetric(const_cast<alglib_impl::ae_matrix*>(a.c_ptr())) )
+        throw ap_error("'a' parameter is not symmetric matrix");
+    n = a.cols();
+    isupper = false;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::spdmatrixinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
+        if( !alglib_impl::ae_force_symmetric(const_cast<alglib_impl::ae_matrix*>(a.c_ptr())) )
+            throw ap_error("Internal error while forcing symmetricity of 'a' parameter");
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Inversion of a Hermitian positive definite matrix which is given
+by Cholesky decomposition.
+
+Input parameters:
+    A       -   Cholesky decomposition of the matrix to be inverted:
+                A=U’*U or A = L*L'.
+                Output of  HPDMatrixCholesky subroutine.
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+    IsUpper -   storage type (optional):
+                * if True, symmetric  matrix  A  is  given  by  its  upper
+                  triangle, and the lower triangle isn’t  used/changed  by
+                  function
+                * if False,  symmetric matrix  A  is  given  by  its lower
+                  triangle, and the  upper triangle isn’t used/changed  by
+                  function
+                * if not given, lower half is used.
+
+Output parameters:
+    Info    -   return code, same as in RMatrixLUInverse
+    Rep     -   solver report, same as in RMatrixLUInverse
+    A       -   inverse of matrix A, same as in RMatrixLUInverse
+
+  -- ALGLIB routine --
+     10.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void hpdmatrixcholeskyinverse(complex_2d_array &a, const ae_int_t n, const bool isupper, ae_int_t &info, matinvreport &rep)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::hpdmatrixcholeskyinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Inversion of a Hermitian positive definite matrix which is given
+by Cholesky decomposition.
+
+Input parameters:
+    A       -   Cholesky decomposition of the matrix to be inverted:
+                A=U’*U or A = L*L'.
+                Output of  HPDMatrixCholesky subroutine.
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+    IsUpper -   storage type (optional):
+                * if True, symmetric  matrix  A  is  given  by  its  upper
+                  triangle, and the lower triangle isn’t  used/changed  by
+                  function
+                * if False,  symmetric matrix  A  is  given  by  its lower
+                  triangle, and the  upper triangle isn’t used/changed  by
+                  function
+                * if not given, lower half is used.
+
+Output parameters:
+    Info    -   return code, same as in RMatrixLUInverse
+    Rep     -   solver report, same as in RMatrixLUInverse
+    A       -   inverse of matrix A, same as in RMatrixLUInverse
+
+  -- ALGLIB routine --
+     10.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void hpdmatrixcholeskyinverse(complex_2d_array &a, ae_int_t &info, matinvreport &rep)
+{
+    alglib_impl::ae_state _alglib_env_state;    
+    ae_int_t n;
+    bool isupper;
+    if( (a.cols()!=a.rows()))
+        throw ap_error("Error while calling 'hpdmatrixcholeskyinverse': looks like one of arguments has wrong size");
+    n = a.cols();
+    isupper = false;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::hpdmatrixcholeskyinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
+
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Inversion of a Hermitian positive definite matrix.
+
+Given an upper or lower triangle of a Hermitian positive definite matrix,
+the algorithm generates matrix A^-1 and saves the upper or lower triangle
+depending on the input.
+
+Input parameters:
+    A       -   matrix to be inverted (upper or lower triangle).
+                Array with elements [0..N-1,0..N-1].
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+    IsUpper -   storage type (optional):
+                * if True, symmetric  matrix  A  is  given  by  its  upper
+                  triangle, and the lower triangle isn’t  used/changed  by
+                  function
+                * if False,  symmetric matrix  A  is  given  by  its lower
+                  triangle, and the  upper triangle isn’t used/changed  by
+                  function
+                * if not given,  both lower and upper  triangles  must  be
+                  filled.
+
+Output parameters:
+    Info    -   return code, same as in RMatrixLUInverse
+    Rep     -   solver report, same as in RMatrixLUInverse
+    A       -   inverse of matrix A, same as in RMatrixLUInverse
+
+  -- ALGLIB routine --
+     10.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void hpdmatrixinverse(complex_2d_array &a, const ae_int_t n, const bool isupper, ae_int_t &info, matinvreport &rep)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::hpdmatrixinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Inversion of a Hermitian positive definite matrix.
+
+Given an upper or lower triangle of a Hermitian positive definite matrix,
+the algorithm generates matrix A^-1 and saves the upper or lower triangle
+depending on the input.
+
+Input parameters:
+    A       -   matrix to be inverted (upper or lower triangle).
+                Array with elements [0..N-1,0..N-1].
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+    IsUpper -   storage type (optional):
+                * if True, symmetric  matrix  A  is  given  by  its  upper
+                  triangle, and the lower triangle isn’t  used/changed  by
+                  function
+                * if False,  symmetric matrix  A  is  given  by  its lower
+                  triangle, and the  upper triangle isn’t used/changed  by
+                  function
+                * if not given,  both lower and upper  triangles  must  be
+                  filled.
+
+Output parameters:
+    Info    -   return code, same as in RMatrixLUInverse
+    Rep     -   solver report, same as in RMatrixLUInverse
+    A       -   inverse of matrix A, same as in RMatrixLUInverse
+
+  -- ALGLIB routine --
+     10.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void hpdmatrixinverse(complex_2d_array &a, ae_int_t &info, matinvreport &rep)
+{
+    alglib_impl::ae_state _alglib_env_state;    
+    ae_int_t n;
+    bool isupper;
+    if( (a.cols()!=a.rows()))
+        throw ap_error("Error while calling 'hpdmatrixinverse': looks like one of arguments has wrong size");
+    if( !alglib_impl::ae_is_hermitian(const_cast<alglib_impl::ae_matrix*>(a.c_ptr())) )
+        throw ap_error("'a' parameter is not Hermitian matrix");
+    n = a.cols();
+    isupper = false;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::hpdmatrixinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
+        if( !alglib_impl::ae_force_hermitian(const_cast<alglib_impl::ae_matrix*>(a.c_ptr())) )
+            throw ap_error("Internal error while forcing Hermitian properties of 'a' parameter");
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Triangular matrix inverse (real)
+
+The subroutine inverts the following types of matrices:
+    * upper triangular
+    * upper triangular with unit diagonal
+    * lower triangular
+    * lower triangular with unit diagonal
+
+In case of an upper (lower) triangular matrix,  the  inverse  matrix  will
+also be upper (lower) triangular, and after the end of the algorithm,  the
+inverse matrix replaces the source matrix. The elements  below (above) the
+main diagonal are not changed by the algorithm.
+
+If  the matrix  has a unit diagonal, the inverse matrix also  has  a  unit
+diagonal, and the diagonal elements are not passed to the algorithm.
+
+Input parameters:
+    A       -   matrix, array[0..N-1, 0..N-1].
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+    IsUpper -   True, if the matrix is upper triangular.
+    IsUnit  -   diagonal type (optional):
+                * if True, matrix has unit diagonal (a[i,i] are NOT used)
+                * if False, matrix diagonal is arbitrary
+                * if not given, False is assumed
+
+Output parameters:
+    Info    -   same as for RMatrixLUInverse
+    Rep     -   same as for RMatrixLUInverse
+    A       -   same as for RMatrixLUInverse.
+
+  -- ALGLIB --
+     Copyright 05.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void rmatrixtrinverse(real_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit, ae_int_t &info, matinvreport &rep)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixtrinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, isunit, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Triangular matrix inverse (real)
+
+The subroutine inverts the following types of matrices:
+    * upper triangular
+    * upper triangular with unit diagonal
+    * lower triangular
+    * lower triangular with unit diagonal
+
+In case of an upper (lower) triangular matrix,  the  inverse  matrix  will
+also be upper (lower) triangular, and after the end of the algorithm,  the
+inverse matrix replaces the source matrix. The elements  below (above) the
+main diagonal are not changed by the algorithm.
+
+If  the matrix  has a unit diagonal, the inverse matrix also  has  a  unit
+diagonal, and the diagonal elements are not passed to the algorithm.
+
+Input parameters:
+    A       -   matrix, array[0..N-1, 0..N-1].
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+    IsUpper -   True, if the matrix is upper triangular.
+    IsUnit  -   diagonal type (optional):
+                * if True, matrix has unit diagonal (a[i,i] are NOT used)
+                * if False, matrix diagonal is arbitrary
+                * if not given, False is assumed
+
+Output parameters:
+    Info    -   same as for RMatrixLUInverse
+    Rep     -   same as for RMatrixLUInverse
+    A       -   same as for RMatrixLUInverse.
+
+  -- ALGLIB --
+     Copyright 05.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void rmatrixtrinverse(real_2d_array &a, const bool isupper, ae_int_t &info, matinvreport &rep)
+{
+    alglib_impl::ae_state _alglib_env_state;    
+    ae_int_t n;
+    bool isunit;
+    if( (a.cols()!=a.rows()))
+        throw ap_error("Error while calling 'rmatrixtrinverse': looks like one of arguments has wrong size");
+    n = a.cols();
+    isunit = false;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixtrinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, isunit, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
+
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Triangular matrix inverse (complex)
+
+The subroutine inverts the following types of matrices:
+    * upper triangular
+    * upper triangular with unit diagonal
+    * lower triangular
+    * lower triangular with unit diagonal
+
+In case of an upper (lower) triangular matrix,  the  inverse  matrix  will
+also be upper (lower) triangular, and after the end of the algorithm,  the
+inverse matrix replaces the source matrix. The elements  below (above) the
+main diagonal are not changed by the algorithm.
+
+If  the matrix  has a unit diagonal, the inverse matrix also  has  a  unit
+diagonal, and the diagonal elements are not passed to the algorithm.
+
+Input parameters:
+    A       -   matrix, array[0..N-1, 0..N-1].
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+    IsUpper -   True, if the matrix is upper triangular.
+    IsUnit  -   diagonal type (optional):
+                * if True, matrix has unit diagonal (a[i,i] are NOT used)
+                * if False, matrix diagonal is arbitrary
+                * if not given, False is assumed
+
+Output parameters:
+    Info    -   same as for RMatrixLUInverse
+    Rep     -   same as for RMatrixLUInverse
+    A       -   same as for RMatrixLUInverse.
+
+  -- ALGLIB --
+     Copyright 05.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void cmatrixtrinverse(complex_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit, ae_int_t &info, matinvreport &rep)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::cmatrixtrinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, isunit, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Triangular matrix inverse (complex)
+
+The subroutine inverts the following types of matrices:
+    * upper triangular
+    * upper triangular with unit diagonal
+    * lower triangular
+    * lower triangular with unit diagonal
+
+In case of an upper (lower) triangular matrix,  the  inverse  matrix  will
+also be upper (lower) triangular, and after the end of the algorithm,  the
+inverse matrix replaces the source matrix. The elements  below (above) the
+main diagonal are not changed by the algorithm.
+
+If  the matrix  has a unit diagonal, the inverse matrix also  has  a  unit
+diagonal, and the diagonal elements are not passed to the algorithm.
+
+Input parameters:
+    A       -   matrix, array[0..N-1, 0..N-1].
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+    IsUpper -   True, if the matrix is upper triangular.
+    IsUnit  -   diagonal type (optional):
+                * if True, matrix has unit diagonal (a[i,i] are NOT used)
+                * if False, matrix diagonal is arbitrary
+                * if not given, False is assumed
+
+Output parameters:
+    Info    -   same as for RMatrixLUInverse
+    Rep     -   same as for RMatrixLUInverse
+    A       -   same as for RMatrixLUInverse.
+
+  -- ALGLIB --
+     Copyright 05.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void cmatrixtrinverse(complex_2d_array &a, const bool isupper, ae_int_t &info, matinvreport &rep)
+{
+    alglib_impl::ae_state _alglib_env_state;    
+    ae_int_t n;
+    bool isunit;
+    if( (a.cols()!=a.rows()))
+        throw ap_error("Error while calling 'cmatrixtrinverse': looks like one of arguments has wrong size");
+    n = a.cols();
+    isunit = false;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::cmatrixtrinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, isunit, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
+
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Singular value decomposition of a bidiagonal matrix (extended algorithm)
+
+The algorithm performs the singular value decomposition  of  a  bidiagonal
+matrix B (upper or lower) representing it as B = Q*S*P^T, where Q and  P -
+orthogonal matrices, S - diagonal matrix with non-negative elements on the
+main diagonal, in descending order.
+
+The  algorithm  finds  singular  values.  In  addition,  the algorithm can
+calculate  matrices  Q  and P (more precisely, not the matrices, but their
+product  with  given  matrices U and VT - U*Q and (P^T)*VT)).  Of  course,
+matrices U and VT can be of any type, including identity. Furthermore, the
+algorithm can calculate Q'*C (this product is calculated more  effectively
+than U*Q,  because  this calculation operates with rows instead  of matrix
+columns).
+
+The feature of the algorithm is its ability to find  all  singular  values
+including those which are arbitrarily close to 0  with  relative  accuracy
+close to  machine precision. If the parameter IsFractionalAccuracyRequired
+is set to True, all singular values will have high relative accuracy close
+to machine precision. If the parameter is set to False, only  the  biggest
+singular value will have relative accuracy  close  to  machine  precision.
+The absolute error of other singular values is equal to the absolute error
+of the biggest singular value.
+
+Input parameters:
+    D       -   main diagonal of matrix B.
+                Array whose index ranges within [0..N-1].
+    E       -   superdiagonal (or subdiagonal) of matrix B.
+                Array whose index ranges within [0..N-2].
+    N       -   size of matrix B.
+    IsUpper -   True, if the matrix is upper bidiagonal.
+    IsFractionalAccuracyRequired -
+                accuracy to search singular values with.
+    U       -   matrix to be multiplied by Q.
+                Array whose indexes range within [0..NRU-1, 0..N-1].
+                The matrix can be bigger, in that case only the  submatrix
+                [0..NRU-1, 0..N-1] will be multiplied by Q.
+    NRU     -   number of rows in matrix U.
+    C       -   matrix to be multiplied by Q'.
+                Array whose indexes range within [0..N-1, 0..NCC-1].
+                The matrix can be bigger, in that case only the  submatrix
+                [0..N-1, 0..NCC-1] will be multiplied by Q'.
+    NCC     -   number of columns in matrix C.
+    VT      -   matrix to be multiplied by P^T.
+                Array whose indexes range within [0..N-1, 0..NCVT-1].
+                The matrix can be bigger, in that case only the  submatrix
+                [0..N-1, 0..NCVT-1] will be multiplied by P^T.
+    NCVT    -   number of columns in matrix VT.
+
+Output parameters:
+    D       -   singular values of matrix B in descending order.
+    U       -   if NRU>0, contains matrix U*Q.
+    VT      -   if NCVT>0, contains matrix (P^T)*VT.
+    C       -   if NCC>0, contains matrix Q'*C.
+
+Result:
+    True, if the algorithm has converged.
+    False, if the algorithm hasn't converged (rare case).
+
+Additional information:
+    The type of convergence is controlled by the internal  parameter  TOL.
+    If the parameter is greater than 0, the singular values will have
+    relative accuracy TOL. If TOL<0, the singular values will have
+    absolute accuracy ABS(TOL)*norm(B).
+    By default, |TOL| falls within the range of 10*Epsilon and 100*Epsilon,
+    where Epsilon is the machine precision. It is not  recommended  to  use
+    TOL less than 10*Epsilon since this will  considerably  slow  down  the
+    algorithm and may not lead to error decreasing.
+History:
+    * 31 March, 2007.
+        changed MAXITR from 6 to 12.
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     October 31, 1999.
+*************************************************************************/
+bool rmatrixbdsvd(real_1d_array &d, const real_1d_array &e, const ae_int_t n, const bool isupper, const bool isfractionalaccuracyrequired, real_2d_array &u, const ae_int_t nru, real_2d_array &c, const ae_int_t ncc, real_2d_array &vt, const ae_int_t ncvt)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        ae_bool result = alglib_impl::rmatrixbdsvd(const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(e.c_ptr()), n, isupper, isfractionalaccuracyrequired, const_cast<alglib_impl::ae_matrix*>(u.c_ptr()), nru, const_cast<alglib_impl::ae_matrix*>(c.c_ptr()), ncc, const_cast<alglib_impl::ae_matrix*>(vt.c_ptr()), ncvt, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<bool*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Singular value decomposition of a rectangular matrix.
+
+The algorithm calculates the singular value decomposition of a matrix of
+size MxN: A = U * S * V^T
+
+The algorithm finds the singular values and, optionally, matrices U and V^T.
+The algorithm can find both first min(M,N) columns of matrix U and rows of
+matrix V^T (singular vectors), and matrices U and V^T wholly (of sizes MxM
+and NxN respectively).
+
+Take into account that the subroutine does not return matrix V but V^T.
+
+Input parameters:
+    A           -   matrix to be decomposed.
+                    Array whose indexes range within [0..M-1, 0..N-1].
+    M           -   number of rows in matrix A.
+    N           -   number of columns in matrix A.
+    UNeeded     -   0, 1 or 2. See the description of the parameter U.
+    VTNeeded    -   0, 1 or 2. See the description of the parameter VT.
+    AdditionalMemory -
+                    If the parameter:
+                     * equals 0, the algorithm doesn’t use additional
+                       memory (lower requirements, lower performance).
+                     * equals 1, the algorithm uses additional
+                       memory of size min(M,N)*min(M,N) of real numbers.
+                       It often speeds up the algorithm.
+                     * equals 2, the algorithm uses additional
+                       memory of size M*min(M,N) of real numbers.
+                       It allows to get a maximum performance.
+                    The recommended value of the parameter is 2.
+
+Output parameters:
+    W           -   contains singular values in descending order.
+    U           -   if UNeeded=0, U isn't changed, the left singular vectors
+                    are not calculated.
+                    if Uneeded=1, U contains left singular vectors (first
+                    min(M,N) columns of matrix U). Array whose indexes range
+                    within [0..M-1, 0..Min(M,N)-1].
+                    if UNeeded=2, U contains matrix U wholly. Array whose
+                    indexes range within [0..M-1, 0..M-1].
+    VT          -   if VTNeeded=0, VT isn’t changed, the right singular vectors
+                    are not calculated.
+                    if VTNeeded=1, VT contains right singular vectors (first
+                    min(M,N) rows of matrix V^T). Array whose indexes range
+                    within [0..min(M,N)-1, 0..N-1].
+                    if VTNeeded=2, VT contains matrix V^T wholly. Array whose
+                    indexes range within [0..N-1, 0..N-1].
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+bool rmatrixsvd(const real_2d_array &a, const ae_int_t m, const ae_int_t n, const ae_int_t uneeded, const ae_int_t vtneeded, const ae_int_t additionalmemory, real_1d_array &w, real_2d_array &u, real_2d_array &vt)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        ae_bool result = alglib_impl::rmatrixsvd(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, uneeded, vtneeded, additionalmemory, const_cast<alglib_impl::ae_vector*>(w.c_ptr()), const_cast<alglib_impl::ae_matrix*>(u.c_ptr()), const_cast<alglib_impl::ae_matrix*>(vt.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<bool*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+
+
+/*************************************************************************
+Determinant calculation of the matrix given by its LU decomposition.
+
+Input parameters:
+    A       -   LU decomposition of the matrix (output of
+                RMatrixLU subroutine).
+    Pivots  -   table of permutations which were made during
+                the LU decomposition.
+                Output of RMatrixLU subroutine.
+    N       -   (optional) size of matrix A:
+                * if given, only principal NxN submatrix is processed and
+                  overwritten. other elements are unchanged.
+                * if not given, automatically determined from matrix size
+                  (A must be square matrix)
+
+Result: matrix determinant.
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+double rmatrixludet(const real_2d_array &a, const integer_1d_array &pivots, const ae_int_t n)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        double result = alglib_impl::rmatrixludet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), n, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<double*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Determinant calculation of the matrix given by its LU decomposition.
+
+Input parameters:
+    A       -   LU decomposition of the matrix (output of
+                RMatrixLU subroutine).
+    Pivots  -   table of permutations which were made during
+                the LU decomposition.
+                Output of RMatrixLU subroutine.
+    N       -   (optional) size of matrix A:
+                * if given, only principal NxN submatrix is processed and
+                  overwritten. other elements are unchanged.
+                * if not given, automatically determined from matrix size
+                  (A must be square matrix)
+
+Result: matrix determinant.
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+double rmatrixludet(const real_2d_array &a, const integer_1d_array &pivots)
+{
+    alglib_impl::ae_state _alglib_env_state;    
+    ae_int_t n;
+    if( (a.rows()!=a.cols()) || (a.rows()!=pivots.length()))
+        throw ap_error("Error while calling 'rmatrixludet': looks like one of arguments has wrong size");
+    n = a.rows();
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        double result = alglib_impl::rmatrixludet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), n, &_alglib_env_state);
+
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<double*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Calculation of the determinant of a general matrix
+
+Input parameters:
+    A       -   matrix, array[0..N-1, 0..N-1]
+    N       -   (optional) size of matrix A:
+                * if given, only principal NxN submatrix is processed and
+                  overwritten. other elements are unchanged.
+                * if not given, automatically determined from matrix size
+                  (A must be square matrix)
+
+Result: determinant of matrix A.
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+double rmatrixdet(const real_2d_array &a, const ae_int_t n)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        double result = alglib_impl::rmatrixdet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<double*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Calculation of the determinant of a general matrix
+
+Input parameters:
+    A       -   matrix, array[0..N-1, 0..N-1]
+    N       -   (optional) size of matrix A:
+                * if given, only principal NxN submatrix is processed and
+                  overwritten. other elements are unchanged.
+                * if not given, automatically determined from matrix size
+                  (A must be square matrix)
+
+Result: determinant of matrix A.
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+double rmatrixdet(const real_2d_array &a)
+{
+    alglib_impl::ae_state _alglib_env_state;    
+    ae_int_t n;
+    if( (a.rows()!=a.cols()))
+        throw ap_error("Error while calling 'rmatrixdet': looks like one of arguments has wrong size");
+    n = a.rows();
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        double result = alglib_impl::rmatrixdet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);
+
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<double*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Determinant calculation of the matrix given by its LU decomposition.
+
+Input parameters:
+    A       -   LU decomposition of the matrix (output of
+                RMatrixLU subroutine).
+    Pivots  -   table of permutations which were made during
+                the LU decomposition.
+                Output of RMatrixLU subroutine.
+    N       -   (optional) size of matrix A:
+                * if given, only principal NxN submatrix is processed and
+                  overwritten. other elements are unchanged.
+                * if not given, automatically determined from matrix size
+                  (A must be square matrix)
+
+Result: matrix determinant.
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+alglib::complex cmatrixludet(const complex_2d_array &a, const integer_1d_array &pivots, const ae_int_t n)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::ae_complex result = alglib_impl::cmatrixludet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), n, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<alglib::complex*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Determinant calculation of the matrix given by its LU decomposition.
+
+Input parameters:
+    A       -   LU decomposition of the matrix (output of
+                RMatrixLU subroutine).
+    Pivots  -   table of permutations which were made during
+                the LU decomposition.
+                Output of RMatrixLU subroutine.
+    N       -   (optional) size of matrix A:
+                * if given, only principal NxN submatrix is processed and
+                  overwritten. other elements are unchanged.
+                * if not given, automatically determined from matrix size
+                  (A must be square matrix)
+
+Result: matrix determinant.
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+alglib::complex cmatrixludet(const complex_2d_array &a, const integer_1d_array &pivots)
+{
+    alglib_impl::ae_state _alglib_env_state;    
+    ae_int_t n;
+    if( (a.rows()!=a.cols()) || (a.rows()!=pivots.length()))
+        throw ap_error("Error while calling 'cmatrixludet': looks like one of arguments has wrong size");
+    n = a.rows();
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::ae_complex result = alglib_impl::cmatrixludet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), n, &_alglib_env_state);
+
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<alglib::complex*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Calculation of the determinant of a general matrix
+
+Input parameters:
+    A       -   matrix, array[0..N-1, 0..N-1]
+    N       -   (optional) size of matrix A:
+                * if given, only principal NxN submatrix is processed and
+                  overwritten. other elements are unchanged.
+                * if not given, automatically determined from matrix size
+                  (A must be square matrix)
+
+Result: determinant of matrix A.
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+alglib::complex cmatrixdet(const complex_2d_array &a, const ae_int_t n)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::ae_complex result = alglib_impl::cmatrixdet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<alglib::complex*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Calculation of the determinant of a general matrix
+
+Input parameters:
+    A       -   matrix, array[0..N-1, 0..N-1]
+    N       -   (optional) size of matrix A:
+                * if given, only principal NxN submatrix is processed and
+                  overwritten. other elements are unchanged.
+                * if not given, automatically determined from matrix size
+                  (A must be square matrix)
+
+Result: determinant of matrix A.
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+alglib::complex cmatrixdet(const complex_2d_array &a)
+{
+    alglib_impl::ae_state _alglib_env_state;    
+    ae_int_t n;
+    if( (a.rows()!=a.cols()))
+        throw ap_error("Error while calling 'cmatrixdet': looks like one of arguments has wrong size");
+    n = a.rows();
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::ae_complex result = alglib_impl::cmatrixdet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);
+
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<alglib::complex*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Determinant calculation of the matrix given by the Cholesky decomposition.
+
+Input parameters:
+    A       -   Cholesky decomposition,
+                output of SMatrixCholesky subroutine.
+    N       -   (optional) size of matrix A:
+                * if given, only principal NxN submatrix is processed and
+                  overwritten. other elements are unchanged.
+                * if not given, automatically determined from matrix size
+                  (A must be square matrix)
+
+As the determinant is equal to the product of squares of diagonal elements,
+it’s not necessary to specify which triangle - lower or upper - the matrix
+is stored in.
+
+Result:
+    matrix determinant.
+
+  -- ALGLIB --
+     Copyright 2005-2008 by Bochkanov Sergey
+*************************************************************************/
+double spdmatrixcholeskydet(const real_2d_array &a, const ae_int_t n)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        double result = alglib_impl::spdmatrixcholeskydet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<double*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Determinant calculation of the matrix given by the Cholesky decomposition.
+
+Input parameters:
+    A       -   Cholesky decomposition,
+                output of SMatrixCholesky subroutine.
+    N       -   (optional) size of matrix A:
+                * if given, only principal NxN submatrix is processed and
+                  overwritten. other elements are unchanged.
+                * if not given, automatically determined from matrix size
+                  (A must be square matrix)
+
+As the determinant is equal to the product of squares of diagonal elements,
+it’s not necessary to specify which triangle - lower or upper - the matrix
+is stored in.
+
+Result:
+    matrix determinant.
+
+  -- ALGLIB --
+     Copyright 2005-2008 by Bochkanov Sergey
+*************************************************************************/
+double spdmatrixcholeskydet(const real_2d_array &a)
+{
+    alglib_impl::ae_state _alglib_env_state;    
+    ae_int_t n;
+    if( (a.rows()!=a.cols()))
+        throw ap_error("Error while calling 'spdmatrixcholeskydet': looks like one of arguments has wrong size");
+    n = a.rows();
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        double result = alglib_impl::spdmatrixcholeskydet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);
+
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<double*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Determinant calculation of the symmetric positive definite matrix.
+
+Input parameters:
+    A       -   matrix. Array with elements [0..N-1, 0..N-1].
+    N       -   (optional) size of matrix A:
+                * if given, only principal NxN submatrix is processed and
+                  overwritten. other elements are unchanged.
+                * if not given, automatically determined from matrix size
+                  (A must be square matrix)
+    IsUpper -   (optional) storage type:
+                * if True, symmetric matrix  A  is  given  by  its  upper
+                  triangle, and the lower triangle isn’t used/changed  by
+                  function
+                * if False, symmetric matrix  A  is  given  by  its lower
+                  triangle, and the upper triangle isn’t used/changed  by
+                  function
+                * if not given, both lower and upper  triangles  must  be
+                  filled.
+
+Result:
+    determinant of matrix A.
+    If matrix A is not positive definite, exception is thrown.
+
+  -- ALGLIB --
+     Copyright 2005-2008 by Bochkanov Sergey
+*************************************************************************/
+double spdmatrixdet(const real_2d_array &a, const ae_int_t n, const bool isupper)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        double result = alglib_impl::spdmatrixdet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<double*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Determinant calculation of the symmetric positive definite matrix.
+
+Input parameters:
+    A       -   matrix. Array with elements [0..N-1, 0..N-1].
+    N       -   (optional) size of matrix A:
+                * if given, only principal NxN submatrix is processed and
+                  overwritten. other elements are unchanged.
+                * if not given, automatically determined from matrix size
+                  (A must be square matrix)
+    IsUpper -   (optional) storage type:
+                * if True, symmetric matrix  A  is  given  by  its  upper
+                  triangle, and the lower triangle isn’t used/changed  by
+                  function
+                * if False, symmetric matrix  A  is  given  by  its lower
+                  triangle, and the upper triangle isn’t used/changed  by
+                  function
+                * if not given, both lower and upper  triangles  must  be
+                  filled.
+
+Result:
+    determinant of matrix A.
+    If matrix A is not positive definite, exception is thrown.
+
+  -- ALGLIB --
+     Copyright 2005-2008 by Bochkanov Sergey
+*************************************************************************/
+double spdmatrixdet(const real_2d_array &a)
+{
+    alglib_impl::ae_state _alglib_env_state;    
+    ae_int_t n;
+    bool isupper;
+    if( (a.rows()!=a.cols()))
+        throw ap_error("Error while calling 'spdmatrixdet': looks like one of arguments has wrong size");
+    if( !alglib_impl::ae_is_symmetric(const_cast<alglib_impl::ae_matrix*>(a.c_ptr())) )
+        throw ap_error("'a' parameter is not symmetric matrix");
+    n = a.rows();
+    isupper = false;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        double result = alglib_impl::spdmatrixdet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state);
+
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<double*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Algorithm for solving the following generalized symmetric positive-definite
+eigenproblem:
+    A*x = lambda*B*x (1) or
+    A*B*x = lambda*x (2) or
+    B*A*x = lambda*x (3).
+where A is a symmetric matrix, B - symmetric positive-definite matrix.
+The problem is solved by reducing it to an ordinary  symmetric  eigenvalue
+problem.
+
+Input parameters:
+    A           -   symmetric matrix which is given by its upper or lower
+                    triangular part.
+                    Array whose indexes range within [0..N-1, 0..N-1].
+    N           -   size of matrices A and B.
+    IsUpperA    -   storage format of matrix A.
+    B           -   symmetric positive-definite matrix which is given by
+                    its upper or lower triangular part.
+                    Array whose indexes range within [0..N-1, 0..N-1].
+    IsUpperB    -   storage format of matrix B.
+    ZNeeded     -   if ZNeeded is equal to:
+                     * 0, the eigenvectors are not returned;
+                     * 1, the eigenvectors are returned.
+    ProblemType -   if ProblemType is equal to:
+                     * 1, the following problem is solved: A*x = lambda*B*x;
+                     * 2, the following problem is solved: A*B*x = lambda*x;
+                     * 3, the following problem is solved: B*A*x = lambda*x.
+
+Output parameters:
+    D           -   eigenvalues in ascending order.
+                    Array whose index ranges within [0..N-1].
+    Z           -   if ZNeeded is equal to:
+                     * 0, Z hasn’t changed;
+                     * 1, Z contains eigenvectors.
+                    Array whose indexes range within [0..N-1, 0..N-1].
+                    The eigenvectors are stored in matrix columns. It should
+                    be noted that the eigenvectors in such problems do not
+                    form an orthogonal system.
+
+Result:
+    True, if the problem was solved successfully.
+    False, if the error occurred during the Cholesky decomposition of matrix
+    B (the matrix isn’t positive-definite) or during the work of the iterative
+    algorithm for solving the symmetric eigenproblem.
+
+See also the GeneralizedSymmetricDefiniteEVDReduce subroutine.
+
+  -- ALGLIB --
+     Copyright 1.28.2006 by Bochkanov Sergey
+*************************************************************************/
+bool smatrixgevd(const real_2d_array &a, const ae_int_t n, const bool isuppera, const real_2d_array &b, const bool isupperb, const ae_int_t zneeded, const ae_int_t problemtype, real_1d_array &d, real_2d_array &z)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        ae_bool result = alglib_impl::smatrixgevd(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isuppera, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), isupperb, zneeded, problemtype, const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<bool*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Algorithm for reduction of the following generalized symmetric positive-
+definite eigenvalue problem:
+    A*x = lambda*B*x (1) or
+    A*B*x = lambda*x (2) or
+    B*A*x = lambda*x (3)
+to the symmetric eigenvalues problem C*y = lambda*y (eigenvalues of this and
+the given problems are the same, and the eigenvectors of the given problem
+could be obtained by multiplying the obtained eigenvectors by the
+transformation matrix x = R*y).
+
+Here A is a symmetric matrix, B - symmetric positive-definite matrix.
+
+Input parameters:
+    A           -   symmetric matrix which is given by its upper or lower
+                    triangular part.
+                    Array whose indexes range within [0..N-1, 0..N-1].
+    N           -   size of matrices A and B.
+    IsUpperA    -   storage format of matrix A.
+    B           -   symmetric positive-definite matrix which is given by
+                    its upper or lower triangular part.
+                    Array whose indexes range within [0..N-1, 0..N-1].
+    IsUpperB    -   storage format of matrix B.
+    ProblemType -   if ProblemType is equal to:
+                     * 1, the following problem is solved: A*x = lambda*B*x;
+                     * 2, the following problem is solved: A*B*x = lambda*x;
+                     * 3, the following problem is solved: B*A*x = lambda*x.
+
+Output parameters:
+    A           -   symmetric matrix which is given by its upper or lower
+                    triangle depending on IsUpperA. Contains matrix C.
+                    Array whose indexes range within [0..N-1, 0..N-1].
+    R           -   upper triangular or low triangular transformation matrix
+                    which is used to obtain the eigenvectors of a given problem
+                    as the product of eigenvectors of C (from the right) and
+                    matrix R (from the left). If the matrix is upper
+                    triangular, the elements below the main diagonal
+                    are equal to 0 (and vice versa). Thus, we can perform
+                    the multiplication without taking into account the
+                    internal structure (which is an easier though less
+                    effective way).
+                    Array whose indexes range within [0..N-1, 0..N-1].
+    IsUpperR    -   type of matrix R (upper or lower triangular).
+
+Result:
+    True, if the problem was reduced successfully.
+    False, if the error occurred during the Cholesky decomposition of
+        matrix B (the matrix is not positive-definite).
+
+  -- ALGLIB --
+     Copyright 1.28.2006 by Bochkanov Sergey
+*************************************************************************/
+bool smatrixgevdreduce(real_2d_array &a, const ae_int_t n, const bool isuppera, const real_2d_array &b, const bool isupperb, const ae_int_t problemtype, real_2d_array &r, bool &isupperr)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        ae_bool result = alglib_impl::smatrixgevdreduce(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isuppera, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), isupperb, problemtype, const_cast<alglib_impl::ae_matrix*>(r.c_ptr()), &isupperr, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<bool*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Inverse matrix update by the Sherman-Morrison formula
+
+The algorithm updates matrix A^-1 when adding a number to an element
+of matrix A.
+
+Input parameters:
+    InvA    -   inverse of matrix A.
+                Array whose indexes range within [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    UpdRow  -   row where the element to be updated is stored.
+    UpdColumn - column where the element to be updated is stored.
+    UpdVal  -   a number to be added to the element.
+
+
+Output parameters:
+    InvA    -   inverse of modified matrix A.
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+void rmatrixinvupdatesimple(real_2d_array &inva, const ae_int_t n, const ae_int_t updrow, const ae_int_t updcolumn, const double updval)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixinvupdatesimple(const_cast<alglib_impl::ae_matrix*>(inva.c_ptr()), n, updrow, updcolumn, updval, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Inverse matrix update by the Sherman-Morrison formula
+
+The algorithm updates matrix A^-1 when adding a vector to a row
+of matrix A.
+
+Input parameters:
+    InvA    -   inverse of matrix A.
+                Array whose indexes range within [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    UpdRow  -   the row of A whose vector V was added.
+                0 <= Row <= N-1
+    V       -   the vector to be added to a row.
+                Array whose index ranges within [0..N-1].
+
+Output parameters:
+    InvA    -   inverse of modified matrix A.
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+void rmatrixinvupdaterow(real_2d_array &inva, const ae_int_t n, const ae_int_t updrow, const real_1d_array &v)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixinvupdaterow(const_cast<alglib_impl::ae_matrix*>(inva.c_ptr()), n, updrow, const_cast<alglib_impl::ae_vector*>(v.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Inverse matrix update by the Sherman-Morrison formula
+
+The algorithm updates matrix A^-1 when adding a vector to a column
+of matrix A.
+
+Input parameters:
+    InvA        -   inverse of matrix A.
+                    Array whose indexes range within [0..N-1, 0..N-1].
+    N           -   size of matrix A.
+    UpdColumn   -   the column of A whose vector U was added.
+                    0 <= UpdColumn <= N-1
+    U           -   the vector to be added to a column.
+                    Array whose index ranges within [0..N-1].
+
+Output parameters:
+    InvA        -   inverse of modified matrix A.
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+void rmatrixinvupdatecolumn(real_2d_array &inva, const ae_int_t n, const ae_int_t updcolumn, const real_1d_array &u)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixinvupdatecolumn(const_cast<alglib_impl::ae_matrix*>(inva.c_ptr()), n, updcolumn, const_cast<alglib_impl::ae_vector*>(u.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Inverse matrix update by the Sherman-Morrison formula
+
+The algorithm computes the inverse of matrix A+u*v’ by using the given matrix
+A^-1 and the vectors u and v.
+
+Input parameters:
+    InvA    -   inverse of matrix A.
+                Array whose indexes range within [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    U       -   the vector modifying the matrix.
+                Array whose index ranges within [0..N-1].
+    V       -   the vector modifying the matrix.
+                Array whose index ranges within [0..N-1].
+
+Output parameters:
+    InvA - inverse of matrix A + u*v'.
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+void rmatrixinvupdateuv(real_2d_array &inva, const ae_int_t n, const real_1d_array &u, const real_1d_array &v)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::rmatrixinvupdateuv(const_cast<alglib_impl::ae_matrix*>(inva.c_ptr()), n, const_cast<alglib_impl::ae_vector*>(u.c_ptr()), const_cast<alglib_impl::ae_vector*>(v.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Subroutine performing the Schur decomposition of a general matrix by using
+the QR algorithm with multiple shifts.
+
+The source matrix A is represented as S'*A*S = T, where S is an orthogonal
+matrix (Schur vectors), T - upper quasi-triangular matrix (with blocks of
+sizes 1x1 and 2x2 on the main diagonal).
+
+Input parameters:
+    A   -   matrix to be decomposed.
+            Array whose indexes range within [0..N-1, 0..N-1].
+    N   -   size of A, N>=0.
+
+
+Output parameters:
+    A   -   contains matrix T.
+            Array whose indexes range within [0..N-1, 0..N-1].
+    S   -   contains Schur vectors.
+            Array whose indexes range within [0..N-1, 0..N-1].
+
+Note 1:
+    The block structure of matrix T can be easily recognized: since all
+    the elements below the blocks are zeros, the elements a[i+1,i] which
+    are equal to 0 show the block border.
+
+Note 2:
+    The algorithm performance depends on the value of the internal parameter
+    NS of the InternalSchurDecomposition subroutine which defines the number
+    of shifts in the QR algorithm (similarly to the block width in block-matrix
+    algorithms in linear algebra). If you require maximum performance on
+    your machine, it is recommended to adjust this parameter manually.
+
+Result:
+    True,
+        if the algorithm has converged and parameters A and S contain the result.
+    False,
+        if the algorithm has not converged.
+
+Algorithm implemented on the basis of the DHSEQR subroutine (LAPACK 3.0 library).
+*************************************************************************/
+bool rmatrixschur(real_2d_array &a, const ae_int_t n, real_2d_array &s)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        ae_bool result = alglib_impl::rmatrixschur(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, const_cast<alglib_impl::ae_matrix*>(s.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<bool*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+}
+
+/////////////////////////////////////////////////////////////////////////
+//
+// THIS SECTION CONTAINS IMPLEMENTATION OF COMPUTATIONAL CORE
+//
+/////////////////////////////////////////////////////////////////////////
+namespace alglib_impl
+{
+static void ablas_ablasinternalsplitlength(ae_int_t n,
+     ae_int_t nb,
+     ae_int_t* n1,
+     ae_int_t* n2,
+     ae_state *_state);
+static void ablas_cmatrixrighttrsm2(ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_matrix* a,
+     ae_int_t i1,
+     ae_int_t j1,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     /* Complex */ ae_matrix* x,
+     ae_int_t i2,
+     ae_int_t j2,
+     ae_state *_state);
+static void ablas_cmatrixlefttrsm2(ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_matrix* a,
+     ae_int_t i1,
+     ae_int_t j1,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     /* Complex */ ae_matrix* x,
+     ae_int_t i2,
+     ae_int_t j2,
+     ae_state *_state);
+static void ablas_rmatrixrighttrsm2(ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_matrix* a,
+     ae_int_t i1,
+     ae_int_t j1,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     /* Real    */ ae_matrix* x,
+     ae_int_t i2,
+     ae_int_t j2,
+     ae_state *_state);
+static void ablas_rmatrixlefttrsm2(ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_matrix* a,
+     ae_int_t i1,
+     ae_int_t j1,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     /* Real    */ ae_matrix* x,
+     ae_int_t i2,
+     ae_int_t j2,
+     ae_state *_state);
+static void ablas_cmatrixsyrk2(ae_int_t n,
+     ae_int_t k,
+     double alpha,
+     /* Complex */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t optypea,
+     double beta,
+     /* Complex */ ae_matrix* c,
+     ae_int_t ic,
+     ae_int_t jc,
+     ae_bool isupper,
+     ae_state *_state);
+static void ablas_rmatrixsyrk2(ae_int_t n,
+     ae_int_t k,
+     double alpha,
+     /* Real    */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t optypea,
+     double beta,
+     /* Real    */ ae_matrix* c,
+     ae_int_t ic,
+     ae_int_t jc,
+     ae_bool isupper,
+     ae_state *_state);
+static void ablas_cmatrixgemmk(ae_int_t m,
+     ae_int_t n,
+     ae_int_t k,
+     ae_complex alpha,
+     /* Complex */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t optypea,
+     /* Complex */ ae_matrix* b,
+     ae_int_t ib,
+     ae_int_t jb,
+     ae_int_t optypeb,
+     ae_complex beta,
+     /* Complex */ ae_matrix* c,
+     ae_int_t ic,
+     ae_int_t jc,
+     ae_state *_state);
+static void ablas_rmatrixgemmk(ae_int_t m,
+     ae_int_t n,
+     ae_int_t k,
+     double alpha,
+     /* Real    */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t optypea,
+     /* Real    */ ae_matrix* b,
+     ae_int_t ib,
+     ae_int_t jb,
+     ae_int_t optypeb,
+     double beta,
+     /* Real    */ ae_matrix* c,
+     ae_int_t ic,
+     ae_int_t jc,
+     ae_state *_state);
+
+
+static void ortfac_rmatrixqrbasecase(/* Real    */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_vector* work,
+     /* Real    */ ae_vector* t,
+     /* Real    */ ae_vector* tau,
+     ae_state *_state);
+static void ortfac_rmatrixlqbasecase(/* Real    */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_vector* work,
+     /* Real    */ ae_vector* t,
+     /* Real    */ ae_vector* tau,
+     ae_state *_state);
+static void ortfac_cmatrixqrbasecase(/* Complex */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_vector* work,
+     /* Complex */ ae_vector* t,
+     /* Complex */ ae_vector* tau,
+     ae_state *_state);
+static void ortfac_cmatrixlqbasecase(/* Complex */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_vector* work,
+     /* Complex */ ae_vector* t,
+     /* Complex */ ae_vector* tau,
+     ae_state *_state);
+static void ortfac_rmatrixblockreflector(/* Real    */ ae_matrix* a,
+     /* Real    */ ae_vector* tau,
+     ae_bool columnwisea,
+     ae_int_t lengtha,
+     ae_int_t blocksize,
+     /* Real    */ ae_matrix* t,
+     /* Real    */ ae_vector* work,
+     ae_state *_state);
+static void ortfac_cmatrixblockreflector(/* Complex */ ae_matrix* a,
+     /* Complex */ ae_vector* tau,
+     ae_bool columnwisea,
+     ae_int_t lengtha,
+     ae_int_t blocksize,
+     /* Complex */ ae_matrix* t,
+     /* Complex */ ae_vector* work,
+     ae_state *_state);
+
+
+static ae_bool evd_tridiagonalevd(/* Real    */ ae_vector* d,
+     /* Real    */ ae_vector* e,
+     ae_int_t n,
+     ae_int_t zneeded,
+     /* Real    */ ae_matrix* z,
+     ae_state *_state);
+static void evd_tdevde2(double a,
+     double b,
+     double c,
+     double* rt1,
+     double* rt2,
+     ae_state *_state);
+static void evd_tdevdev2(double a,
+     double b,
+     double c,
+     double* rt1,
+     double* rt2,
+     double* cs1,
+     double* sn1,
+     ae_state *_state);
+static double evd_tdevdpythag(double a, double b, ae_state *_state);
+static double evd_tdevdextsign(double a, double b, ae_state *_state);
+static ae_bool evd_internalbisectioneigenvalues(/* Real    */ ae_vector* d,
+     /* Real    */ ae_vector* e,
+     ae_int_t n,
+     ae_int_t irange,
+     ae_int_t iorder,
+     double vl,
+     double vu,
+     ae_int_t il,
+     ae_int_t iu,
+     double abstol,
+     /* Real    */ ae_vector* w,
+     ae_int_t* m,
+     ae_int_t* nsplit,
+     /* Integer */ ae_vector* iblock,
+     /* Integer */ ae_vector* isplit,
+     ae_int_t* errorcode,
+     ae_state *_state);
+static void evd_internaldstein(ae_int_t n,
+     /* Real    */ ae_vector* d,
+     /* Real    */ ae_vector* e,
+     ae_int_t m,
+     /* Real    */ ae_vector* w,
+     /* Integer */ ae_vector* iblock,
+     /* Integer */ ae_vector* isplit,
+     /* Real    */ ae_matrix* z,
+     /* Integer */ ae_vector* ifail,
+     ae_int_t* info,
+     ae_state *_state);
+static void evd_tdininternaldlagtf(ae_int_t n,
+     /* Real    */ ae_vector* a,
+     double lambdav,
+     /* Real    */ ae_vector* b,
+     /* Real    */ ae_vector* c,
+     double tol,
+     /* Real    */ ae_vector* d,
+     /* Integer */ ae_vector* iin,
+     ae_int_t* info,
+     ae_state *_state);
+static void evd_tdininternaldlagts(ae_int_t n,
+     /* Real    */ ae_vector* a,
+     /* Real    */ ae_vector* b,
+     /* Real    */ ae_vector* c,
+     /* Real    */ ae_vector* d,
+     /* Integer */ ae_vector* iin,
+     /* Real    */ ae_vector* y,
+     double* tol,
+     ae_int_t* info,
+     ae_state *_state);
+static void evd_internaldlaebz(ae_int_t ijob,
+     ae_int_t nitmax,
+     ae_int_t n,
+     ae_int_t mmax,
+     ae_int_t minp,
+     double abstol,
+     double reltol,
+     double pivmin,
+     /* Real    */ ae_vector* d,
+     /* Real    */ ae_vector* e,
+     /* Real    */ ae_vector* e2,
+     /* Integer */ ae_vector* nval,
+     /* Real    */ ae_matrix* ab,
+     /* Real    */ ae_vector* c,
+     ae_int_t* mout,
+     /* Integer */ ae_matrix* nab,
+     /* Real    */ ae_vector* work,
+     /* Integer */ ae_vector* iwork,
+     ae_int_t* info,
+     ae_state *_state);
+static void evd_internaltrevc(/* Real    */ ae_matrix* t,
+     ae_int_t n,
+     ae_int_t side,
+     ae_int_t howmny,
+     /* Boolean */ ae_vector* vselect,
+     /* Real    */ ae_matrix* vl,
+     /* Real    */ ae_matrix* vr,
+     ae_int_t* m,
+     ae_int_t* info,
+     ae_state *_state);
+static void evd_internalhsevdlaln2(ae_bool ltrans,
+     ae_int_t na,
+     ae_int_t nw,
+     double smin,
+     double ca,
+     /* Real    */ ae_matrix* a,
+     double d1,
+     double d2,
+     /* Real    */ ae_matrix* b,
+     double wr,
+     double wi,
+     /* Boolean */ ae_vector* rswap4,
+     /* Boolean */ ae_vector* zswap4,
+     /* Integer */ ae_matrix* ipivot44,
+     /* Real    */ ae_vector* civ4,
+     /* Real    */ ae_vector* crv4,
+     /* Real    */ ae_matrix* x,
+     double* scl,
+     double* xnorm,
+     ae_int_t* info,
+     ae_state *_state);
+static void evd_internalhsevdladiv(double a,
+     double b,
+     double c,
+     double d,
+     double* p,
+     double* q,
+     ae_state *_state);
+static ae_bool evd_nonsymmetricevd(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_int_t vneeded,
+     /* Real    */ ae_vector* wr,
+     /* Real    */ ae_vector* wi,
+     /* Real    */ ae_matrix* vl,
+     /* Real    */ ae_matrix* vr,
+     ae_state *_state);
+static void evd_toupperhessenberg(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     /* Real    */ ae_vector* tau,
+     ae_state *_state);
+static void evd_unpackqfromupperhessenberg(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     /* Real    */ ae_vector* tau,
+     /* Real    */ ae_matrix* q,
+     ae_state *_state);
+
+
+
+
+static void trfac_cmatrixluprec(/* Complex */ ae_matrix* a,
+     ae_int_t offs,
+     ae_int_t m,
+     ae_int_t n,
+     /* Integer */ ae_vector* pivots,
+     /* Complex */ ae_vector* tmp,
+     ae_state *_state);
+static void trfac_rmatrixluprec(/* Real    */ ae_matrix* a,
+     ae_int_t offs,
+     ae_int_t m,
+     ae_int_t n,
+     /* Integer */ ae_vector* pivots,
+     /* Real    */ ae_vector* tmp,
+     ae_state *_state);
+static void trfac_cmatrixplurec(/* Complex */ ae_matrix* a,
+     ae_int_t offs,
+     ae_int_t m,
+     ae_int_t n,
+     /* Integer */ ae_vector* pivots,
+     /* Complex */ ae_vector* tmp,
+     ae_state *_state);
+static void trfac_rmatrixplurec(/* Real    */ ae_matrix* a,
+     ae_int_t offs,
+     ae_int_t m,
+     ae_int_t n,
+     /* Integer */ ae_vector* pivots,
+     /* Real    */ ae_vector* tmp,
+     ae_state *_state);
+static void trfac_cmatrixlup2(/* Complex */ ae_matrix* a,
+     ae_int_t offs,
+     ae_int_t m,
+     ae_int_t n,
+     /* Integer */ ae_vector* pivots,
+     /* Complex */ ae_vector* tmp,
+     ae_state *_state);
+static void trfac_rmatrixlup2(/* Real    */ ae_matrix* a,
+     ae_int_t offs,
+     ae_int_t m,
+     ae_int_t n,
+     /* Integer */ ae_vector* pivots,
+     /* Real    */ ae_vector* tmp,
+     ae_state *_state);
+static void trfac_cmatrixplu2(/* Complex */ ae_matrix* a,
+     ae_int_t offs,
+     ae_int_t m,
+     ae_int_t n,
+     /* Integer */ ae_vector* pivots,
+     /* Complex */ ae_vector* tmp,
+     ae_state *_state);
+static void trfac_rmatrixplu2(/* Real    */ ae_matrix* a,
+     ae_int_t offs,
+     ae_int_t m,
+     ae_int_t n,
+     /* Integer */ ae_vector* pivots,
+     /* Real    */ ae_vector* tmp,
+     ae_state *_state);
+static ae_bool trfac_hpdmatrixcholeskyrec(/* Complex */ ae_matrix* a,
+     ae_int_t offs,
+     ae_int_t n,
+     ae_bool isupper,
+     /* Complex */ ae_vector* tmp,
+     ae_state *_state);
+static ae_bool trfac_hpdmatrixcholesky2(/* Complex */ ae_matrix* aaa,
+     ae_int_t offs,
+     ae_int_t n,
+     ae_bool isupper,
+     /* Complex */ ae_vector* tmp,
+     ae_state *_state);
+static ae_bool trfac_spdmatrixcholesky2(/* Real    */ ae_matrix* aaa,
+     ae_int_t offs,
+     ae_int_t n,
+     ae_bool isupper,
+     /* Real    */ ae_vector* tmp,
+     ae_state *_state);
+
+
+static void rcond_rmatrixrcondtrinternal(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_bool onenorm,
+     double anorm,
+     double* rc,
+     ae_state *_state);
+static void rcond_cmatrixrcondtrinternal(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_bool onenorm,
+     double anorm,
+     double* rc,
+     ae_state *_state);
+static void rcond_spdmatrixrcondcholeskyinternal(/* Real    */ ae_matrix* cha,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_bool isnormprovided,
+     double anorm,
+     double* rc,
+     ae_state *_state);
+static void rcond_hpdmatrixrcondcholeskyinternal(/* Complex */ ae_matrix* cha,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_bool isnormprovided,
+     double anorm,
+     double* rc,
+     ae_state *_state);
+static void rcond_rmatrixrcondluinternal(/* Real    */ ae_matrix* lua,
+     ae_int_t n,
+     ae_bool onenorm,
+     ae_bool isanormprovided,
+     double anorm,
+     double* rc,
+     ae_state *_state);
+static void rcond_cmatrixrcondluinternal(/* Complex */ ae_matrix* lua,
+     ae_int_t n,
+     ae_bool onenorm,
+     ae_bool isanormprovided,
+     double anorm,
+     double* rc,
+     ae_state *_state);
+static void rcond_rmatrixestimatenorm(ae_int_t n,
+     /* Real    */ ae_vector* v,
+     /* Real    */ ae_vector* x,
+     /* Integer */ ae_vector* isgn,
+     double* est,
+     ae_int_t* kase,
+     ae_state *_state);
+static void rcond_cmatrixestimatenorm(ae_int_t n,
+     /* Complex */ ae_vector* v,
+     /* Complex */ ae_vector* x,
+     double* est,
+     ae_int_t* kase,
+     /* Integer */ ae_vector* isave,
+     /* Real    */ ae_vector* rsave,
+     ae_state *_state);
+static double rcond_internalcomplexrcondscsum1(/* Complex */ ae_vector* x,
+     ae_int_t n,
+     ae_state *_state);
+static ae_int_t rcond_internalcomplexrcondicmax1(/* Complex */ ae_vector* x,
+     ae_int_t n,
+     ae_state *_state);
+static void rcond_internalcomplexrcondsaveall(/* Integer */ ae_vector* isave,
+     /* Real    */ ae_vector* rsave,
+     ae_int_t* i,
+     ae_int_t* iter,
+     ae_int_t* j,
+     ae_int_t* jlast,
+     ae_int_t* jump,
+     double* absxi,
+     double* altsgn,
+     double* estold,
+     double* temp,
+     ae_state *_state);
+static void rcond_internalcomplexrcondloadall(/* Integer */ ae_vector* isave,
+     /* Real    */ ae_vector* rsave,
+     ae_int_t* i,
+     ae_int_t* iter,
+     ae_int_t* j,
+     ae_int_t* jlast,
+     ae_int_t* jump,
+     double* absxi,
+     double* altsgn,
+     double* estold,
+     double* temp,
+     ae_state *_state);
+
+
+static void matinv_rmatrixtrinverserec(/* Real    */ ae_matrix* a,
+     ae_int_t offs,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_bool isunit,
+     /* Real    */ ae_vector* tmp,
+     ae_int_t* info,
+     matinvreport* rep,
+     ae_state *_state);
+static void matinv_cmatrixtrinverserec(/* Complex */ ae_matrix* a,
+     ae_int_t offs,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_bool isunit,
+     /* Complex */ ae_vector* tmp,
+     ae_int_t* info,
+     matinvreport* rep,
+     ae_state *_state);
+static void matinv_rmatrixluinverserec(/* Real    */ ae_matrix* a,
+     ae_int_t offs,
+     ae_int_t n,
+     /* Real    */ ae_vector* work,
+     ae_int_t* info,
+     matinvreport* rep,
+     ae_state *_state);
+static void matinv_cmatrixluinverserec(/* Complex */ ae_matrix* a,
+     ae_int_t offs,
+     ae_int_t n,
+     /* Complex */ ae_vector* work,
+     ae_int_t* info,
+     matinvreport* rep,
+     ae_state *_state);
+static void matinv_spdmatrixcholeskyinverserec(/* Real    */ ae_matrix* a,
+     ae_int_t offs,
+     ae_int_t n,
+     ae_bool isupper,
+     /* Real    */ ae_vector* tmp,
+     ae_state *_state);
+static void matinv_hpdmatrixcholeskyinverserec(/* Complex */ ae_matrix* a,
+     ae_int_t offs,
+     ae_int_t n,
+     ae_bool isupper,
+     /* Complex */ ae_vector* tmp,
+     ae_state *_state);
+
+
+static ae_bool bdsvd_bidiagonalsvddecompositioninternal(/* Real    */ ae_vector* d,
+     /* Real    */ ae_vector* e,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_bool isfractionalaccuracyrequired,
+     /* Real    */ ae_matrix* u,
+     ae_int_t ustart,
+     ae_int_t nru,
+     /* Real    */ ae_matrix* c,
+     ae_int_t cstart,
+     ae_int_t ncc,
+     /* Real    */ ae_matrix* vt,
+     ae_int_t vstart,
+     ae_int_t ncvt,
+     ae_state *_state);
+static double bdsvd_extsignbdsqr(double a, double b, ae_state *_state);
+static void bdsvd_svd2x2(double f,
+     double g,
+     double h,
+     double* ssmin,
+     double* ssmax,
+     ae_state *_state);
+static void bdsvd_svdv2x2(double f,
+     double g,
+     double h,
+     double* ssmin,
+     double* ssmax,
+     double* snr,
+     double* csr,
+     double* snl,
+     double* csl,
+     ae_state *_state);
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+/*************************************************************************
+Splits matrix length in two parts, left part should match ABLAS block size
+
+INPUT PARAMETERS
+    A   -   real matrix, is passed to ensure that we didn't split
+            complex matrix using real splitting subroutine.
+            matrix itself is not changed.
+    N   -   length, N>0
+
+OUTPUT PARAMETERS
+    N1  -   length
+    N2  -   length
+
+N1+N2=N, N1>=N2, N2 may be zero
+
+  -- ALGLIB routine --
+     15.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void ablassplitlength(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_int_t* n1,
+     ae_int_t* n2,
+     ae_state *_state)
+{
+
+    *n1 = 0;
+    *n2 = 0;
+
+    if( n>ablasblocksize(a, _state) )
+    {
+        ablas_ablasinternalsplitlength(n, ablasblocksize(a, _state), n1, n2, _state);
+    }
+    else
+    {
+        ablas_ablasinternalsplitlength(n, ablasmicroblocksize(_state), n1, n2, _state);
+    }
+}
+
+
+/*************************************************************************
+Complex ABLASSplitLength
+
+  -- ALGLIB routine --
+     15.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void ablascomplexsplitlength(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_int_t* n1,
+     ae_int_t* n2,
+     ae_state *_state)
+{
+
+    *n1 = 0;
+    *n2 = 0;
+
+    if( n>ablascomplexblocksize(a, _state) )
+    {
+        ablas_ablasinternalsplitlength(n, ablascomplexblocksize(a, _state), n1, n2, _state);
+    }
+    else
+    {
+        ablas_ablasinternalsplitlength(n, ablasmicroblocksize(_state), n1, n2, _state);
+    }
+}
+
+
+/*************************************************************************
+Returns block size - subdivision size where  cache-oblivious  soubroutines
+switch to the optimized kernel.
+
+INPUT PARAMETERS
+    A   -   real matrix, is passed to ensure that we didn't split
+            complex matrix using real splitting subroutine.
+            matrix itself is not changed.
+
+  -- ALGLIB routine --
+     15.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+ae_int_t ablasblocksize(/* Real    */ ae_matrix* a, ae_state *_state)
+{
+    ae_int_t result;
+
+
+    result = 32;
+    return result;
+}
+
+
+/*************************************************************************
+Block size for complex subroutines.
+
+  -- ALGLIB routine --
+     15.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+ae_int_t ablascomplexblocksize(/* Complex */ ae_matrix* a,
+     ae_state *_state)
+{
+    ae_int_t result;
+
+
+    result = 24;
+    return result;
+}
+
+
+/*************************************************************************
+Microblock size
+
+  -- ALGLIB routine --
+     15.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+ae_int_t ablasmicroblocksize(ae_state *_state)
+{
+    ae_int_t result;
+
+
+    result = 8;
+    return result;
+}
+
+
+/*************************************************************************
+Cache-oblivous complex "copy-and-transpose"
+
+Input parameters:
+    M   -   number of rows
+    N   -   number of columns
+    A   -   source matrix, MxN submatrix is copied and transposed
+    IA  -   submatrix offset (row index)
+    JA  -   submatrix offset (column index)
+    A   -   destination matrix
+    IB  -   submatrix offset (row index)
+    JB  -   submatrix offset (column index)
+*************************************************************************/
+void cmatrixtranspose(ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     /* Complex */ ae_matrix* b,
+     ae_int_t ib,
+     ae_int_t jb,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t s1;
+    ae_int_t s2;
+
+
+    if( m<=2*ablascomplexblocksize(a, _state)&&n<=2*ablascomplexblocksize(a, _state) )
+    {
+        
+        /*
+         * base case
+         */
+        for(i=0; i<=m-1; i++)
+        {
+            ae_v_cmove(&b->ptr.pp_complex[ib][jb+i], b->stride, &a->ptr.pp_complex[ia+i][ja], 1, "N", ae_v_len(ib,ib+n-1));
+        }
+    }
+    else
+    {
+        
+        /*
+         * Cache-oblivious recursion
+         */
+        if( m>n )
+        {
+            ablascomplexsplitlength(a, m, &s1, &s2, _state);
+            cmatrixtranspose(s1, n, a, ia, ja, b, ib, jb, _state);
+            cmatrixtranspose(s2, n, a, ia+s1, ja, b, ib, jb+s1, _state);
+        }
+        else
+        {
+            ablascomplexsplitlength(a, n, &s1, &s2, _state);
+            cmatrixtranspose(m, s1, a, ia, ja, b, ib, jb, _state);
+            cmatrixtranspose(m, s2, a, ia, ja+s1, b, ib+s1, jb, _state);
+        }
+    }
+}
+
+
+/*************************************************************************
+Cache-oblivous real "copy-and-transpose"
+
+Input parameters:
+    M   -   number of rows
+    N   -   number of columns
+    A   -   source matrix, MxN submatrix is copied and transposed
+    IA  -   submatrix offset (row index)
+    JA  -   submatrix offset (column index)
+    A   -   destination matrix
+    IB  -   submatrix offset (row index)
+    JB  -   submatrix offset (column index)
+*************************************************************************/
+void rmatrixtranspose(ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     /* Real    */ ae_matrix* b,
+     ae_int_t ib,
+     ae_int_t jb,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t s1;
+    ae_int_t s2;
+
+
+    if( m<=2*ablasblocksize(a, _state)&&n<=2*ablasblocksize(a, _state) )
+    {
+        
+        /*
+         * base case
+         */
+        for(i=0; i<=m-1; i++)
+        {
+            ae_v_move(&b->ptr.pp_double[ib][jb+i], b->stride, &a->ptr.pp_double[ia+i][ja], 1, ae_v_len(ib,ib+n-1));
+        }
+    }
+    else
+    {
+        
+        /*
+         * Cache-oblivious recursion
+         */
+        if( m>n )
+        {
+            ablassplitlength(a, m, &s1, &s2, _state);
+            rmatrixtranspose(s1, n, a, ia, ja, b, ib, jb, _state);
+            rmatrixtranspose(s2, n, a, ia+s1, ja, b, ib, jb+s1, _state);
+        }
+        else
+        {
+            ablassplitlength(a, n, &s1, &s2, _state);
+            rmatrixtranspose(m, s1, a, ia, ja, b, ib, jb, _state);
+            rmatrixtranspose(m, s2, a, ia, ja+s1, b, ib+s1, jb, _state);
+        }
+    }
+}
+
+
+/*************************************************************************
+Copy
+
+Input parameters:
+    M   -   number of rows
+    N   -   number of columns
+    A   -   source matrix, MxN submatrix is copied and transposed
+    IA  -   submatrix offset (row index)
+    JA  -   submatrix offset (column index)
+    B   -   destination matrix
+    IB  -   submatrix offset (row index)
+    JB  -   submatrix offset (column index)
+*************************************************************************/
+void cmatrixcopy(ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     /* Complex */ ae_matrix* b,
+     ae_int_t ib,
+     ae_int_t jb,
+     ae_state *_state)
+{
+    ae_int_t i;
+
+
+    for(i=0; i<=m-1; i++)
+    {
+        ae_v_cmove(&b->ptr.pp_complex[ib+i][jb], 1, &a->ptr.pp_complex[ia+i][ja], 1, "N", ae_v_len(jb,jb+n-1));
+    }
+}
+
+
+/*************************************************************************
+Copy
+
+Input parameters:
+    M   -   number of rows
+    N   -   number of columns
+    A   -   source matrix, MxN submatrix is copied and transposed
+    IA  -   submatrix offset (row index)
+    JA  -   submatrix offset (column index)
+    B   -   destination matrix
+    IB  -   submatrix offset (row index)
+    JB  -   submatrix offset (column index)
+*************************************************************************/
+void rmatrixcopy(ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     /* Real    */ ae_matrix* b,
+     ae_int_t ib,
+     ae_int_t jb,
+     ae_state *_state)
+{
+    ae_int_t i;
+
+
+    for(i=0; i<=m-1; i++)
+    {
+        ae_v_move(&b->ptr.pp_double[ib+i][jb], 1, &a->ptr.pp_double[ia+i][ja], 1, ae_v_len(jb,jb+n-1));
+    }
+}
+
+
+/*************************************************************************
+Rank-1 correction: A := A + u*v'
+
+INPUT PARAMETERS:
+    M   -   number of rows
+    N   -   number of columns
+    A   -   target matrix, MxN submatrix is updated
+    IA  -   submatrix offset (row index)
+    JA  -   submatrix offset (column index)
+    U   -   vector #1
+    IU  -   subvector offset
+    V   -   vector #2
+    IV  -   subvector offset
+*************************************************************************/
+void cmatrixrank1(ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     /* Complex */ ae_vector* u,
+     ae_int_t iu,
+     /* Complex */ ae_vector* v,
+     ae_int_t iv,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_complex s;
+
+
+    if( m==0||n==0 )
+    {
+        return;
+    }
+    if( cmatrixrank1f(m, n, a, ia, ja, u, iu, v, iv, _state) )
+    {
+        return;
+    }
+    for(i=0; i<=m-1; i++)
+    {
+        s = u->ptr.p_complex[iu+i];
+        ae_v_caddc(&a->ptr.pp_complex[ia+i][ja], 1, &v->ptr.p_complex[iv], 1, "N", ae_v_len(ja,ja+n-1), s);
+    }
+}
+
+
+/*************************************************************************
+Rank-1 correction: A := A + u*v'
+
+INPUT PARAMETERS:
+    M   -   number of rows
+    N   -   number of columns
+    A   -   target matrix, MxN submatrix is updated
+    IA  -   submatrix offset (row index)
+    JA  -   submatrix offset (column index)
+    U   -   vector #1
+    IU  -   subvector offset
+    V   -   vector #2
+    IV  -   subvector offset
+*************************************************************************/
+void rmatrixrank1(ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     /* Real    */ ae_vector* u,
+     ae_int_t iu,
+     /* Real    */ ae_vector* v,
+     ae_int_t iv,
+     ae_state *_state)
+{
+    ae_int_t i;
+    double s;
+
+
+    if( m==0||n==0 )
+    {
+        return;
+    }
+    if( rmatrixrank1f(m, n, a, ia, ja, u, iu, v, iv, _state) )
+    {
+        return;
+    }
+    for(i=0; i<=m-1; i++)
+    {
+        s = u->ptr.p_double[iu+i];
+        ae_v_addd(&a->ptr.pp_double[ia+i][ja], 1, &v->ptr.p_double[iv], 1, ae_v_len(ja,ja+n-1), s);
+    }
+}
+
+
+/*************************************************************************
+Matrix-vector product: y := op(A)*x
+
+INPUT PARAMETERS:
+    M   -   number of rows of op(A)
+            M>=0
+    N   -   number of columns of op(A)
+            N>=0
+    A   -   target matrix
+    IA  -   submatrix offset (row index)
+    JA  -   submatrix offset (column index)
+    OpA -   operation type:
+            * OpA=0     =>  op(A) = A
+            * OpA=1     =>  op(A) = A^T
+            * OpA=2     =>  op(A) = A^H
+    X   -   input vector
+    IX  -   subvector offset
+    IY  -   subvector offset
+
+OUTPUT PARAMETERS:
+    Y   -   vector which stores result
+
+if M=0, then subroutine does nothing.
+if N=0, Y is filled by zeros.
+
+
+  -- ALGLIB routine --
+
+     28.01.2010
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixmv(ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t opa,
+     /* Complex */ ae_vector* x,
+     ae_int_t ix,
+     /* Complex */ ae_vector* y,
+     ae_int_t iy,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_complex v;
+
+
+    if( m==0 )
+    {
+        return;
+    }
+    if( n==0 )
+    {
+        for(i=0; i<=m-1; i++)
+        {
+            y->ptr.p_complex[iy+i] = ae_complex_from_d(0);
+        }
+        return;
+    }
+    if( cmatrixmvf(m, n, a, ia, ja, opa, x, ix, y, iy, _state) )
+    {
+        return;
+    }
+    if( opa==0 )
+    {
+        
+        /*
+         * y = A*x
+         */
+        for(i=0; i<=m-1; i++)
+        {
+            v = ae_v_cdotproduct(&a->ptr.pp_complex[ia+i][ja], 1, "N", &x->ptr.p_complex[ix], 1, "N", ae_v_len(ja,ja+n-1));
+            y->ptr.p_complex[iy+i] = v;
+        }
+        return;
+    }
+    if( opa==1 )
+    {
+        
+        /*
+         * y = A^T*x
+         */
+        for(i=0; i<=m-1; i++)
+        {
+            y->ptr.p_complex[iy+i] = ae_complex_from_d(0);
+        }
+        for(i=0; i<=n-1; i++)
+        {
+            v = x->ptr.p_complex[ix+i];
+            ae_v_caddc(&y->ptr.p_complex[iy], 1, &a->ptr.pp_complex[ia+i][ja], 1, "N", ae_v_len(iy,iy+m-1), v);
+        }
+        return;
+    }
+    if( opa==2 )
+    {
+        
+        /*
+         * y = A^H*x
+         */
+        for(i=0; i<=m-1; i++)
+        {
+            y->ptr.p_complex[iy+i] = ae_complex_from_d(0);
+        }
+        for(i=0; i<=n-1; i++)
+        {
+            v = x->ptr.p_complex[ix+i];
+            ae_v_caddc(&y->ptr.p_complex[iy], 1, &a->ptr.pp_complex[ia+i][ja], 1, "Conj", ae_v_len(iy,iy+m-1), v);
+        }
+        return;
+    }
+}
+
+
+/*************************************************************************
+Matrix-vector product: y := op(A)*x
+
+INPUT PARAMETERS:
+    M   -   number of rows of op(A)
+    N   -   number of columns of op(A)
+    A   -   target matrix
+    IA  -   submatrix offset (row index)
+    JA  -   submatrix offset (column index)
+    OpA -   operation type:
+            * OpA=0     =>  op(A) = A
+            * OpA=1     =>  op(A) = A^T
+    X   -   input vector
+    IX  -   subvector offset
+    IY  -   subvector offset
+
+OUTPUT PARAMETERS:
+    Y   -   vector which stores result
+
+if M=0, then subroutine does nothing.
+if N=0, Y is filled by zeros.
+
+
+  -- ALGLIB routine --
+
+     28.01.2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixmv(ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t opa,
+     /* Real    */ ae_vector* x,
+     ae_int_t ix,
+     /* Real    */ ae_vector* y,
+     ae_int_t iy,
+     ae_state *_state)
+{
+    ae_int_t i;
+    double v;
+
+
+    if( m==0 )
+    {
+        return;
+    }
+    if( n==0 )
+    {
+        for(i=0; i<=m-1; i++)
+        {
+            y->ptr.p_double[iy+i] = 0;
+        }
+        return;
+    }
+    if( rmatrixmvf(m, n, a, ia, ja, opa, x, ix, y, iy, _state) )
+    {
+        return;
+    }
+    if( opa==0 )
+    {
+        
+        /*
+         * y = A*x
+         */
+        for(i=0; i<=m-1; i++)
+        {
+            v = ae_v_dotproduct(&a->ptr.pp_double[ia+i][ja], 1, &x->ptr.p_double[ix], 1, ae_v_len(ja,ja+n-1));
+            y->ptr.p_double[iy+i] = v;
+        }
+        return;
+    }
+    if( opa==1 )
+    {
+        
+        /*
+         * y = A^T*x
+         */
+        for(i=0; i<=m-1; i++)
+        {
+            y->ptr.p_double[iy+i] = 0;
+        }
+        for(i=0; i<=n-1; i++)
+        {
+            v = x->ptr.p_double[ix+i];
+            ae_v_addd(&y->ptr.p_double[iy], 1, &a->ptr.pp_double[ia+i][ja], 1, ae_v_len(iy,iy+m-1), v);
+        }
+        return;
+    }
+}
+
+
+/*************************************************************************
+This subroutine calculates X*op(A^-1) where:
+* X is MxN general matrix
+* A is NxN upper/lower triangular/unitriangular matrix
+* "op" may be identity transformation, transposition, conjugate transposition
+
+Multiplication result replaces X.
+Cache-oblivious algorithm is used.
+
+INPUT PARAMETERS
+    N   -   matrix size, N>=0
+    M   -   matrix size, N>=0
+    A       -   matrix, actial matrix is stored in A[I1:I1+N-1,J1:J1+N-1]
+    I1      -   submatrix offset
+    J1      -   submatrix offset
+    IsUpper -   whether matrix is upper triangular
+    IsUnit  -   whether matrix is unitriangular
+    OpType  -   transformation type:
+                * 0 - no transformation
+                * 1 - transposition
+                * 2 - conjugate transposition
+    C   -   matrix, actial matrix is stored in C[I2:I2+M-1,J2:J2+N-1]
+    I2  -   submatrix offset
+    J2  -   submatrix offset
+
+  -- ALGLIB routine --
+     15.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixrighttrsm(ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_matrix* a,
+     ae_int_t i1,
+     ae_int_t j1,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     /* Complex */ ae_matrix* x,
+     ae_int_t i2,
+     ae_int_t j2,
+     ae_state *_state)
+{
+    ae_int_t s1;
+    ae_int_t s2;
+    ae_int_t bs;
+
+
+    bs = ablascomplexblocksize(a, _state);
+    if( m<=bs&&n<=bs )
+    {
+        ablas_cmatrixrighttrsm2(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
+        return;
+    }
+    if( m>=n )
+    {
+        
+        /*
+         * Split X: X*A = (X1 X2)^T*A
+         */
+        ablascomplexsplitlength(a, m, &s1, &s2, _state);
+        cmatrixrighttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
+        cmatrixrighttrsm(s2, n, a, i1, j1, isupper, isunit, optype, x, i2+s1, j2, _state);
+    }
+    else
+    {
+        
+        /*
+         * Split A:
+         *               (A1  A12)
+         * X*op(A) = X*op(       )
+         *               (     A2)
+         *
+         * Different variants depending on
+         * IsUpper/OpType combinations
+         */
+        ablascomplexsplitlength(a, n, &s1, &s2, _state);
+        if( isupper&&optype==0 )
+        {
+            
+            /*
+             *                  (A1  A12)-1
+             * X*A^-1 = (X1 X2)*(       )
+             *                  (     A2)
+             */
+            cmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
+            cmatrixgemm(m, s2, s1, ae_complex_from_d(-1.0), x, i2, j2, 0, a, i1, j1+s1, 0, ae_complex_from_d(1.0), x, i2, j2+s1, _state);
+            cmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1, _state);
+            return;
+        }
+        if( isupper&&optype!=0 )
+        {
+            
+            /*
+             *                  (A1'     )-1
+             * X*A^-1 = (X1 X2)*(        )
+             *                  (A12' A2')
+             */
+            cmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1, _state);
+            cmatrixgemm(m, s1, s2, ae_complex_from_d(-1.0), x, i2, j2+s1, 0, a, i1, j1+s1, optype, ae_complex_from_d(1.0), x, i2, j2, _state);
+            cmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
+            return;
+        }
+        if( !isupper&&optype==0 )
+        {
+            
+            /*
+             *                  (A1     )-1
+             * X*A^-1 = (X1 X2)*(       )
+             *                  (A21  A2)
+             */
+            cmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1, _state);
+            cmatrixgemm(m, s1, s2, ae_complex_from_d(-1.0), x, i2, j2+s1, 0, a, i1+s1, j1, 0, ae_complex_from_d(1.0), x, i2, j2, _state);
+            cmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
+            return;
+        }
+        if( !isupper&&optype!=0 )
+        {
+            
+            /*
+             *                  (A1' A21')-1
+             * X*A^-1 = (X1 X2)*(        )
+             *                  (     A2')
+             */
+            cmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
+            cmatrixgemm(m, s2, s1, ae_complex_from_d(-1.0), x, i2, j2, 0, a, i1+s1, j1, optype, ae_complex_from_d(1.0), x, i2, j2+s1, _state);
+            cmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1, _state);
+            return;
+        }
+    }
+}
+
+
+/*************************************************************************
+This subroutine calculates op(A^-1)*X where:
+* X is MxN general matrix
+* A is MxM upper/lower triangular/unitriangular matrix
+* "op" may be identity transformation, transposition, conjugate transposition
+
+Multiplication result replaces X.
+Cache-oblivious algorithm is used.
+
+INPUT PARAMETERS
+    N   -   matrix size, N>=0
+    M   -   matrix size, N>=0
+    A       -   matrix, actial matrix is stored in A[I1:I1+M-1,J1:J1+M-1]
+    I1      -   submatrix offset
+    J1      -   submatrix offset
+    IsUpper -   whether matrix is upper triangular
+    IsUnit  -   whether matrix is unitriangular
+    OpType  -   transformation type:
+                * 0 - no transformation
+                * 1 - transposition
+                * 2 - conjugate transposition
+    C   -   matrix, actial matrix is stored in C[I2:I2+M-1,J2:J2+N-1]
+    I2  -   submatrix offset
+    J2  -   submatrix offset
+
+  -- ALGLIB routine --
+     15.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixlefttrsm(ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_matrix* a,
+     ae_int_t i1,
+     ae_int_t j1,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     /* Complex */ ae_matrix* x,
+     ae_int_t i2,
+     ae_int_t j2,
+     ae_state *_state)
+{
+    ae_int_t s1;
+    ae_int_t s2;
+    ae_int_t bs;
+
+
+    bs = ablascomplexblocksize(a, _state);
+    if( m<=bs&&n<=bs )
+    {
+        ablas_cmatrixlefttrsm2(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
+        return;
+    }
+    if( n>=m )
+    {
+        
+        /*
+         * Split X: op(A)^-1*X = op(A)^-1*(X1 X2)
+         */
+        ablascomplexsplitlength(x, n, &s1, &s2, _state);
+        cmatrixlefttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
+        cmatrixlefttrsm(m, s2, a, i1, j1, isupper, isunit, optype, x, i2, j2+s1, _state);
+    }
+    else
+    {
+        
+        /*
+         * Split A
+         */
+        ablascomplexsplitlength(a, m, &s1, &s2, _state);
+        if( isupper&&optype==0 )
+        {
+            
+            /*
+             *           (A1  A12)-1  ( X1 )
+             * A^-1*X* = (       )   *(    )
+             *           (     A2)    ( X2 )
+             */
+            cmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2, _state);
+            cmatrixgemm(s1, n, s2, ae_complex_from_d(-1.0), a, i1, j1+s1, 0, x, i2+s1, j2, 0, ae_complex_from_d(1.0), x, i2, j2, _state);
+            cmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
+            return;
+        }
+        if( isupper&&optype!=0 )
+        {
+            
+            /*
+             *          (A1'     )-1 ( X1 )
+             * A^-1*X = (        )  *(    )
+             *          (A12' A2')   ( X2 )
+             */
+            cmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
+            cmatrixgemm(s2, n, s1, ae_complex_from_d(-1.0), a, i1, j1+s1, optype, x, i2, j2, 0, ae_complex_from_d(1.0), x, i2+s1, j2, _state);
+            cmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2, _state);
+            return;
+        }
+        if( !isupper&&optype==0 )
+        {
+            
+            /*
+             *          (A1     )-1 ( X1 )
+             * A^-1*X = (       )  *(    )
+             *          (A21  A2)   ( X2 )
+             */
+            cmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
+            cmatrixgemm(s2, n, s1, ae_complex_from_d(-1.0), a, i1+s1, j1, 0, x, i2, j2, 0, ae_complex_from_d(1.0), x, i2+s1, j2, _state);
+            cmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2, _state);
+            return;
+        }
+        if( !isupper&&optype!=0 )
+        {
+            
+            /*
+             *          (A1' A21')-1 ( X1 )
+             * A^-1*X = (        )  *(    )
+             *          (     A2')   ( X2 )
+             */
+            cmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2, _state);
+            cmatrixgemm(s1, n, s2, ae_complex_from_d(-1.0), a, i1+s1, j1, optype, x, i2+s1, j2, 0, ae_complex_from_d(1.0), x, i2, j2, _state);
+            cmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
+            return;
+        }
+    }
+}
+
+
+/*************************************************************************
+Same as CMatrixRightTRSM, but for real matrices
+
+OpType may be only 0 or 1.
+
+  -- ALGLIB routine --
+     15.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixrighttrsm(ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_matrix* a,
+     ae_int_t i1,
+     ae_int_t j1,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     /* Real    */ ae_matrix* x,
+     ae_int_t i2,
+     ae_int_t j2,
+     ae_state *_state)
+{
+    ae_int_t s1;
+    ae_int_t s2;
+    ae_int_t bs;
+
+
+    bs = ablasblocksize(a, _state);
+    if( m<=bs&&n<=bs )
+    {
+        ablas_rmatrixrighttrsm2(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
+        return;
+    }
+    if( m>=n )
+    {
+        
+        /*
+         * Split X: X*A = (X1 X2)^T*A
+         */
+        ablassplitlength(a, m, &s1, &s2, _state);
+        rmatrixrighttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
+        rmatrixrighttrsm(s2, n, a, i1, j1, isupper, isunit, optype, x, i2+s1, j2, _state);
+    }
+    else
+    {
+        
+        /*
+         * Split A:
+         *               (A1  A12)
+         * X*op(A) = X*op(       )
+         *               (     A2)
+         *
+         * Different variants depending on
+         * IsUpper/OpType combinations
+         */
+        ablassplitlength(a, n, &s1, &s2, _state);
+        if( isupper&&optype==0 )
+        {
+            
+            /*
+             *                  (A1  A12)-1
+             * X*A^-1 = (X1 X2)*(       )
+             *                  (     A2)
+             */
+            rmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
+            rmatrixgemm(m, s2, s1, -1.0, x, i2, j2, 0, a, i1, j1+s1, 0, 1.0, x, i2, j2+s1, _state);
+            rmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1, _state);
+            return;
+        }
+        if( isupper&&optype!=0 )
+        {
+            
+            /*
+             *                  (A1'     )-1
+             * X*A^-1 = (X1 X2)*(        )
+             *                  (A12' A2')
+             */
+            rmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1, _state);
+            rmatrixgemm(m, s1, s2, -1.0, x, i2, j2+s1, 0, a, i1, j1+s1, optype, 1.0, x, i2, j2, _state);
+            rmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
+            return;
+        }
+        if( !isupper&&optype==0 )
+        {
+            
+            /*
+             *                  (A1     )-1
+             * X*A^-1 = (X1 X2)*(       )
+             *                  (A21  A2)
+             */
+            rmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1, _state);
+            rmatrixgemm(m, s1, s2, -1.0, x, i2, j2+s1, 0, a, i1+s1, j1, 0, 1.0, x, i2, j2, _state);
+            rmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
+            return;
+        }
+        if( !isupper&&optype!=0 )
+        {
+            
+            /*
+             *                  (A1' A21')-1
+             * X*A^-1 = (X1 X2)*(        )
+             *                  (     A2')
+             */
+            rmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
+            rmatrixgemm(m, s2, s1, -1.0, x, i2, j2, 0, a, i1+s1, j1, optype, 1.0, x, i2, j2+s1, _state);
+            rmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1, _state);
+            return;
+        }
+    }
+}
+
+
+/*************************************************************************
+Same as CMatrixLeftTRSM, but for real matrices
+
+OpType may be only 0 or 1.
+
+  -- ALGLIB routine --
+     15.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixlefttrsm(ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_matrix* a,
+     ae_int_t i1,
+     ae_int_t j1,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     /* Real    */ ae_matrix* x,
+     ae_int_t i2,
+     ae_int_t j2,
+     ae_state *_state)
+{
+    ae_int_t s1;
+    ae_int_t s2;
+    ae_int_t bs;
+
+
+    bs = ablasblocksize(a, _state);
+    if( m<=bs&&n<=bs )
+    {
+        ablas_rmatrixlefttrsm2(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
+        return;
+    }
+    if( n>=m )
+    {
+        
+        /*
+         * Split X: op(A)^-1*X = op(A)^-1*(X1 X2)
+         */
+        ablassplitlength(x, n, &s1, &s2, _state);
+        rmatrixlefttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
+        rmatrixlefttrsm(m, s2, a, i1, j1, isupper, isunit, optype, x, i2, j2+s1, _state);
+    }
+    else
+    {
+        
+        /*
+         * Split A
+         */
+        ablassplitlength(a, m, &s1, &s2, _state);
+        if( isupper&&optype==0 )
+        {
+            
+            /*
+             *           (A1  A12)-1  ( X1 )
+             * A^-1*X* = (       )   *(    )
+             *           (     A2)    ( X2 )
+             */
+            rmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2, _state);
+            rmatrixgemm(s1, n, s2, -1.0, a, i1, j1+s1, 0, x, i2+s1, j2, 0, 1.0, x, i2, j2, _state);
+            rmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
+            return;
+        }
+        if( isupper&&optype!=0 )
+        {
+            
+            /*
+             *          (A1'     )-1 ( X1 )
+             * A^-1*X = (        )  *(    )
+             *          (A12' A2')   ( X2 )
+             */
+            rmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
+            rmatrixgemm(s2, n, s1, -1.0, a, i1, j1+s1, optype, x, i2, j2, 0, 1.0, x, i2+s1, j2, _state);
+            rmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2, _state);
+            return;
+        }
+        if( !isupper&&optype==0 )
+        {
+            
+            /*
+             *          (A1     )-1 ( X1 )
+             * A^-1*X = (       )  *(    )
+             *          (A21  A2)   ( X2 )
+             */
+            rmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
+            rmatrixgemm(s2, n, s1, -1.0, a, i1+s1, j1, 0, x, i2, j2, 0, 1.0, x, i2+s1, j2, _state);
+            rmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2, _state);
+            return;
+        }
+        if( !isupper&&optype!=0 )
+        {
+            
+            /*
+             *          (A1' A21')-1 ( X1 )
+             * A^-1*X = (        )  *(    )
+             *          (     A2')   ( X2 )
+             */
+            rmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2, _state);
+            rmatrixgemm(s1, n, s2, -1.0, a, i1+s1, j1, optype, x, i2+s1, j2, 0, 1.0, x, i2, j2, _state);
+            rmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
+            return;
+        }
+    }
+}
+
+
+/*************************************************************************
+This subroutine calculates  C=alpha*A*A^H+beta*C  or  C=alpha*A^H*A+beta*C
+where:
+* C is NxN Hermitian matrix given by its upper/lower triangle
+* A is NxK matrix when A*A^H is calculated, KxN matrix otherwise
+
+Additional info:
+* cache-oblivious algorithm is used.
+* multiplication result replaces C. If Beta=0, C elements are not used in
+  calculations (not multiplied by zero - just not referenced)
+* if Alpha=0, A is not used (not multiplied by zero - just not referenced)
+* if both Beta and Alpha are zero, C is filled by zeros.
+
+INPUT PARAMETERS
+    N       -   matrix size, N>=0
+    K       -   matrix size, K>=0
+    Alpha   -   coefficient
+    A       -   matrix
+    IA      -   submatrix offset
+    JA      -   submatrix offset
+    OpTypeA -   multiplication type:
+                * 0 - A*A^H is calculated
+                * 2 - A^H*A is calculated
+    Beta    -   coefficient
+    C       -   matrix
+    IC      -   submatrix offset
+    JC      -   submatrix offset
+    IsUpper -   whether C is upper triangular or lower triangular
+
+  -- ALGLIB routine --
+     16.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixsyrk(ae_int_t n,
+     ae_int_t k,
+     double alpha,
+     /* Complex */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t optypea,
+     double beta,
+     /* Complex */ ae_matrix* c,
+     ae_int_t ic,
+     ae_int_t jc,
+     ae_bool isupper,
+     ae_state *_state)
+{
+    ae_int_t s1;
+    ae_int_t s2;
+    ae_int_t bs;
+
+
+    bs = ablascomplexblocksize(a, _state);
+    if( n<=bs&&k<=bs )
+    {
+        ablas_cmatrixsyrk2(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
+        return;
+    }
+    if( k>=n )
+    {
+        
+        /*
+         * Split K
+         */
+        ablascomplexsplitlength(a, k, &s1, &s2, _state);
+        if( optypea==0 )
+        {
+            cmatrixsyrk(n, s1, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
+            cmatrixsyrk(n, s2, alpha, a, ia, ja+s1, optypea, 1.0, c, ic, jc, isupper, _state);
+        }
+        else
+        {
+            cmatrixsyrk(n, s1, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
+            cmatrixsyrk(n, s2, alpha, a, ia+s1, ja, optypea, 1.0, c, ic, jc, isupper, _state);
+        }
+    }
+    else
+    {
+        
+        /*
+         * Split N
+         */
+        ablascomplexsplitlength(a, n, &s1, &s2, _state);
+        if( optypea==0&&isupper )
+        {
+            cmatrixsyrk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
+            cmatrixgemm(s1, s2, k, ae_complex_from_d(alpha), a, ia, ja, 0, a, ia+s1, ja, 2, ae_complex_from_d(beta), c, ic, jc+s1, _state);
+            cmatrixsyrk(s2, k, alpha, a, ia+s1, ja, optypea, beta, c, ic+s1, jc+s1, isupper, _state);
+            return;
+        }
+        if( optypea==0&&!isupper )
+        {
+            cmatrixsyrk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
+            cmatrixgemm(s2, s1, k, ae_complex_from_d(alpha), a, ia+s1, ja, 0, a, ia, ja, 2, ae_complex_from_d(beta), c, ic+s1, jc, _state);
+            cmatrixsyrk(s2, k, alpha, a, ia+s1, ja, optypea, beta, c, ic+s1, jc+s1, isupper, _state);
+            return;
+        }
+        if( optypea!=0&&isupper )
+        {
+            cmatrixsyrk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
+            cmatrixgemm(s1, s2, k, ae_complex_from_d(alpha), a, ia, ja, 2, a, ia, ja+s1, 0, ae_complex_from_d(beta), c, ic, jc+s1, _state);
+            cmatrixsyrk(s2, k, alpha, a, ia, ja+s1, optypea, beta, c, ic+s1, jc+s1, isupper, _state);
+            return;
+        }
+        if( optypea!=0&&!isupper )
+        {
+            cmatrixsyrk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
+            cmatrixgemm(s2, s1, k, ae_complex_from_d(alpha), a, ia, ja+s1, 2, a, ia, ja, 0, ae_complex_from_d(beta), c, ic+s1, jc, _state);
+            cmatrixsyrk(s2, k, alpha, a, ia, ja+s1, optypea, beta, c, ic+s1, jc+s1, isupper, _state);
+            return;
+        }
+    }
+}
+
+
+/*************************************************************************
+Same as CMatrixSYRK, but for real matrices
+
+OpType may be only 0 or 1.
+
+  -- ALGLIB routine --
+     16.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixsyrk(ae_int_t n,
+     ae_int_t k,
+     double alpha,
+     /* Real    */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t optypea,
+     double beta,
+     /* Real    */ ae_matrix* c,
+     ae_int_t ic,
+     ae_int_t jc,
+     ae_bool isupper,
+     ae_state *_state)
+{
+    ae_int_t s1;
+    ae_int_t s2;
+    ae_int_t bs;
+
+
+    bs = ablasblocksize(a, _state);
+    if( n<=bs&&k<=bs )
+    {
+        ablas_rmatrixsyrk2(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
+        return;
+    }
+    if( k>=n )
+    {
+        
+        /*
+         * Split K
+         */
+        ablassplitlength(a, k, &s1, &s2, _state);
+        if( optypea==0 )
+        {
+            rmatrixsyrk(n, s1, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
+            rmatrixsyrk(n, s2, alpha, a, ia, ja+s1, optypea, 1.0, c, ic, jc, isupper, _state);
+        }
+        else
+        {
+            rmatrixsyrk(n, s1, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
+            rmatrixsyrk(n, s2, alpha, a, ia+s1, ja, optypea, 1.0, c, ic, jc, isupper, _state);
+        }
+    }
+    else
+    {
+        
+        /*
+         * Split N
+         */
+        ablassplitlength(a, n, &s1, &s2, _state);
+        if( optypea==0&&isupper )
+        {
+            rmatrixsyrk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
+            rmatrixgemm(s1, s2, k, alpha, a, ia, ja, 0, a, ia+s1, ja, 1, beta, c, ic, jc+s1, _state);
+            rmatrixsyrk(s2, k, alpha, a, ia+s1, ja, optypea, beta, c, ic+s1, jc+s1, isupper, _state);
+            return;
+        }
+        if( optypea==0&&!isupper )
+        {
+            rmatrixsyrk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
+            rmatrixgemm(s2, s1, k, alpha, a, ia+s1, ja, 0, a, ia, ja, 1, beta, c, ic+s1, jc, _state);
+            rmatrixsyrk(s2, k, alpha, a, ia+s1, ja, optypea, beta, c, ic+s1, jc+s1, isupper, _state);
+            return;
+        }
+        if( optypea!=0&&isupper )
+        {
+            rmatrixsyrk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
+            rmatrixgemm(s1, s2, k, alpha, a, ia, ja, 1, a, ia, ja+s1, 0, beta, c, ic, jc+s1, _state);
+            rmatrixsyrk(s2, k, alpha, a, ia, ja+s1, optypea, beta, c, ic+s1, jc+s1, isupper, _state);
+            return;
+        }
+        if( optypea!=0&&!isupper )
+        {
+            rmatrixsyrk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
+            rmatrixgemm(s2, s1, k, alpha, a, ia, ja+s1, 1, a, ia, ja, 0, beta, c, ic+s1, jc, _state);
+            rmatrixsyrk(s2, k, alpha, a, ia, ja+s1, optypea, beta, c, ic+s1, jc+s1, isupper, _state);
+            return;
+        }
+    }
+}
+
+
+/*************************************************************************
+This subroutine calculates C = alpha*op1(A)*op2(B) +beta*C where:
+* C is MxN general matrix
+* op1(A) is MxK matrix
+* op2(B) is KxN matrix
+* "op" may be identity transformation, transposition, conjugate transposition
+
+Additional info:
+* cache-oblivious algorithm is used.
+* multiplication result replaces C. If Beta=0, C elements are not used in
+  calculations (not multiplied by zero - just not referenced)
+* if Alpha=0, A is not used (not multiplied by zero - just not referenced)
+* if both Beta and Alpha are zero, C is filled by zeros.
+
+INPUT PARAMETERS
+    N       -   matrix size, N>0
+    M       -   matrix size, N>0
+    K       -   matrix size, K>0
+    Alpha   -   coefficient
+    A       -   matrix
+    IA      -   submatrix offset
+    JA      -   submatrix offset
+    OpTypeA -   transformation type:
+                * 0 - no transformation
+                * 1 - transposition
+                * 2 - conjugate transposition
+    B       -   matrix
+    IB      -   submatrix offset
+    JB      -   submatrix offset
+    OpTypeB -   transformation type:
+                * 0 - no transformation
+                * 1 - transposition
+                * 2 - conjugate transposition
+    Beta    -   coefficient
+    C       -   matrix
+    IC      -   submatrix offset
+    JC      -   submatrix offset
+
+  -- ALGLIB routine --
+     16.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixgemm(ae_int_t m,
+     ae_int_t n,
+     ae_int_t k,
+     ae_complex alpha,
+     /* Complex */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t optypea,
+     /* Complex */ ae_matrix* b,
+     ae_int_t ib,
+     ae_int_t jb,
+     ae_int_t optypeb,
+     ae_complex beta,
+     /* Complex */ ae_matrix* c,
+     ae_int_t ic,
+     ae_int_t jc,
+     ae_state *_state)
+{
+    ae_int_t s1;
+    ae_int_t s2;
+    ae_int_t bs;
+
+
+    bs = ablascomplexblocksize(a, _state);
+    if( (m<=bs&&n<=bs)&&k<=bs )
+    {
+        ablas_cmatrixgemmk(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
+        return;
+    }
+    if( m>=n&&m>=k )
+    {
+        
+        /*
+         * A*B = (A1 A2)^T*B
+         */
+        ablascomplexsplitlength(a, m, &s1, &s2, _state);
+        cmatrixgemm(s1, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
+        if( optypea==0 )
+        {
+            cmatrixgemm(s2, n, k, alpha, a, ia+s1, ja, optypea, b, ib, jb, optypeb, beta, c, ic+s1, jc, _state);
+        }
+        else
+        {
+            cmatrixgemm(s2, n, k, alpha, a, ia, ja+s1, optypea, b, ib, jb, optypeb, beta, c, ic+s1, jc, _state);
+        }
+        return;
+    }
+    if( n>=m&&n>=k )
+    {
+        
+        /*
+         * A*B = A*(B1 B2)
+         */
+        ablascomplexsplitlength(a, n, &s1, &s2, _state);
+        if( optypeb==0 )
+        {
+            cmatrixgemm(m, s1, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
+            cmatrixgemm(m, s2, k, alpha, a, ia, ja, optypea, b, ib, jb+s1, optypeb, beta, c, ic, jc+s1, _state);
+        }
+        else
+        {
+            cmatrixgemm(m, s1, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
+            cmatrixgemm(m, s2, k, alpha, a, ia, ja, optypea, b, ib+s1, jb, optypeb, beta, c, ic, jc+s1, _state);
+        }
+        return;
+    }
+    if( k>=m&&k>=n )
+    {
+        
+        /*
+         * A*B = (A1 A2)*(B1 B2)^T
+         */
+        ablascomplexsplitlength(a, k, &s1, &s2, _state);
+        if( optypea==0&&optypeb==0 )
+        {
+            cmatrixgemm(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
+            cmatrixgemm(m, n, s2, alpha, a, ia, ja+s1, optypea, b, ib+s1, jb, optypeb, ae_complex_from_d(1.0), c, ic, jc, _state);
+        }
+        if( optypea==0&&optypeb!=0 )
+        {
+            cmatrixgemm(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
+            cmatrixgemm(m, n, s2, alpha, a, ia, ja+s1, optypea, b, ib, jb+s1, optypeb, ae_complex_from_d(1.0), c, ic, jc, _state);
+        }
+        if( optypea!=0&&optypeb==0 )
+        {
+            cmatrixgemm(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
+            cmatrixgemm(m, n, s2, alpha, a, ia+s1, ja, optypea, b, ib+s1, jb, optypeb, ae_complex_from_d(1.0), c, ic, jc, _state);
+        }
+        if( optypea!=0&&optypeb!=0 )
+        {
+            cmatrixgemm(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
+            cmatrixgemm(m, n, s2, alpha, a, ia+s1, ja, optypea, b, ib, jb+s1, optypeb, ae_complex_from_d(1.0), c, ic, jc, _state);
+        }
+        return;
+    }
+}
+
+
+/*************************************************************************
+Same as CMatrixGEMM, but for real numbers.
+OpType may be only 0 or 1.
+
+  -- ALGLIB routine --
+     16.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixgemm(ae_int_t m,
+     ae_int_t n,
+     ae_int_t k,
+     double alpha,
+     /* Real    */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t optypea,
+     /* Real    */ ae_matrix* b,
+     ae_int_t ib,
+     ae_int_t jb,
+     ae_int_t optypeb,
+     double beta,
+     /* Real    */ ae_matrix* c,
+     ae_int_t ic,
+     ae_int_t jc,
+     ae_state *_state)
+{
+    ae_int_t s1;
+    ae_int_t s2;
+    ae_int_t bs;
+
+
+    bs = ablasblocksize(a, _state);
+    if( (m<=bs&&n<=bs)&&k<=bs )
+    {
+        ablas_rmatrixgemmk(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
+        return;
+    }
+    if( m>=n&&m>=k )
+    {
+        
+        /*
+         * A*B = (A1 A2)^T*B
+         */
+        ablassplitlength(a, m, &s1, &s2, _state);
+        if( optypea==0 )
+        {
+            rmatrixgemm(s1, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
+            rmatrixgemm(s2, n, k, alpha, a, ia+s1, ja, optypea, b, ib, jb, optypeb, beta, c, ic+s1, jc, _state);
+        }
+        else
+        {
+            rmatrixgemm(s1, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
+            rmatrixgemm(s2, n, k, alpha, a, ia, ja+s1, optypea, b, ib, jb, optypeb, beta, c, ic+s1, jc, _state);
+        }
+        return;
+    }
+    if( n>=m&&n>=k )
+    {
+        
+        /*
+         * A*B = A*(B1 B2)
+         */
+        ablassplitlength(a, n, &s1, &s2, _state);
+        if( optypeb==0 )
+        {
+            rmatrixgemm(m, s1, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
+            rmatrixgemm(m, s2, k, alpha, a, ia, ja, optypea, b, ib, jb+s1, optypeb, beta, c, ic, jc+s1, _state);
+        }
+        else
+        {
+            rmatrixgemm(m, s1, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
+            rmatrixgemm(m, s2, k, alpha, a, ia, ja, optypea, b, ib+s1, jb, optypeb, beta, c, ic, jc+s1, _state);
+        }
+        return;
+    }
+    if( k>=m&&k>=n )
+    {
+        
+        /*
+         * A*B = (A1 A2)*(B1 B2)^T
+         */
+        ablassplitlength(a, k, &s1, &s2, _state);
+        if( optypea==0&&optypeb==0 )
+        {
+            rmatrixgemm(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
+            rmatrixgemm(m, n, s2, alpha, a, ia, ja+s1, optypea, b, ib+s1, jb, optypeb, 1.0, c, ic, jc, _state);
+        }
+        if( optypea==0&&optypeb!=0 )
+        {
+            rmatrixgemm(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
+            rmatrixgemm(m, n, s2, alpha, a, ia, ja+s1, optypea, b, ib, jb+s1, optypeb, 1.0, c, ic, jc, _state);
+        }
+        if( optypea!=0&&optypeb==0 )
+        {
+            rmatrixgemm(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
+            rmatrixgemm(m, n, s2, alpha, a, ia+s1, ja, optypea, b, ib+s1, jb, optypeb, 1.0, c, ic, jc, _state);
+        }
+        if( optypea!=0&&optypeb!=0 )
+        {
+            rmatrixgemm(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
+            rmatrixgemm(m, n, s2, alpha, a, ia+s1, ja, optypea, b, ib, jb+s1, optypeb, 1.0, c, ic, jc, _state);
+        }
+        return;
+    }
+}
+
+
+/*************************************************************************
+Complex ABLASSplitLength
+
+  -- ALGLIB routine --
+     15.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+static void ablas_ablasinternalsplitlength(ae_int_t n,
+     ae_int_t nb,
+     ae_int_t* n1,
+     ae_int_t* n2,
+     ae_state *_state)
+{
+    ae_int_t r;
+
+    *n1 = 0;
+    *n2 = 0;
+
+    if( n<=nb )
+    {
+        
+        /*
+         * Block size, no further splitting
+         */
+        *n1 = n;
+        *n2 = 0;
+    }
+    else
+    {
+        
+        /*
+         * Greater than block size
+         */
+        if( n%nb!=0 )
+        {
+            
+            /*
+             * Split remainder
+             */
+            *n2 = n%nb;
+            *n1 = n-(*n2);
+        }
+        else
+        {
+            
+            /*
+             * Split on block boundaries
+             */
+            *n2 = n/2;
+            *n1 = n-(*n2);
+            if( *n1%nb==0 )
+            {
+                return;
+            }
+            r = nb-*n1%nb;
+            *n1 = *n1+r;
+            *n2 = *n2-r;
+        }
+    }
+}
+
+
+/*************************************************************************
+Level 2 variant of CMatrixRightTRSM
+*************************************************************************/
+static void ablas_cmatrixrighttrsm2(ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_matrix* a,
+     ae_int_t i1,
+     ae_int_t j1,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     /* Complex */ ae_matrix* x,
+     ae_int_t i2,
+     ae_int_t j2,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    ae_complex vc;
+    ae_complex vd;
+
+
+    
+    /*
+     * Special case
+     */
+    if( n*m==0 )
+    {
+        return;
+    }
+    
+    /*
+     * Try to call fast TRSM
+     */
+    if( cmatrixrighttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state) )
+    {
+        return;
+    }
+    
+    /*
+     * General case
+     */
+    if( isupper )
+    {
+        
+        /*
+         * Upper triangular matrix
+         */
+        if( optype==0 )
+        {
+            
+            /*
+             * X*A^(-1)
+             */
+            for(i=0; i<=m-1; i++)
+            {
+                for(j=0; j<=n-1; j++)
+                {
+                    if( isunit )
+                    {
+                        vd = ae_complex_from_d(1);
+                    }
+                    else
+                    {
+                        vd = a->ptr.pp_complex[i1+j][j1+j];
+                    }
+                    x->ptr.pp_complex[i2+i][j2+j] = ae_c_div(x->ptr.pp_complex[i2+i][j2+j],vd);
+                    if( j<n-1 )
+                    {
+                        vc = x->ptr.pp_complex[i2+i][j2+j];
+                        ae_v_csubc(&x->ptr.pp_complex[i2+i][j2+j+1], 1, &a->ptr.pp_complex[i1+j][j1+j+1], 1, "N", ae_v_len(j2+j+1,j2+n-1), vc);
+                    }
+                }
+            }
+            return;
+        }
+        if( optype==1 )
+        {
+            
+            /*
+             * X*A^(-T)
+             */
+            for(i=0; i<=m-1; i++)
+            {
+                for(j=n-1; j>=0; j--)
+                {
+                    vc = ae_complex_from_d(0);
+                    vd = ae_complex_from_d(1);
+                    if( j<n-1 )
+                    {
+                        vc = ae_v_cdotproduct(&x->ptr.pp_complex[i2+i][j2+j+1], 1, "N", &a->ptr.pp_complex[i1+j][j1+j+1], 1, "N", ae_v_len(j2+j+1,j2+n-1));
+                    }
+                    if( !isunit )
+                    {
+                        vd = a->ptr.pp_complex[i1+j][j1+j];
+                    }
+                    x->ptr.pp_complex[i2+i][j2+j] = ae_c_div(ae_c_sub(x->ptr.pp_complex[i2+i][j2+j],vc),vd);
+                }
+            }
+            return;
+        }
+        if( optype==2 )
+        {
+            
+            /*
+             * X*A^(-H)
+             */
+            for(i=0; i<=m-1; i++)
+            {
+                for(j=n-1; j>=0; j--)
+                {
+                    vc = ae_complex_from_d(0);
+                    vd = ae_complex_from_d(1);
+                    if( j<n-1 )
+                    {
+                        vc = ae_v_cdotproduct(&x->ptr.pp_complex[i2+i][j2+j+1], 1, "N", &a->ptr.pp_complex[i1+j][j1+j+1], 1, "Conj", ae_v_len(j2+j+1,j2+n-1));
+                    }
+                    if( !isunit )
+                    {
+                        vd = ae_c_conj(a->ptr.pp_complex[i1+j][j1+j], _state);
+                    }
+                    x->ptr.pp_complex[i2+i][j2+j] = ae_c_div(ae_c_sub(x->ptr.pp_complex[i2+i][j2+j],vc),vd);
+                }
+            }
+            return;
+        }
+    }
+    else
+    {
+        
+        /*
+         * Lower triangular matrix
+         */
+        if( optype==0 )
+        {
+            
+            /*
+             * X*A^(-1)
+             */
+            for(i=0; i<=m-1; i++)
+            {
+                for(j=n-1; j>=0; j--)
+                {
+                    if( isunit )
+                    {
+                        vd = ae_complex_from_d(1);
+                    }
+                    else
+                    {
+                        vd = a->ptr.pp_complex[i1+j][j1+j];
+                    }
+                    x->ptr.pp_complex[i2+i][j2+j] = ae_c_div(x->ptr.pp_complex[i2+i][j2+j],vd);
+                    if( j>0 )
+                    {
+                        vc = x->ptr.pp_complex[i2+i][j2+j];
+                        ae_v_csubc(&x->ptr.pp_complex[i2+i][j2], 1, &a->ptr.pp_complex[i1+j][j1], 1, "N", ae_v_len(j2,j2+j-1), vc);
+                    }
+                }
+            }
+            return;
+        }
+        if( optype==1 )
+        {
+            
+            /*
+             * X*A^(-T)
+             */
+            for(i=0; i<=m-1; i++)
+            {
+                for(j=0; j<=n-1; j++)
+                {
+                    vc = ae_complex_from_d(0);
+                    vd = ae_complex_from_d(1);
+                    if( j>0 )
+                    {
+                        vc = ae_v_cdotproduct(&x->ptr.pp_complex[i2+i][j2], 1, "N", &a->ptr.pp_complex[i1+j][j1], 1, "N", ae_v_len(j2,j2+j-1));
+                    }
+                    if( !isunit )
+                    {
+                        vd = a->ptr.pp_complex[i1+j][j1+j];
+                    }
+                    x->ptr.pp_complex[i2+i][j2+j] = ae_c_div(ae_c_sub(x->ptr.pp_complex[i2+i][j2+j],vc),vd);
+                }
+            }
+            return;
+        }
+        if( optype==2 )
+        {
+            
+            /*
+             * X*A^(-H)
+             */
+            for(i=0; i<=m-1; i++)
+            {
+                for(j=0; j<=n-1; j++)
+                {
+                    vc = ae_complex_from_d(0);
+                    vd = ae_complex_from_d(1);
+                    if( j>0 )
+                    {
+                        vc = ae_v_cdotproduct(&x->ptr.pp_complex[i2+i][j2], 1, "N", &a->ptr.pp_complex[i1+j][j1], 1, "Conj", ae_v_len(j2,j2+j-1));
+                    }
+                    if( !isunit )
+                    {
+                        vd = ae_c_conj(a->ptr.pp_complex[i1+j][j1+j], _state);
+                    }
+                    x->ptr.pp_complex[i2+i][j2+j] = ae_c_div(ae_c_sub(x->ptr.pp_complex[i2+i][j2+j],vc),vd);
+                }
+            }
+            return;
+        }
+    }
+}
+
+
+/*************************************************************************
+Level-2 subroutine
+*************************************************************************/
+static void ablas_cmatrixlefttrsm2(ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_matrix* a,
+     ae_int_t i1,
+     ae_int_t j1,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     /* Complex */ ae_matrix* x,
+     ae_int_t i2,
+     ae_int_t j2,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    ae_complex vc;
+    ae_complex vd;
+
+
+    
+    /*
+     * Special case
+     */
+    if( n*m==0 )
+    {
+        return;
+    }
+    
+    /*
+     * Try to call fast TRSM
+     */
+    if( cmatrixlefttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state) )
+    {
+        return;
+    }
+    
+    /*
+     * General case
+     */
+    if( isupper )
+    {
+        
+        /*
+         * Upper triangular matrix
+         */
+        if( optype==0 )
+        {
+            
+            /*
+             * A^(-1)*X
+             */
+            for(i=m-1; i>=0; i--)
+            {
+                for(j=i+1; j<=m-1; j++)
+                {
+                    vc = a->ptr.pp_complex[i1+i][j1+j];
+                    ae_v_csubc(&x->ptr.pp_complex[i2+i][j2], 1, &x->ptr.pp_complex[i2+j][j2], 1, "N", ae_v_len(j2,j2+n-1), vc);
+                }
+                if( !isunit )
+                {
+                    vd = ae_c_d_div(1,a->ptr.pp_complex[i1+i][j1+i]);
+                    ae_v_cmulc(&x->ptr.pp_complex[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd);
+                }
+            }
+            return;
+        }
+        if( optype==1 )
+        {
+            
+            /*
+             * A^(-T)*X
+             */
+            for(i=0; i<=m-1; i++)
+            {
+                if( isunit )
+                {
+                    vd = ae_complex_from_d(1);
+                }
+                else
+                {
+                    vd = ae_c_d_div(1,a->ptr.pp_complex[i1+i][j1+i]);
+                }
+                ae_v_cmulc(&x->ptr.pp_complex[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd);
+                for(j=i+1; j<=m-1; j++)
+                {
+                    vc = a->ptr.pp_complex[i1+i][j1+j];
+                    ae_v_csubc(&x->ptr.pp_complex[i2+j][j2], 1, &x->ptr.pp_complex[i2+i][j2], 1, "N", ae_v_len(j2,j2+n-1), vc);
+                }
+            }
+            return;
+        }
+        if( optype==2 )
+        {
+            
+            /*
+             * A^(-H)*X
+             */
+            for(i=0; i<=m-1; i++)
+            {
+                if( isunit )
+                {
+                    vd = ae_complex_from_d(1);
+                }
+                else
+                {
+                    vd = ae_c_d_div(1,ae_c_conj(a->ptr.pp_complex[i1+i][j1+i], _state));
+                }
+                ae_v_cmulc(&x->ptr.pp_complex[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd);
+                for(j=i+1; j<=m-1; j++)
+                {
+                    vc = ae_c_conj(a->ptr.pp_complex[i1+i][j1+j], _state);
+                    ae_v_csubc(&x->ptr.pp_complex[i2+j][j2], 1, &x->ptr.pp_complex[i2+i][j2], 1, "N", ae_v_len(j2,j2+n-1), vc);
+                }
+            }
+            return;
+        }
+    }
+    else
+    {
+        
+        /*
+         * Lower triangular matrix
+         */
+        if( optype==0 )
+        {
+            
+            /*
+             * A^(-1)*X
+             */
+            for(i=0; i<=m-1; i++)
+            {
+                for(j=0; j<=i-1; j++)
+                {
+                    vc = a->ptr.pp_complex[i1+i][j1+j];
+                    ae_v_csubc(&x->ptr.pp_complex[i2+i][j2], 1, &x->ptr.pp_complex[i2+j][j2], 1, "N", ae_v_len(j2,j2+n-1), vc);
+                }
+                if( isunit )
+                {
+                    vd = ae_complex_from_d(1);
+                }
+                else
+                {
+                    vd = ae_c_d_div(1,a->ptr.pp_complex[i1+j][j1+j]);
+                }
+                ae_v_cmulc(&x->ptr.pp_complex[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd);
+            }
+            return;
+        }
+        if( optype==1 )
+        {
+            
+            /*
+             * A^(-T)*X
+             */
+            for(i=m-1; i>=0; i--)
+            {
+                if( isunit )
+                {
+                    vd = ae_complex_from_d(1);
+                }
+                else
+                {
+                    vd = ae_c_d_div(1,a->ptr.pp_complex[i1+i][j1+i]);
+                }
+                ae_v_cmulc(&x->ptr.pp_complex[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd);
+                for(j=i-1; j>=0; j--)
+                {
+                    vc = a->ptr.pp_complex[i1+i][j1+j];
+                    ae_v_csubc(&x->ptr.pp_complex[i2+j][j2], 1, &x->ptr.pp_complex[i2+i][j2], 1, "N", ae_v_len(j2,j2+n-1), vc);
+                }
+            }
+            return;
+        }
+        if( optype==2 )
+        {
+            
+            /*
+             * A^(-H)*X
+             */
+            for(i=m-1; i>=0; i--)
+            {
+                if( isunit )
+                {
+                    vd = ae_complex_from_d(1);
+                }
+                else
+                {
+                    vd = ae_c_d_div(1,ae_c_conj(a->ptr.pp_complex[i1+i][j1+i], _state));
+                }
+                ae_v_cmulc(&x->ptr.pp_complex[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd);
+                for(j=i-1; j>=0; j--)
+                {
+                    vc = ae_c_conj(a->ptr.pp_complex[i1+i][j1+j], _state);
+                    ae_v_csubc(&x->ptr.pp_complex[i2+j][j2], 1, &x->ptr.pp_complex[i2+i][j2], 1, "N", ae_v_len(j2,j2+n-1), vc);
+                }
+            }
+            return;
+        }
+    }
+}
+
+
+/*************************************************************************
+Level 2 subroutine
+
+  -- ALGLIB routine --
+     15.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+static void ablas_rmatrixrighttrsm2(ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_matrix* a,
+     ae_int_t i1,
+     ae_int_t j1,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     /* Real    */ ae_matrix* x,
+     ae_int_t i2,
+     ae_int_t j2,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    double vr;
+    double vd;
+
+
+    
+    /*
+     * Special case
+     */
+    if( n*m==0 )
+    {
+        return;
+    }
+    
+    /*
+     * Try to use "fast" code
+     */
+    if( rmatrixrighttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state) )
+    {
+        return;
+    }
+    
+    /*
+     * General case
+     */
+    if( isupper )
+    {
+        
+        /*
+         * Upper triangular matrix
+         */
+        if( optype==0 )
+        {
+            
+            /*
+             * X*A^(-1)
+             */
+            for(i=0; i<=m-1; i++)
+            {
+                for(j=0; j<=n-1; j++)
+                {
+                    if( isunit )
+                    {
+                        vd = 1;
+                    }
+                    else
+                    {
+                        vd = a->ptr.pp_double[i1+j][j1+j];
+                    }
+                    x->ptr.pp_double[i2+i][j2+j] = x->ptr.pp_double[i2+i][j2+j]/vd;
+                    if( j<n-1 )
+                    {
+                        vr = x->ptr.pp_double[i2+i][j2+j];
+                        ae_v_subd(&x->ptr.pp_double[i2+i][j2+j+1], 1, &a->ptr.pp_double[i1+j][j1+j+1], 1, ae_v_len(j2+j+1,j2+n-1), vr);
+                    }
+                }
+            }
+            return;
+        }
+        if( optype==1 )
+        {
+            
+            /*
+             * X*A^(-T)
+             */
+            for(i=0; i<=m-1; i++)
+            {
+                for(j=n-1; j>=0; j--)
+                {
+                    vr = 0;
+                    vd = 1;
+                    if( j<n-1 )
+                    {
+                        vr = ae_v_dotproduct(&x->ptr.pp_double[i2+i][j2+j+1], 1, &a->ptr.pp_double[i1+j][j1+j+1], 1, ae_v_len(j2+j+1,j2+n-1));
+                    }
+                    if( !isunit )
+                    {
+                        vd = a->ptr.pp_double[i1+j][j1+j];
+                    }
+                    x->ptr.pp_double[i2+i][j2+j] = (x->ptr.pp_double[i2+i][j2+j]-vr)/vd;
+                }
+            }
+            return;
+        }
+    }
+    else
+    {
+        
+        /*
+         * Lower triangular matrix
+         */
+        if( optype==0 )
+        {
+            
+            /*
+             * X*A^(-1)
+             */
+            for(i=0; i<=m-1; i++)
+            {
+                for(j=n-1; j>=0; j--)
+                {
+                    if( isunit )
+                    {
+                        vd = 1;
+                    }
+                    else
+                    {
+                        vd = a->ptr.pp_double[i1+j][j1+j];
+                    }
+                    x->ptr.pp_double[i2+i][j2+j] = x->ptr.pp_double[i2+i][j2+j]/vd;
+                    if( j>0 )
+                    {
+                        vr = x->ptr.pp_double[i2+i][j2+j];
+                        ae_v_subd(&x->ptr.pp_double[i2+i][j2], 1, &a->ptr.pp_double[i1+j][j1], 1, ae_v_len(j2,j2+j-1), vr);
+                    }
+                }
+            }
+            return;
+        }
+        if( optype==1 )
+        {
+            
+            /*
+             * X*A^(-T)
+             */
+            for(i=0; i<=m-1; i++)
+            {
+                for(j=0; j<=n-1; j++)
+                {
+                    vr = 0;
+                    vd = 1;
+                    if( j>0 )
+                    {
+                        vr = ae_v_dotproduct(&x->ptr.pp_double[i2+i][j2], 1, &a->ptr.pp_double[i1+j][j1], 1, ae_v_len(j2,j2+j-1));
+                    }
+                    if( !isunit )
+                    {
+                        vd = a->ptr.pp_double[i1+j][j1+j];
+                    }
+                    x->ptr.pp_double[i2+i][j2+j] = (x->ptr.pp_double[i2+i][j2+j]-vr)/vd;
+                }
+            }
+            return;
+        }
+    }
+}
+
+
+/*************************************************************************
+Level 2 subroutine
+*************************************************************************/
+static void ablas_rmatrixlefttrsm2(ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_matrix* a,
+     ae_int_t i1,
+     ae_int_t j1,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     /* Real    */ ae_matrix* x,
+     ae_int_t i2,
+     ae_int_t j2,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    double vr;
+    double vd;
+
+
+    
+    /*
+     * Special case
+     */
+    if( n*m==0 )
+    {
+        return;
+    }
+    
+    /*
+     * Try fast code
+     */
+    if( rmatrixlefttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state) )
+    {
+        return;
+    }
+    
+    /*
+     * General case
+     */
+    if( isupper )
+    {
+        
+        /*
+         * Upper triangular matrix
+         */
+        if( optype==0 )
+        {
+            
+            /*
+             * A^(-1)*X
+             */
+            for(i=m-1; i>=0; i--)
+            {
+                for(j=i+1; j<=m-1; j++)
+                {
+                    vr = a->ptr.pp_double[i1+i][j1+j];
+                    ae_v_subd(&x->ptr.pp_double[i2+i][j2], 1, &x->ptr.pp_double[i2+j][j2], 1, ae_v_len(j2,j2+n-1), vr);
+                }
+                if( !isunit )
+                {
+                    vd = 1/a->ptr.pp_double[i1+i][j1+i];
+                    ae_v_muld(&x->ptr.pp_double[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd);
+                }
+            }
+            return;
+        }
+        if( optype==1 )
+        {
+            
+            /*
+             * A^(-T)*X
+             */
+            for(i=0; i<=m-1; i++)
+            {
+                if( isunit )
+                {
+                    vd = 1;
+                }
+                else
+                {
+                    vd = 1/a->ptr.pp_double[i1+i][j1+i];
+                }
+                ae_v_muld(&x->ptr.pp_double[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd);
+                for(j=i+1; j<=m-1; j++)
+                {
+                    vr = a->ptr.pp_double[i1+i][j1+j];
+                    ae_v_subd(&x->ptr.pp_double[i2+j][j2], 1, &x->ptr.pp_double[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vr);
+                }
+            }
+            return;
+        }
+    }
+    else
+    {
+        
+        /*
+         * Lower triangular matrix
+         */
+        if( optype==0 )
+        {
+            
+            /*
+             * A^(-1)*X
+             */
+            for(i=0; i<=m-1; i++)
+            {
+                for(j=0; j<=i-1; j++)
+                {
+                    vr = a->ptr.pp_double[i1+i][j1+j];
+                    ae_v_subd(&x->ptr.pp_double[i2+i][j2], 1, &x->ptr.pp_double[i2+j][j2], 1, ae_v_len(j2,j2+n-1), vr);
+                }
+                if( isunit )
+                {
+                    vd = 1;
+                }
+                else
+                {
+                    vd = 1/a->ptr.pp_double[i1+j][j1+j];
+                }
+                ae_v_muld(&x->ptr.pp_double[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd);
+            }
+            return;
+        }
+        if( optype==1 )
+        {
+            
+            /*
+             * A^(-T)*X
+             */
+            for(i=m-1; i>=0; i--)
+            {
+                if( isunit )
+                {
+                    vd = 1;
+                }
+                else
+                {
+                    vd = 1/a->ptr.pp_double[i1+i][j1+i];
+                }
+                ae_v_muld(&x->ptr.pp_double[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd);
+                for(j=i-1; j>=0; j--)
+                {
+                    vr = a->ptr.pp_double[i1+i][j1+j];
+                    ae_v_subd(&x->ptr.pp_double[i2+j][j2], 1, &x->ptr.pp_double[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vr);
+                }
+            }
+            return;
+        }
+    }
+}
+
+
+/*************************************************************************
+Level 2 subroutine
+*************************************************************************/
+static void ablas_cmatrixsyrk2(ae_int_t n,
+     ae_int_t k,
+     double alpha,
+     /* Complex */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t optypea,
+     double beta,
+     /* Complex */ ae_matrix* c,
+     ae_int_t ic,
+     ae_int_t jc,
+     ae_bool isupper,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t j1;
+    ae_int_t j2;
+    ae_complex v;
+
+
+    
+    /*
+     * Fast exit (nothing to be done)
+     */
+    if( (ae_fp_eq(alpha,0)||k==0)&&ae_fp_eq(beta,1) )
+    {
+        return;
+    }
+    
+    /*
+     * Try to call fast SYRK
+     */
+    if( cmatrixsyrkf(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state) )
+    {
+        return;
+    }
+    
+    /*
+     * SYRK
+     */
+    if( optypea==0 )
+    {
+        
+        /*
+         * C=alpha*A*A^H+beta*C
+         */
+        for(i=0; i<=n-1; i++)
+        {
+            if( isupper )
+            {
+                j1 = i;
+                j2 = n-1;
+            }
+            else
+            {
+                j1 = 0;
+                j2 = i;
+            }
+            for(j=j1; j<=j2; j++)
+            {
+                if( ae_fp_neq(alpha,0)&&k>0 )
+                {
+                    v = ae_v_cdotproduct(&a->ptr.pp_complex[ia+i][ja], 1, "N", &a->ptr.pp_complex[ia+j][ja], 1, "Conj", ae_v_len(ja,ja+k-1));
+                }
+                else
+                {
+                    v = ae_complex_from_d(0);
+                }
+                if( ae_fp_eq(beta,0) )
+                {
+                    c->ptr.pp_complex[ic+i][jc+j] = ae_c_mul_d(v,alpha);
+                }
+                else
+                {
+                    c->ptr.pp_complex[ic+i][jc+j] = ae_c_add(ae_c_mul_d(c->ptr.pp_complex[ic+i][jc+j],beta),ae_c_mul_d(v,alpha));
+                }
+            }
+        }
+        return;
+    }
+    else
+    {
+        
+        /*
+         * C=alpha*A^H*A+beta*C
+         */
+        for(i=0; i<=n-1; i++)
+        {
+            if( isupper )
+            {
+                j1 = i;
+                j2 = n-1;
+            }
+            else
+            {
+                j1 = 0;
+                j2 = i;
+            }
+            if( ae_fp_eq(beta,0) )
+            {
+                for(j=j1; j<=j2; j++)
+                {
+                    c->ptr.pp_complex[ic+i][jc+j] = ae_complex_from_d(0);
+                }
+            }
+            else
+            {
+                ae_v_cmuld(&c->ptr.pp_complex[ic+i][jc+j1], 1, ae_v_len(jc+j1,jc+j2), beta);
+            }
+        }
+        for(i=0; i<=k-1; i++)
+        {
+            for(j=0; j<=n-1; j++)
+            {
+                if( isupper )
+                {
+                    j1 = j;
+                    j2 = n-1;
+                }
+                else
+                {
+                    j1 = 0;
+                    j2 = j;
+                }
+                v = ae_c_mul_d(ae_c_conj(a->ptr.pp_complex[ia+i][ja+j], _state),alpha);
+                ae_v_caddc(&c->ptr.pp_complex[ic+j][jc+j1], 1, &a->ptr.pp_complex[ia+i][ja+j1], 1, "N", ae_v_len(jc+j1,jc+j2), v);
+            }
+        }
+        return;
+    }
+}
+
+
+/*************************************************************************
+Level 2 subrotuine
+*************************************************************************/
+static void ablas_rmatrixsyrk2(ae_int_t n,
+     ae_int_t k,
+     double alpha,
+     /* Real    */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t optypea,
+     double beta,
+     /* Real    */ ae_matrix* c,
+     ae_int_t ic,
+     ae_int_t jc,
+     ae_bool isupper,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t j1;
+    ae_int_t j2;
+    double v;
+
+
+    
+    /*
+     * Fast exit (nothing to be done)
+     */
+    if( (ae_fp_eq(alpha,0)||k==0)&&ae_fp_eq(beta,1) )
+    {
+        return;
+    }
+    
+    /*
+     * Try to call fast SYRK
+     */
+    if( rmatrixsyrkf(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state) )
+    {
+        return;
+    }
+    
+    /*
+     * SYRK
+     */
+    if( optypea==0 )
+    {
+        
+        /*
+         * C=alpha*A*A^H+beta*C
+         */
+        for(i=0; i<=n-1; i++)
+        {
+            if( isupper )
+            {
+                j1 = i;
+                j2 = n-1;
+            }
+            else
+            {
+                j1 = 0;
+                j2 = i;
+            }
+            for(j=j1; j<=j2; j++)
+            {
+                if( ae_fp_neq(alpha,0)&&k>0 )
+                {
+                    v = ae_v_dotproduct(&a->ptr.pp_double[ia+i][ja], 1, &a->ptr.pp_double[ia+j][ja], 1, ae_v_len(ja,ja+k-1));
+                }
+                else
+                {
+                    v = 0;
+                }
+                if( ae_fp_eq(beta,0) )
+                {
+                    c->ptr.pp_double[ic+i][jc+j] = alpha*v;
+                }
+                else
+                {
+                    c->ptr.pp_double[ic+i][jc+j] = beta*c->ptr.pp_double[ic+i][jc+j]+alpha*v;
+                }
+            }
+        }
+        return;
+    }
+    else
+    {
+        
+        /*
+         * C=alpha*A^H*A+beta*C
+         */
+        for(i=0; i<=n-1; i++)
+        {
+            if( isupper )
+            {
+                j1 = i;
+                j2 = n-1;
+            }
+            else
+            {
+                j1 = 0;
+                j2 = i;
+            }
+            if( ae_fp_eq(beta,0) )
+            {
+                for(j=j1; j<=j2; j++)
+                {
+                    c->ptr.pp_double[ic+i][jc+j] = 0;
+                }
+            }
+            else
+            {
+                ae_v_muld(&c->ptr.pp_double[ic+i][jc+j1], 1, ae_v_len(jc+j1,jc+j2), beta);
+            }
+        }
+        for(i=0; i<=k-1; i++)
+        {
+            for(j=0; j<=n-1; j++)
+            {
+                if( isupper )
+                {
+                    j1 = j;
+                    j2 = n-1;
+                }
+                else
+                {
+                    j1 = 0;
+                    j2 = j;
+                }
+                v = alpha*a->ptr.pp_double[ia+i][ja+j];
+                ae_v_addd(&c->ptr.pp_double[ic+j][jc+j1], 1, &a->ptr.pp_double[ia+i][ja+j1], 1, ae_v_len(jc+j1,jc+j2), v);
+            }
+        }
+        return;
+    }
+}
+
+
+/*************************************************************************
+GEMM kernel
+
+  -- ALGLIB routine --
+     16.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+static void ablas_cmatrixgemmk(ae_int_t m,
+     ae_int_t n,
+     ae_int_t k,
+     ae_complex alpha,
+     /* Complex */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t optypea,
+     /* Complex */ ae_matrix* b,
+     ae_int_t ib,
+     ae_int_t jb,
+     ae_int_t optypeb,
+     ae_complex beta,
+     /* Complex */ ae_matrix* c,
+     ae_int_t ic,
+     ae_int_t jc,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    ae_complex v;
+
+
+    
+    /*
+     * Special case
+     */
+    if( m*n==0 )
+    {
+        return;
+    }
+    
+    /*
+     * Try optimized code
+     */
+    if( cmatrixgemmf(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state) )
+    {
+        return;
+    }
+    
+    /*
+     * Another special case
+     */
+    if( k==0 )
+    {
+        if( ae_c_neq_d(beta,0) )
+        {
+            for(i=0; i<=m-1; i++)
+            {
+                for(j=0; j<=n-1; j++)
+                {
+                    c->ptr.pp_complex[ic+i][jc+j] = ae_c_mul(beta,c->ptr.pp_complex[ic+i][jc+j]);
+                }
+            }
+        }
+        else
+        {
+            for(i=0; i<=m-1; i++)
+            {
+                for(j=0; j<=n-1; j++)
+                {
+                    c->ptr.pp_complex[ic+i][jc+j] = ae_complex_from_d(0);
+                }
+            }
+        }
+        return;
+    }
+    
+    /*
+     * General case
+     */
+    if( optypea==0&&optypeb!=0 )
+    {
+        
+        /*
+         * A*B'
+         */
+        for(i=0; i<=m-1; i++)
+        {
+            for(j=0; j<=n-1; j++)
+            {
+                if( k==0||ae_c_eq_d(alpha,0) )
+                {
+                    v = ae_complex_from_d(0);
+                }
+                else
+                {
+                    if( optypeb==1 )
+                    {
+                        v = ae_v_cdotproduct(&a->ptr.pp_complex[ia+i][ja], 1, "N", &b->ptr.pp_complex[ib+j][jb], 1, "N", ae_v_len(ja,ja+k-1));
+                    }
+                    else
+                    {
+                        v = ae_v_cdotproduct(&a->ptr.pp_complex[ia+i][ja], 1, "N", &b->ptr.pp_complex[ib+j][jb], 1, "Conj", ae_v_len(ja,ja+k-1));
+                    }
+                }
+                if( ae_c_eq_d(beta,0) )
+                {
+                    c->ptr.pp_complex[ic+i][jc+j] = ae_c_mul(alpha,v);
+                }
+                else
+                {
+                    c->ptr.pp_complex[ic+i][jc+j] = ae_c_add(ae_c_mul(beta,c->ptr.pp_complex[ic+i][jc+j]),ae_c_mul(alpha,v));
+                }
+            }
+        }
+        return;
+    }
+    if( optypea==0&&optypeb==0 )
+    {
+        
+        /*
+         * A*B
+         */
+        for(i=0; i<=m-1; i++)
+        {
+            if( ae_c_neq_d(beta,0) )
+            {
+                ae_v_cmulc(&c->ptr.pp_complex[ic+i][jc], 1, ae_v_len(jc,jc+n-1), beta);
+            }
+            else
+            {
+                for(j=0; j<=n-1; j++)
+                {
+                    c->ptr.pp_complex[ic+i][jc+j] = ae_complex_from_d(0);
+                }
+            }
+            if( ae_c_neq_d(alpha,0) )
+            {
+                for(j=0; j<=k-1; j++)
+                {
+                    v = ae_c_mul(alpha,a->ptr.pp_complex[ia+i][ja+j]);
+                    ae_v_caddc(&c->ptr.pp_complex[ic+i][jc], 1, &b->ptr.pp_complex[ib+j][jb], 1, "N", ae_v_len(jc,jc+n-1), v);
+                }
+            }
+        }
+        return;
+    }
+    if( optypea!=0&&optypeb!=0 )
+    {
+        
+        /*
+         * A'*B'
+         */
+        for(i=0; i<=m-1; i++)
+        {
+            for(j=0; j<=n-1; j++)
+            {
+                if( ae_c_eq_d(alpha,0) )
+                {
+                    v = ae_complex_from_d(0);
+                }
+                else
+                {
+                    if( optypea==1 )
+                    {
+                        if( optypeb==1 )
+                        {
+                            v = ae_v_cdotproduct(&a->ptr.pp_complex[ia][ja+i], a->stride, "N", &b->ptr.pp_complex[ib+j][jb], 1, "N", ae_v_len(ia,ia+k-1));
+                        }
+                        else
+                        {
+                            v = ae_v_cdotproduct(&a->ptr.pp_complex[ia][ja+i], a->stride, "N", &b->ptr.pp_complex[ib+j][jb], 1, "Conj", ae_v_len(ia,ia+k-1));
+                        }
+                    }
+                    else
+                    {
+                        if( optypeb==1 )
+                        {
+                            v = ae_v_cdotproduct(&a->ptr.pp_complex[ia][ja+i], a->stride, "Conj", &b->ptr.pp_complex[ib+j][jb], 1, "N", ae_v_len(ia,ia+k-1));
+                        }
+                        else
+                        {
+                            v = ae_v_cdotproduct(&a->ptr.pp_complex[ia][ja+i], a->stride, "Conj", &b->ptr.pp_complex[ib+j][jb], 1, "Conj", ae_v_len(ia,ia+k-1));
+                        }
+                    }
+                }
+                if( ae_c_eq_d(beta,0) )
+                {
+                    c->ptr.pp_complex[ic+i][jc+j] = ae_c_mul(alpha,v);
+                }
+                else
+                {
+                    c->ptr.pp_complex[ic+i][jc+j] = ae_c_add(ae_c_mul(beta,c->ptr.pp_complex[ic+i][jc+j]),ae_c_mul(alpha,v));
+                }
+            }
+        }
+        return;
+    }
+    if( optypea!=0&&optypeb==0 )
+    {
+        
+        /*
+         * A'*B
+         */
+        if( ae_c_eq_d(beta,0) )
+        {
+            for(i=0; i<=m-1; i++)
+            {
+                for(j=0; j<=n-1; j++)
+                {
+                    c->ptr.pp_complex[ic+i][jc+j] = ae_complex_from_d(0);
+                }
+            }
+        }
+        else
+        {
+            for(i=0; i<=m-1; i++)
+            {
+                ae_v_cmulc(&c->ptr.pp_complex[ic+i][jc], 1, ae_v_len(jc,jc+n-1), beta);
+            }
+        }
+        if( ae_c_neq_d(alpha,0) )
+        {
+            for(j=0; j<=k-1; j++)
+            {
+                for(i=0; i<=m-1; i++)
+                {
+                    if( optypea==1 )
+                    {
+                        v = ae_c_mul(alpha,a->ptr.pp_complex[ia+j][ja+i]);
+                    }
+                    else
+                    {
+                        v = ae_c_mul(alpha,ae_c_conj(a->ptr.pp_complex[ia+j][ja+i], _state));
+                    }
+                    ae_v_caddc(&c->ptr.pp_complex[ic+i][jc], 1, &b->ptr.pp_complex[ib+j][jb], 1, "N", ae_v_len(jc,jc+n-1), v);
+                }
+            }
+        }
+        return;
+    }
+}
+
+
+/*************************************************************************
+GEMM kernel
+
+  -- ALGLIB routine --
+     16.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+static void ablas_rmatrixgemmk(ae_int_t m,
+     ae_int_t n,
+     ae_int_t k,
+     double alpha,
+     /* Real    */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t optypea,
+     /* Real    */ ae_matrix* b,
+     ae_int_t ib,
+     ae_int_t jb,
+     ae_int_t optypeb,
+     double beta,
+     /* Real    */ ae_matrix* c,
+     ae_int_t ic,
+     ae_int_t jc,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    double v;
+
+
+    
+    /*
+     * if matrix size is zero
+     */
+    if( m*n==0 )
+    {
+        return;
+    }
+    
+    /*
+     * Try optimized code
+     */
+    if( rmatrixgemmf(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state) )
+    {
+        return;
+    }
+    
+    /*
+     * if K=0, then C=Beta*C
+     */
+    if( k==0 )
+    {
+        if( ae_fp_neq(beta,1) )
+        {
+            if( ae_fp_neq(beta,0) )
+            {
+                for(i=0; i<=m-1; i++)
+                {
+                    for(j=0; j<=n-1; j++)
+                    {
+                        c->ptr.pp_double[ic+i][jc+j] = beta*c->ptr.pp_double[ic+i][jc+j];
+                    }
+                }
+            }
+            else
+            {
+                for(i=0; i<=m-1; i++)
+                {
+                    for(j=0; j<=n-1; j++)
+                    {
+                        c->ptr.pp_double[ic+i][jc+j] = 0;
+                    }
+                }
+            }
+        }
+        return;
+    }
+    
+    /*
+     * General case
+     */
+    if( optypea==0&&optypeb!=0 )
+    {
+        
+        /*
+         * A*B'
+         */
+        for(i=0; i<=m-1; i++)
+        {
+            for(j=0; j<=n-1; j++)
+            {
+                if( k==0||ae_fp_eq(alpha,0) )
+                {
+                    v = 0;
+                }
+                else
+                {
+                    v = ae_v_dotproduct(&a->ptr.pp_double[ia+i][ja], 1, &b->ptr.pp_double[ib+j][jb], 1, ae_v_len(ja,ja+k-1));
+                }
+                if( ae_fp_eq(beta,0) )
+                {
+                    c->ptr.pp_double[ic+i][jc+j] = alpha*v;
+                }
+                else
+                {
+                    c->ptr.pp_double[ic+i][jc+j] = beta*c->ptr.pp_double[ic+i][jc+j]+alpha*v;
+                }
+            }
+        }
+        return;
+    }
+    if( optypea==0&&optypeb==0 )
+    {
+        
+        /*
+         * A*B
+         */
+        for(i=0; i<=m-1; i++)
+        {
+            if( ae_fp_neq(beta,0) )
+            {
+                ae_v_muld(&c->ptr.pp_double[ic+i][jc], 1, ae_v_len(jc,jc+n-1), beta);
+            }
+            else
+            {
+                for(j=0; j<=n-1; j++)
+                {
+                    c->ptr.pp_double[ic+i][jc+j] = 0;
+                }
+            }
+            if( ae_fp_neq(alpha,0) )
+            {
+                for(j=0; j<=k-1; j++)
+                {
+                    v = alpha*a->ptr.pp_double[ia+i][ja+j];
+                    ae_v_addd(&c->ptr.pp_double[ic+i][jc], 1, &b->ptr.pp_double[ib+j][jb], 1, ae_v_len(jc,jc+n-1), v);
+                }
+            }
+        }
+        return;
+    }
+    if( optypea!=0&&optypeb!=0 )
+    {
+        
+        /*
+         * A'*B'
+         */
+        for(i=0; i<=m-1; i++)
+        {
+            for(j=0; j<=n-1; j++)
+            {
+                if( ae_fp_eq(alpha,0) )
+                {
+                    v = 0;
+                }
+                else
+                {
+                    v = ae_v_dotproduct(&a->ptr.pp_double[ia][ja+i], a->stride, &b->ptr.pp_double[ib+j][jb], 1, ae_v_len(ia,ia+k-1));
+                }
+                if( ae_fp_eq(beta,0) )
+                {
+                    c->ptr.pp_double[ic+i][jc+j] = alpha*v;
+                }
+                else
+                {
+                    c->ptr.pp_double[ic+i][jc+j] = beta*c->ptr.pp_double[ic+i][jc+j]+alpha*v;
+                }
+            }
+        }
+        return;
+    }
+    if( optypea!=0&&optypeb==0 )
+    {
+        
+        /*
+         * A'*B
+         */
+        if( ae_fp_eq(beta,0) )
+        {
+            for(i=0; i<=m-1; i++)
+            {
+                for(j=0; j<=n-1; j++)
+                {
+                    c->ptr.pp_double[ic+i][jc+j] = 0;
+                }
+            }
+        }
+        else
+        {
+            for(i=0; i<=m-1; i++)
+            {
+                ae_v_muld(&c->ptr.pp_double[ic+i][jc], 1, ae_v_len(jc,jc+n-1), beta);
+            }
+        }
+        if( ae_fp_neq(alpha,0) )
+        {
+            for(j=0; j<=k-1; j++)
+            {
+                for(i=0; i<=m-1; i++)
+                {
+                    v = alpha*a->ptr.pp_double[ia+j][ja+i];
+                    ae_v_addd(&c->ptr.pp_double[ic+i][jc], 1, &b->ptr.pp_double[ib+j][jb], 1, ae_v_len(jc,jc+n-1), v);
+                }
+            }
+        }
+        return;
+    }
+}
+
+
+
+
+/*************************************************************************
+QR decomposition of a rectangular matrix of size MxN
+
+Input parameters:
+    A   -   matrix A whose indexes range within [0..M-1, 0..N-1].
+    M   -   number of rows in matrix A.
+    N   -   number of columns in matrix A.
+
+Output parameters:
+    A   -   matrices Q and R in compact form (see below).
+    Tau -   array of scalar factors which are used to form
+            matrix Q. Array whose index ranges within [0.. Min(M-1,N-1)].
+
+Matrix A is represented as A = QR, where Q is an orthogonal matrix of size
+MxM, R - upper triangular (or upper trapezoid) matrix of size M x N.
+
+The elements of matrix R are located on and above the main diagonal of
+matrix A. The elements which are located in Tau array and below the main
+diagonal of matrix A are used to form matrix Q as follows:
+
+Matrix Q is represented as a product of elementary reflections
+
+Q = H(0)*H(2)*...*H(k-1),
+
+where k = min(m,n), and each H(i) is in the form
+
+H(i) = 1 - tau * v * (v^T)
+
+where tau is a scalar stored in Tau[I]; v - real vector,
+so that v(0:i-1) = 0, v(i) = 1, v(i+1:m-1) stored in A(i+1:m-1,i).
+
+  -- ALGLIB routine --
+     17.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixqr(/* Real    */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_vector* tau,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector work;
+    ae_vector t;
+    ae_vector taubuf;
+    ae_int_t minmn;
+    ae_matrix tmpa;
+    ae_matrix tmpt;
+    ae_matrix tmpr;
+    ae_int_t blockstart;
+    ae_int_t blocksize;
+    ae_int_t rowscount;
+    ae_int_t i;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_clear(tau);
+    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&taubuf, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&tmpa, 0, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&tmpt, 0, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&tmpr, 0, 0, DT_REAL, _state, ae_true);
+
+    if( m<=0||n<=0 )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    minmn = ae_minint(m, n, _state);
+    ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state);
+    ae_vector_set_length(&t, ae_maxint(m, n, _state)+1, _state);
+    ae_vector_set_length(tau, minmn, _state);
+    ae_vector_set_length(&taubuf, minmn, _state);
+    ae_matrix_set_length(&tmpa, m, ablasblocksize(a, _state), _state);
+    ae_matrix_set_length(&tmpt, ablasblocksize(a, _state), 2*ablasblocksize(a, _state), _state);
+    ae_matrix_set_length(&tmpr, 2*ablasblocksize(a, _state), n, _state);
+    
+    /*
+     * Blocked code
+     */
+    blockstart = 0;
+    while(blockstart!=minmn)
+    {
+        
+        /*
+         * Determine block size
+         */
+        blocksize = minmn-blockstart;
+        if( blocksize>ablasblocksize(a, _state) )
+        {
+            blocksize = ablasblocksize(a, _state);
+        }
+        rowscount = m-blockstart;
+        
+        /*
+         * QR decomposition of submatrix.
+         * Matrix is copied to temporary storage to solve
+         * some TLB issues arising from non-contiguous memory
+         * access pattern.
+         */
+        rmatrixcopy(rowscount, blocksize, a, blockstart, blockstart, &tmpa, 0, 0, _state);
+        ortfac_rmatrixqrbasecase(&tmpa, rowscount, blocksize, &work, &t, &taubuf, _state);
+        rmatrixcopy(rowscount, blocksize, &tmpa, 0, 0, a, blockstart, blockstart, _state);
+        ae_v_move(&tau->ptr.p_double[blockstart], 1, &taubuf.ptr.p_double[0], 1, ae_v_len(blockstart,blockstart+blocksize-1));
+        
+        /*
+         * Update the rest, choose between:
+         * a) Level 2 algorithm (when the rest of the matrix is small enough)
+         * b) blocked algorithm, see algorithm 5 from  'A storage efficient WY
+         *    representation for products of Householder transformations',
+         *    by R. Schreiber and C. Van Loan.
+         */
+        if( blockstart+blocksize<=n-1 )
+        {
+            if( n-blockstart-blocksize>=2*ablasblocksize(a, _state)||rowscount>=4*ablasblocksize(a, _state) )
+            {
+                
+                /*
+                 * Prepare block reflector
+                 */
+                ortfac_rmatrixblockreflector(&tmpa, &taubuf, ae_true, rowscount, blocksize, &tmpt, &work, _state);
+                
+                /*
+                 * Multiply the rest of A by Q'.
+                 *
+                 * Q  = E + Y*T*Y'  = E + TmpA*TmpT*TmpA'
+                 * Q' = E + Y*T'*Y' = E + TmpA*TmpT'*TmpA'
+                 */
+                rmatrixgemm(blocksize, n-blockstart-blocksize, rowscount, 1.0, &tmpa, 0, 0, 1, a, blockstart, blockstart+blocksize, 0, 0.0, &tmpr, 0, 0, _state);
+                rmatrixgemm(blocksize, n-blockstart-blocksize, blocksize, 1.0, &tmpt, 0, 0, 1, &tmpr, 0, 0, 0, 0.0, &tmpr, blocksize, 0, _state);
+                rmatrixgemm(rowscount, n-blockstart-blocksize, blocksize, 1.0, &tmpa, 0, 0, 0, &tmpr, blocksize, 0, 0, 1.0, a, blockstart, blockstart+blocksize, _state);
+            }
+            else
+            {
+                
+                /*
+                 * Level 2 algorithm
+                 */
+                for(i=0; i<=blocksize-1; i++)
+                {
+                    ae_v_move(&t.ptr.p_double[1], 1, &tmpa.ptr.pp_double[i][i], tmpa.stride, ae_v_len(1,rowscount-i));
+                    t.ptr.p_double[1] = 1;
+                    applyreflectionfromtheleft(a, taubuf.ptr.p_double[i], &t, blockstart+i, m-1, blockstart+blocksize, n-1, &work, _state);
+                }
+            }
+        }
+        
+        /*
+         * Advance
+         */
+        blockstart = blockstart+blocksize;
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+LQ decomposition of a rectangular matrix of size MxN
+
+Input parameters:
+    A   -   matrix A whose indexes range within [0..M-1, 0..N-1].
+    M   -   number of rows in matrix A.
+    N   -   number of columns in matrix A.
+
+Output parameters:
+    A   -   matrices L and Q in compact form (see below)
+    Tau -   array of scalar factors which are used to form
+            matrix Q. Array whose index ranges within [0..Min(M,N)-1].
+
+Matrix A is represented as A = LQ, where Q is an orthogonal matrix of size
+MxM, L - lower triangular (or lower trapezoid) matrix of size M x N.
+
+The elements of matrix L are located on and below  the  main  diagonal  of
+matrix A. The elements which are located in Tau array and above  the  main
+diagonal of matrix A are used to form matrix Q as follows:
+
+Matrix Q is represented as a product of elementary reflections
+
+Q = H(k-1)*H(k-2)*...*H(1)*H(0),
+
+where k = min(m,n), and each H(i) is of the form
+
+H(i) = 1 - tau * v * (v^T)
+
+where tau is a scalar stored in Tau[I]; v - real vector, so that v(0:i-1)=0,
+v(i) = 1, v(i+1:n-1) stored in A(i,i+1:n-1).
+
+  -- ALGLIB routine --
+     17.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixlq(/* Real    */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_vector* tau,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector work;
+    ae_vector t;
+    ae_vector taubuf;
+    ae_int_t minmn;
+    ae_matrix tmpa;
+    ae_matrix tmpt;
+    ae_matrix tmpr;
+    ae_int_t blockstart;
+    ae_int_t blocksize;
+    ae_int_t columnscount;
+    ae_int_t i;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_clear(tau);
+    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&taubuf, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&tmpa, 0, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&tmpt, 0, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&tmpr, 0, 0, DT_REAL, _state, ae_true);
+
+    if( m<=0||n<=0 )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    minmn = ae_minint(m, n, _state);
+    ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state);
+    ae_vector_set_length(&t, ae_maxint(m, n, _state)+1, _state);
+    ae_vector_set_length(tau, minmn, _state);
+    ae_vector_set_length(&taubuf, minmn, _state);
+    ae_matrix_set_length(&tmpa, ablasblocksize(a, _state), n, _state);
+    ae_matrix_set_length(&tmpt, ablasblocksize(a, _state), 2*ablasblocksize(a, _state), _state);
+    ae_matrix_set_length(&tmpr, m, 2*ablasblocksize(a, _state), _state);
+    
+    /*
+     * Blocked code
+     */
+    blockstart = 0;
+    while(blockstart!=minmn)
+    {
+        
+        /*
+         * Determine block size
+         */
+        blocksize = minmn-blockstart;
+        if( blocksize>ablasblocksize(a, _state) )
+        {
+            blocksize = ablasblocksize(a, _state);
+        }
+        columnscount = n-blockstart;
+        
+        /*
+         * LQ decomposition of submatrix.
+         * Matrix is copied to temporary storage to solve
+         * some TLB issues arising from non-contiguous memory
+         * access pattern.
+         */
+        rmatrixcopy(blocksize, columnscount, a, blockstart, blockstart, &tmpa, 0, 0, _state);
+        ortfac_rmatrixlqbasecase(&tmpa, blocksize, columnscount, &work, &t, &taubuf, _state);
+        rmatrixcopy(blocksize, columnscount, &tmpa, 0, 0, a, blockstart, blockstart, _state);
+        ae_v_move(&tau->ptr.p_double[blockstart], 1, &taubuf.ptr.p_double[0], 1, ae_v_len(blockstart,blockstart+blocksize-1));
+        
+        /*
+         * Update the rest, choose between:
+         * a) Level 2 algorithm (when the rest of the matrix is small enough)
+         * b) blocked algorithm, see algorithm 5 from  'A storage efficient WY
+         *    representation for products of Householder transformations',
+         *    by R. Schreiber and C. Van Loan.
+         */
+        if( blockstart+blocksize<=m-1 )
+        {
+            if( m-blockstart-blocksize>=2*ablasblocksize(a, _state) )
+            {
+                
+                /*
+                 * Prepare block reflector
+                 */
+                ortfac_rmatrixblockreflector(&tmpa, &taubuf, ae_false, columnscount, blocksize, &tmpt, &work, _state);
+                
+                /*
+                 * Multiply the rest of A by Q.
+                 *
+                 * Q  = E + Y*T*Y'  = E + TmpA'*TmpT*TmpA
+                 */
+                rmatrixgemm(m-blockstart-blocksize, blocksize, columnscount, 1.0, a, blockstart+blocksize, blockstart, 0, &tmpa, 0, 0, 1, 0.0, &tmpr, 0, 0, _state);
+                rmatrixgemm(m-blockstart-blocksize, blocksize, blocksize, 1.0, &tmpr, 0, 0, 0, &tmpt, 0, 0, 0, 0.0, &tmpr, 0, blocksize, _state);
+                rmatrixgemm(m-blockstart-blocksize, columnscount, blocksize, 1.0, &tmpr, 0, blocksize, 0, &tmpa, 0, 0, 0, 1.0, a, blockstart+blocksize, blockstart, _state);
+            }
+            else
+            {
+                
+                /*
+                 * Level 2 algorithm
+                 */
+                for(i=0; i<=blocksize-1; i++)
+                {
+                    ae_v_move(&t.ptr.p_double[1], 1, &tmpa.ptr.pp_double[i][i], 1, ae_v_len(1,columnscount-i));
+                    t.ptr.p_double[1] = 1;
+                    applyreflectionfromtheright(a, taubuf.ptr.p_double[i], &t, blockstart+blocksize, m-1, blockstart+i, n-1, &work, _state);
+                }
+            }
+        }
+        
+        /*
+         * Advance
+         */
+        blockstart = blockstart+blocksize;
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+QR decomposition of a rectangular complex matrix of size MxN
+
+Input parameters:
+    A   -   matrix A whose indexes range within [0..M-1, 0..N-1]
+    M   -   number of rows in matrix A.
+    N   -   number of columns in matrix A.
+
+Output parameters:
+    A   -   matrices Q and R in compact form
+    Tau -   array of scalar factors which are used to form matrix Q. Array
+            whose indexes range within [0.. Min(M,N)-1]
+
+Matrix A is represented as A = QR, where Q is an orthogonal matrix of size
+MxM, R - upper triangular (or upper trapezoid) matrix of size MxN.
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     September 30, 1994
+*************************************************************************/
+void cmatrixqr(/* Complex */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_vector* tau,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector work;
+    ae_vector t;
+    ae_vector taubuf;
+    ae_int_t minmn;
+    ae_matrix tmpa;
+    ae_matrix tmpt;
+    ae_matrix tmpr;
+    ae_int_t blockstart;
+    ae_int_t blocksize;
+    ae_int_t rowscount;
+    ae_int_t i;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_clear(tau);
+    ae_vector_init(&work, 0, DT_COMPLEX, _state, ae_true);
+    ae_vector_init(&t, 0, DT_COMPLEX, _state, ae_true);
+    ae_vector_init(&taubuf, 0, DT_COMPLEX, _state, ae_true);
+    ae_matrix_init(&tmpa, 0, 0, DT_COMPLEX, _state, ae_true);
+    ae_matrix_init(&tmpt, 0, 0, DT_COMPLEX, _state, ae_true);
+    ae_matrix_init(&tmpr, 0, 0, DT_COMPLEX, _state, ae_true);
+
+    if( m<=0||n<=0 )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    minmn = ae_minint(m, n, _state);
+    ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state);
+    ae_vector_set_length(&t, ae_maxint(m, n, _state)+1, _state);
+    ae_vector_set_length(tau, minmn, _state);
+    ae_vector_set_length(&taubuf, minmn, _state);
+    ae_matrix_set_length(&tmpa, m, ablascomplexblocksize(a, _state), _state);
+    ae_matrix_set_length(&tmpt, ablascomplexblocksize(a, _state), ablascomplexblocksize(a, _state), _state);
+    ae_matrix_set_length(&tmpr, 2*ablascomplexblocksize(a, _state), n, _state);
+    
+    /*
+     * Blocked code
+     */
+    blockstart = 0;
+    while(blockstart!=minmn)
+    {
+        
+        /*
+         * Determine block size
+         */
+        blocksize = minmn-blockstart;
+        if( blocksize>ablascomplexblocksize(a, _state) )
+        {
+            blocksize = ablascomplexblocksize(a, _state);
+        }
+        rowscount = m-blockstart;
+        
+        /*
+         * QR decomposition of submatrix.
+         * Matrix is copied to temporary storage to solve
+         * some TLB issues arising from non-contiguous memory
+         * access pattern.
+         */
+        cmatrixcopy(rowscount, blocksize, a, blockstart, blockstart, &tmpa, 0, 0, _state);
+        ortfac_cmatrixqrbasecase(&tmpa, rowscount, blocksize, &work, &t, &taubuf, _state);
+        cmatrixcopy(rowscount, blocksize, &tmpa, 0, 0, a, blockstart, blockstart, _state);
+        ae_v_cmove(&tau->ptr.p_complex[blockstart], 1, &taubuf.ptr.p_complex[0], 1, "N", ae_v_len(blockstart,blockstart+blocksize-1));
+        
+        /*
+         * Update the rest, choose between:
+         * a) Level 2 algorithm (when the rest of the matrix is small enough)
+         * b) blocked algorithm, see algorithm 5 from  'A storage efficient WY
+         *    representation for products of Householder transformations',
+         *    by R. Schreiber and C. Van Loan.
+         */
+        if( blockstart+blocksize<=n-1 )
+        {
+            if( n-blockstart-blocksize>=2*ablascomplexblocksize(a, _state) )
+            {
+                
+                /*
+                 * Prepare block reflector
+                 */
+                ortfac_cmatrixblockreflector(&tmpa, &taubuf, ae_true, rowscount, blocksize, &tmpt, &work, _state);
+                
+                /*
+                 * Multiply the rest of A by Q'.
+                 *
+                 * Q  = E + Y*T*Y'  = E + TmpA*TmpT*TmpA'
+                 * Q' = E + Y*T'*Y' = E + TmpA*TmpT'*TmpA'
+                 */
+                cmatrixgemm(blocksize, n-blockstart-blocksize, rowscount, ae_complex_from_d(1.0), &tmpa, 0, 0, 2, a, blockstart, blockstart+blocksize, 0, ae_complex_from_d(0.0), &tmpr, 0, 0, _state);
+                cmatrixgemm(blocksize, n-blockstart-blocksize, blocksize, ae_complex_from_d(1.0), &tmpt, 0, 0, 2, &tmpr, 0, 0, 0, ae_complex_from_d(0.0), &tmpr, blocksize, 0, _state);
+                cmatrixgemm(rowscount, n-blockstart-blocksize, blocksize, ae_complex_from_d(1.0), &tmpa, 0, 0, 0, &tmpr, blocksize, 0, 0, ae_complex_from_d(1.0), a, blockstart, blockstart+blocksize, _state);
+            }
+            else
+            {
+                
+                /*
+                 * Level 2 algorithm
+                 */
+                for(i=0; i<=blocksize-1; i++)
+                {
+                    ae_v_cmove(&t.ptr.p_complex[1], 1, &tmpa.ptr.pp_complex[i][i], tmpa.stride, "N", ae_v_len(1,rowscount-i));
+                    t.ptr.p_complex[1] = ae_complex_from_d(1);
+                    complexapplyreflectionfromtheleft(a, ae_c_conj(taubuf.ptr.p_complex[i], _state), &t, blockstart+i, m-1, blockstart+blocksize, n-1, &work, _state);
+                }
+            }
+        }
+        
+        /*
+         * Advance
+         */
+        blockstart = blockstart+blocksize;
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+LQ decomposition of a rectangular complex matrix of size MxN
+
+Input parameters:
+    A   -   matrix A whose indexes range within [0..M-1, 0..N-1]
+    M   -   number of rows in matrix A.
+    N   -   number of columns in matrix A.
+
+Output parameters:
+    A   -   matrices Q and L in compact form
+    Tau -   array of scalar factors which are used to form matrix Q. Array
+            whose indexes range within [0.. Min(M,N)-1]
+
+Matrix A is represented as A = LQ, where Q is an orthogonal matrix of size
+MxM, L - lower triangular (or lower trapezoid) matrix of size MxN.
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     September 30, 1994
+*************************************************************************/
+void cmatrixlq(/* Complex */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_vector* tau,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector work;
+    ae_vector t;
+    ae_vector taubuf;
+    ae_int_t minmn;
+    ae_matrix tmpa;
+    ae_matrix tmpt;
+    ae_matrix tmpr;
+    ae_int_t blockstart;
+    ae_int_t blocksize;
+    ae_int_t columnscount;
+    ae_int_t i;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_clear(tau);
+    ae_vector_init(&work, 0, DT_COMPLEX, _state, ae_true);
+    ae_vector_init(&t, 0, DT_COMPLEX, _state, ae_true);
+    ae_vector_init(&taubuf, 0, DT_COMPLEX, _state, ae_true);
+    ae_matrix_init(&tmpa, 0, 0, DT_COMPLEX, _state, ae_true);
+    ae_matrix_init(&tmpt, 0, 0, DT_COMPLEX, _state, ae_true);
+    ae_matrix_init(&tmpr, 0, 0, DT_COMPLEX, _state, ae_true);
+
+    if( m<=0||n<=0 )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    minmn = ae_minint(m, n, _state);
+    ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state);
+    ae_vector_set_length(&t, ae_maxint(m, n, _state)+1, _state);
+    ae_vector_set_length(tau, minmn, _state);
+    ae_vector_set_length(&taubuf, minmn, _state);
+    ae_matrix_set_length(&tmpa, ablascomplexblocksize(a, _state), n, _state);
+    ae_matrix_set_length(&tmpt, ablascomplexblocksize(a, _state), ablascomplexblocksize(a, _state), _state);
+    ae_matrix_set_length(&tmpr, m, 2*ablascomplexblocksize(a, _state), _state);
+    
+    /*
+     * Blocked code
+     */
+    blockstart = 0;
+    while(blockstart!=minmn)
+    {
+        
+        /*
+         * Determine block size
+         */
+        blocksize = minmn-blockstart;
+        if( blocksize>ablascomplexblocksize(a, _state) )
+        {
+            blocksize = ablascomplexblocksize(a, _state);
+        }
+        columnscount = n-blockstart;
+        
+        /*
+         * LQ decomposition of submatrix.
+         * Matrix is copied to temporary storage to solve
+         * some TLB issues arising from non-contiguous memory
+         * access pattern.
+         */
+        cmatrixcopy(blocksize, columnscount, a, blockstart, blockstart, &tmpa, 0, 0, _state);
+        ortfac_cmatrixlqbasecase(&tmpa, blocksize, columnscount, &work, &t, &taubuf, _state);
+        cmatrixcopy(blocksize, columnscount, &tmpa, 0, 0, a, blockstart, blockstart, _state);
+        ae_v_cmove(&tau->ptr.p_complex[blockstart], 1, &taubuf.ptr.p_complex[0], 1, "N", ae_v_len(blockstart,blockstart+blocksize-1));
+        
+        /*
+         * Update the rest, choose between:
+         * a) Level 2 algorithm (when the rest of the matrix is small enough)
+         * b) blocked algorithm, see algorithm 5 from  'A storage efficient WY
+         *    representation for products of Householder transformations',
+         *    by R. Schreiber and C. Van Loan.
+         */
+        if( blockstart+blocksize<=m-1 )
+        {
+            if( m-blockstart-blocksize>=2*ablascomplexblocksize(a, _state) )
+            {
+                
+                /*
+                 * Prepare block reflector
+                 */
+                ortfac_cmatrixblockreflector(&tmpa, &taubuf, ae_false, columnscount, blocksize, &tmpt, &work, _state);
+                
+                /*
+                 * Multiply the rest of A by Q.
+                 *
+                 * Q  = E + Y*T*Y'  = E + TmpA'*TmpT*TmpA
+                 */
+                cmatrixgemm(m-blockstart-blocksize, blocksize, columnscount, ae_complex_from_d(1.0), a, blockstart+blocksize, blockstart, 0, &tmpa, 0, 0, 2, ae_complex_from_d(0.0), &tmpr, 0, 0, _state);
+                cmatrixgemm(m-blockstart-blocksize, blocksize, blocksize, ae_complex_from_d(1.0), &tmpr, 0, 0, 0, &tmpt, 0, 0, 0, ae_complex_from_d(0.0), &tmpr, 0, blocksize, _state);
+                cmatrixgemm(m-blockstart-blocksize, columnscount, blocksize, ae_complex_from_d(1.0), &tmpr, 0, blocksize, 0, &tmpa, 0, 0, 0, ae_complex_from_d(1.0), a, blockstart+blocksize, blockstart, _state);
+            }
+            else
+            {
+                
+                /*
+                 * Level 2 algorithm
+                 */
+                for(i=0; i<=blocksize-1; i++)
+                {
+                    ae_v_cmove(&t.ptr.p_complex[1], 1, &tmpa.ptr.pp_complex[i][i], 1, "Conj", ae_v_len(1,columnscount-i));
+                    t.ptr.p_complex[1] = ae_complex_from_d(1);
+                    complexapplyreflectionfromtheright(a, taubuf.ptr.p_complex[i], &t, blockstart+blocksize, m-1, blockstart+i, n-1, &work, _state);
+                }
+            }
+        }
+        
+        /*
+         * Advance
+         */
+        blockstart = blockstart+blocksize;
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Partial unpacking of matrix Q from the QR decomposition of a matrix A
+
+Input parameters:
+    A       -   matrices Q and R in compact form.
+                Output of RMatrixQR subroutine.
+    M       -   number of rows in given matrix A. M>=0.
+    N       -   number of columns in given matrix A. N>=0.
+    Tau     -   scalar factors which are used to form Q.
+                Output of the RMatrixQR subroutine.
+    QColumns -  required number of columns of matrix Q. M>=QColumns>=0.
+
+Output parameters:
+    Q       -   first QColumns columns of matrix Q.
+                Array whose indexes range within [0..M-1, 0..QColumns-1].
+                If QColumns=0, the array remains unchanged.
+
+  -- ALGLIB routine --
+     17.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixqrunpackq(/* Real    */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_vector* tau,
+     ae_int_t qcolumns,
+     /* Real    */ ae_matrix* q,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector work;
+    ae_vector t;
+    ae_vector taubuf;
+    ae_int_t minmn;
+    ae_int_t refcnt;
+    ae_matrix tmpa;
+    ae_matrix tmpt;
+    ae_matrix tmpr;
+    ae_int_t blockstart;
+    ae_int_t blocksize;
+    ae_int_t rowscount;
+    ae_int_t i;
+    ae_int_t j;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_matrix_clear(q);
+    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&taubuf, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&tmpa, 0, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&tmpt, 0, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&tmpr, 0, 0, DT_REAL, _state, ae_true);
+
+    ae_assert(qcolumns<=m, "UnpackQFromQR: QColumns>M!", _state);
+    if( (m<=0||n<=0)||qcolumns<=0 )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * init
+     */
+    minmn = ae_minint(m, n, _state);
+    refcnt = ae_minint(minmn, qcolumns, _state);
+    ae_matrix_set_length(q, m, qcolumns, _state);
+    for(i=0; i<=m-1; i++)
+    {
+        for(j=0; j<=qcolumns-1; j++)
+        {
+            if( i==j )
+            {
+                q->ptr.pp_double[i][j] = 1;
+            }
+            else
+            {
+                q->ptr.pp_double[i][j] = 0;
+            }
+        }
+    }
+    ae_vector_set_length(&work, ae_maxint(m, qcolumns, _state)+1, _state);
+    ae_vector_set_length(&t, ae_maxint(m, qcolumns, _state)+1, _state);
+    ae_vector_set_length(&taubuf, minmn, _state);
+    ae_matrix_set_length(&tmpa, m, ablasblocksize(a, _state), _state);
+    ae_matrix_set_length(&tmpt, ablasblocksize(a, _state), 2*ablasblocksize(a, _state), _state);
+    ae_matrix_set_length(&tmpr, 2*ablasblocksize(a, _state), qcolumns, _state);
+    
+    /*
+     * Blocked code
+     */
+    blockstart = ablasblocksize(a, _state)*(refcnt/ablasblocksize(a, _state));
+    blocksize = refcnt-blockstart;
+    while(blockstart>=0)
+    {
+        rowscount = m-blockstart;
+        
+        /*
+         * Copy current block
+         */
+        rmatrixcopy(rowscount, blocksize, a, blockstart, blockstart, &tmpa, 0, 0, _state);
+        ae_v_move(&taubuf.ptr.p_double[0], 1, &tau->ptr.p_double[blockstart], 1, ae_v_len(0,blocksize-1));
+        
+        /*
+         * Update, choose between:
+         * a) Level 2 algorithm (when the rest of the matrix is small enough)
+         * b) blocked algorithm, see algorithm 5 from  'A storage efficient WY
+         *    representation for products of Householder transformations',
+         *    by R. Schreiber and C. Van Loan.
+         */
+        if( qcolumns>=2*ablasblocksize(a, _state) )
+        {
+            
+            /*
+             * Prepare block reflector
+             */
+            ortfac_rmatrixblockreflector(&tmpa, &taubuf, ae_true, rowscount, blocksize, &tmpt, &work, _state);
+            
+            /*
+             * Multiply matrix by Q.
+             *
+             * Q  = E + Y*T*Y'  = E + TmpA*TmpT*TmpA'
+             */
+            rmatrixgemm(blocksize, qcolumns, rowscount, 1.0, &tmpa, 0, 0, 1, q, blockstart, 0, 0, 0.0, &tmpr, 0, 0, _state);
+            rmatrixgemm(blocksize, qcolumns, blocksize, 1.0, &tmpt, 0, 0, 0, &tmpr, 0, 0, 0, 0.0, &tmpr, blocksize, 0, _state);
+            rmatrixgemm(rowscount, qcolumns, blocksize, 1.0, &tmpa, 0, 0, 0, &tmpr, blocksize, 0, 0, 1.0, q, blockstart, 0, _state);
+        }
+        else
+        {
+            
+            /*
+             * Level 2 algorithm
+             */
+            for(i=blocksize-1; i>=0; i--)
+            {
+                ae_v_move(&t.ptr.p_double[1], 1, &tmpa.ptr.pp_double[i][i], tmpa.stride, ae_v_len(1,rowscount-i));
+                t.ptr.p_double[1] = 1;
+                applyreflectionfromtheleft(q, taubuf.ptr.p_double[i], &t, blockstart+i, m-1, 0, qcolumns-1, &work, _state);
+            }
+        }
+        
+        /*
+         * Advance
+         */
+        blockstart = blockstart-ablasblocksize(a, _state);
+        blocksize = ablasblocksize(a, _state);
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Unpacking of matrix R from the QR decomposition of a matrix A
+
+Input parameters:
+    A       -   matrices Q and R in compact form.
+                Output of RMatrixQR subroutine.
+    M       -   number of rows in given matrix A. M>=0.
+    N       -   number of columns in given matrix A. N>=0.
+
+Output parameters:
+    R       -   matrix R, array[0..M-1, 0..N-1].
+
+  -- ALGLIB routine --
+     17.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixqrunpackr(/* Real    */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_matrix* r,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t k;
+
+    ae_matrix_clear(r);
+
+    if( m<=0||n<=0 )
+    {
+        return;
+    }
+    k = ae_minint(m, n, _state);
+    ae_matrix_set_length(r, m, n, _state);
+    for(i=0; i<=n-1; i++)
+    {
+        r->ptr.pp_double[0][i] = 0;
+    }
+    for(i=1; i<=m-1; i++)
+    {
+        ae_v_move(&r->ptr.pp_double[i][0], 1, &r->ptr.pp_double[0][0], 1, ae_v_len(0,n-1));
+    }
+    for(i=0; i<=k-1; i++)
+    {
+        ae_v_move(&r->ptr.pp_double[i][i], 1, &a->ptr.pp_double[i][i], 1, ae_v_len(i,n-1));
+    }
+}
+
+
+/*************************************************************************
+Partial unpacking of matrix Q from the LQ decomposition of a matrix A
+
+Input parameters:
+    A       -   matrices L and Q in compact form.
+                Output of RMatrixLQ subroutine.
+    M       -   number of rows in given matrix A. M>=0.
+    N       -   number of columns in given matrix A. N>=0.
+    Tau     -   scalar factors which are used to form Q.
+                Output of the RMatrixLQ subroutine.
+    QRows   -   required number of rows in matrix Q. N>=QRows>=0.
+
+Output parameters:
+    Q       -   first QRows rows of matrix Q. Array whose indexes range
+                within [0..QRows-1, 0..N-1]. If QRows=0, the array remains
+                unchanged.
+
+  -- ALGLIB routine --
+     17.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixlqunpackq(/* Real    */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_vector* tau,
+     ae_int_t qrows,
+     /* Real    */ ae_matrix* q,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector work;
+    ae_vector t;
+    ae_vector taubuf;
+    ae_int_t minmn;
+    ae_int_t refcnt;
+    ae_matrix tmpa;
+    ae_matrix tmpt;
+    ae_matrix tmpr;
+    ae_int_t blockstart;
+    ae_int_t blocksize;
+    ae_int_t columnscount;
+    ae_int_t i;
+    ae_int_t j;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_matrix_clear(q);
+    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&taubuf, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&tmpa, 0, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&tmpt, 0, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&tmpr, 0, 0, DT_REAL, _state, ae_true);
+
+    ae_assert(qrows<=n, "RMatrixLQUnpackQ: QRows>N!", _state);
+    if( (m<=0||n<=0)||qrows<=0 )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * init
+     */
+    minmn = ae_minint(m, n, _state);
+    refcnt = ae_minint(minmn, qrows, _state);
+    ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state);
+    ae_vector_set_length(&t, ae_maxint(m, n, _state)+1, _state);
+    ae_vector_set_length(&taubuf, minmn, _state);
+    ae_matrix_set_length(&tmpa, ablasblocksize(a, _state), n, _state);
+    ae_matrix_set_length(&tmpt, ablasblocksize(a, _state), 2*ablasblocksize(a, _state), _state);
+    ae_matrix_set_length(&tmpr, qrows, 2*ablasblocksize(a, _state), _state);
+    ae_matrix_set_length(q, qrows, n, _state);
+    for(i=0; i<=qrows-1; i++)
+    {
+        for(j=0; j<=n-1; j++)
+        {
+            if( i==j )
+            {
+                q->ptr.pp_double[i][j] = 1;
+            }
+            else
+            {
+                q->ptr.pp_double[i][j] = 0;
+            }
+        }
+    }
+    
+    /*
+     * Blocked code
+     */
+    blockstart = ablasblocksize(a, _state)*(refcnt/ablasblocksize(a, _state));
+    blocksize = refcnt-blockstart;
+    while(blockstart>=0)
+    {
+        columnscount = n-blockstart;
+        
+        /*
+         * Copy submatrix
+         */
+        rmatrixcopy(blocksize, columnscount, a, blockstart, blockstart, &tmpa, 0, 0, _state);
+        ae_v_move(&taubuf.ptr.p_double[0], 1, &tau->ptr.p_double[blockstart], 1, ae_v_len(0,blocksize-1));
+        
+        /*
+         * Update matrix, choose between:
+         * a) Level 2 algorithm (when the rest of the matrix is small enough)
+         * b) blocked algorithm, see algorithm 5 from  'A storage efficient WY
+         *    representation for products of Householder transformations',
+         *    by R. Schreiber and C. Van Loan.
+         */
+        if( qrows>=2*ablasblocksize(a, _state) )
+        {
+            
+            /*
+             * Prepare block reflector
+             */
+            ortfac_rmatrixblockreflector(&tmpa, &taubuf, ae_false, columnscount, blocksize, &tmpt, &work, _state);
+            
+            /*
+             * Multiply the rest of A by Q'.
+             *
+             * Q'  = E + Y*T'*Y'  = E + TmpA'*TmpT'*TmpA
+             */
+            rmatrixgemm(qrows, blocksize, columnscount, 1.0, q, 0, blockstart, 0, &tmpa, 0, 0, 1, 0.0, &tmpr, 0, 0, _state);
+            rmatrixgemm(qrows, blocksize, blocksize, 1.0, &tmpr, 0, 0, 0, &tmpt, 0, 0, 1, 0.0, &tmpr, 0, blocksize, _state);
+            rmatrixgemm(qrows, columnscount, blocksize, 1.0, &tmpr, 0, blocksize, 0, &tmpa, 0, 0, 0, 1.0, q, 0, blockstart, _state);
+        }
+        else
+        {
+            
+            /*
+             * Level 2 algorithm
+             */
+            for(i=blocksize-1; i>=0; i--)
+            {
+                ae_v_move(&t.ptr.p_double[1], 1, &tmpa.ptr.pp_double[i][i], 1, ae_v_len(1,columnscount-i));
+                t.ptr.p_double[1] = 1;
+                applyreflectionfromtheright(q, taubuf.ptr.p_double[i], &t, 0, qrows-1, blockstart+i, n-1, &work, _state);
+            }
+        }
+        
+        /*
+         * Advance
+         */
+        blockstart = blockstart-ablasblocksize(a, _state);
+        blocksize = ablasblocksize(a, _state);
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Unpacking of matrix L from the LQ decomposition of a matrix A
+
+Input parameters:
+    A       -   matrices Q and L in compact form.
+                Output of RMatrixLQ subroutine.
+    M       -   number of rows in given matrix A. M>=0.
+    N       -   number of columns in given matrix A. N>=0.
+
+Output parameters:
+    L       -   matrix L, array[0..M-1, 0..N-1].
+
+  -- ALGLIB routine --
+     17.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixlqunpackl(/* Real    */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_matrix* l,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t k;
+
+    ae_matrix_clear(l);
+
+    if( m<=0||n<=0 )
+    {
+        return;
+    }
+    ae_matrix_set_length(l, m, n, _state);
+    for(i=0; i<=n-1; i++)
+    {
+        l->ptr.pp_double[0][i] = 0;
+    }
+    for(i=1; i<=m-1; i++)
+    {
+        ae_v_move(&l->ptr.pp_double[i][0], 1, &l->ptr.pp_double[0][0], 1, ae_v_len(0,n-1));
+    }
+    for(i=0; i<=m-1; i++)
+    {
+        k = ae_minint(i, n-1, _state);
+        ae_v_move(&l->ptr.pp_double[i][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k));
+    }
+}
+
+
+/*************************************************************************
+Partial unpacking of matrix Q from QR decomposition of a complex matrix A.
+
+Input parameters:
+    A           -   matrices Q and R in compact form.
+                    Output of CMatrixQR subroutine .
+    M           -   number of rows in matrix A. M>=0.
+    N           -   number of columns in matrix A. N>=0.
+    Tau         -   scalar factors which are used to form Q.
+                    Output of CMatrixQR subroutine .
+    QColumns    -   required number of columns in matrix Q. M>=QColumns>=0.
+
+Output parameters:
+    Q           -   first QColumns columns of matrix Q.
+                    Array whose index ranges within [0..M-1, 0..QColumns-1].
+                    If QColumns=0, array isn't changed.
+
+  -- ALGLIB routine --
+     17.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixqrunpackq(/* Complex */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_vector* tau,
+     ae_int_t qcolumns,
+     /* Complex */ ae_matrix* q,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector work;
+    ae_vector t;
+    ae_vector taubuf;
+    ae_int_t minmn;
+    ae_int_t refcnt;
+    ae_matrix tmpa;
+    ae_matrix tmpt;
+    ae_matrix tmpr;
+    ae_int_t blockstart;
+    ae_int_t blocksize;
+    ae_int_t rowscount;
+    ae_int_t i;
+    ae_int_t j;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_matrix_clear(q);
+    ae_vector_init(&work, 0, DT_COMPLEX, _state, ae_true);
+    ae_vector_init(&t, 0, DT_COMPLEX, _state, ae_true);
+    ae_vector_init(&taubuf, 0, DT_COMPLEX, _state, ae_true);
+    ae_matrix_init(&tmpa, 0, 0, DT_COMPLEX, _state, ae_true);
+    ae_matrix_init(&tmpt, 0, 0, DT_COMPLEX, _state, ae_true);
+    ae_matrix_init(&tmpr, 0, 0, DT_COMPLEX, _state, ae_true);
+
+    ae_assert(qcolumns<=m, "UnpackQFromQR: QColumns>M!", _state);
+    if( m<=0||n<=0 )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * init
+     */
+    minmn = ae_minint(m, n, _state);
+    refcnt = ae_minint(minmn, qcolumns, _state);
+    ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state);
+    ae_vector_set_length(&t, ae_maxint(m, n, _state)+1, _state);
+    ae_vector_set_length(&taubuf, minmn, _state);
+    ae_matrix_set_length(&tmpa, m, ablascomplexblocksize(a, _state), _state);
+    ae_matrix_set_length(&tmpt, ablascomplexblocksize(a, _state), ablascomplexblocksize(a, _state), _state);
+    ae_matrix_set_length(&tmpr, 2*ablascomplexblocksize(a, _state), qcolumns, _state);
+    ae_matrix_set_length(q, m, qcolumns, _state);
+    for(i=0; i<=m-1; i++)
+    {
+        for(j=0; j<=qcolumns-1; j++)
+        {
+            if( i==j )
+            {
+                q->ptr.pp_complex[i][j] = ae_complex_from_d(1);
+            }
+            else
+            {
+                q->ptr.pp_complex[i][j] = ae_complex_from_d(0);
+            }
+        }
+    }
+    
+    /*
+     * Blocked code
+     */
+    blockstart = ablascomplexblocksize(a, _state)*(refcnt/ablascomplexblocksize(a, _state));
+    blocksize = refcnt-blockstart;
+    while(blockstart>=0)
+    {
+        rowscount = m-blockstart;
+        
+        /*
+         * QR decomposition of submatrix.
+         * Matrix is copied to temporary storage to solve
+         * some TLB issues arising from non-contiguous memory
+         * access pattern.
+         */
+        cmatrixcopy(rowscount, blocksize, a, blockstart, blockstart, &tmpa, 0, 0, _state);
+        ae_v_cmove(&taubuf.ptr.p_complex[0], 1, &tau->ptr.p_complex[blockstart], 1, "N", ae_v_len(0,blocksize-1));
+        
+        /*
+         * Update matrix, choose between:
+         * a) Level 2 algorithm (when the rest of the matrix is small enough)
+         * b) blocked algorithm, see algorithm 5 from  'A storage efficient WY
+         *    representation for products of Householder transformations',
+         *    by R. Schreiber and C. Van Loan.
+         */
+        if( qcolumns>=2*ablascomplexblocksize(a, _state) )
+        {
+            
+            /*
+             * Prepare block reflector
+             */
+            ortfac_cmatrixblockreflector(&tmpa, &taubuf, ae_true, rowscount, blocksize, &tmpt, &work, _state);
+            
+            /*
+             * Multiply the rest of A by Q.
+             *
+             * Q  = E + Y*T*Y'  = E + TmpA*TmpT*TmpA'
+             */
+            cmatrixgemm(blocksize, qcolumns, rowscount, ae_complex_from_d(1.0), &tmpa, 0, 0, 2, q, blockstart, 0, 0, ae_complex_from_d(0.0), &tmpr, 0, 0, _state);
+            cmatrixgemm(blocksize, qcolumns, blocksize, ae_complex_from_d(1.0), &tmpt, 0, 0, 0, &tmpr, 0, 0, 0, ae_complex_from_d(0.0), &tmpr, blocksize, 0, _state);
+            cmatrixgemm(rowscount, qcolumns, blocksize, ae_complex_from_d(1.0), &tmpa, 0, 0, 0, &tmpr, blocksize, 0, 0, ae_complex_from_d(1.0), q, blockstart, 0, _state);
+        }
+        else
+        {
+            
+            /*
+             * Level 2 algorithm
+             */
+            for(i=blocksize-1; i>=0; i--)
+            {
+                ae_v_cmove(&t.ptr.p_complex[1], 1, &tmpa.ptr.pp_complex[i][i], tmpa.stride, "N", ae_v_len(1,rowscount-i));
+                t.ptr.p_complex[1] = ae_complex_from_d(1);
+                complexapplyreflectionfromtheleft(q, taubuf.ptr.p_complex[i], &t, blockstart+i, m-1, 0, qcolumns-1, &work, _state);
+            }
+        }
+        
+        /*
+         * Advance
+         */
+        blockstart = blockstart-ablascomplexblocksize(a, _state);
+        blocksize = ablascomplexblocksize(a, _state);
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Unpacking of matrix R from the QR decomposition of a matrix A
+
+Input parameters:
+    A       -   matrices Q and R in compact form.
+                Output of CMatrixQR subroutine.
+    M       -   number of rows in given matrix A. M>=0.
+    N       -   number of columns in given matrix A. N>=0.
+
+Output parameters:
+    R       -   matrix R, array[0..M-1, 0..N-1].
+
+  -- ALGLIB routine --
+     17.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixqrunpackr(/* Complex */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_matrix* r,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t k;
+
+    ae_matrix_clear(r);
+
+    if( m<=0||n<=0 )
+    {
+        return;
+    }
+    k = ae_minint(m, n, _state);
+    ae_matrix_set_length(r, m, n, _state);
+    for(i=0; i<=n-1; i++)
+    {
+        r->ptr.pp_complex[0][i] = ae_complex_from_d(0);
+    }
+    for(i=1; i<=m-1; i++)
+    {
+        ae_v_cmove(&r->ptr.pp_complex[i][0], 1, &r->ptr.pp_complex[0][0], 1, "N", ae_v_len(0,n-1));
+    }
+    for(i=0; i<=k-1; i++)
+    {
+        ae_v_cmove(&r->ptr.pp_complex[i][i], 1, &a->ptr.pp_complex[i][i], 1, "N", ae_v_len(i,n-1));
+    }
+}
+
+
+/*************************************************************************
+Partial unpacking of matrix Q from LQ decomposition of a complex matrix A.
+
+Input parameters:
+    A           -   matrices Q and R in compact form.
+                    Output of CMatrixLQ subroutine .
+    M           -   number of rows in matrix A. M>=0.
+    N           -   number of columns in matrix A. N>=0.
+    Tau         -   scalar factors which are used to form Q.
+                    Output of CMatrixLQ subroutine .
+    QRows       -   required number of rows in matrix Q. N>=QColumns>=0.
+
+Output parameters:
+    Q           -   first QRows rows of matrix Q.
+                    Array whose index ranges within [0..QRows-1, 0..N-1].
+                    If QRows=0, array isn't changed.
+
+  -- ALGLIB routine --
+     17.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixlqunpackq(/* Complex */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_vector* tau,
+     ae_int_t qrows,
+     /* Complex */ ae_matrix* q,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector work;
+    ae_vector t;
+    ae_vector taubuf;
+    ae_int_t minmn;
+    ae_int_t refcnt;
+    ae_matrix tmpa;
+    ae_matrix tmpt;
+    ae_matrix tmpr;
+    ae_int_t blockstart;
+    ae_int_t blocksize;
+    ae_int_t columnscount;
+    ae_int_t i;
+    ae_int_t j;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_matrix_clear(q);
+    ae_vector_init(&work, 0, DT_COMPLEX, _state, ae_true);
+    ae_vector_init(&t, 0, DT_COMPLEX, _state, ae_true);
+    ae_vector_init(&taubuf, 0, DT_COMPLEX, _state, ae_true);
+    ae_matrix_init(&tmpa, 0, 0, DT_COMPLEX, _state, ae_true);
+    ae_matrix_init(&tmpt, 0, 0, DT_COMPLEX, _state, ae_true);
+    ae_matrix_init(&tmpr, 0, 0, DT_COMPLEX, _state, ae_true);
+
+    if( m<=0||n<=0 )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * Init
+     */
+    minmn = ae_minint(m, n, _state);
+    refcnt = ae_minint(minmn, qrows, _state);
+    ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state);
+    ae_vector_set_length(&t, ae_maxint(m, n, _state)+1, _state);
+    ae_vector_set_length(&taubuf, minmn, _state);
+    ae_matrix_set_length(&tmpa, ablascomplexblocksize(a, _state), n, _state);
+    ae_matrix_set_length(&tmpt, ablascomplexblocksize(a, _state), ablascomplexblocksize(a, _state), _state);
+    ae_matrix_set_length(&tmpr, qrows, 2*ablascomplexblocksize(a, _state), _state);
+    ae_matrix_set_length(q, qrows, n, _state);
+    for(i=0; i<=qrows-1; i++)
+    {
+        for(j=0; j<=n-1; j++)
+        {
+            if( i==j )
+            {
+                q->ptr.pp_complex[i][j] = ae_complex_from_d(1);
+            }
+            else
+            {
+                q->ptr.pp_complex[i][j] = ae_complex_from_d(0);
+            }
+        }
+    }
+    
+    /*
+     * Blocked code
+     */
+    blockstart = ablascomplexblocksize(a, _state)*(refcnt/ablascomplexblocksize(a, _state));
+    blocksize = refcnt-blockstart;
+    while(blockstart>=0)
+    {
+        columnscount = n-blockstart;
+        
+        /*
+         * LQ decomposition of submatrix.
+         * Matrix is copied to temporary storage to solve
+         * some TLB issues arising from non-contiguous memory
+         * access pattern.
+         */
+        cmatrixcopy(blocksize, columnscount, a, blockstart, blockstart, &tmpa, 0, 0, _state);
+        ae_v_cmove(&taubuf.ptr.p_complex[0], 1, &tau->ptr.p_complex[blockstart], 1, "N", ae_v_len(0,blocksize-1));
+        
+        /*
+         * Update matrix, choose between:
+         * a) Level 2 algorithm (when the rest of the matrix is small enough)
+         * b) blocked algorithm, see algorithm 5 from  'A storage efficient WY
+         *    representation for products of Householder transformations',
+         *    by R. Schreiber and C. Van Loan.
+         */
+        if( qrows>=2*ablascomplexblocksize(a, _state) )
+        {
+            
+            /*
+             * Prepare block reflector
+             */
+            ortfac_cmatrixblockreflector(&tmpa, &taubuf, ae_false, columnscount, blocksize, &tmpt, &work, _state);
+            
+            /*
+             * Multiply the rest of A by Q'.
+             *
+             * Q'  = E + Y*T'*Y'  = E + TmpA'*TmpT'*TmpA
+             */
+            cmatrixgemm(qrows, blocksize, columnscount, ae_complex_from_d(1.0), q, 0, blockstart, 0, &tmpa, 0, 0, 2, ae_complex_from_d(0.0), &tmpr, 0, 0, _state);
+            cmatrixgemm(qrows, blocksize, blocksize, ae_complex_from_d(1.0), &tmpr, 0, 0, 0, &tmpt, 0, 0, 2, ae_complex_from_d(0.0), &tmpr, 0, blocksize, _state);
+            cmatrixgemm(qrows, columnscount, blocksize, ae_complex_from_d(1.0), &tmpr, 0, blocksize, 0, &tmpa, 0, 0, 0, ae_complex_from_d(1.0), q, 0, blockstart, _state);
+        }
+        else
+        {
+            
+            /*
+             * Level 2 algorithm
+             */
+            for(i=blocksize-1; i>=0; i--)
+            {
+                ae_v_cmove(&t.ptr.p_complex[1], 1, &tmpa.ptr.pp_complex[i][i], 1, "Conj", ae_v_len(1,columnscount-i));
+                t.ptr.p_complex[1] = ae_complex_from_d(1);
+                complexapplyreflectionfromtheright(q, ae_c_conj(taubuf.ptr.p_complex[i], _state), &t, 0, qrows-1, blockstart+i, n-1, &work, _state);
+            }
+        }
+        
+        /*
+         * Advance
+         */
+        blockstart = blockstart-ablascomplexblocksize(a, _state);
+        blocksize = ablascomplexblocksize(a, _state);
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Unpacking of matrix L from the LQ decomposition of a matrix A
+
+Input parameters:
+    A       -   matrices Q and L in compact form.
+                Output of CMatrixLQ subroutine.
+    M       -   number of rows in given matrix A. M>=0.
+    N       -   number of columns in given matrix A. N>=0.
+
+Output parameters:
+    L       -   matrix L, array[0..M-1, 0..N-1].
+
+  -- ALGLIB routine --
+     17.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixlqunpackl(/* Complex */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_matrix* l,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t k;
+
+    ae_matrix_clear(l);
+
+    if( m<=0||n<=0 )
+    {
+        return;
+    }
+    ae_matrix_set_length(l, m, n, _state);
+    for(i=0; i<=n-1; i++)
+    {
+        l->ptr.pp_complex[0][i] = ae_complex_from_d(0);
+    }
+    for(i=1; i<=m-1; i++)
+    {
+        ae_v_cmove(&l->ptr.pp_complex[i][0], 1, &l->ptr.pp_complex[0][0], 1, "N", ae_v_len(0,n-1));
+    }
+    for(i=0; i<=m-1; i++)
+    {
+        k = ae_minint(i, n-1, _state);
+        ae_v_cmove(&l->ptr.pp_complex[i][0], 1, &a->ptr.pp_complex[i][0], 1, "N", ae_v_len(0,k));
+    }
+}
+
+
+/*************************************************************************
+Reduction of a rectangular matrix to  bidiagonal form
+
+The algorithm reduces the rectangular matrix A to  bidiagonal form by
+orthogonal transformations P and Q: A = Q*B*P.
+
+Input parameters:
+    A       -   source matrix. array[0..M-1, 0..N-1]
+    M       -   number of rows in matrix A.
+    N       -   number of columns in matrix A.
+
+Output parameters:
+    A       -   matrices Q, B, P in compact form (see below).
+    TauQ    -   scalar factors which are used to form matrix Q.
+    TauP    -   scalar factors which are used to form matrix P.
+
+The main diagonal and one of the  secondary  diagonals  of  matrix  A  are
+replaced with bidiagonal  matrix  B.  Other  elements  contain  elementary
+reflections which form MxM matrix Q and NxN matrix P, respectively.
+
+If M>=N, B is the upper  bidiagonal  MxN  matrix  and  is  stored  in  the
+corresponding  elements  of  matrix  A.  Matrix  Q  is  represented  as  a
+product   of   elementary   reflections   Q = H(0)*H(1)*...*H(n-1),  where
+H(i) = 1-tau*v*v'. Here tau is a scalar which is stored  in  TauQ[i],  and
+vector v has the following  structure:  v(0:i-1)=0, v(i)=1, v(i+1:m-1)  is
+stored   in   elements   A(i+1:m-1,i).   Matrix   P  is  as  follows:  P =
+G(0)*G(1)*...*G(n-2), where G(i) = 1 - tau*u*u'. Tau is stored in TauP[i],
+u(0:i)=0, u(i+1)=1, u(i+2:n-1) is stored in elements A(i,i+2:n-1).
+
+If M<N, B is the  lower  bidiagonal  MxN  matrix  and  is  stored  in  the
+corresponding   elements  of  matrix  A.  Q = H(0)*H(1)*...*H(m-2),  where
+H(i) = 1 - tau*v*v', tau is stored in TauQ, v(0:i)=0, v(i+1)=1, v(i+2:m-1)
+is    stored    in   elements   A(i+2:m-1,i).    P = G(0)*G(1)*...*G(m-1),
+G(i) = 1-tau*u*u', tau is stored in  TauP,  u(0:i-1)=0, u(i)=1, u(i+1:n-1)
+is stored in A(i,i+1:n-1).
+
+EXAMPLE:
+
+m=6, n=5 (m > n):               m=5, n=6 (m < n):
+
+(  d   e   u1  u1  u1 )         (  d   u1  u1  u1  u1  u1 )
+(  v1  d   e   u2  u2 )         (  e   d   u2  u2  u2  u2 )
+(  v1  v2  d   e   u3 )         (  v1  e   d   u3  u3  u3 )
+(  v1  v2  v3  d   e  )         (  v1  v2  e   d   u4  u4 )
+(  v1  v2  v3  v4  d  )         (  v1  v2  v3  e   d   u5 )
+(  v1  v2  v3  v4  v5 )
+
+Here vi and ui are vectors which form H(i) and G(i), and d and e -
+are the diagonal and off-diagonal elements of matrix B.
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     September 30, 1994.
+     Sergey Bochkanov, ALGLIB project, translation from FORTRAN to
+     pseudocode, 2007-2010.
+*************************************************************************/
+void rmatrixbd(/* Real    */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_vector* tauq,
+     /* Real    */ ae_vector* taup,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector work;
+    ae_vector t;
+    ae_int_t minmn;
+    ae_int_t maxmn;
+    ae_int_t i;
+    double ltau;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_clear(tauq);
+    ae_vector_clear(taup);
+    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
+
+    
+    /*
+     * Prepare
+     */
+    if( n<=0||m<=0 )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    minmn = ae_minint(m, n, _state);
+    maxmn = ae_maxint(m, n, _state);
+    ae_vector_set_length(&work, maxmn+1, _state);
+    ae_vector_set_length(&t, maxmn+1, _state);
+    if( m>=n )
+    {
+        ae_vector_set_length(tauq, n, _state);
+        ae_vector_set_length(taup, n, _state);
+    }
+    else
+    {
+        ae_vector_set_length(tauq, m, _state);
+        ae_vector_set_length(taup, m, _state);
+    }
+    if( m>=n )
+    {
+        
+        /*
+         * Reduce to upper bidiagonal form
+         */
+        for(i=0; i<=n-1; i++)
+        {
+            
+            /*
+             * Generate elementary reflector H(i) to annihilate A(i+1:m-1,i)
+             */
+            ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[i][i], a->stride, ae_v_len(1,m-i));
+            generatereflection(&t, m-i, &ltau, _state);
+            tauq->ptr.p_double[i] = ltau;
+            ae_v_move(&a->ptr.pp_double[i][i], a->stride, &t.ptr.p_double[1], 1, ae_v_len(i,m-1));
+            t.ptr.p_double[1] = 1;
+            
+            /*
+             * Apply H(i) to A(i:m-1,i+1:n-1) from the left
+             */
+            applyreflectionfromtheleft(a, ltau, &t, i, m-1, i+1, n-1, &work, _state);
+            if( i<n-1 )
+            {
+                
+                /*
+                 * Generate elementary reflector G(i) to annihilate
+                 * A(i,i+2:n-1)
+                 */
+                ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[i][i+1], 1, ae_v_len(1,n-i-1));
+                generatereflection(&t, n-1-i, &ltau, _state);
+                taup->ptr.p_double[i] = ltau;
+                ae_v_move(&a->ptr.pp_double[i][i+1], 1, &t.ptr.p_double[1], 1, ae_v_len(i+1,n-1));
+                t.ptr.p_double[1] = 1;
+                
+                /*
+                 * Apply G(i) to A(i+1:m-1,i+1:n-1) from the right
+                 */
+                applyreflectionfromtheright(a, ltau, &t, i+1, m-1, i+1, n-1, &work, _state);
+            }
+            else
+            {
+                taup->ptr.p_double[i] = 0;
+            }
+        }
+    }
+    else
+    {
+        
+        /*
+         * Reduce to lower bidiagonal form
+         */
+        for(i=0; i<=m-1; i++)
+        {
+            
+            /*
+             * Generate elementary reflector G(i) to annihilate A(i,i+1:n-1)
+             */
+            ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[i][i], 1, ae_v_len(1,n-i));
+            generatereflection(&t, n-i, &ltau, _state);
+            taup->ptr.p_double[i] = ltau;
+            ae_v_move(&a->ptr.pp_double[i][i], 1, &t.ptr.p_double[1], 1, ae_v_len(i,n-1));
+            t.ptr.p_double[1] = 1;
+            
+            /*
+             * Apply G(i) to A(i+1:m-1,i:n-1) from the right
+             */
+            applyreflectionfromtheright(a, ltau, &t, i+1, m-1, i, n-1, &work, _state);
+            if( i<m-1 )
+            {
+                
+                /*
+                 * Generate elementary reflector H(i) to annihilate
+                 * A(i+2:m-1,i)
+                 */
+                ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(1,m-1-i));
+                generatereflection(&t, m-1-i, &ltau, _state);
+                tauq->ptr.p_double[i] = ltau;
+                ae_v_move(&a->ptr.pp_double[i+1][i], a->stride, &t.ptr.p_double[1], 1, ae_v_len(i+1,m-1));
+                t.ptr.p_double[1] = 1;
+                
+                /*
+                 * Apply H(i) to A(i+1:m-1,i+1:n-1) from the left
+                 */
+                applyreflectionfromtheleft(a, ltau, &t, i+1, m-1, i+1, n-1, &work, _state);
+            }
+            else
+            {
+                tauq->ptr.p_double[i] = 0;
+            }
+        }
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Unpacking matrix Q which reduces a matrix to bidiagonal form.
+
+Input parameters:
+    QP          -   matrices Q and P in compact form.
+                    Output of ToBidiagonal subroutine.
+    M           -   number of rows in matrix A.
+    N           -   number of columns in matrix A.
+    TAUQ        -   scalar factors which are used to form Q.
+                    Output of ToBidiagonal subroutine.
+    QColumns    -   required number of columns in matrix Q.
+                    M>=QColumns>=0.
+
+Output parameters:
+    Q           -   first QColumns columns of matrix Q.
+                    Array[0..M-1, 0..QColumns-1]
+                    If QColumns=0, the array is not modified.
+
+  -- ALGLIB --
+     2005-2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixbdunpackq(/* Real    */ ae_matrix* qp,
+     ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_vector* tauq,
+     ae_int_t qcolumns,
+     /* Real    */ ae_matrix* q,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+
+    ae_matrix_clear(q);
+
+    ae_assert(qcolumns<=m, "RMatrixBDUnpackQ: QColumns>M!", _state);
+    ae_assert(qcolumns>=0, "RMatrixBDUnpackQ: QColumns<0!", _state);
+    if( (m==0||n==0)||qcolumns==0 )
+    {
+        return;
+    }
+    
+    /*
+     * prepare Q
+     */
+    ae_matrix_set_length(q, m, qcolumns, _state);
+    for(i=0; i<=m-1; i++)
+    {
+        for(j=0; j<=qcolumns-1; j++)
+        {
+            if( i==j )
+            {
+                q->ptr.pp_double[i][j] = 1;
+            }
+            else
+            {
+                q->ptr.pp_double[i][j] = 0;
+            }
+        }
+    }
+    
+    /*
+     * Calculate
+     */
+    rmatrixbdmultiplybyq(qp, m, n, tauq, q, m, qcolumns, ae_false, ae_false, _state);
+}
+
+
+/*************************************************************************
+Multiplication by matrix Q which reduces matrix A to  bidiagonal form.
+
+The algorithm allows pre- or post-multiply by Q or Q'.
+
+Input parameters:
+    QP          -   matrices Q and P in compact form.
+                    Output of ToBidiagonal subroutine.
+    M           -   number of rows in matrix A.
+    N           -   number of columns in matrix A.
+    TAUQ        -   scalar factors which are used to form Q.
+                    Output of ToBidiagonal subroutine.
+    Z           -   multiplied matrix.
+                    array[0..ZRows-1,0..ZColumns-1]
+    ZRows       -   number of rows in matrix Z. If FromTheRight=False,
+                    ZRows=M, otherwise ZRows can be arbitrary.
+    ZColumns    -   number of columns in matrix Z. If FromTheRight=True,
+                    ZColumns=M, otherwise ZColumns can be arbitrary.
+    FromTheRight -  pre- or post-multiply.
+    DoTranspose -   multiply by Q or Q'.
+
+Output parameters:
+    Z           -   product of Z and Q.
+                    Array[0..ZRows-1,0..ZColumns-1]
+                    If ZRows=0 or ZColumns=0, the array is not modified.
+
+  -- ALGLIB --
+     2005-2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixbdmultiplybyq(/* Real    */ ae_matrix* qp,
+     ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_vector* tauq,
+     /* Real    */ ae_matrix* z,
+     ae_int_t zrows,
+     ae_int_t zcolumns,
+     ae_bool fromtheright,
+     ae_bool dotranspose,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_int_t i;
+    ae_int_t i1;
+    ae_int_t i2;
+    ae_int_t istep;
+    ae_vector v;
+    ae_vector work;
+    ae_int_t mx;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_init(&v, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
+
+    if( ((m<=0||n<=0)||zrows<=0)||zcolumns<=0 )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    ae_assert((fromtheright&&zcolumns==m)||(!fromtheright&&zrows==m), "RMatrixBDMultiplyByQ: incorrect Z size!", _state);
+    
+    /*
+     * init
+     */
+    mx = ae_maxint(m, n, _state);
+    mx = ae_maxint(mx, zrows, _state);
+    mx = ae_maxint(mx, zcolumns, _state);
+    ae_vector_set_length(&v, mx+1, _state);
+    ae_vector_set_length(&work, mx+1, _state);
+    if( m>=n )
+    {
+        
+        /*
+         * setup
+         */
+        if( fromtheright )
+        {
+            i1 = 0;
+            i2 = n-1;
+            istep = 1;
+        }
+        else
+        {
+            i1 = n-1;
+            i2 = 0;
+            istep = -1;
+        }
+        if( dotranspose )
+        {
+            i = i1;
+            i1 = i2;
+            i2 = i;
+            istep = -istep;
+        }
+        
+        /*
+         * Process
+         */
+        i = i1;
+        do
+        {
+            ae_v_move(&v.ptr.p_double[1], 1, &qp->ptr.pp_double[i][i], qp->stride, ae_v_len(1,m-i));
+            v.ptr.p_double[1] = 1;
+            if( fromtheright )
+            {
+                applyreflectionfromtheright(z, tauq->ptr.p_double[i], &v, 0, zrows-1, i, m-1, &work, _state);
+            }
+            else
+            {
+                applyreflectionfromtheleft(z, tauq->ptr.p_double[i], &v, i, m-1, 0, zcolumns-1, &work, _state);
+            }
+            i = i+istep;
+        }
+        while(i!=i2+istep);
+    }
+    else
+    {
+        
+        /*
+         * setup
+         */
+        if( fromtheright )
+        {
+            i1 = 0;
+            i2 = m-2;
+            istep = 1;
+        }
+        else
+        {
+            i1 = m-2;
+            i2 = 0;
+            istep = -1;
+        }
+        if( dotranspose )
+        {
+            i = i1;
+            i1 = i2;
+            i2 = i;
+            istep = -istep;
+        }
+        
+        /*
+         * Process
+         */
+        if( m-1>0 )
+        {
+            i = i1;
+            do
+            {
+                ae_v_move(&v.ptr.p_double[1], 1, &qp->ptr.pp_double[i+1][i], qp->stride, ae_v_len(1,m-i-1));
+                v.ptr.p_double[1] = 1;
+                if( fromtheright )
+                {
+                    applyreflectionfromtheright(z, tauq->ptr.p_double[i], &v, 0, zrows-1, i+1, m-1, &work, _state);
+                }
+                else
+                {
+                    applyreflectionfromtheleft(z, tauq->ptr.p_double[i], &v, i+1, m-1, 0, zcolumns-1, &work, _state);
+                }
+                i = i+istep;
+            }
+            while(i!=i2+istep);
+        }
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Unpacking matrix P which reduces matrix A to bidiagonal form.
+The subroutine returns transposed matrix P.
+
+Input parameters:
+    QP      -   matrices Q and P in compact form.
+                Output of ToBidiagonal subroutine.
+    M       -   number of rows in matrix A.
+    N       -   number of columns in matrix A.
+    TAUP    -   scalar factors which are used to form P.
+                Output of ToBidiagonal subroutine.
+    PTRows  -   required number of rows of matrix P^T. N >= PTRows >= 0.
+
+Output parameters:
+    PT      -   first PTRows columns of matrix P^T
+                Array[0..PTRows-1, 0..N-1]
+                If PTRows=0, the array is not modified.
+
+  -- ALGLIB --
+     2005-2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixbdunpackpt(/* Real    */ ae_matrix* qp,
+     ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_vector* taup,
+     ae_int_t ptrows,
+     /* Real    */ ae_matrix* pt,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+
+    ae_matrix_clear(pt);
+
+    ae_assert(ptrows<=n, "RMatrixBDUnpackPT: PTRows>N!", _state);
+    ae_assert(ptrows>=0, "RMatrixBDUnpackPT: PTRows<0!", _state);
+    if( (m==0||n==0)||ptrows==0 )
+    {
+        return;
+    }
+    
+    /*
+     * prepare PT
+     */
+    ae_matrix_set_length(pt, ptrows, n, _state);
+    for(i=0; i<=ptrows-1; i++)
+    {
+        for(j=0; j<=n-1; j++)
+        {
+            if( i==j )
+            {
+                pt->ptr.pp_double[i][j] = 1;
+            }
+            else
+            {
+                pt->ptr.pp_double[i][j] = 0;
+            }
+        }
+    }
+    
+    /*
+     * Calculate
+     */
+    rmatrixbdmultiplybyp(qp, m, n, taup, pt, ptrows, n, ae_true, ae_true, _state);
+}
+
+
+/*************************************************************************
+Multiplication by matrix P which reduces matrix A to  bidiagonal form.
+
+The algorithm allows pre- or post-multiply by P or P'.
+
+Input parameters:
+    QP          -   matrices Q and P in compact form.
+                    Output of RMatrixBD subroutine.
+    M           -   number of rows in matrix A.
+    N           -   number of columns in matrix A.
+    TAUP        -   scalar factors which are used to form P.
+                    Output of RMatrixBD subroutine.
+    Z           -   multiplied matrix.
+                    Array whose indexes range within [0..ZRows-1,0..ZColumns-1].
+    ZRows       -   number of rows in matrix Z. If FromTheRight=False,
+                    ZRows=N, otherwise ZRows can be arbitrary.
+    ZColumns    -   number of columns in matrix Z. If FromTheRight=True,
+                    ZColumns=N, otherwise ZColumns can be arbitrary.
+    FromTheRight -  pre- or post-multiply.
+    DoTranspose -   multiply by P or P'.
+
+Output parameters:
+    Z - product of Z and P.
+                Array whose indexes range within [0..ZRows-1,0..ZColumns-1].
+                If ZRows=0 or ZColumns=0, the array is not modified.
+
+  -- ALGLIB --
+     2005-2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixbdmultiplybyp(/* Real    */ ae_matrix* qp,
+     ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_vector* taup,
+     /* Real    */ ae_matrix* z,
+     ae_int_t zrows,
+     ae_int_t zcolumns,
+     ae_bool fromtheright,
+     ae_bool dotranspose,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_int_t i;
+    ae_vector v;
+    ae_vector work;
+    ae_int_t mx;
+    ae_int_t i1;
+    ae_int_t i2;
+    ae_int_t istep;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_init(&v, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
+
+    if( ((m<=0||n<=0)||zrows<=0)||zcolumns<=0 )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    ae_assert((fromtheright&&zcolumns==n)||(!fromtheright&&zrows==n), "RMatrixBDMultiplyByP: incorrect Z size!", _state);
+    
+    /*
+     * init
+     */
+    mx = ae_maxint(m, n, _state);
+    mx = ae_maxint(mx, zrows, _state);
+    mx = ae_maxint(mx, zcolumns, _state);
+    ae_vector_set_length(&v, mx+1, _state);
+    ae_vector_set_length(&work, mx+1, _state);
+    if( m>=n )
+    {
+        
+        /*
+         * setup
+         */
+        if( fromtheright )
+        {
+            i1 = n-2;
+            i2 = 0;
+            istep = -1;
+        }
+        else
+        {
+            i1 = 0;
+            i2 = n-2;
+            istep = 1;
+        }
+        if( !dotranspose )
+        {
+            i = i1;
+            i1 = i2;
+            i2 = i;
+            istep = -istep;
+        }
+        
+        /*
+         * Process
+         */
+        if( n-1>0 )
+        {
+            i = i1;
+            do
+            {
+                ae_v_move(&v.ptr.p_double[1], 1, &qp->ptr.pp_double[i][i+1], 1, ae_v_len(1,n-1-i));
+                v.ptr.p_double[1] = 1;
+                if( fromtheright )
+                {
+                    applyreflectionfromtheright(z, taup->ptr.p_double[i], &v, 0, zrows-1, i+1, n-1, &work, _state);
+                }
+                else
+                {
+                    applyreflectionfromtheleft(z, taup->ptr.p_double[i], &v, i+1, n-1, 0, zcolumns-1, &work, _state);
+                }
+                i = i+istep;
+            }
+            while(i!=i2+istep);
+        }
+    }
+    else
+    {
+        
+        /*
+         * setup
+         */
+        if( fromtheright )
+        {
+            i1 = m-1;
+            i2 = 0;
+            istep = -1;
+        }
+        else
+        {
+            i1 = 0;
+            i2 = m-1;
+            istep = 1;
+        }
+        if( !dotranspose )
+        {
+            i = i1;
+            i1 = i2;
+            i2 = i;
+            istep = -istep;
+        }
+        
+        /*
+         * Process
+         */
+        i = i1;
+        do
+        {
+            ae_v_move(&v.ptr.p_double[1], 1, &qp->ptr.pp_double[i][i], 1, ae_v_len(1,n-i));
+            v.ptr.p_double[1] = 1;
+            if( fromtheright )
+            {
+                applyreflectionfromtheright(z, taup->ptr.p_double[i], &v, 0, zrows-1, i, n-1, &work, _state);
+            }
+            else
+            {
+                applyreflectionfromtheleft(z, taup->ptr.p_double[i], &v, i, n-1, 0, zcolumns-1, &work, _state);
+            }
+            i = i+istep;
+        }
+        while(i!=i2+istep);
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Unpacking of the main and secondary diagonals of bidiagonal decomposition
+of matrix A.
+
+Input parameters:
+    B   -   output of RMatrixBD subroutine.
+    M   -   number of rows in matrix B.
+    N   -   number of columns in matrix B.
+
+Output parameters:
+    IsUpper -   True, if the matrix is upper bidiagonal.
+                otherwise IsUpper is False.
+    D       -   the main diagonal.
+                Array whose index ranges within [0..Min(M,N)-1].
+    E       -   the secondary diagonal (upper or lower, depending on
+                the value of IsUpper).
+                Array index ranges within [0..Min(M,N)-1], the last
+                element is not used.
+
+  -- ALGLIB --
+     2005-2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixbdunpackdiagonals(/* Real    */ ae_matrix* b,
+     ae_int_t m,
+     ae_int_t n,
+     ae_bool* isupper,
+     /* Real    */ ae_vector* d,
+     /* Real    */ ae_vector* e,
+     ae_state *_state)
+{
+    ae_int_t i;
+
+    *isupper = ae_false;
+    ae_vector_clear(d);
+    ae_vector_clear(e);
+
+    *isupper = m>=n;
+    if( m<=0||n<=0 )
+    {
+        return;
+    }
+    if( *isupper )
+    {
+        ae_vector_set_length(d, n, _state);
+        ae_vector_set_length(e, n, _state);
+        for(i=0; i<=n-2; i++)
+        {
+            d->ptr.p_double[i] = b->ptr.pp_double[i][i];
+            e->ptr.p_double[i] = b->ptr.pp_double[i][i+1];
+        }
+        d->ptr.p_double[n-1] = b->ptr.pp_double[n-1][n-1];
+    }
+    else
+    {
+        ae_vector_set_length(d, m, _state);
+        ae_vector_set_length(e, m, _state);
+        for(i=0; i<=m-2; i++)
+        {
+            d->ptr.p_double[i] = b->ptr.pp_double[i][i];
+            e->ptr.p_double[i] = b->ptr.pp_double[i+1][i];
+        }
+        d->ptr.p_double[m-1] = b->ptr.pp_double[m-1][m-1];
+    }
+}
+
+
+/*************************************************************************
+Reduction of a square matrix to  upper Hessenberg form: Q'*A*Q = H,
+where Q is an orthogonal matrix, H - Hessenberg matrix.
+
+Input parameters:
+    A       -   matrix A with elements [0..N-1, 0..N-1]
+    N       -   size of matrix A.
+
+Output parameters:
+    A       -   matrices Q and P in  compact form (see below).
+    Tau     -   array of scalar factors which are used to form matrix Q.
+                Array whose index ranges within [0..N-2]
+
+Matrix H is located on the main diagonal, on the lower secondary  diagonal
+and above the main diagonal of matrix A. The elements which are used to
+form matrix Q are situated in array Tau and below the lower secondary
+diagonal of matrix A as follows:
+
+Matrix Q is represented as a product of elementary reflections
+
+Q = H(0)*H(2)*...*H(n-2),
+
+where each H(i) is given by
+
+H(i) = 1 - tau * v * (v^T)
+
+where tau is a scalar stored in Tau[I]; v - is a real vector,
+so that v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) stored in A(i+2:n-1,i).
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     October 31, 1992
+*************************************************************************/
+void rmatrixhessenberg(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     /* Real    */ ae_vector* tau,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_int_t i;
+    double v;
+    ae_vector t;
+    ae_vector work;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_clear(tau);
+    ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
+
+    ae_assert(n>=0, "RMatrixHessenberg: incorrect N!", _state);
+    
+    /*
+     * Quick return if possible
+     */
+    if( n<=1 )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    ae_vector_set_length(tau, n-2+1, _state);
+    ae_vector_set_length(&t, n+1, _state);
+    ae_vector_set_length(&work, n-1+1, _state);
+    for(i=0; i<=n-2; i++)
+    {
+        
+        /*
+         * Compute elementary reflector H(i) to annihilate A(i+2:ihi,i)
+         */
+        ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(1,n-i-1));
+        generatereflection(&t, n-i-1, &v, _state);
+        ae_v_move(&a->ptr.pp_double[i+1][i], a->stride, &t.ptr.p_double[1], 1, ae_v_len(i+1,n-1));
+        tau->ptr.p_double[i] = v;
+        t.ptr.p_double[1] = 1;
+        
+        /*
+         * Apply H(i) to A(1:ihi,i+1:ihi) from the right
+         */
+        applyreflectionfromtheright(a, v, &t, 0, n-1, i+1, n-1, &work, _state);
+        
+        /*
+         * Apply H(i) to A(i+1:ihi,i+1:n) from the left
+         */
+        applyreflectionfromtheleft(a, v, &t, i+1, n-1, i+1, n-1, &work, _state);
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Unpacking matrix Q which reduces matrix A to upper Hessenberg form
+
+Input parameters:
+    A   -   output of RMatrixHessenberg subroutine.
+    N   -   size of matrix A.
+    Tau -   scalar factors which are used to form Q.
+            Output of RMatrixHessenberg subroutine.
+
+Output parameters:
+    Q   -   matrix Q.
+            Array whose indexes range within [0..N-1, 0..N-1].
+
+  -- ALGLIB --
+     2005-2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixhessenbergunpackq(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     /* Real    */ ae_vector* tau,
+     /* Real    */ ae_matrix* q,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_int_t i;
+    ae_int_t j;
+    ae_vector v;
+    ae_vector work;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_matrix_clear(q);
+    ae_vector_init(&v, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
+
+    if( n==0 )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * init
+     */
+    ae_matrix_set_length(q, n-1+1, n-1+1, _state);
+    ae_vector_set_length(&v, n-1+1, _state);
+    ae_vector_set_length(&work, n-1+1, _state);
+    for(i=0; i<=n-1; i++)
+    {
+        for(j=0; j<=n-1; j++)
+        {
+            if( i==j )
+            {
+                q->ptr.pp_double[i][j] = 1;
+            }
+            else
+            {
+                q->ptr.pp_double[i][j] = 0;
+            }
+        }
+    }
+    
+    /*
+     * unpack Q
+     */
+    for(i=0; i<=n-2; i++)
+    {
+        
+        /*
+         * Apply H(i)
+         */
+        ae_v_move(&v.ptr.p_double[1], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(1,n-i-1));
+        v.ptr.p_double[1] = 1;
+        applyreflectionfromtheright(q, tau->ptr.p_double[i], &v, 0, n-1, i+1, n-1, &work, _state);
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Unpacking matrix H (the result of matrix A reduction to upper Hessenberg form)
+
+Input parameters:
+    A   -   output of RMatrixHessenberg subroutine.
+    N   -   size of matrix A.
+
+Output parameters:
+    H   -   matrix H. Array whose indexes range within [0..N-1, 0..N-1].
+
+  -- ALGLIB --
+     2005-2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixhessenbergunpackh(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     /* Real    */ ae_matrix* h,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_int_t i;
+    ae_int_t j;
+    ae_vector v;
+    ae_vector work;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_matrix_clear(h);
+    ae_vector_init(&v, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
+
+    if( n==0 )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    ae_matrix_set_length(h, n-1+1, n-1+1, _state);
+    for(i=0; i<=n-1; i++)
+    {
+        for(j=0; j<=i-2; j++)
+        {
+            h->ptr.pp_double[i][j] = 0;
+        }
+        j = ae_maxint(0, i-1, _state);
+        ae_v_move(&h->ptr.pp_double[i][j], 1, &a->ptr.pp_double[i][j], 1, ae_v_len(j,n-1));
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Reduction of a symmetric matrix which is given by its higher or lower
+triangular part to a tridiagonal matrix using orthogonal similarity
+transformation: Q'*A*Q=T.
+
+Input parameters:
+    A       -   matrix to be transformed
+                array with elements [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    IsUpper -   storage format. If IsUpper = True, then matrix A is given
+                by its upper triangle, and the lower triangle is not used
+                and not modified by the algorithm, and vice versa
+                if IsUpper = False.
+
+Output parameters:
+    A       -   matrices T and Q in  compact form (see lower)
+    Tau     -   array of factors which are forming matrices H(i)
+                array with elements [0..N-2].
+    D       -   main diagonal of symmetric matrix T.
+                array with elements [0..N-1].
+    E       -   secondary diagonal of symmetric matrix T.
+                array with elements [0..N-2].
+
+
+  If IsUpper=True, the matrix Q is represented as a product of elementary
+  reflectors
+
+     Q = H(n-2) . . . H(2) H(0).
+
+  Each H(i) has the form
+
+     H(i) = I - tau * v * v'
+
+  where tau is a real scalar, and v is a real vector with
+  v(i+1:n-1) = 0, v(i) = 1, v(0:i-1) is stored on exit in
+  A(0:i-1,i+1), and tau in TAU(i).
+
+  If IsUpper=False, the matrix Q is represented as a product of elementary
+  reflectors
+
+     Q = H(0) H(2) . . . H(n-2).
+
+  Each H(i) has the form
+
+     H(i) = I - tau * v * v'
+
+  where tau is a real scalar, and v is a real vector with
+  v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) is stored on exit in A(i+2:n-1,i),
+  and tau in TAU(i).
+
+  The contents of A on exit are illustrated by the following examples
+  with n = 5:
+
+  if UPLO = 'U':                       if UPLO = 'L':
+
+    (  d   e   v1  v2  v3 )              (  d                  )
+    (      d   e   v2  v3 )              (  e   d              )
+    (          d   e   v3 )              (  v0  e   d          )
+    (              d   e  )              (  v0  v1  e   d      )
+    (                  d  )              (  v0  v1  v2  e   d  )
+
+  where d and e denote diagonal and off-diagonal elements of T, and vi
+  denotes an element of the vector defining H(i).
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     October 31, 1992
+*************************************************************************/
+void smatrixtd(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     /* Real    */ ae_vector* tau,
+     /* Real    */ ae_vector* d,
+     /* Real    */ ae_vector* e,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_int_t i;
+    double alpha;
+    double taui;
+    double v;
+    ae_vector t;
+    ae_vector t2;
+    ae_vector t3;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_clear(tau);
+    ae_vector_clear(d);
+    ae_vector_clear(e);
+    ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&t2, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&t3, 0, DT_REAL, _state, ae_true);
+
+    if( n<=0 )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    ae_vector_set_length(&t, n+1, _state);
+    ae_vector_set_length(&t2, n+1, _state);
+    ae_vector_set_length(&t3, n+1, _state);
+    if( n>1 )
+    {
+        ae_vector_set_length(tau, n-2+1, _state);
+    }
+    ae_vector_set_length(d, n-1+1, _state);
+    if( n>1 )
+    {
+        ae_vector_set_length(e, n-2+1, _state);
+    }
+    if( isupper )
+    {
+        
+        /*
+         * Reduce the upper triangle of A
+         */
+        for(i=n-2; i>=0; i--)
+        {
+            
+            /*
+             * Generate elementary reflector H() = E - tau * v * v'
+             */
+            if( i>=1 )
+            {
+                ae_v_move(&t.ptr.p_double[2], 1, &a->ptr.pp_double[0][i+1], a->stride, ae_v_len(2,i+1));
+            }
+            t.ptr.p_double[1] = a->ptr.pp_double[i][i+1];
+            generatereflection(&t, i+1, &taui, _state);
+            if( i>=1 )
+            {
+                ae_v_move(&a->ptr.pp_double[0][i+1], a->stride, &t.ptr.p_double[2], 1, ae_v_len(0,i-1));
+            }
+            a->ptr.pp_double[i][i+1] = t.ptr.p_double[1];
+            e->ptr.p_double[i] = a->ptr.pp_double[i][i+1];
+            if( ae_fp_neq(taui,0) )
+            {
+                
+                /*
+                 * Apply H from both sides to A
+                 */
+                a->ptr.pp_double[i][i+1] = 1;
+                
+                /*
+                 * Compute  x := tau * A * v  storing x in TAU
+                 */
+                ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[0][i+1], a->stride, ae_v_len(1,i+1));
+                symmetricmatrixvectormultiply(a, isupper, 0, i, &t, taui, &t3, _state);
+                ae_v_move(&tau->ptr.p_double[0], 1, &t3.ptr.p_double[1], 1, ae_v_len(0,i));
+                
+                /*
+                 * Compute  w := x - 1/2 * tau * (x'*v) * v
+                 */
+                v = ae_v_dotproduct(&tau->ptr.p_double[0], 1, &a->ptr.pp_double[0][i+1], a->stride, ae_v_len(0,i));
+                alpha = -0.5*taui*v;
+                ae_v_addd(&tau->ptr.p_double[0], 1, &a->ptr.pp_double[0][i+1], a->stride, ae_v_len(0,i), alpha);
+                
+                /*
+                 * Apply the transformation as a rank-2 update:
+                 *    A := A - v * w' - w * v'
+                 */
+                ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[0][i+1], a->stride, ae_v_len(1,i+1));
+                ae_v_move(&t3.ptr.p_double[1], 1, &tau->ptr.p_double[0], 1, ae_v_len(1,i+1));
+                symmetricrank2update(a, isupper, 0, i, &t, &t3, &t2, -1, _state);
+                a->ptr.pp_double[i][i+1] = e->ptr.p_double[i];
+            }
+            d->ptr.p_double[i+1] = a->ptr.pp_double[i+1][i+1];
+            tau->ptr.p_double[i] = taui;
+        }
+        d->ptr.p_double[0] = a->ptr.pp_double[0][0];
+    }
+    else
+    {
+        
+        /*
+         * Reduce the lower triangle of A
+         */
+        for(i=0; i<=n-2; i++)
+        {
+            
+            /*
+             * Generate elementary reflector H = E - tau * v * v'
+             */
+            ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(1,n-i-1));
+            generatereflection(&t, n-i-1, &taui, _state);
+            ae_v_move(&a->ptr.pp_double[i+1][i], a->stride, &t.ptr.p_double[1], 1, ae_v_len(i+1,n-1));
+            e->ptr.p_double[i] = a->ptr.pp_double[i+1][i];
+            if( ae_fp_neq(taui,0) )
+            {
+                
+                /*
+                 * Apply H from both sides to A
+                 */
+                a->ptr.pp_double[i+1][i] = 1;
+                
+                /*
+                 * Compute  x := tau * A * v  storing y in TAU
+                 */
+                ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(1,n-i-1));
+                symmetricmatrixvectormultiply(a, isupper, i+1, n-1, &t, taui, &t2, _state);
+                ae_v_move(&tau->ptr.p_double[i], 1, &t2.ptr.p_double[1], 1, ae_v_len(i,n-2));
+                
+                /*
+                 * Compute  w := x - 1/2 * tau * (x'*v) * v
+                 */
+                v = ae_v_dotproduct(&tau->ptr.p_double[i], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(i,n-2));
+                alpha = -0.5*taui*v;
+                ae_v_addd(&tau->ptr.p_double[i], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(i,n-2), alpha);
+                
+                /*
+                 * Apply the transformation as a rank-2 update:
+                 *     A := A - v * w' - w * v'
+                 *
+                 */
+                ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(1,n-i-1));
+                ae_v_move(&t2.ptr.p_double[1], 1, &tau->ptr.p_double[i], 1, ae_v_len(1,n-i-1));
+                symmetricrank2update(a, isupper, i+1, n-1, &t, &t2, &t3, -1, _state);
+                a->ptr.pp_double[i+1][i] = e->ptr.p_double[i];
+            }
+            d->ptr.p_double[i] = a->ptr.pp_double[i][i];
+            tau->ptr.p_double[i] = taui;
+        }
+        d->ptr.p_double[n-1] = a->ptr.pp_double[n-1][n-1];
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Unpacking matrix Q which reduces symmetric matrix to a tridiagonal
+form.
+
+Input parameters:
+    A       -   the result of a SMatrixTD subroutine
+    N       -   size of matrix A.
+    IsUpper -   storage format (a parameter of SMatrixTD subroutine)
+    Tau     -   the result of a SMatrixTD subroutine
+
+Output parameters:
+    Q       -   transformation matrix.
+                array with elements [0..N-1, 0..N-1].
+
+  -- ALGLIB --
+     Copyright 2005-2010 by Bochkanov Sergey
+*************************************************************************/
+void smatrixtdunpackq(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     /* Real    */ ae_vector* tau,
+     /* Real    */ ae_matrix* q,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_int_t i;
+    ae_int_t j;
+    ae_vector v;
+    ae_vector work;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_matrix_clear(q);
+    ae_vector_init(&v, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
+
+    if( n==0 )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * init
+     */
+    ae_matrix_set_length(q, n-1+1, n-1+1, _state);
+    ae_vector_set_length(&v, n+1, _state);
+    ae_vector_set_length(&work, n-1+1, _state);
+    for(i=0; i<=n-1; i++)
+    {
+        for(j=0; j<=n-1; j++)
+        {
+            if( i==j )
+            {
+                q->ptr.pp_double[i][j] = 1;
+            }
+            else
+            {
+                q->ptr.pp_double[i][j] = 0;
+            }
+        }
+    }
+    
+    /*
+     * unpack Q
+     */
+    if( isupper )
+    {
+        for(i=0; i<=n-2; i++)
+        {
+            
+            /*
+             * Apply H(i)
+             */
+            ae_v_move(&v.ptr.p_double[1], 1, &a->ptr.pp_double[0][i+1], a->stride, ae_v_len(1,i+1));
+            v.ptr.p_double[i+1] = 1;
+            applyreflectionfromtheleft(q, tau->ptr.p_double[i], &v, 0, i, 0, n-1, &work, _state);
+        }
+    }
+    else
+    {
+        for(i=n-2; i>=0; i--)
+        {
+            
+            /*
+             * Apply H(i)
+             */
+            ae_v_move(&v.ptr.p_double[1], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(1,n-i-1));
+            v.ptr.p_double[1] = 1;
+            applyreflectionfromtheleft(q, tau->ptr.p_double[i], &v, i+1, n-1, 0, n-1, &work, _state);
+        }
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Reduction of a Hermitian matrix which is given  by  its  higher  or  lower
+triangular part to a real  tridiagonal  matrix  using  unitary  similarity
+transformation: Q'*A*Q = T.
+
+Input parameters:
+    A       -   matrix to be transformed
+                array with elements [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    IsUpper -   storage format. If IsUpper = True, then matrix A is  given
+                by its upper triangle, and the lower triangle is not  used
+                and not modified by the algorithm, and vice versa
+                if IsUpper = False.
+
+Output parameters:
+    A       -   matrices T and Q in  compact form (see lower)
+    Tau     -   array of factors which are forming matrices H(i)
+                array with elements [0..N-2].
+    D       -   main diagonal of real symmetric matrix T.
+                array with elements [0..N-1].
+    E       -   secondary diagonal of real symmetric matrix T.
+                array with elements [0..N-2].
+
+
+  If IsUpper=True, the matrix Q is represented as a product of elementary
+  reflectors
+
+     Q = H(n-2) . . . H(2) H(0).
+
+  Each H(i) has the form
+
+     H(i) = I - tau * v * v'
+
+  where tau is a complex scalar, and v is a complex vector with
+  v(i+1:n-1) = 0, v(i) = 1, v(0:i-1) is stored on exit in
+  A(0:i-1,i+1), and tau in TAU(i).
+
+  If IsUpper=False, the matrix Q is represented as a product of elementary
+  reflectors
+
+     Q = H(0) H(2) . . . H(n-2).
+
+  Each H(i) has the form
+
+     H(i) = I - tau * v * v'
+
+  where tau is a complex scalar, and v is a complex vector with
+  v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) is stored on exit in A(i+2:n-1,i),
+  and tau in TAU(i).
+
+  The contents of A on exit are illustrated by the following examples
+  with n = 5:
+
+  if UPLO = 'U':                       if UPLO = 'L':
+
+    (  d   e   v1  v2  v3 )              (  d                  )
+    (      d   e   v2  v3 )              (  e   d              )
+    (          d   e   v3 )              (  v0  e   d          )
+    (              d   e  )              (  v0  v1  e   d      )
+    (                  d  )              (  v0  v1  v2  e   d  )
+
+where d and e denote diagonal and off-diagonal elements of T, and vi
+denotes an element of the vector defining H(i).
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     October 31, 1992
+*************************************************************************/
+void hmatrixtd(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     /* Complex */ ae_vector* tau,
+     /* Real    */ ae_vector* d,
+     /* Real    */ ae_vector* e,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_int_t i;
+    ae_complex alpha;
+    ae_complex taui;
+    ae_complex v;
+    ae_vector t;
+    ae_vector t2;
+    ae_vector t3;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_clear(tau);
+    ae_vector_clear(d);
+    ae_vector_clear(e);
+    ae_vector_init(&t, 0, DT_COMPLEX, _state, ae_true);
+    ae_vector_init(&t2, 0, DT_COMPLEX, _state, ae_true);
+    ae_vector_init(&t3, 0, DT_COMPLEX, _state, ae_true);
+
+    if( n<=0 )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    for(i=0; i<=n-1; i++)
+    {
+        ae_assert(ae_fp_eq(a->ptr.pp_complex[i][i].y,0), "Assertion failed", _state);
+    }
+    if( n>1 )
+    {
+        ae_vector_set_length(tau, n-2+1, _state);
+        ae_vector_set_length(e, n-2+1, _state);
+    }
+    ae_vector_set_length(d, n-1+1, _state);
+    ae_vector_set_length(&t, n-1+1, _state);
+    ae_vector_set_length(&t2, n-1+1, _state);
+    ae_vector_set_length(&t3, n-1+1, _state);
+    if( isupper )
+    {
+        
+        /*
+         * Reduce the upper triangle of A
+         */
+        a->ptr.pp_complex[n-1][n-1] = ae_complex_from_d(a->ptr.pp_complex[n-1][n-1].x);
+        for(i=n-2; i>=0; i--)
+        {
+            
+            /*
+             * Generate elementary reflector H = I+1 - tau * v * v'
+             */
+            alpha = a->ptr.pp_complex[i][i+1];
+            t.ptr.p_complex[1] = alpha;
+            if( i>=1 )
+            {
+                ae_v_cmove(&t.ptr.p_complex[2], 1, &a->ptr.pp_complex[0][i+1], a->stride, "N", ae_v_len(2,i+1));
+            }
+            complexgeneratereflection(&t, i+1, &taui, _state);
+            if( i>=1 )
+            {
+                ae_v_cmove(&a->ptr.pp_complex[0][i+1], a->stride, &t.ptr.p_complex[2], 1, "N", ae_v_len(0,i-1));
+            }
+            alpha = t.ptr.p_complex[1];
+            e->ptr.p_double[i] = alpha.x;
+            if( ae_c_neq_d(taui,0) )
+            {
+                
+                /*
+                 * Apply H(I+1) from both sides to A
+                 */
+                a->ptr.pp_complex[i][i+1] = ae_complex_from_d(1);
+                
+                /*
+                 * Compute  x := tau * A * v  storing x in TAU
+                 */
+                ae_v_cmove(&t.ptr.p_complex[1], 1, &a->ptr.pp_complex[0][i+1], a->stride, "N", ae_v_len(1,i+1));
+                hermitianmatrixvectormultiply(a, isupper, 0, i, &t, taui, &t2, _state);
+                ae_v_cmove(&tau->ptr.p_complex[0], 1, &t2.ptr.p_complex[1], 1, "N", ae_v_len(0,i));
+                
+                /*
+                 * Compute  w := x - 1/2 * tau * (x'*v) * v
+                 */
+                v = ae_v_cdotproduct(&tau->ptr.p_complex[0], 1, "Conj", &a->ptr.pp_complex[0][i+1], a->stride, "N", ae_v_len(0,i));
+                alpha = ae_c_neg(ae_c_mul(ae_c_mul_d(taui,0.5),v));
+                ae_v_caddc(&tau->ptr.p_complex[0], 1, &a->ptr.pp_complex[0][i+1], a->stride, "N", ae_v_len(0,i), alpha);
+                
+                /*
+                 * Apply the transformation as a rank-2 update:
+                 *    A := A - v * w' - w * v'
+                 */
+                ae_v_cmove(&t.ptr.p_complex[1], 1, &a->ptr.pp_complex[0][i+1], a->stride, "N", ae_v_len(1,i+1));
+                ae_v_cmove(&t3.ptr.p_complex[1], 1, &tau->ptr.p_complex[0], 1, "N", ae_v_len(1,i+1));
+                hermitianrank2update(a, isupper, 0, i, &t, &t3, &t2, ae_complex_from_d(-1), _state);
+            }
+            else
+            {
+                a->ptr.pp_complex[i][i] = ae_complex_from_d(a->ptr.pp_complex[i][i].x);
+            }
+            a->ptr.pp_complex[i][i+1] = ae_complex_from_d(e->ptr.p_double[i]);
+            d->ptr.p_double[i+1] = a->ptr.pp_complex[i+1][i+1].x;
+            tau->ptr.p_complex[i] = taui;
+        }
+        d->ptr.p_double[0] = a->ptr.pp_complex[0][0].x;
+    }
+    else
+    {
+        
+        /*
+         * Reduce the lower triangle of A
+         */
+        a->ptr.pp_complex[0][0] = ae_complex_from_d(a->ptr.pp_complex[0][0].x);
+        for(i=0; i<=n-2; i++)
+        {
+            
+            /*
+             * Generate elementary reflector H = I - tau * v * v'
+             */
+            ae_v_cmove(&t.ptr.p_complex[1], 1, &a->ptr.pp_complex[i+1][i], a->stride, "N", ae_v_len(1,n-i-1));
+            complexgeneratereflection(&t, n-i-1, &taui, _state);
+            ae_v_cmove(&a->ptr.pp_complex[i+1][i], a->stride, &t.ptr.p_complex[1], 1, "N", ae_v_len(i+1,n-1));
+            e->ptr.p_double[i] = a->ptr.pp_complex[i+1][i].x;
+            if( ae_c_neq_d(taui,0) )
+            {
+                
+                /*
+                 * Apply H(i) from both sides to A(i+1:n,i+1:n)
+                 */
+                a->ptr.pp_complex[i+1][i] = ae_complex_from_d(1);
+                
+                /*
+                 * Compute  x := tau * A * v  storing y in TAU
+                 */
+                ae_v_cmove(&t.ptr.p_complex[1], 1, &a->ptr.pp_complex[i+1][i], a->stride, "N", ae_v_len(1,n-i-1));
+                hermitianmatrixvectormultiply(a, isupper, i+1, n-1, &t, taui, &t2, _state);
+                ae_v_cmove(&tau->ptr.p_complex[i], 1, &t2.ptr.p_complex[1], 1, "N", ae_v_len(i,n-2));
+                
+                /*
+                 * Compute  w := x - 1/2 * tau * (x'*v) * v
+                 */
+                v = ae_v_cdotproduct(&tau->ptr.p_complex[i], 1, "Conj", &a->ptr.pp_complex[i+1][i], a->stride, "N", ae_v_len(i,n-2));
+                alpha = ae_c_neg(ae_c_mul(ae_c_mul_d(taui,0.5),v));
+                ae_v_caddc(&tau->ptr.p_complex[i], 1, &a->ptr.pp_complex[i+1][i], a->stride, "N", ae_v_len(i,n-2), alpha);
+                
+                /*
+                 * Apply the transformation as a rank-2 update:
+                 * A := A - v * w' - w * v'
+                 */
+                ae_v_cmove(&t.ptr.p_complex[1], 1, &a->ptr.pp_complex[i+1][i], a->stride, "N", ae_v_len(1,n-i-1));
+                ae_v_cmove(&t2.ptr.p_complex[1], 1, &tau->ptr.p_complex[i], 1, "N", ae_v_len(1,n-i-1));
+                hermitianrank2update(a, isupper, i+1, n-1, &t, &t2, &t3, ae_complex_from_d(-1), _state);
+            }
+            else
+            {
+                a->ptr.pp_complex[i+1][i+1] = ae_complex_from_d(a->ptr.pp_complex[i+1][i+1].x);
+            }
+            a->ptr.pp_complex[i+1][i] = ae_complex_from_d(e->ptr.p_double[i]);
+            d->ptr.p_double[i] = a->ptr.pp_complex[i][i].x;
+            tau->ptr.p_complex[i] = taui;
+        }
+        d->ptr.p_double[n-1] = a->ptr.pp_complex[n-1][n-1].x;
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Unpacking matrix Q which reduces a Hermitian matrix to a real  tridiagonal
+form.
+
+Input parameters:
+    A       -   the result of a HMatrixTD subroutine
+    N       -   size of matrix A.
+    IsUpper -   storage format (a parameter of HMatrixTD subroutine)
+    Tau     -   the result of a HMatrixTD subroutine
+
+Output parameters:
+    Q       -   transformation matrix.
+                array with elements [0..N-1, 0..N-1].
+
+  -- ALGLIB --
+     Copyright 2005-2010 by Bochkanov Sergey
+*************************************************************************/
+void hmatrixtdunpackq(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     /* Complex */ ae_vector* tau,
+     /* Complex */ ae_matrix* q,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_int_t i;
+    ae_int_t j;
+    ae_vector v;
+    ae_vector work;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_matrix_clear(q);
+    ae_vector_init(&v, 0, DT_COMPLEX, _state, ae_true);
+    ae_vector_init(&work, 0, DT_COMPLEX, _state, ae_true);
+
+    if( n==0 )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * init
+     */
+    ae_matrix_set_length(q, n-1+1, n-1+1, _state);
+    ae_vector_set_length(&v, n+1, _state);
+    ae_vector_set_length(&work, n-1+1, _state);
+    for(i=0; i<=n-1; i++)
+    {
+        for(j=0; j<=n-1; j++)
+        {
+            if( i==j )
+            {
+                q->ptr.pp_complex[i][j] = ae_complex_from_d(1);
+            }
+            else
+            {
+                q->ptr.pp_complex[i][j] = ae_complex_from_d(0);
+            }
+        }
+    }
+    
+    /*
+     * unpack Q
+     */
+    if( isupper )
+    {
+        for(i=0; i<=n-2; i++)
+        {
+            
+            /*
+             * Apply H(i)
+             */
+            ae_v_cmove(&v.ptr.p_complex[1], 1, &a->ptr.pp_complex[0][i+1], a->stride, "N", ae_v_len(1,i+1));
+            v.ptr.p_complex[i+1] = ae_complex_from_d(1);
+            complexapplyreflectionfromtheleft(q, tau->ptr.p_complex[i], &v, 0, i, 0, n-1, &work, _state);
+        }
+    }
+    else
+    {
+        for(i=n-2; i>=0; i--)
+        {
+            
+            /*
+             * Apply H(i)
+             */
+            ae_v_cmove(&v.ptr.p_complex[1], 1, &a->ptr.pp_complex[i+1][i], a->stride, "N", ae_v_len(1,n-i-1));
+            v.ptr.p_complex[1] = ae_complex_from_d(1);
+            complexapplyreflectionfromtheleft(q, tau->ptr.p_complex[i], &v, i+1, n-1, 0, n-1, &work, _state);
+        }
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Base case for real QR
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     September 30, 1994.
+     Sergey Bochkanov, ALGLIB project, translation from FORTRAN to
+     pseudocode, 2007-2010.
+*************************************************************************/
+static void ortfac_rmatrixqrbasecase(/* Real    */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_vector* work,
+     /* Real    */ ae_vector* t,
+     /* Real    */ ae_vector* tau,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t k;
+    ae_int_t minmn;
+    double tmp;
+
+
+    minmn = ae_minint(m, n, _state);
+    
+    /*
+     * Test the input arguments
+     */
+    k = minmn;
+    for(i=0; i<=k-1; i++)
+    {
+        
+        /*
+         * Generate elementary reflector H(i) to annihilate A(i+1:m,i)
+         */
+        ae_v_move(&t->ptr.p_double[1], 1, &a->ptr.pp_double[i][i], a->stride, ae_v_len(1,m-i));
+        generatereflection(t, m-i, &tmp, _state);
+        tau->ptr.p_double[i] = tmp;
+        ae_v_move(&a->ptr.pp_double[i][i], a->stride, &t->ptr.p_double[1], 1, ae_v_len(i,m-1));
+        t->ptr.p_double[1] = 1;
+        if( i<n )
+        {
+            
+            /*
+             * Apply H(i) to A(i:m-1,i+1:n-1) from the left
+             */
+            applyreflectionfromtheleft(a, tau->ptr.p_double[i], t, i, m-1, i+1, n-1, work, _state);
+        }
+    }
+}
+
+
+/*************************************************************************
+Base case for real LQ
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     September 30, 1994.
+     Sergey Bochkanov, ALGLIB project, translation from FORTRAN to
+     pseudocode, 2007-2010.
+*************************************************************************/
+static void ortfac_rmatrixlqbasecase(/* Real    */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_vector* work,
+     /* Real    */ ae_vector* t,
+     /* Real    */ ae_vector* tau,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t k;
+    ae_int_t minmn;
+    double tmp;
+
+
+    minmn = ae_minint(m, n, _state);
+    k = ae_minint(m, n, _state);
+    for(i=0; i<=k-1; i++)
+    {
+        
+        /*
+         * Generate elementary reflector H(i) to annihilate A(i,i+1:n-1)
+         */
+        ae_v_move(&t->ptr.p_double[1], 1, &a->ptr.pp_double[i][i], 1, ae_v_len(1,n-i));
+        generatereflection(t, n-i, &tmp, _state);
+        tau->ptr.p_double[i] = tmp;
+        ae_v_move(&a->ptr.pp_double[i][i], 1, &t->ptr.p_double[1], 1, ae_v_len(i,n-1));
+        t->ptr.p_double[1] = 1;
+        if( i<n )
+        {
+            
+            /*
+             * Apply H(i) to A(i+1:m,i:n) from the right
+             */
+            applyreflectionfromtheright(a, tau->ptr.p_double[i], t, i+1, m-1, i, n-1, work, _state);
+        }
+    }
+}
+
+
+/*************************************************************************
+Base case for complex QR
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     September 30, 1994.
+     Sergey Bochkanov, ALGLIB project, translation from FORTRAN to
+     pseudocode, 2007-2010.
+*************************************************************************/
+static void ortfac_cmatrixqrbasecase(/* Complex */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_vector* work,
+     /* Complex */ ae_vector* t,
+     /* Complex */ ae_vector* tau,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t k;
+    ae_int_t mmi;
+    ae_int_t minmn;
+    ae_complex tmp;
+
+
+    minmn = ae_minint(m, n, _state);
+    if( minmn<=0 )
+    {
+        return;
+    }
+    
+    /*
+     * Test the input arguments
+     */
+    k = ae_minint(m, n, _state);
+    for(i=0; i<=k-1; i++)
+    {
+        
+        /*
+         * Generate elementary reflector H(i) to annihilate A(i+1:m,i)
+         */
+        mmi = m-i;
+        ae_v_cmove(&t->ptr.p_complex[1], 1, &a->ptr.pp_complex[i][i], a->stride, "N", ae_v_len(1,mmi));
+        complexgeneratereflection(t, mmi, &tmp, _state);
+        tau->ptr.p_complex[i] = tmp;
+        ae_v_cmove(&a->ptr.pp_complex[i][i], a->stride, &t->ptr.p_complex[1], 1, "N", ae_v_len(i,m-1));
+        t->ptr.p_complex[1] = ae_complex_from_d(1);
+        if( i<n-1 )
+        {
+            
+            /*
+             * Apply H'(i) to A(i:m,i+1:n) from the left
+             */
+            complexapplyreflectionfromtheleft(a, ae_c_conj(tau->ptr.p_complex[i], _state), t, i, m-1, i+1, n-1, work, _state);
+        }
+    }
+}
+
+
+/*************************************************************************
+Base case for complex LQ
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     September 30, 1994.
+     Sergey Bochkanov, ALGLIB project, translation from FORTRAN to
+     pseudocode, 2007-2010.
+*************************************************************************/
+static void ortfac_cmatrixlqbasecase(/* Complex */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_vector* work,
+     /* Complex */ ae_vector* t,
+     /* Complex */ ae_vector* tau,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t minmn;
+    ae_complex tmp;
+
+
+    minmn = ae_minint(m, n, _state);
+    if( minmn<=0 )
+    {
+        return;
+    }
+    
+    /*
+     * Test the input arguments
+     */
+    for(i=0; i<=minmn-1; i++)
+    {
+        
+        /*
+         * Generate elementary reflector H(i)
+         *
+         * NOTE: ComplexGenerateReflection() generates left reflector,
+         * i.e. H which reduces x by applyiong from the left, but we
+         * need RIGHT reflector. So we replace H=E-tau*v*v' by H^H,
+         * which changes v to conj(v).
+         */
+        ae_v_cmove(&t->ptr.p_complex[1], 1, &a->ptr.pp_complex[i][i], 1, "Conj", ae_v_len(1,n-i));
+        complexgeneratereflection(t, n-i, &tmp, _state);
+        tau->ptr.p_complex[i] = tmp;
+        ae_v_cmove(&a->ptr.pp_complex[i][i], 1, &t->ptr.p_complex[1], 1, "Conj", ae_v_len(i,n-1));
+        t->ptr.p_complex[1] = ae_complex_from_d(1);
+        if( i<m-1 )
+        {
+            
+            /*
+             * Apply H'(i)
+             */
+            complexapplyreflectionfromtheright(a, tau->ptr.p_complex[i], t, i+1, m-1, i, n-1, work, _state);
+        }
+    }
+}
+
+
+/*************************************************************************
+Generate block reflector:
+* fill unused parts of reflectors matrix by zeros
+* fill diagonal of reflectors matrix by ones
+* generate triangular factor T
+
+PARAMETERS:
+    A           -   either LengthA*BlockSize (if ColumnwiseA) or
+                    BlockSize*LengthA (if not ColumnwiseA) matrix of
+                    elementary reflectors.
+                    Modified on exit.
+    Tau         -   scalar factors
+    ColumnwiseA -   reflectors are stored in rows or in columns
+    LengthA     -   length of largest reflector
+    BlockSize   -   number of reflectors
+    T           -   array[BlockSize,2*BlockSize]. Left BlockSize*BlockSize
+                    submatrix stores triangular factor on exit.
+    WORK        -   array[BlockSize]
+    
+  -- ALGLIB routine --
+     17.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+static void ortfac_rmatrixblockreflector(/* Real    */ ae_matrix* a,
+     /* Real    */ ae_vector* tau,
+     ae_bool columnwisea,
+     ae_int_t lengtha,
+     ae_int_t blocksize,
+     /* Real    */ ae_matrix* t,
+     /* Real    */ ae_vector* work,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t k;
+    double v;
+
+
+    
+    /*
+     * fill beginning of new column with zeros,
+     * load 1.0 in the first non-zero element
+     */
+    for(k=0; k<=blocksize-1; k++)
+    {
+        if( columnwisea )
+        {
+            for(i=0; i<=k-1; i++)
+            {
+                a->ptr.pp_double[i][k] = 0;
+            }
+        }
+        else
+        {
+            for(i=0; i<=k-1; i++)
+            {
+                a->ptr.pp_double[k][i] = 0;
+            }
+        }
+        a->ptr.pp_double[k][k] = 1;
+    }
+    
+    /*
+     * Calculate Gram matrix of A
+     */
+    for(i=0; i<=blocksize-1; i++)
+    {
+        for(j=0; j<=blocksize-1; j++)
+        {
+            t->ptr.pp_double[i][blocksize+j] = 0;
+        }
+    }
+    for(k=0; k<=lengtha-1; k++)
+    {
+        for(j=1; j<=blocksize-1; j++)
+        {
+            if( columnwisea )
+            {
+                v = a->ptr.pp_double[k][j];
+                if( ae_fp_neq(v,0) )
+                {
+                    ae_v_addd(&t->ptr.pp_double[j][blocksize], 1, &a->ptr.pp_double[k][0], 1, ae_v_len(blocksize,blocksize+j-1), v);
+                }
+            }
+            else
+            {
+                v = a->ptr.pp_double[j][k];
+                if( ae_fp_neq(v,0) )
+                {
+                    ae_v_addd(&t->ptr.pp_double[j][blocksize], 1, &a->ptr.pp_double[0][k], a->stride, ae_v_len(blocksize,blocksize+j-1), v);
+                }
+            }
+        }
+    }
+    
+    /*
+     * Prepare Y (stored in TmpA) and T (stored in TmpT)
+     */
+    for(k=0; k<=blocksize-1; k++)
+    {
+        
+        /*
+         * fill non-zero part of T, use pre-calculated Gram matrix
+         */
+        ae_v_move(&work->ptr.p_double[0], 1, &t->ptr.pp_double[k][blocksize], 1, ae_v_len(0,k-1));
+        for(i=0; i<=k-1; i++)
+        {
+            v = ae_v_dotproduct(&t->ptr.pp_double[i][i], 1, &work->ptr.p_double[i], 1, ae_v_len(i,k-1));
+            t->ptr.pp_double[i][k] = -tau->ptr.p_double[k]*v;
+        }
+        t->ptr.pp_double[k][k] = -tau->ptr.p_double[k];
+        
+        /*
+         * Rest of T is filled by zeros
+         */
+        for(i=k+1; i<=blocksize-1; i++)
+        {
+            t->ptr.pp_double[i][k] = 0;
+        }
+    }
+}
+
+
+/*************************************************************************
+Generate block reflector (complex):
+* fill unused parts of reflectors matrix by zeros
+* fill diagonal of reflectors matrix by ones
+* generate triangular factor T
+
+
+  -- ALGLIB routine --
+     17.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+static void ortfac_cmatrixblockreflector(/* Complex */ ae_matrix* a,
+     /* Complex */ ae_vector* tau,
+     ae_bool columnwisea,
+     ae_int_t lengtha,
+     ae_int_t blocksize,
+     /* Complex */ ae_matrix* t,
+     /* Complex */ ae_vector* work,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t k;
+    ae_complex v;
+
+
+    
+    /*
+     * Prepare Y (stored in TmpA) and T (stored in TmpT)
+     */
+    for(k=0; k<=blocksize-1; k++)
+    {
+        
+        /*
+         * fill beginning of new column with zeros,
+         * load 1.0 in the first non-zero element
+         */
+        if( columnwisea )
+        {
+            for(i=0; i<=k-1; i++)
+            {
+                a->ptr.pp_complex[i][k] = ae_complex_from_d(0);
+            }
+        }
+        else
+        {
+            for(i=0; i<=k-1; i++)
+            {
+                a->ptr.pp_complex[k][i] = ae_complex_from_d(0);
+            }
+        }
+        a->ptr.pp_complex[k][k] = ae_complex_from_d(1);
+        
+        /*
+         * fill non-zero part of T,
+         */
+        for(i=0; i<=k-1; i++)
+        {
+            if( columnwisea )
+            {
+                v = ae_v_cdotproduct(&a->ptr.pp_complex[k][i], a->stride, "Conj", &a->ptr.pp_complex[k][k], a->stride, "N", ae_v_len(k,lengtha-1));
+            }
+            else
+            {
+                v = ae_v_cdotproduct(&a->ptr.pp_complex[i][k], 1, "N", &a->ptr.pp_complex[k][k], 1, "Conj", ae_v_len(k,lengtha-1));
+            }
+            work->ptr.p_complex[i] = v;
+        }
+        for(i=0; i<=k-1; i++)
+        {
+            v = ae_v_cdotproduct(&t->ptr.pp_complex[i][i], 1, "N", &work->ptr.p_complex[i], 1, "N", ae_v_len(i,k-1));
+            t->ptr.pp_complex[i][k] = ae_c_neg(ae_c_mul(tau->ptr.p_complex[k],v));
+        }
+        t->ptr.pp_complex[k][k] = ae_c_neg(tau->ptr.p_complex[k]);
+        
+        /*
+         * Rest of T is filled by zeros
+         */
+        for(i=k+1; i<=blocksize-1; i++)
+        {
+            t->ptr.pp_complex[i][k] = ae_complex_from_d(0);
+        }
+    }
+}
+
+
+
+
+/*************************************************************************
+Finding the eigenvalues and eigenvectors of a symmetric matrix
+
+The algorithm finds eigen pairs of a symmetric matrix by reducing it to
+tridiagonal form and using the QL/QR algorithm.
+
+Input parameters:
+    A       -   symmetric matrix which is given by its upper or lower
+                triangular part.
+                Array whose indexes range within [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    ZNeeded -   flag controlling whether the eigenvectors are needed or not.
+                If ZNeeded is equal to:
+                 * 0, the eigenvectors are not returned;
+                 * 1, the eigenvectors are returned.
+    IsUpper -   storage format.
+
+Output parameters:
+    D       -   eigenvalues in ascending order.
+                Array whose index ranges within [0..N-1].
+    Z       -   if ZNeeded is equal to:
+                 * 0, Z hasn’t changed;
+                 * 1, Z contains the eigenvectors.
+                Array whose indexes range within [0..N-1, 0..N-1].
+                The eigenvectors are stored in the matrix columns.
+
+Result:
+    True, if the algorithm has converged.
+    False, if the algorithm hasn't converged (rare case).
+
+  -- ALGLIB --
+     Copyright 2005-2008 by Bochkanov Sergey
+*************************************************************************/
+ae_bool smatrixevd(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_int_t zneeded,
+     ae_bool isupper,
+     /* Real    */ ae_vector* d,
+     /* Real    */ ae_matrix* z,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_matrix _a;
+    ae_vector tau;
+    ae_vector e;
+    ae_bool result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_matrix_init_copy(&_a, a, _state, ae_true);
+    a = &_a;
+    ae_vector_clear(d);
+    ae_matrix_clear(z);
+    ae_vector_init(&tau, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&e, 0, DT_REAL, _state, ae_true);
+
+    ae_assert(zneeded==0||zneeded==1, "SMatrixEVD: incorrect ZNeeded", _state);
+    smatrixtd(a, n, isupper, &tau, d, &e, _state);
+    if( zneeded==1 )
+    {
+        smatrixtdunpackq(a, n, isupper, &tau, z, _state);
+    }
+    result = smatrixtdevd(d, &e, n, zneeded, z, _state);
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+/*************************************************************************
+Subroutine for finding the eigenvalues (and eigenvectors) of  a  symmetric
+matrix  in  a  given half open interval (A, B] by using  a  bisection  and
+inverse iteration
+
+Input parameters:
+    A       -   symmetric matrix which is given by its upper or lower
+                triangular part. Array [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    ZNeeded -   flag controlling whether the eigenvectors are needed or not.
+                If ZNeeded is equal to:
+                 * 0, the eigenvectors are not returned;
+                 * 1, the eigenvectors are returned.
+    IsUpperA -  storage format of matrix A.
+    B1, B2 -    half open interval (B1, B2] to search eigenvalues in.
+
+Output parameters:
+    M       -   number of eigenvalues found in a given half-interval (M>=0).
+    W       -   array of the eigenvalues found.
+                Array whose index ranges within [0..M-1].
+    Z       -   if ZNeeded is equal to:
+                 * 0, Z hasn’t changed;
+                 * 1, Z contains eigenvectors.
+                Array whose indexes range within [0..N-1, 0..M-1].
+                The eigenvectors are stored in the matrix columns.
+
+Result:
+    True, if successful. M contains the number of eigenvalues in the given
+    half-interval (could be equal to 0), W contains the eigenvalues,
+    Z contains the eigenvectors (if needed).
+
+    False, if the bisection method subroutine wasn't able to find the
+    eigenvalues in the given interval or if the inverse iteration subroutine
+    wasn't able to find all the corresponding eigenvectors.
+    In that case, the eigenvalues and eigenvectors are not returned,
+    M is equal to 0.
+
+  -- ALGLIB --
+     Copyright 07.01.2006 by Bochkanov Sergey
+*************************************************************************/
+ae_bool smatrixevdr(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_int_t zneeded,
+     ae_bool isupper,
+     double b1,
+     double b2,
+     ae_int_t* m,
+     /* Real    */ ae_vector* w,
+     /* Real    */ ae_matrix* z,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_matrix _a;
+    ae_vector tau;
+    ae_vector e;
+    ae_bool result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_matrix_init_copy(&_a, a, _state, ae_true);
+    a = &_a;
+    *m = 0;
+    ae_vector_clear(w);
+    ae_matrix_clear(z);
+    ae_vector_init(&tau, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&e, 0, DT_REAL, _state, ae_true);
+
+    ae_assert(zneeded==0||zneeded==1, "SMatrixTDEVDR: incorrect ZNeeded", _state);
+    smatrixtd(a, n, isupper, &tau, w, &e, _state);
+    if( zneeded==1 )
+    {
+        smatrixtdunpackq(a, n, isupper, &tau, z, _state);
+    }
+    result = smatrixtdevdr(w, &e, n, zneeded, b1, b2, m, z, _state);
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+/*************************************************************************
+Subroutine for finding the eigenvalues and  eigenvectors  of  a  symmetric
+matrix with given indexes by using bisection and inverse iteration methods.
+
+Input parameters:
+    A       -   symmetric matrix which is given by its upper or lower
+                triangular part. Array whose indexes range within [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    ZNeeded -   flag controlling whether the eigenvectors are needed or not.
+                If ZNeeded is equal to:
+                 * 0, the eigenvectors are not returned;
+                 * 1, the eigenvectors are returned.
+    IsUpperA -  storage format of matrix A.
+    I1, I2 -    index interval for searching (from I1 to I2).
+                0 <= I1 <= I2 <= N-1.
+
+Output parameters:
+    W       -   array of the eigenvalues found.
+                Array whose index ranges within [0..I2-I1].
+    Z       -   if ZNeeded is equal to:
+                 * 0, Z hasn’t changed;
+                 * 1, Z contains eigenvectors.
+                Array whose indexes range within [0..N-1, 0..I2-I1].
+                In that case, the eigenvectors are stored in the matrix columns.
+
+Result:
+    True, if successful. W contains the eigenvalues, Z contains the
+    eigenvectors (if needed).
+
+    False, if the bisection method subroutine wasn't able to find the
+    eigenvalues in the given interval or if the inverse iteration subroutine
+    wasn't able to find all the corresponding eigenvectors.
+    In that case, the eigenvalues and eigenvectors are not returned.
+
+  -- ALGLIB --
+     Copyright 07.01.2006 by Bochkanov Sergey
+*************************************************************************/
+ae_bool smatrixevdi(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_int_t zneeded,
+     ae_bool isupper,
+     ae_int_t i1,
+     ae_int_t i2,
+     /* Real    */ ae_vector* w,
+     /* Real    */ ae_matrix* z,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_matrix _a;
+    ae_vector tau;
+    ae_vector e;
+    ae_bool result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_matrix_init_copy(&_a, a, _state, ae_true);
+    a = &_a;
+    ae_vector_clear(w);
+    ae_matrix_clear(z);
+    ae_vector_init(&tau, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&e, 0, DT_REAL, _state, ae_true);
+
+    ae_assert(zneeded==0||zneeded==1, "SMatrixEVDI: incorrect ZNeeded", _state);
+    smatrixtd(a, n, isupper, &tau, w, &e, _state);
+    if( zneeded==1 )
+    {
+        smatrixtdunpackq(a, n, isupper, &tau, z, _state);
+    }
+    result = smatrixtdevdi(w, &e, n, zneeded, i1, i2, z, _state);
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+/*************************************************************************
+Finding the eigenvalues and eigenvectors of a Hermitian matrix
+
+The algorithm finds eigen pairs of a Hermitian matrix by  reducing  it  to
+real tridiagonal form and using the QL/QR algorithm.
+
+Input parameters:
+    A       -   Hermitian matrix which is given  by  its  upper  or  lower
+                triangular part.
+                Array whose indexes range within [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    IsUpper -   storage format.
+    ZNeeded -   flag controlling whether the eigenvectors  are  needed  or
+                not. If ZNeeded is equal to:
+                 * 0, the eigenvectors are not returned;
+                 * 1, the eigenvectors are returned.
+
+Output parameters:
+    D       -   eigenvalues in ascending order.
+                Array whose index ranges within [0..N-1].
+    Z       -   if ZNeeded is equal to:
+                 * 0, Z hasn’t changed;
+                 * 1, Z contains the eigenvectors.
+                Array whose indexes range within [0..N-1, 0..N-1].
+                The eigenvectors are stored in the matrix columns.
+
+Result:
+    True, if the algorithm has converged.
+    False, if the algorithm hasn't converged (rare case).
+
+Note:
+    eigenvectors of Hermitian matrix are defined up to  multiplication  by
+    a complex number L, such that |L|=1.
+
+  -- ALGLIB --
+     Copyright 2005, 23 March 2007 by Bochkanov Sergey
+*************************************************************************/
+ae_bool hmatrixevd(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_int_t zneeded,
+     ae_bool isupper,
+     /* Real    */ ae_vector* d,
+     /* Complex */ ae_matrix* z,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_matrix _a;
+    ae_vector tau;
+    ae_vector e;
+    ae_vector work;
+    ae_matrix t;
+    ae_matrix q;
+    ae_int_t i;
+    ae_int_t k;
+    double v;
+    ae_bool result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_matrix_init_copy(&_a, a, _state, ae_true);
+    a = &_a;
+    ae_vector_clear(d);
+    ae_matrix_clear(z);
+    ae_vector_init(&tau, 0, DT_COMPLEX, _state, ae_true);
+    ae_vector_init(&e, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&t, 0, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&q, 0, 0, DT_COMPLEX, _state, ae_true);
+
+    ae_assert(zneeded==0||zneeded==1, "HermitianEVD: incorrect ZNeeded", _state);
+    
+    /*
+     * Reduce to tridiagonal form
+     */
+    hmatrixtd(a, n, isupper, &tau, d, &e, _state);
+    if( zneeded==1 )
+    {
+        hmatrixtdunpackq(a, n, isupper, &tau, &q, _state);
+        zneeded = 2;
+    }
+    
+    /*
+     * TDEVD
+     */
+    result = smatrixtdevd(d, &e, n, zneeded, &t, _state);
+    
+    /*
+     * Eigenvectors are needed
+     * Calculate Z = Q*T = Re(Q)*T + i*Im(Q)*T
+     */
+    if( result&&zneeded!=0 )
+    {
+        ae_vector_set_length(&work, n-1+1, _state);
+        ae_matrix_set_length(z, n-1+1, n-1+1, _state);
+        for(i=0; i<=n-1; i++)
+        {
+            
+            /*
+             * Calculate real part
+             */
+            for(k=0; k<=n-1; k++)
+            {
+                work.ptr.p_double[k] = 0;
+            }
+            for(k=0; k<=n-1; k++)
+            {
+                v = q.ptr.pp_complex[i][k].x;
+                ae_v_addd(&work.ptr.p_double[0], 1, &t.ptr.pp_double[k][0], 1, ae_v_len(0,n-1), v);
+            }
+            for(k=0; k<=n-1; k++)
+            {
+                z->ptr.pp_complex[i][k].x = work.ptr.p_double[k];
+            }
+            
+            /*
+             * Calculate imaginary part
+             */
+            for(k=0; k<=n-1; k++)
+            {
+                work.ptr.p_double[k] = 0;
+            }
+            for(k=0; k<=n-1; k++)
+            {
+                v = q.ptr.pp_complex[i][k].y;
+                ae_v_addd(&work.ptr.p_double[0], 1, &t.ptr.pp_double[k][0], 1, ae_v_len(0,n-1), v);
+            }
+            for(k=0; k<=n-1; k++)
+            {
+                z->ptr.pp_complex[i][k].y = work.ptr.p_double[k];
+            }
+        }
+    }
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+/*************************************************************************
+Subroutine for finding the eigenvalues (and eigenvectors) of  a  Hermitian
+matrix  in  a  given half-interval (A, B] by using a bisection and inverse
+iteration
+
+Input parameters:
+    A       -   Hermitian matrix which is given  by  its  upper  or  lower
+                triangular  part.  Array  whose   indexes   range   within
+                [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    ZNeeded -   flag controlling whether the eigenvectors  are  needed  or
+                not. If ZNeeded is equal to:
+                 * 0, the eigenvectors are not returned;
+                 * 1, the eigenvectors are returned.
+    IsUpperA -  storage format of matrix A.
+    B1, B2 -    half-interval (B1, B2] to search eigenvalues in.
+
+Output parameters:
+    M       -   number of eigenvalues found in a given half-interval, M>=0
+    W       -   array of the eigenvalues found.
+                Array whose index ranges within [0..M-1].
+    Z       -   if ZNeeded is equal to:
+                 * 0, Z hasn’t changed;
+                 * 1, Z contains eigenvectors.
+                Array whose indexes range within [0..N-1, 0..M-1].
+                The eigenvectors are stored in the matrix columns.
+
+Result:
+    True, if successful. M contains the number of eigenvalues in the given
+    half-interval (could be equal to 0), W contains the eigenvalues,
+    Z contains the eigenvectors (if needed).
+
+    False, if the bisection method subroutine  wasn't  able  to  find  the
+    eigenvalues  in  the  given  interval  or  if  the  inverse  iteration
+    subroutine  wasn't  able  to  find all the corresponding eigenvectors.
+    In that case, the eigenvalues and eigenvectors are not returned, M  is
+    equal to 0.
+
+Note:
+    eigen vectors of Hermitian matrix are defined up to multiplication  by
+    a complex number L, such as |L|=1.
+
+  -- ALGLIB --
+     Copyright 07.01.2006, 24.03.2007 by Bochkanov Sergey.
+*************************************************************************/
+ae_bool hmatrixevdr(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_int_t zneeded,
+     ae_bool isupper,
+     double b1,
+     double b2,
+     ae_int_t* m,
+     /* Real    */ ae_vector* w,
+     /* Complex */ ae_matrix* z,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_matrix _a;
+    ae_matrix q;
+    ae_matrix t;
+    ae_vector tau;
+    ae_vector e;
+    ae_vector work;
+    ae_int_t i;
+    ae_int_t k;
+    double v;
+    ae_bool result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_matrix_init_copy(&_a, a, _state, ae_true);
+    a = &_a;
+    *m = 0;
+    ae_vector_clear(w);
+    ae_matrix_clear(z);
+    ae_matrix_init(&q, 0, 0, DT_COMPLEX, _state, ae_true);
+    ae_matrix_init(&t, 0, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&tau, 0, DT_COMPLEX, _state, ae_true);
+    ae_vector_init(&e, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
+
+    ae_assert(zneeded==0||zneeded==1, "HermitianEigenValuesAndVectorsInInterval: incorrect ZNeeded", _state);
+    
+    /*
+     * Reduce to tridiagonal form
+     */
+    hmatrixtd(a, n, isupper, &tau, w, &e, _state);
+    if( zneeded==1 )
+    {
+        hmatrixtdunpackq(a, n, isupper, &tau, &q, _state);
+        zneeded = 2;
+    }
+    
+    /*
+     * Bisection and inverse iteration
+     */
+    result = smatrixtdevdr(w, &e, n, zneeded, b1, b2, m, &t, _state);
+    
+    /*
+     * Eigenvectors are needed
+     * Calculate Z = Q*T = Re(Q)*T + i*Im(Q)*T
+     */
+    if( (result&&zneeded!=0)&&*m!=0 )
+    {
+        ae_vector_set_length(&work, *m-1+1, _state);
+        ae_matrix_set_length(z, n-1+1, *m-1+1, _state);
+        for(i=0; i<=n-1; i++)
+        {
+            
+            /*
+             * Calculate real part
+             */
+            for(k=0; k<=*m-1; k++)
+            {
+                work.ptr.p_double[k] = 0;
+            }
+            for(k=0; k<=n-1; k++)
+            {
+                v = q.ptr.pp_complex[i][k].x;
+                ae_v_addd(&work.ptr.p_double[0], 1, &t.ptr.pp_double[k][0], 1, ae_v_len(0,*m-1), v);
+            }
+            for(k=0; k<=*m-1; k++)
+            {
+                z->ptr.pp_complex[i][k].x = work.ptr.p_double[k];
+            }
+            
+            /*
+             * Calculate imaginary part
+             */
+            for(k=0; k<=*m-1; k++)
+            {
+                work.ptr.p_double[k] = 0;
+            }
+            for(k=0; k<=n-1; k++)
+            {
+                v = q.ptr.pp_complex[i][k].y;
+                ae_v_addd(&work.ptr.p_double[0], 1, &t.ptr.pp_double[k][0], 1, ae_v_len(0,*m-1), v);
+            }
+            for(k=0; k<=*m-1; k++)
+            {
+                z->ptr.pp_complex[i][k].y = work.ptr.p_double[k];
+            }
+        }
+    }
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+/*************************************************************************
+Subroutine for finding the eigenvalues and  eigenvectors  of  a  Hermitian
+matrix with given indexes by using bisection and inverse iteration methods
+
+Input parameters:
+    A       -   Hermitian matrix which is given  by  its  upper  or  lower
+                triangular part.
+                Array whose indexes range within [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    ZNeeded -   flag controlling whether the eigenvectors  are  needed  or
+                not. If ZNeeded is equal to:
+                 * 0, the eigenvectors are not returned;
+                 * 1, the eigenvectors are returned.
+    IsUpperA -  storage format of matrix A.
+    I1, I2 -    index interval for searching (from I1 to I2).
+                0 <= I1 <= I2 <= N-1.
+
+Output parameters:
+    W       -   array of the eigenvalues found.
+                Array whose index ranges within [0..I2-I1].
+    Z       -   if ZNeeded is equal to:
+                 * 0, Z hasn’t changed;
+                 * 1, Z contains eigenvectors.
+                Array whose indexes range within [0..N-1, 0..I2-I1].
+                In  that  case,  the eigenvectors are stored in the matrix
+                columns.
+
+Result:
+    True, if successful. W contains the eigenvalues, Z contains the
+    eigenvectors (if needed).
+
+    False, if the bisection method subroutine  wasn't  able  to  find  the
+    eigenvalues  in  the  given  interval  or  if  the  inverse  iteration
+    subroutine wasn't able to find  all  the  corresponding  eigenvectors.
+    In that case, the eigenvalues and eigenvectors are not returned.
+
+Note:
+    eigen vectors of Hermitian matrix are defined up to multiplication  by
+    a complex number L, such as |L|=1.
+
+  -- ALGLIB --
+     Copyright 07.01.2006, 24.03.2007 by Bochkanov Sergey.
+*************************************************************************/
+ae_bool hmatrixevdi(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_int_t zneeded,
+     ae_bool isupper,
+     ae_int_t i1,
+     ae_int_t i2,
+     /* Real    */ ae_vector* w,
+     /* Complex */ ae_matrix* z,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_matrix _a;
+    ae_matrix q;
+    ae_matrix t;
+    ae_vector tau;
+    ae_vector e;
+    ae_vector work;
+    ae_int_t i;
+    ae_int_t k;
+    double v;
+    ae_int_t m;
+    ae_bool result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_matrix_init_copy(&_a, a, _state, ae_true);
+    a = &_a;
+    ae_vector_clear(w);
+    ae_matrix_clear(z);
+    ae_matrix_init(&q, 0, 0, DT_COMPLEX, _state, ae_true);
+    ae_matrix_init(&t, 0, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&tau, 0, DT_COMPLEX, _state, ae_true);
+    ae_vector_init(&e, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
+
+    ae_assert(zneeded==0||zneeded==1, "HermitianEigenValuesAndVectorsByIndexes: incorrect ZNeeded", _state);
+    
+    /*
+     * Reduce to tridiagonal form
+     */
+    hmatrixtd(a, n, isupper, &tau, w, &e, _state);
+    if( zneeded==1 )
+    {
+        hmatrixtdunpackq(a, n, isupper, &tau, &q, _state);
+        zneeded = 2;
+    }
+    
+    /*
+     * Bisection and inverse iteration
+     */
+    result = smatrixtdevdi(w, &e, n, zneeded, i1, i2, &t, _state);
+    
+    /*
+     * Eigenvectors are needed
+     * Calculate Z = Q*T = Re(Q)*T + i*Im(Q)*T
+     */
+    m = i2-i1+1;
+    if( result&&zneeded!=0 )
+    {
+        ae_vector_set_length(&work, m-1+1, _state);
+        ae_matrix_set_length(z, n-1+1, m-1+1, _state);
+        for(i=0; i<=n-1; i++)
+        {
+            
+            /*
+             * Calculate real part
+             */
+            for(k=0; k<=m-1; k++)
+            {
+                work.ptr.p_double[k] = 0;
+            }
+            for(k=0; k<=n-1; k++)
+            {
+                v = q.ptr.pp_complex[i][k].x;
+                ae_v_addd(&work.ptr.p_double[0], 1, &t.ptr.pp_double[k][0], 1, ae_v_len(0,m-1), v);
+            }
+            for(k=0; k<=m-1; k++)
+            {
+                z->ptr.pp_complex[i][k].x = work.ptr.p_double[k];
+            }
+            
+            /*
+             * Calculate imaginary part
+             */
+            for(k=0; k<=m-1; k++)
+            {
+                work.ptr.p_double[k] = 0;
+            }
+            for(k=0; k<=n-1; k++)
+            {
+                v = q.ptr.pp_complex[i][k].y;
+                ae_v_addd(&work.ptr.p_double[0], 1, &t.ptr.pp_double[k][0], 1, ae_v_len(0,m-1), v);
+            }
+            for(k=0; k<=m-1; k++)
+            {
+                z->ptr.pp_complex[i][k].y = work.ptr.p_double[k];
+            }
+        }
+    }
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+/*************************************************************************
+Finding the eigenvalues and eigenvectors of a tridiagonal symmetric matrix
+
+The algorithm finds the eigen pairs of a tridiagonal symmetric matrix by
+using an QL/QR algorithm with implicit shifts.
+
+Input parameters:
+    D       -   the main diagonal of a tridiagonal matrix.
+                Array whose index ranges within [0..N-1].
+    E       -   the secondary diagonal of a tridiagonal matrix.
+                Array whose index ranges within [0..N-2].
+    N       -   size of matrix A.
+    ZNeeded -   flag controlling whether the eigenvectors are needed or not.
+                If ZNeeded is equal to:
+                 * 0, the eigenvectors are not needed;
+                 * 1, the eigenvectors of a tridiagonal matrix
+                   are multiplied by the square matrix Z. It is used if the
+                   tridiagonal matrix is obtained by the similarity
+                   transformation of a symmetric matrix;
+                 * 2, the eigenvectors of a tridiagonal matrix replace the
+                   square matrix Z;
+                 * 3, matrix Z contains the first row of the eigenvectors
+                   matrix.
+    Z       -   if ZNeeded=1, Z contains the square matrix by which the
+                eigenvectors are multiplied.
+                Array whose indexes range within [0..N-1, 0..N-1].
+
+Output parameters:
+    D       -   eigenvalues in ascending order.
+                Array whose index ranges within [0..N-1].
+    Z       -   if ZNeeded is equal to:
+                 * 0, Z hasn’t changed;
+                 * 1, Z contains the product of a given matrix (from the left)
+                   and the eigenvectors matrix (from the right);
+                 * 2, Z contains the eigenvectors.
+                 * 3, Z contains the first row of the eigenvectors matrix.
+                If ZNeeded<3, Z is the array whose indexes range within [0..N-1, 0..N-1].
+                In that case, the eigenvectors are stored in the matrix columns.
+                If ZNeeded=3, Z is the array whose indexes range within [0..0, 0..N-1].
+
+Result:
+    True, if the algorithm has converged.
+    False, if the algorithm hasn't converged.
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     September 30, 1994
+*************************************************************************/
+ae_bool smatrixtdevd(/* Real    */ ae_vector* d,
+     /* Real    */ ae_vector* e,
+     ae_int_t n,
+     ae_int_t zneeded,
+     /* Real    */ ae_matrix* z,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector _e;
+    ae_vector d1;
+    ae_vector e1;
+    ae_matrix z1;
+    ae_int_t i;
+    ae_bool result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_init_copy(&_e, e, _state, ae_true);
+    e = &_e;
+    ae_vector_init(&d1, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&e1, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&z1, 0, 0, DT_REAL, _state, ae_true);
+
+    
+    /*
+     * Prepare 1-based task
+     */
+    ae_vector_set_length(&d1, n+1, _state);
+    ae_vector_set_length(&e1, n+1, _state);
+    ae_v_move(&d1.ptr.p_double[1], 1, &d->ptr.p_double[0], 1, ae_v_len(1,n));
+    if( n>1 )
+    {
+        ae_v_move(&e1.ptr.p_double[1], 1, &e->ptr.p_double[0], 1, ae_v_len(1,n-1));
+    }
+    if( zneeded==1 )
+    {
+        ae_matrix_set_length(&z1, n+1, n+1, _state);
+        for(i=1; i<=n; i++)
+        {
+            ae_v_move(&z1.ptr.pp_double[i][1], 1, &z->ptr.pp_double[i-1][0], 1, ae_v_len(1,n));
+        }
+    }
+    
+    /*
+     * Solve 1-based task
+     */
+    result = evd_tridiagonalevd(&d1, &e1, n, zneeded, &z1, _state);
+    if( !result )
+    {
+        ae_frame_leave(_state);
+        return result;
+    }
+    
+    /*
+     * Convert back to 0-based result
+     */
+    ae_v_move(&d->ptr.p_double[0], 1, &d1.ptr.p_double[1], 1, ae_v_len(0,n-1));
+    if( zneeded!=0 )
+    {
+        if( zneeded==1 )
+        {
+            for(i=1; i<=n; i++)
+            {
+                ae_v_move(&z->ptr.pp_double[i-1][0], 1, &z1.ptr.pp_double[i][1], 1, ae_v_len(0,n-1));
+            }
+            ae_frame_leave(_state);
+            return result;
+        }
+        if( zneeded==2 )
+        {
+            ae_matrix_set_length(z, n-1+1, n-1+1, _state);
+            for(i=1; i<=n; i++)
+            {
+                ae_v_move(&z->ptr.pp_double[i-1][0], 1, &z1.ptr.pp_double[i][1], 1, ae_v_len(0,n-1));
+            }
+            ae_frame_leave(_state);
+            return result;
+        }
+        if( zneeded==3 )
+        {
+            ae_matrix_set_length(z, 0+1, n-1+1, _state);
+            ae_v_move(&z->ptr.pp_double[0][0], 1, &z1.ptr.pp_double[1][1], 1, ae_v_len(0,n-1));
+            ae_frame_leave(_state);
+            return result;
+        }
+        ae_assert(ae_false, "SMatrixTDEVD: Incorrect ZNeeded!", _state);
+    }
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+/*************************************************************************
+Subroutine for finding the tridiagonal matrix eigenvalues/vectors in a
+given half-interval (A, B] by using bisection and inverse iteration.
+
+Input parameters:
+    D       -   the main diagonal of a tridiagonal matrix.
+                Array whose index ranges within [0..N-1].
+    E       -   the secondary diagonal of a tridiagonal matrix.
+                Array whose index ranges within [0..N-2].
+    N       -   size of matrix, N>=0.
+    ZNeeded -   flag controlling whether the eigenvectors are needed or not.
+                If ZNeeded is equal to:
+                 * 0, the eigenvectors are not needed;
+                 * 1, the eigenvectors of a tridiagonal matrix are multiplied
+                   by the square matrix Z. It is used if the tridiagonal
+                   matrix is obtained by the similarity transformation
+                   of a symmetric matrix.
+                 * 2, the eigenvectors of a tridiagonal matrix replace matrix Z.
+    A, B    -   half-interval (A, B] to search eigenvalues in.
+    Z       -   if ZNeeded is equal to:
+                 * 0, Z isn't used and remains unchanged;
+                 * 1, Z contains the square matrix (array whose indexes range
+                   within [0..N-1, 0..N-1]) which reduces the given symmetric
+                   matrix to tridiagonal form;
+                 * 2, Z isn't used (but changed on the exit).
+
+Output parameters:
+    D       -   array of the eigenvalues found.
+                Array whose index ranges within [0..M-1].
+    M       -   number of eigenvalues found in the given half-interval (M>=0).
+    Z       -   if ZNeeded is equal to:
+                 * 0, doesn't contain any information;
+                 * 1, contains the product of a given NxN matrix Z (from the
+                   left) and NxM matrix of the eigenvectors found (from the
+                   right). Array whose indexes range within [0..N-1, 0..M-1].
+                 * 2, contains the matrix of the eigenvectors found.
+                   Array whose indexes range within [0..N-1, 0..M-1].
+
+Result:
+
+    True, if successful. In that case, M contains the number of eigenvalues
+    in the given half-interval (could be equal to 0), D contains the eigenvalues,
+    Z contains the eigenvectors (if needed).
+    It should be noted that the subroutine changes the size of arrays D and Z.
+
+    False, if the bisection method subroutine wasn't able to find the
+    eigenvalues in the given interval or if the inverse iteration subroutine
+    wasn't able to find all the corresponding eigenvectors. In that case,
+    the eigenvalues and eigenvectors are not returned, M is equal to 0.
+
+  -- ALGLIB --
+     Copyright 31.03.2008 by Bochkanov Sergey
+*************************************************************************/
+ae_bool smatrixtdevdr(/* Real    */ ae_vector* d,
+     /* Real    */ ae_vector* e,
+     ae_int_t n,
+     ae_int_t zneeded,
+     double a,
+     double b,
+     ae_int_t* m,
+     /* Real    */ ae_matrix* z,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_int_t errorcode;
+    ae_int_t nsplit;
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t k;
+    ae_int_t cr;
+    ae_vector iblock;
+    ae_vector isplit;
+    ae_vector ifail;
+    ae_vector d1;
+    ae_vector e1;
+    ae_vector w;
+    ae_matrix z2;
+    ae_matrix z3;
+    double v;
+    ae_bool result;
+
+    ae_frame_make(_state, &_frame_block);
+    *m = 0;
+    ae_vector_init(&iblock, 0, DT_INT, _state, ae_true);
+    ae_vector_init(&isplit, 0, DT_INT, _state, ae_true);
+    ae_vector_init(&ifail, 0, DT_INT, _state, ae_true);
+    ae_vector_init(&d1, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&e1, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&w, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&z2, 0, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&z3, 0, 0, DT_REAL, _state, ae_true);
+
+    ae_assert(zneeded>=0&&zneeded<=2, "SMatrixTDEVDR: incorrect ZNeeded!", _state);
+    
+    /*
+     * Special cases
+     */
+    if( ae_fp_less_eq(b,a) )
+    {
+        *m = 0;
+        result = ae_true;
+        ae_frame_leave(_state);
+        return result;
+    }
+    if( n<=0 )
+    {
+        *m = 0;
+        result = ae_true;
+        ae_frame_leave(_state);
+        return result;
+    }
+    
+    /*
+     * Copy D,E to D1, E1
+     */
+    ae_vector_set_length(&d1, n+1, _state);
+    ae_v_move(&d1.ptr.p_double[1], 1, &d->ptr.p_double[0], 1, ae_v_len(1,n));
+    if( n>1 )
+    {
+        ae_vector_set_length(&e1, n-1+1, _state);
+        ae_v_move(&e1.ptr.p_double[1], 1, &e->ptr.p_double[0], 1, ae_v_len(1,n-1));
+    }
+    
+    /*
+     * No eigen vectors
+     */
+    if( zneeded==0 )
+    {
+        result = evd_internalbisectioneigenvalues(&d1, &e1, n, 2, 1, a, b, 0, 0, -1, &w, m, &nsplit, &iblock, &isplit, &errorcode, _state);
+        if( !result||*m==0 )
+        {
+            *m = 0;
+            ae_frame_leave(_state);
+            return result;
+        }
+        ae_vector_set_length(d, *m-1+1, _state);
+        ae_v_move(&d->ptr.p_double[0], 1, &w.ptr.p_double[1], 1, ae_v_len(0,*m-1));
+        ae_frame_leave(_state);
+        return result;
+    }
+    
+    /*
+     * Eigen vectors are multiplied by Z
+     */
+    if( zneeded==1 )
+    {
+        
+        /*
+         * Find eigen pairs
+         */
+        result = evd_internalbisectioneigenvalues(&d1, &e1, n, 2, 2, a, b, 0, 0, -1, &w, m, &nsplit, &iblock, &isplit, &errorcode, _state);
+        if( !result||*m==0 )
+        {
+            *m = 0;
+            ae_frame_leave(_state);
+            return result;
+        }
+        evd_internaldstein(n, &d1, &e1, *m, &w, &iblock, &isplit, &z2, &ifail, &cr, _state);
+        if( cr!=0 )
+        {
+            *m = 0;
+            result = ae_false;
+            ae_frame_leave(_state);
+            return result;
+        }
+        
+        /*
+         * Sort eigen values and vectors
+         */
+        for(i=1; i<=*m; i++)
+        {
+            k = i;
+            for(j=i; j<=*m; j++)
+            {
+                if( ae_fp_less(w.ptr.p_double[j],w.ptr.p_double[k]) )
+                {
+                    k = j;
+                }
+            }
+            v = w.ptr.p_double[i];
+            w.ptr.p_double[i] = w.ptr.p_double[k];
+            w.ptr.p_double[k] = v;
+            for(j=1; j<=n; j++)
+            {
+                v = z2.ptr.pp_double[j][i];
+                z2.ptr.pp_double[j][i] = z2.ptr.pp_double[j][k];
+                z2.ptr.pp_double[j][k] = v;
+            }
+        }
+        
+        /*
+         * Transform Z2 and overwrite Z
+         */
+        ae_matrix_set_length(&z3, *m+1, n+1, _state);
+        for(i=1; i<=*m; i++)
+        {
+            ae_v_move(&z3.ptr.pp_double[i][1], 1, &z2.ptr.pp_double[1][i], z2.stride, ae_v_len(1,n));
+        }
+        for(i=1; i<=n; i++)
+        {
+            for(j=1; j<=*m; j++)
+            {
+                v = ae_v_dotproduct(&z->ptr.pp_double[i-1][0], 1, &z3.ptr.pp_double[j][1], 1, ae_v_len(0,n-1));
+                z2.ptr.pp_double[i][j] = v;
+            }
+        }
+        ae_matrix_set_length(z, n-1+1, *m-1+1, _state);
+        for(i=1; i<=*m; i++)
+        {
+            ae_v_move(&z->ptr.pp_double[0][i-1], z->stride, &z2.ptr.pp_double[1][i], z2.stride, ae_v_len(0,n-1));
+        }
+        
+        /*
+         * Store W
+         */
+        ae_vector_set_length(d, *m-1+1, _state);
+        for(i=1; i<=*m; i++)
+        {
+            d->ptr.p_double[i-1] = w.ptr.p_double[i];
+        }
+        ae_frame_leave(_state);
+        return result;
+    }
+    
+    /*
+     * Eigen vectors are stored in Z
+     */
+    if( zneeded==2 )
+    {
+        
+        /*
+         * Find eigen pairs
+         */
+        result = evd_internalbisectioneigenvalues(&d1, &e1, n, 2, 2, a, b, 0, 0, -1, &w, m, &nsplit, &iblock, &isplit, &errorcode, _state);
+        if( !result||*m==0 )
+        {
+            *m = 0;
+            ae_frame_leave(_state);
+            return result;
+        }
+        evd_internaldstein(n, &d1, &e1, *m, &w, &iblock, &isplit, &z2, &ifail, &cr, _state);
+        if( cr!=0 )
+        {
+            *m = 0;
+            result = ae_false;
+            ae_frame_leave(_state);
+            return result;
+        }
+        
+        /*
+         * Sort eigen values and vectors
+         */
+        for(i=1; i<=*m; i++)
+        {
+            k = i;
+            for(j=i; j<=*m; j++)
+            {
+                if( ae_fp_less(w.ptr.p_double[j],w.ptr.p_double[k]) )
+                {
+                    k = j;
+                }
+            }
+            v = w.ptr.p_double[i];
+            w.ptr.p_double[i] = w.ptr.p_double[k];
+            w.ptr.p_double[k] = v;
+            for(j=1; j<=n; j++)
+            {
+                v = z2.ptr.pp_double[j][i];
+                z2.ptr.pp_double[j][i] = z2.ptr.pp_double[j][k];
+                z2.ptr.pp_double[j][k] = v;
+            }
+        }
+        
+        /*
+         * Store W
+         */
+        ae_vector_set_length(d, *m-1+1, _state);
+        for(i=1; i<=*m; i++)
+        {
+            d->ptr.p_double[i-1] = w.ptr.p_double[i];
+        }
+        ae_matrix_set_length(z, n-1+1, *m-1+1, _state);
+        for(i=1; i<=*m; i++)
+        {
+            ae_v_move(&z->ptr.pp_double[0][i-1], z->stride, &z2.ptr.pp_double[1][i], z2.stride, ae_v_len(0,n-1));
+        }
+        ae_frame_leave(_state);
+        return result;
+    }
+    result = ae_false;
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+/*************************************************************************
+Subroutine for finding tridiagonal matrix eigenvalues/vectors with given
+indexes (in ascending order) by using the bisection and inverse iteraion.
+
+Input parameters:
+    D       -   the main diagonal of a tridiagonal matrix.
+                Array whose index ranges within [0..N-1].
+    E       -   the secondary diagonal of a tridiagonal matrix.
+                Array whose index ranges within [0..N-2].
+    N       -   size of matrix. N>=0.
+    ZNeeded -   flag controlling whether the eigenvectors are needed or not.
+                If ZNeeded is equal to:
+                 * 0, the eigenvectors are not needed;
+                 * 1, the eigenvectors of a tridiagonal matrix are multiplied
+                   by the square matrix Z. It is used if the
+                   tridiagonal matrix is obtained by the similarity transformation
+                   of a symmetric matrix.
+                 * 2, the eigenvectors of a tridiagonal matrix replace
+                   matrix Z.
+    I1, I2  -   index interval for searching (from I1 to I2).
+                0 <= I1 <= I2 <= N-1.
+    Z       -   if ZNeeded is equal to:
+                 * 0, Z isn't used and remains unchanged;
+                 * 1, Z contains the square matrix (array whose indexes range within [0..N-1, 0..N-1])
+                   which reduces the given symmetric matrix to  tridiagonal form;
+                 * 2, Z isn't used (but changed on the exit).
+
+Output parameters:
+    D       -   array of the eigenvalues found.
+                Array whose index ranges within [0..I2-I1].
+    Z       -   if ZNeeded is equal to:
+                 * 0, doesn't contain any information;
+                 * 1, contains the product of a given NxN matrix Z (from the left) and
+                   Nx(I2-I1) matrix of the eigenvectors found (from the right).
+                   Array whose indexes range within [0..N-1, 0..I2-I1].
+                 * 2, contains the matrix of the eigenvalues found.
+                   Array whose indexes range within [0..N-1, 0..I2-I1].
+
+
+Result:
+
+    True, if successful. In that case, D contains the eigenvalues,
+    Z contains the eigenvectors (if needed).
+    It should be noted that the subroutine changes the size of arrays D and Z.
+
+    False, if the bisection method subroutine wasn't able to find the eigenvalues
+    in the given interval or if the inverse iteration subroutine wasn't able
+    to find all the corresponding eigenvectors. In that case, the eigenvalues
+    and eigenvectors are not returned.
+
+  -- ALGLIB --
+     Copyright 25.12.2005 by Bochkanov Sergey
+*************************************************************************/
+ae_bool smatrixtdevdi(/* Real    */ ae_vector* d,
+     /* Real    */ ae_vector* e,
+     ae_int_t n,
+     ae_int_t zneeded,
+     ae_int_t i1,
+     ae_int_t i2,
+     /* Real    */ ae_matrix* z,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_int_t errorcode;
+    ae_int_t nsplit;
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t k;
+    ae_int_t m;
+    ae_int_t cr;
+    ae_vector iblock;
+    ae_vector isplit;
+    ae_vector ifail;
+    ae_vector w;
+    ae_vector d1;
+    ae_vector e1;
+    ae_matrix z2;
+    ae_matrix z3;
+    double v;
+    ae_bool result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_init(&iblock, 0, DT_INT, _state, ae_true);
+    ae_vector_init(&isplit, 0, DT_INT, _state, ae_true);
+    ae_vector_init(&ifail, 0, DT_INT, _state, ae_true);
+    ae_vector_init(&w, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&d1, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&e1, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&z2, 0, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&z3, 0, 0, DT_REAL, _state, ae_true);
+
+    ae_assert((0<=i1&&i1<=i2)&&i2<n, "SMatrixTDEVDI: incorrect I1/I2!", _state);
+    
+    /*
+     * Copy D,E to D1, E1
+     */
+    ae_vector_set_length(&d1, n+1, _state);
+    ae_v_move(&d1.ptr.p_double[1], 1, &d->ptr.p_double[0], 1, ae_v_len(1,n));
+    if( n>1 )
+    {
+        ae_vector_set_length(&e1, n-1+1, _state);
+        ae_v_move(&e1.ptr.p_double[1], 1, &e->ptr.p_double[0], 1, ae_v_len(1,n-1));
+    }
+    
+    /*
+     * No eigen vectors
+     */
+    if( zneeded==0 )
+    {
+        result = evd_internalbisectioneigenvalues(&d1, &e1, n, 3, 1, 0, 0, i1+1, i2+1, -1, &w, &m, &nsplit, &iblock, &isplit, &errorcode, _state);
+        if( !result )
+        {
+            ae_frame_leave(_state);
+            return result;
+        }
+        if( m!=i2-i1+1 )
+        {
+            result = ae_false;
+            ae_frame_leave(_state);
+            return result;
+        }
+        ae_vector_set_length(d, m-1+1, _state);
+        for(i=1; i<=m; i++)
+        {
+            d->ptr.p_double[i-1] = w.ptr.p_double[i];
+        }
+        ae_frame_leave(_state);
+        return result;
+    }
+    
+    /*
+     * Eigen vectors are multiplied by Z
+     */
+    if( zneeded==1 )
+    {
+        
+        /*
+         * Find eigen pairs
+         */
+        result = evd_internalbisectioneigenvalues(&d1, &e1, n, 3, 2, 0, 0, i1+1, i2+1, -1, &w, &m, &nsplit, &iblock, &isplit, &errorcode, _state);
+        if( !result )
+        {
+            ae_frame_leave(_state);
+            return result;
+        }
+        if( m!=i2-i1+1 )
+        {
+            result = ae_false;
+            ae_frame_leave(_state);
+            return result;
+        }
+        evd_internaldstein(n, &d1, &e1, m, &w, &iblock, &isplit, &z2, &ifail, &cr, _state);
+        if( cr!=0 )
+        {
+            result = ae_false;
+            ae_frame_leave(_state);
+            return result;
+        }
+        
+        /*
+         * Sort eigen values and vectors
+         */
+        for(i=1; i<=m; i++)
+        {
+            k = i;
+            for(j=i; j<=m; j++)
+            {
+                if( ae_fp_less(w.ptr.p_double[j],w.ptr.p_double[k]) )
+                {
+                    k = j;
+                }
+            }
+            v = w.ptr.p_double[i];
+            w.ptr.p_double[i] = w.ptr.p_double[k];
+            w.ptr.p_double[k] = v;
+            for(j=1; j<=n; j++)
+            {
+                v = z2.ptr.pp_double[j][i];
+                z2.ptr.pp_double[j][i] = z2.ptr.pp_double[j][k];
+                z2.ptr.pp_double[j][k] = v;
+            }
+        }
+        
+        /*
+         * Transform Z2 and overwrite Z
+         */
+        ae_matrix_set_length(&z3, m+1, n+1, _state);
+        for(i=1; i<=m; i++)
+        {
+            ae_v_move(&z3.ptr.pp_double[i][1], 1, &z2.ptr.pp_double[1][i], z2.stride, ae_v_len(1,n));
+        }
+        for(i=1; i<=n; i++)
+        {
+            for(j=1; j<=m; j++)
+            {
+                v = ae_v_dotproduct(&z->ptr.pp_double[i-1][0], 1, &z3.ptr.pp_double[j][1], 1, ae_v_len(0,n-1));
+                z2.ptr.pp_double[i][j] = v;
+            }
+        }
+        ae_matrix_set_length(z, n-1+1, m-1+1, _state);
+        for(i=1; i<=m; i++)
+        {
+            ae_v_move(&z->ptr.pp_double[0][i-1], z->stride, &z2.ptr.pp_double[1][i], z2.stride, ae_v_len(0,n-1));
+        }
+        
+        /*
+         * Store W
+         */
+        ae_vector_set_length(d, m-1+1, _state);
+        for(i=1; i<=m; i++)
+        {
+            d->ptr.p_double[i-1] = w.ptr.p_double[i];
+        }
+        ae_frame_leave(_state);
+        return result;
+    }
+    
+    /*
+     * Eigen vectors are stored in Z
+     */
+    if( zneeded==2 )
+    {
+        
+        /*
+         * Find eigen pairs
+         */
+        result = evd_internalbisectioneigenvalues(&d1, &e1, n, 3, 2, 0, 0, i1+1, i2+1, -1, &w, &m, &nsplit, &iblock, &isplit, &errorcode, _state);
+        if( !result )
+        {
+            ae_frame_leave(_state);
+            return result;
+        }
+        if( m!=i2-i1+1 )
+        {
+            result = ae_false;
+            ae_frame_leave(_state);
+            return result;
+        }
+        evd_internaldstein(n, &d1, &e1, m, &w, &iblock, &isplit, &z2, &ifail, &cr, _state);
+        if( cr!=0 )
+        {
+            result = ae_false;
+            ae_frame_leave(_state);
+            return result;
+        }
+        
+        /*
+         * Sort eigen values and vectors
+         */
+        for(i=1; i<=m; i++)
+        {
+            k = i;
+            for(j=i; j<=m; j++)
+            {
+                if( ae_fp_less(w.ptr.p_double[j],w.ptr.p_double[k]) )
+                {
+                    k = j;
+                }
+            }
+            v = w.ptr.p_double[i];
+            w.ptr.p_double[i] = w.ptr.p_double[k];
+            w.ptr.p_double[k] = v;
+            for(j=1; j<=n; j++)
+            {
+                v = z2.ptr.pp_double[j][i];
+                z2.ptr.pp_double[j][i] = z2.ptr.pp_double[j][k];
+                z2.ptr.pp_double[j][k] = v;
+            }
+        }
+        
+        /*
+         * Store Z
+         */
+        ae_matrix_set_length(z, n-1+1, m-1+1, _state);
+        for(i=1; i<=m; i++)
+        {
+            ae_v_move(&z->ptr.pp_double[0][i-1], z->stride, &z2.ptr.pp_double[1][i], z2.stride, ae_v_len(0,n-1));
+        }
+        
+        /*
+         * Store W
+         */
+        ae_vector_set_length(d, m-1+1, _state);
+        for(i=1; i<=m; i++)
+        {
+            d->ptr.p_double[i-1] = w.ptr.p_double[i];
+        }
+        ae_frame_leave(_state);
+        return result;
+    }
+    result = ae_false;
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+/*************************************************************************
+Finding eigenvalues and eigenvectors of a general matrix
+
+The algorithm finds eigenvalues and eigenvectors of a general matrix by
+using the QR algorithm with multiple shifts. The algorithm can find
+eigenvalues and both left and right eigenvectors.
+
+The right eigenvector is a vector x such that A*x = w*x, and the left
+eigenvector is a vector y such that y'*A = w*y' (here y' implies a complex
+conjugate transposition of vector y).
+
+Input parameters:
+    A       -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    VNeeded -   flag controlling whether eigenvectors are needed or not.
+                If VNeeded is equal to:
+                 * 0, eigenvectors are not returned;
+                 * 1, right eigenvectors are returned;
+                 * 2, left eigenvectors are returned;
+                 * 3, both left and right eigenvectors are returned.
+
+Output parameters:
+    WR      -   real parts of eigenvalues.
+                Array whose index ranges within [0..N-1].
+    WR      -   imaginary parts of eigenvalues.
+                Array whose index ranges within [0..N-1].
+    VL, VR  -   arrays of left and right eigenvectors (if they are needed).
+                If WI[i]=0, the respective eigenvalue is a real number,
+                and it corresponds to the column number I of matrices VL/VR.
+                If WI[i]>0, we have a pair of complex conjugate numbers with
+                positive and negative imaginary parts:
+                    the first eigenvalue WR[i] + sqrt(-1)*WI[i];
+                    the second eigenvalue WR[i+1] + sqrt(-1)*WI[i+1];
+                    WI[i]>0
+                    WI[i+1] = -WI[i] < 0
+                In that case, the eigenvector  corresponding to the first
+                eigenvalue is located in i and i+1 columns of matrices
+                VL/VR (the column number i contains the real part, and the
+                column number i+1 contains the imaginary part), and the vector
+                corresponding to the second eigenvalue is a complex conjugate to
+                the first vector.
+                Arrays whose indexes range within [0..N-1, 0..N-1].
+
+Result:
+    True, if the algorithm has converged.
+    False, if the algorithm has not converged.
+
+Note 1:
+    Some users may ask the following question: what if WI[N-1]>0?
+    WI[N] must contain an eigenvalue which is complex conjugate to the
+    N-th eigenvalue, but the array has only size N?
+    The answer is as follows: such a situation cannot occur because the
+    algorithm finds a pairs of eigenvalues, therefore, if WI[i]>0, I is
+    strictly less than N-1.
+
+Note 2:
+    The algorithm performance depends on the value of the internal parameter
+    NS of the InternalSchurDecomposition subroutine which defines the number
+    of shifts in the QR algorithm (similarly to the block width in block-matrix
+    algorithms of linear algebra). If you require maximum performance
+    on your machine, it is recommended to adjust this parameter manually.
+
+
+See also the InternalTREVC subroutine.
+
+The algorithm is based on the LAPACK 3.0 library.
+*************************************************************************/
+ae_bool rmatrixevd(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_int_t vneeded,
+     /* Real    */ ae_vector* wr,
+     /* Real    */ ae_vector* wi,
+     /* Real    */ ae_matrix* vl,
+     /* Real    */ ae_matrix* vr,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_matrix _a;
+    ae_matrix a1;
+    ae_matrix vl1;
+    ae_matrix vr1;
+    ae_vector wr1;
+    ae_vector wi1;
+    ae_int_t i;
+    ae_bool result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_matrix_init_copy(&_a, a, _state, ae_true);
+    a = &_a;
+    ae_vector_clear(wr);
+    ae_vector_clear(wi);
+    ae_matrix_clear(vl);
+    ae_matrix_clear(vr);
+    ae_matrix_init(&a1, 0, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&vl1, 0, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&vr1, 0, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&wr1, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&wi1, 0, DT_REAL, _state, ae_true);
+
+    ae_assert(vneeded>=0&&vneeded<=3, "RMatrixEVD: incorrect VNeeded!", _state);
+    ae_matrix_set_length(&a1, n+1, n+1, _state);
+    for(i=1; i<=n; i++)
+    {
+        ae_v_move(&a1.ptr.pp_double[i][1], 1, &a->ptr.pp_double[i-1][0], 1, ae_v_len(1,n));
+    }
+    result = evd_nonsymmetricevd(&a1, n, vneeded, &wr1, &wi1, &vl1, &vr1, _state);
+    if( result )
+    {
+        ae_vector_set_length(wr, n-1+1, _state);
+        ae_vector_set_length(wi, n-1+1, _state);
+        ae_v_move(&wr->ptr.p_double[0], 1, &wr1.ptr.p_double[1], 1, ae_v_len(0,n-1));
+        ae_v_move(&wi->ptr.p_double[0], 1, &wi1.ptr.p_double[1], 1, ae_v_len(0,n-1));
+        if( vneeded==2||vneeded==3 )
+        {
+            ae_matrix_set_length(vl, n-1+1, n-1+1, _state);
+            for(i=0; i<=n-1; i++)
+            {
+                ae_v_move(&vl->ptr.pp_double[i][0], 1, &vl1.ptr.pp_double[i+1][1], 1, ae_v_len(0,n-1));
+            }
+        }
+        if( vneeded==1||vneeded==3 )
+        {
+            ae_matrix_set_length(vr, n-1+1, n-1+1, _state);
+            for(i=0; i<=n-1; i++)
+            {
+                ae_v_move(&vr->ptr.pp_double[i][0], 1, &vr1.ptr.pp_double[i+1][1], 1, ae_v_len(0,n-1));
+            }
+        }
+    }
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+static ae_bool evd_tridiagonalevd(/* Real    */ ae_vector* d,
+     /* Real    */ ae_vector* e,
+     ae_int_t n,
+     ae_int_t zneeded,
+     /* Real    */ ae_matrix* z,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector _e;
+    ae_int_t maxit;
+    ae_int_t i;
+    ae_int_t ii;
+    ae_int_t iscale;
+    ae_int_t j;
+    ae_int_t jtot;
+    ae_int_t k;
+    ae_int_t t;
+    ae_int_t l;
+    ae_int_t l1;
+    ae_int_t lend;
+    ae_int_t lendm1;
+    ae_int_t lendp1;
+    ae_int_t lendsv;
+    ae_int_t lm1;
+    ae_int_t lsv;
+    ae_int_t m;
+    ae_int_t mm;
+    ae_int_t mm1;
+    ae_int_t nm1;
+    ae_int_t nmaxit;
+    ae_int_t tmpint;
+    double anorm;
+    double b;
+    double c;
+    double eps;
+    double eps2;
+    double f;
+    double g;
+    double p;
+    double r;
+    double rt1;
+    double rt2;
+    double s;
+    double safmax;
+    double safmin;
+    double ssfmax;
+    double ssfmin;
+    double tst;
+    double tmp;
+    ae_vector work1;
+    ae_vector work2;
+    ae_vector workc;
+    ae_vector works;
+    ae_vector wtemp;
+    ae_bool gotoflag;
+    ae_int_t zrows;
+    ae_bool wastranspose;
+    ae_bool result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_init_copy(&_e, e, _state, ae_true);
+    e = &_e;
+    ae_vector_init(&work1, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&work2, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&workc, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&works, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&wtemp, 0, DT_REAL, _state, ae_true);
+
+    ae_assert(zneeded>=0&&zneeded<=3, "TridiagonalEVD: Incorrent ZNeeded", _state);
+    
+    /*
+     * Quick return if possible
+     */
+    if( zneeded<0||zneeded>3 )
+    {
+        result = ae_false;
+        ae_frame_leave(_state);
+        return result;
+    }
+    result = ae_true;
+    if( n==0 )
+    {
+        ae_frame_leave(_state);
+        return result;
+    }
+    if( n==1 )
+    {
+        if( zneeded==2||zneeded==3 )
+        {
+            ae_matrix_set_length(z, 1+1, 1+1, _state);
+            z->ptr.pp_double[1][1] = 1;
+        }
+        ae_frame_leave(_state);
+        return result;
+    }
+    maxit = 30;
+    
+    /*
+     * Initialize arrays
+     */
+    ae_vector_set_length(&wtemp, n+1, _state);
+    ae_vector_set_length(&work1, n-1+1, _state);
+    ae_vector_set_length(&work2, n-1+1, _state);
+    ae_vector_set_length(&workc, n+1, _state);
+    ae_vector_set_length(&works, n+1, _state);
+    
+    /*
+     * Determine the unit roundoff and over/underflow thresholds.
+     */
+    eps = ae_machineepsilon;
+    eps2 = ae_sqr(eps, _state);
+    safmin = ae_minrealnumber;
+    safmax = ae_maxrealnumber;
+    ssfmax = ae_sqrt(safmax, _state)/3;
+    ssfmin = ae_sqrt(safmin, _state)/eps2;
+    
+    /*
+     * Prepare Z
+     *
+     * Here we are using transposition to get rid of column operations
+     *
+     */
+    wastranspose = ae_false;
+    zrows = 0;
+    if( zneeded==1 )
+    {
+        zrows = n;
+    }
+    if( zneeded==2 )
+    {
+        zrows = n;
+    }
+    if( zneeded==3 )
+    {
+        zrows = 1;
+    }
+    if( zneeded==1 )
+    {
+        wastranspose = ae_true;
+        inplacetranspose(z, 1, n, 1, n, &wtemp, _state);
+    }
+    if( zneeded==2 )
+    {
+        wastranspose = ae_true;
+        ae_matrix_set_length(z, n+1, n+1, _state);
+        for(i=1; i<=n; i++)
+        {
+            for(j=1; j<=n; j++)
+            {
+                if( i==j )
+                {
+                    z->ptr.pp_double[i][j] = 1;
+                }
+                else
+                {
+                    z->ptr.pp_double[i][j] = 0;
+                }
+            }
+        }
+    }
+    if( zneeded==3 )
+    {
+        wastranspose = ae_false;
+        ae_matrix_set_length(z, 1+1, n+1, _state);
+        for(j=1; j<=n; j++)
+        {
+            if( j==1 )
+            {
+                z->ptr.pp_double[1][j] = 1;
+            }
+            else
+            {
+                z->ptr.pp_double[1][j] = 0;
+            }
+        }
+    }
+    nmaxit = n*maxit;
+    jtot = 0;
+    
+    /*
+     * Determine where the matrix splits and choose QL or QR iteration
+     * for each block, according to whether top or bottom diagonal
+     * element is smaller.
+     */
+    l1 = 1;
+    nm1 = n-1;
+    for(;;)
+    {
+        if( l1>n )
+        {
+            break;
+        }
+        if( l1>1 )
+        {
+            e->ptr.p_double[l1-1] = 0;
+        }
+        gotoflag = ae_false;
+        m = l1;
+        if( l1<=nm1 )
+        {
+            for(m=l1; m<=nm1; m++)
+            {
+                tst = ae_fabs(e->ptr.p_double[m], _state);
+                if( ae_fp_eq(tst,0) )
+                {
+                    gotoflag = ae_true;
+                    break;
+                }
+                if( ae_fp_less_eq(tst,ae_sqrt(ae_fabs(d->ptr.p_double[m], _state), _state)*ae_sqrt(ae_fabs(d->ptr.p_double[m+1], _state), _state)*eps) )
+                {
+                    e->ptr.p_double[m] = 0;
+                    gotoflag = ae_true;
+                    break;
+                }
+            }
+        }
+        if( !gotoflag )
+        {
+            m = n;
+        }
+        
+        /*
+         * label 30:
+         */
+        l = l1;
+        lsv = l;
+        lend = m;
+        lendsv = lend;
+        l1 = m+1;
+        if( lend==l )
+        {
+            continue;
+        }
+        
+        /*
+         * Scale submatrix in rows and columns L to LEND
+         */
+        if( l==lend )
+        {
+            anorm = ae_fabs(d->ptr.p_double[l], _state);
+        }
+        else
+        {
+            anorm = ae_maxreal(ae_fabs(d->ptr.p_double[l], _state)+ae_fabs(e->ptr.p_double[l], _state), ae_fabs(e->ptr.p_double[lend-1], _state)+ae_fabs(d->ptr.p_double[lend], _state), _state);
+            for(i=l+1; i<=lend-1; i++)
+            {
+                anorm = ae_maxreal(anorm, ae_fabs(d->ptr.p_double[i], _state)+ae_fabs(e->ptr.p_double[i], _state)+ae_fabs(e->ptr.p_double[i-1], _state), _state);
+            }
+        }
+        iscale = 0;
+        if( ae_fp_eq(anorm,0) )
+        {
+            continue;
+        }
+        if( ae_fp_greater(anorm,ssfmax) )
+        {
+            iscale = 1;
+            tmp = ssfmax/anorm;
+            tmpint = lend-1;
+            ae_v_muld(&d->ptr.p_double[l], 1, ae_v_len(l,lend), tmp);
+            ae_v_muld(&e->ptr.p_double[l], 1, ae_v_len(l,tmpint), tmp);
+        }
+        if( ae_fp_less(anorm,ssfmin) )
+        {
+            iscale = 2;
+            tmp = ssfmin/anorm;
+            tmpint = lend-1;
+            ae_v_muld(&d->ptr.p_double[l], 1, ae_v_len(l,lend), tmp);
+            ae_v_muld(&e->ptr.p_double[l], 1, ae_v_len(l,tmpint), tmp);
+        }
+        
+        /*
+         * Choose between QL and QR iteration
+         */
+        if( ae_fp_less(ae_fabs(d->ptr.p_double[lend], _state),ae_fabs(d->ptr.p_double[l], _state)) )
+        {
+            lend = lsv;
+            l = lendsv;
+        }
+        if( lend>l )
+        {
+            
+            /*
+             * QL Iteration
+             *
+             * Look for small subdiagonal element.
+             */
+            for(;;)
+            {
+                gotoflag = ae_false;
+                if( l!=lend )
+                {
+                    lendm1 = lend-1;
+                    for(m=l; m<=lendm1; m++)
+                    {
+                        tst = ae_sqr(ae_fabs(e->ptr.p_double[m], _state), _state);
+                        if( ae_fp_less_eq(tst,eps2*ae_fabs(d->ptr.p_double[m], _state)*ae_fabs(d->ptr.p_double[m+1], _state)+safmin) )
+                        {
+                            gotoflag = ae_true;
+                            break;
+                        }
+                    }
+                }
+                if( !gotoflag )
+                {
+                    m = lend;
+                }
+                if( m<lend )
+                {
+                    e->ptr.p_double[m] = 0;
+                }
+                p = d->ptr.p_double[l];
+                if( m!=l )
+                {
+                    
+                    /*
+                     * If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
+                     * to compute its eigensystem.
+                     */
+                    if( m==l+1 )
+                    {
+                        if( zneeded>0 )
+                        {
+                            evd_tdevdev2(d->ptr.p_double[l], e->ptr.p_double[l], d->ptr.p_double[l+1], &rt1, &rt2, &c, &s, _state);
+                            work1.ptr.p_double[l] = c;
+                            work2.ptr.p_double[l] = s;
+                            workc.ptr.p_double[1] = work1.ptr.p_double[l];
+                            works.ptr.p_double[1] = work2.ptr.p_double[l];
+                            if( !wastranspose )
+                            {
+                                applyrotationsfromtheright(ae_false, 1, zrows, l, l+1, &workc, &works, z, &wtemp, _state);
+                            }
+                            else
+                            {
+                                applyrotationsfromtheleft(ae_false, l, l+1, 1, zrows, &workc, &works, z, &wtemp, _state);
+                            }
+                        }
+                        else
+                        {
+                            evd_tdevde2(d->ptr.p_double[l], e->ptr.p_double[l], d->ptr.p_double[l+1], &rt1, &rt2, _state);
+                        }
+                        d->ptr.p_double[l] = rt1;
+                        d->ptr.p_double[l+1] = rt2;
+                        e->ptr.p_double[l] = 0;
+                        l = l+2;
+                        if( l<=lend )
+                        {
+                            continue;
+                        }
+                        
+                        /*
+                         * GOTO 140
+                         */
+                        break;
+                    }
+                    if( jtot==nmaxit )
+                    {
+                        
+                        /*
+                         * GOTO 140
+                         */
+                        break;
+                    }
+                    jtot = jtot+1;
+                    
+                    /*
+                     * Form shift.
+                     */
+                    g = (d->ptr.p_double[l+1]-p)/(2*e->ptr.p_double[l]);
+                    r = evd_tdevdpythag(g, 1, _state);
+                    g = d->ptr.p_double[m]-p+e->ptr.p_double[l]/(g+evd_tdevdextsign(r, g, _state));
+                    s = 1;
+                    c = 1;
+                    p = 0;
+                    
+                    /*
+                     * Inner loop
+                     */
+                    mm1 = m-1;
+                    for(i=mm1; i>=l; i--)
+                    {
+                        f = s*e->ptr.p_double[i];
+                        b = c*e->ptr.p_double[i];
+                        generaterotation(g, f, &c, &s, &r, _state);
+                        if( i!=m-1 )
+                        {
+                            e->ptr.p_double[i+1] = r;
+                        }
+                        g = d->ptr.p_double[i+1]-p;
+                        r = (d->ptr.p_double[i]-g)*s+2*c*b;
+                        p = s*r;
+                        d->ptr.p_double[i+1] = g+p;
+                        g = c*r-b;
+                        
+                        /*
+                         * If eigenvectors are desired, then save rotations.
+                         */
+                        if( zneeded>0 )
+                        {
+                            work1.ptr.p_double[i] = c;
+                            work2.ptr.p_double[i] = -s;
+                        }
+                    }
+                    
+                    /*
+                     * If eigenvectors are desired, then apply saved rotations.
+                     */
+                    if( zneeded>0 )
+                    {
+                        for(i=l; i<=m-1; i++)
+                        {
+                            workc.ptr.p_double[i-l+1] = work1.ptr.p_double[i];
+                            works.ptr.p_double[i-l+1] = work2.ptr.p_double[i];
+                        }
+                        if( !wastranspose )
+                        {
+                            applyrotationsfromtheright(ae_false, 1, zrows, l, m, &workc, &works, z, &wtemp, _state);
+                        }
+                        else
+                        {
+                            applyrotationsfromtheleft(ae_false, l, m, 1, zrows, &workc, &works, z, &wtemp, _state);
+                        }
+                    }
+                    d->ptr.p_double[l] = d->ptr.p_double[l]-p;
+                    e->ptr.p_double[l] = g;
+                    continue;
+                }
+                
+                /*
+                 * Eigenvalue found.
+                 */
+                d->ptr.p_double[l] = p;
+                l = l+1;
+                if( l<=lend )
+                {
+                    continue;
+                }
+                break;
+            }
+        }
+        else
+        {
+            
+            /*
+             * QR Iteration
+             *
+             * Look for small superdiagonal element.
+             */
+            for(;;)
+            {
+                gotoflag = ae_false;
+                if( l!=lend )
+                {
+                    lendp1 = lend+1;
+                    for(m=l; m>=lendp1; m--)
+                    {
+                        tst = ae_sqr(ae_fabs(e->ptr.p_double[m-1], _state), _state);
+                        if( ae_fp_less_eq(tst,eps2*ae_fabs(d->ptr.p_double[m], _state)*ae_fabs(d->ptr.p_double[m-1], _state)+safmin) )
+                        {
+                            gotoflag = ae_true;
+                            break;
+                        }
+                    }
+                }
+                if( !gotoflag )
+                {
+                    m = lend;
+                }
+                if( m>lend )
+                {
+                    e->ptr.p_double[m-1] = 0;
+                }
+                p = d->ptr.p_double[l];
+                if( m!=l )
+                {
+                    
+                    /*
+                     * If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
+                     * to compute its eigensystem.
+                     */
+                    if( m==l-1 )
+                    {
+                        if( zneeded>0 )
+                        {
+                            evd_tdevdev2(d->ptr.p_double[l-1], e->ptr.p_double[l-1], d->ptr.p_double[l], &rt1, &rt2, &c, &s, _state);
+                            work1.ptr.p_double[m] = c;
+                            work2.ptr.p_double[m] = s;
+                            workc.ptr.p_double[1] = c;
+                            works.ptr.p_double[1] = s;
+                            if( !wastranspose )
+                            {
+                                applyrotationsfromtheright(ae_true, 1, zrows, l-1, l, &workc, &works, z, &wtemp, _state);
+                            }
+                            else
+                            {
+                                applyrotationsfromtheleft(ae_true, l-1, l, 1, zrows, &workc, &works, z, &wtemp, _state);
+                            }
+                        }
+                        else
+                        {
+                            evd_tdevde2(d->ptr.p_double[l-1], e->ptr.p_double[l-1], d->ptr.p_double[l], &rt1, &rt2, _state);
+                        }
+                        d->ptr.p_double[l-1] = rt1;
+                        d->ptr.p_double[l] = rt2;
+                        e->ptr.p_double[l-1] = 0;
+                        l = l-2;
+                        if( l>=lend )
+                        {
+                            continue;
+                        }
+                        break;
+                    }
+                    if( jtot==nmaxit )
+                    {
+                        break;
+                    }
+                    jtot = jtot+1;
+                    
+                    /*
+                     * Form shift.
+                     */
+                    g = (d->ptr.p_double[l-1]-p)/(2*e->ptr.p_double[l-1]);
+                    r = evd_tdevdpythag(g, 1, _state);
+                    g = d->ptr.p_double[m]-p+e->ptr.p_double[l-1]/(g+evd_tdevdextsign(r, g, _state));
+                    s = 1;
+                    c = 1;
+                    p = 0;
+                    
+                    /*
+                     * Inner loop
+                     */
+                    lm1 = l-1;
+                    for(i=m; i<=lm1; i++)
+                    {
+                        f = s*e->ptr.p_double[i];
+                        b = c*e->ptr.p_double[i];
+                        generaterotation(g, f, &c, &s, &r, _state);
+                        if( i!=m )
+                        {
+                            e->ptr.p_double[i-1] = r;
+                        }
+                        g = d->ptr.p_double[i]-p;
+                        r = (d->ptr.p_double[i+1]-g)*s+2*c*b;
+                        p = s*r;
+                        d->ptr.p_double[i] = g+p;
+                        g = c*r-b;
+                        
+                        /*
+                         * If eigenvectors are desired, then save rotations.
+                         */
+                        if( zneeded>0 )
+                        {
+                            work1.ptr.p_double[i] = c;
+                            work2.ptr.p_double[i] = s;
+                        }
+                    }
+                    
+                    /*
+                     * If eigenvectors are desired, then apply saved rotations.
+                     */
+                    if( zneeded>0 )
+                    {
+                        mm = l-m+1;
+                        for(i=m; i<=l-1; i++)
+                        {
+                            workc.ptr.p_double[i-m+1] = work1.ptr.p_double[i];
+                            works.ptr.p_double[i-m+1] = work2.ptr.p_double[i];
+                        }
+                        if( !wastranspose )
+                        {
+                            applyrotationsfromtheright(ae_true, 1, zrows, m, l, &workc, &works, z, &wtemp, _state);
+                        }
+                        else
+                        {
+                            applyrotationsfromtheleft(ae_true, m, l, 1, zrows, &workc, &works, z, &wtemp, _state);
+                        }
+                    }
+                    d->ptr.p_double[l] = d->ptr.p_double[l]-p;
+                    e->ptr.p_double[lm1] = g;
+                    continue;
+                }
+                
+                /*
+                 * Eigenvalue found.
+                 */
+                d->ptr.p_double[l] = p;
+                l = l-1;
+                if( l>=lend )
+                {
+                    continue;
+                }
+                break;
+            }
+        }
+        
+        /*
+         * Undo scaling if necessary
+         */
+        if( iscale==1 )
+        {
+            tmp = anorm/ssfmax;
+            tmpint = lendsv-1;
+            ae_v_muld(&d->ptr.p_double[lsv], 1, ae_v_len(lsv,lendsv), tmp);
+            ae_v_muld(&e->ptr.p_double[lsv], 1, ae_v_len(lsv,tmpint), tmp);
+        }
+        if( iscale==2 )
+        {
+            tmp = anorm/ssfmin;
+            tmpint = lendsv-1;
+            ae_v_muld(&d->ptr.p_double[lsv], 1, ae_v_len(lsv,lendsv), tmp);
+            ae_v_muld(&e->ptr.p_double[lsv], 1, ae_v_len(lsv,tmpint), tmp);
+        }
+        
+        /*
+         * Check for no convergence to an eigenvalue after a total
+         * of N*MAXIT iterations.
+         */
+        if( jtot>=nmaxit )
+        {
+            result = ae_false;
+            if( wastranspose )
+            {
+                inplacetranspose(z, 1, n, 1, n, &wtemp, _state);
+            }
+            ae_frame_leave(_state);
+            return result;
+        }
+    }
+    
+    /*
+     * Order eigenvalues and eigenvectors.
+     */
+    if( zneeded==0 )
+    {
+        
+        /*
+         * Sort
+         */
+        if( n==1 )
+        {
+            ae_frame_leave(_state);
+            return result;
+        }
+        if( n==2 )
+        {
+            if( ae_fp_greater(d->ptr.p_double[1],d->ptr.p_double[2]) )
+            {
+                tmp = d->ptr.p_double[1];
+                d->ptr.p_double[1] = d->ptr.p_double[2];
+                d->ptr.p_double[2] = tmp;
+            }
+            ae_frame_leave(_state);
+            return result;
+        }
+        i = 2;
+        do
+        {
+            t = i;
+            while(t!=1)
+            {
+                k = t/2;
+                if( ae_fp_greater_eq(d->ptr.p_double[k],d->ptr.p_double[t]) )
+                {
+                    t = 1;
+                }
+                else
+                {
+                    tmp = d->ptr.p_double[k];
+                    d->ptr.p_double[k] = d->ptr.p_double[t];
+                    d->ptr.p_double[t] = tmp;
+                    t = k;
+                }
+            }
+            i = i+1;
+        }
+        while(i<=n);
+        i = n-1;
+        do
+        {
+            tmp = d->ptr.p_double[i+1];
+            d->ptr.p_double[i+1] = d->ptr.p_double[1];
+            d->ptr.p_double[1] = tmp;
+            t = 1;
+            while(t!=0)
+            {
+                k = 2*t;
+                if( k>i )
+                {
+                    t = 0;
+                }
+                else
+                {
+                    if( k<i )
+                    {
+                        if( ae_fp_greater(d->ptr.p_double[k+1],d->ptr.p_double[k]) )
+                        {
+                            k = k+1;
+                        }
+                    }
+                    if( ae_fp_greater_eq(d->ptr.p_double[t],d->ptr.p_double[k]) )
+                    {
+                        t = 0;
+                    }
+                    else
+                    {
+                        tmp = d->ptr.p_double[k];
+                        d->ptr.p_double[k] = d->ptr.p_double[t];
+                        d->ptr.p_double[t] = tmp;
+                        t = k;
+                    }
+                }
+            }
+            i = i-1;
+        }
+        while(i>=1);
+    }
+    else
+    {
+        
+        /*
+         * Use Selection Sort to minimize swaps of eigenvectors
+         */
+        for(ii=2; ii<=n; ii++)
+        {
+            i = ii-1;
+            k = i;
+            p = d->ptr.p_double[i];
+            for(j=ii; j<=n; j++)
+            {
+                if( ae_fp_less(d->ptr.p_double[j],p) )
+                {
+                    k = j;
+                    p = d->ptr.p_double[j];
+                }
+            }
+            if( k!=i )
+            {
+                d->ptr.p_double[k] = d->ptr.p_double[i];
+                d->ptr.p_double[i] = p;
+                if( wastranspose )
+                {
+                    ae_v_move(&wtemp.ptr.p_double[1], 1, &z->ptr.pp_double[i][1], 1, ae_v_len(1,n));
+                    ae_v_move(&z->ptr.pp_double[i][1], 1, &z->ptr.pp_double[k][1], 1, ae_v_len(1,n));
+                    ae_v_move(&z->ptr.pp_double[k][1], 1, &wtemp.ptr.p_double[1], 1, ae_v_len(1,n));
+                }
+                else
+                {
+                    ae_v_move(&wtemp.ptr.p_double[1], 1, &z->ptr.pp_double[1][i], z->stride, ae_v_len(1,zrows));
+                    ae_v_move(&z->ptr.pp_double[1][i], z->stride, &z->ptr.pp_double[1][k], z->stride, ae_v_len(1,zrows));
+                    ae_v_move(&z->ptr.pp_double[1][k], z->stride, &wtemp.ptr.p_double[1], 1, ae_v_len(1,zrows));
+                }
+            }
+        }
+        if( wastranspose )
+        {
+            inplacetranspose(z, 1, n, 1, n, &wtemp, _state);
+        }
+    }
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+/*************************************************************************
+DLAE2  computes the eigenvalues of a 2-by-2 symmetric matrix
+   [  A   B  ]
+   [  B   C  ].
+On return, RT1 is the eigenvalue of larger absolute value, and RT2
+is the eigenvalue of smaller absolute value.
+
+  -- LAPACK auxiliary routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     October 31, 1992
+*************************************************************************/
+static void evd_tdevde2(double a,
+     double b,
+     double c,
+     double* rt1,
+     double* rt2,
+     ae_state *_state)
+{
+    double ab;
+    double acmn;
+    double acmx;
+    double adf;
+    double df;
+    double rt;
+    double sm;
+    double tb;
+
+    *rt1 = 0;
+    *rt2 = 0;
+
+    sm = a+c;
+    df = a-c;
+    adf = ae_fabs(df, _state);
+    tb = b+b;
+    ab = ae_fabs(tb, _state);
+    if( ae_fp_greater(ae_fabs(a, _state),ae_fabs(c, _state)) )
+    {
+        acmx = a;
+        acmn = c;
+    }
+    else
+    {
+        acmx = c;
+        acmn = a;
+    }
+    if( ae_fp_greater(adf,ab) )
+    {
+        rt = adf*ae_sqrt(1+ae_sqr(ab/adf, _state), _state);
+    }
+    else
+    {
+        if( ae_fp_less(adf,ab) )
+        {
+            rt = ab*ae_sqrt(1+ae_sqr(adf/ab, _state), _state);
+        }
+        else
+        {
+            
+            /*
+             * Includes case AB=ADF=0
+             */
+            rt = ab*ae_sqrt(2, _state);
+        }
+    }
+    if( ae_fp_less(sm,0) )
+    {
+        *rt1 = 0.5*(sm-rt);
+        
+        /*
+         * Order of execution important.
+         * To get fully accurate smaller eigenvalue,
+         * next line needs to be executed in higher precision.
+         */
+        *rt2 = acmx/(*rt1)*acmn-b/(*rt1)*b;
+    }
+    else
+    {
+        if( ae_fp_greater(sm,0) )
+        {
+            *rt1 = 0.5*(sm+rt);
+            
+            /*
+             * Order of execution important.
+             * To get fully accurate smaller eigenvalue,
+             * next line needs to be executed in higher precision.
+             */
+            *rt2 = acmx/(*rt1)*acmn-b/(*rt1)*b;
+        }
+        else
+        {
+            
+            /*
+             * Includes case RT1 = RT2 = 0
+             */
+            *rt1 = 0.5*rt;
+            *rt2 = -0.5*rt;
+        }
+    }
+}
+
+
+/*************************************************************************
+DLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix
+
+   [  A   B  ]
+   [  B   C  ].
+
+On return, RT1 is the eigenvalue of larger absolute value, RT2 is the
+eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right
+eigenvector for RT1, giving the decomposition
+
+   [ CS1  SN1 ] [  A   B  ] [ CS1 -SN1 ]  =  [ RT1  0  ]
+   [-SN1  CS1 ] [  B   C  ] [ SN1  CS1 ]     [  0  RT2 ].
+
+
+  -- LAPACK auxiliary routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     October 31, 1992
+*************************************************************************/
+static void evd_tdevdev2(double a,
+     double b,
+     double c,
+     double* rt1,
+     double* rt2,
+     double* cs1,
+     double* sn1,
+     ae_state *_state)
+{
+    ae_int_t sgn1;
+    ae_int_t sgn2;
+    double ab;
+    double acmn;
+    double acmx;
+    double acs;
+    double adf;
+    double cs;
+    double ct;
+    double df;
+    double rt;
+    double sm;
+    double tb;
+    double tn;
+
+    *rt1 = 0;
+    *rt2 = 0;
+    *cs1 = 0;
+    *sn1 = 0;
+
+    
+    /*
+     * Compute the eigenvalues
+     */
+    sm = a+c;
+    df = a-c;
+    adf = ae_fabs(df, _state);
+    tb = b+b;
+    ab = ae_fabs(tb, _state);
+    if( ae_fp_greater(ae_fabs(a, _state),ae_fabs(c, _state)) )
+    {
+        acmx = a;
+        acmn = c;
+    }
+    else
+    {
+        acmx = c;
+        acmn = a;
+    }
+    if( ae_fp_greater(adf,ab) )
+    {
+        rt = adf*ae_sqrt(1+ae_sqr(ab/adf, _state), _state);
+    }
+    else
+    {
+        if( ae_fp_less(adf,ab) )
+        {
+            rt = ab*ae_sqrt(1+ae_sqr(adf/ab, _state), _state);
+        }
+        else
+        {
+            
+            /*
+             * Includes case AB=ADF=0
+             */
+            rt = ab*ae_sqrt(2, _state);
+        }
+    }
+    if( ae_fp_less(sm,0) )
+    {
+        *rt1 = 0.5*(sm-rt);
+        sgn1 = -1;
+        
+        /*
+         * Order of execution important.
+         * To get fully accurate smaller eigenvalue,
+         * next line needs to be executed in higher precision.
+         */
+        *rt2 = acmx/(*rt1)*acmn-b/(*rt1)*b;
+    }
+    else
+    {
+        if( ae_fp_greater(sm,0) )
+        {
+            *rt1 = 0.5*(sm+rt);
+            sgn1 = 1;
+            
+            /*
+             * Order of execution important.
+             * To get fully accurate smaller eigenvalue,
+             * next line needs to be executed in higher precision.
+             */
+            *rt2 = acmx/(*rt1)*acmn-b/(*rt1)*b;
+        }
+        else
+        {
+            
+            /*
+             * Includes case RT1 = RT2 = 0
+             */
+            *rt1 = 0.5*rt;
+            *rt2 = -0.5*rt;
+            sgn1 = 1;
+        }
+    }
+    
+    /*
+     * Compute the eigenvector
+     */
+    if( ae_fp_greater_eq(df,0) )
+    {
+        cs = df+rt;
+        sgn2 = 1;
+    }
+    else
+    {
+        cs = df-rt;
+        sgn2 = -1;
+    }
+    acs = ae_fabs(cs, _state);
+    if( ae_fp_greater(acs,ab) )
+    {
+        ct = -tb/cs;
+        *sn1 = 1/ae_sqrt(1+ct*ct, _state);
+        *cs1 = ct*(*sn1);
+    }
+    else
+    {
+        if( ae_fp_eq(ab,0) )
+        {
+            *cs1 = 1;
+            *sn1 = 0;
+        }
+        else
+        {
+            tn = -cs/tb;
+            *cs1 = 1/ae_sqrt(1+tn*tn, _state);
+            *sn1 = tn*(*cs1);
+        }
+    }
+    if( sgn1==sgn2 )
+    {
+        tn = *cs1;
+        *cs1 = -*sn1;
+        *sn1 = tn;
+    }
+}
+
+
+/*************************************************************************
+Internal routine
+*************************************************************************/
+static double evd_tdevdpythag(double a, double b, ae_state *_state)
+{
+    double result;
+
+
+    if( ae_fp_less(ae_fabs(a, _state),ae_fabs(b, _state)) )
+    {
+        result = ae_fabs(b, _state)*ae_sqrt(1+ae_sqr(a/b, _state), _state);
+    }
+    else
+    {
+        result = ae_fabs(a, _state)*ae_sqrt(1+ae_sqr(b/a, _state), _state);
+    }
+    return result;
+}
+
+
+/*************************************************************************
+Internal routine
+*************************************************************************/
+static double evd_tdevdextsign(double a, double b, ae_state *_state)
+{
+    double result;
+
+
+    if( ae_fp_greater_eq(b,0) )
+    {
+        result = ae_fabs(a, _state);
+    }
+    else
+    {
+        result = -ae_fabs(a, _state);
+    }
+    return result;
+}
+
+
+static ae_bool evd_internalbisectioneigenvalues(/* Real    */ ae_vector* d,
+     /* Real    */ ae_vector* e,
+     ae_int_t n,
+     ae_int_t irange,
+     ae_int_t iorder,
+     double vl,
+     double vu,
+     ae_int_t il,
+     ae_int_t iu,
+     double abstol,
+     /* Real    */ ae_vector* w,
+     ae_int_t* m,
+     ae_int_t* nsplit,
+     /* Integer */ ae_vector* iblock,
+     /* Integer */ ae_vector* isplit,
+     ae_int_t* errorcode,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector _d;
+    ae_vector _e;
+    double fudge;
+    double relfac;
+    ae_bool ncnvrg;
+    ae_bool toofew;
+    ae_int_t ib;
+    ae_int_t ibegin;
+    ae_int_t idiscl;
+    ae_int_t idiscu;
+    ae_int_t ie;
+    ae_int_t iend;
+    ae_int_t iinfo;
+    ae_int_t im;
+    ae_int_t iin;
+    ae_int_t ioff;
+    ae_int_t iout;
+    ae_int_t itmax;
+    ae_int_t iw;
+    ae_int_t iwoff;
+    ae_int_t j;
+    ae_int_t itmp1;
+    ae_int_t jb;
+    ae_int_t jdisc;
+    ae_int_t je;
+    ae_int_t nwl;
+    ae_int_t nwu;
+    double atoli;
+    double bnorm;
+    double gl;
+    double gu;
+    double pivmin;
+    double rtoli;
+    double safemn;
+    double tmp1;
+    double tmp2;
+    double tnorm;
+    double ulp;
+    double wkill;
+    double wl;
+    double wlu;
+    double wu;
+    double wul;
+    double scalefactor;
+    double t;
+    ae_vector idumma;
+    ae_vector work;
+    ae_vector iwork;
+    ae_vector ia1s2;
+    ae_vector ra1s2;
+    ae_matrix ra1s2x2;
+    ae_matrix ia1s2x2;
+    ae_vector ra1siin;
+    ae_vector ra2siin;
+    ae_vector ra3siin;
+    ae_vector ra4siin;
+    ae_matrix ra1siinx2;
+    ae_matrix ia1siinx2;
+    ae_vector iworkspace;
+    ae_vector rworkspace;
+    ae_int_t tmpi;
+    ae_bool result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_init_copy(&_d, d, _state, ae_true);
+    d = &_d;
+    ae_vector_init_copy(&_e, e, _state, ae_true);
+    e = &_e;
+    ae_vector_clear(w);
+    *m = 0;
+    *nsplit = 0;
+    ae_vector_clear(iblock);
+    ae_vector_clear(isplit);
+    *errorcode = 0;
+    ae_vector_init(&idumma, 0, DT_INT, _state, ae_true);
+    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&iwork, 0, DT_INT, _state, ae_true);
+    ae_vector_init(&ia1s2, 0, DT_INT, _state, ae_true);
+    ae_vector_init(&ra1s2, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&ra1s2x2, 0, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&ia1s2x2, 0, 0, DT_INT, _state, ae_true);
+    ae_vector_init(&ra1siin, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&ra2siin, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&ra3siin, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&ra4siin, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&ra1siinx2, 0, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&ia1siinx2, 0, 0, DT_INT, _state, ae_true);
+    ae_vector_init(&iworkspace, 0, DT_INT, _state, ae_true);
+    ae_vector_init(&rworkspace, 0, DT_REAL, _state, ae_true);
+
+    
+    /*
+     * Quick return if possible
+     */
+    *m = 0;
+    if( n==0 )
+    {
+        result = ae_true;
+        ae_frame_leave(_state);
+        return result;
+    }
+    
+    /*
+     * Get machine constants
+     * NB is the minimum vector length for vector bisection, or 0
+     * if only scalar is to be done.
+     */
+    fudge = 2;
+    relfac = 2;
+    safemn = ae_minrealnumber;
+    ulp = 2*ae_machineepsilon;
+    rtoli = ulp*relfac;
+    ae_vector_set_length(&idumma, 1+1, _state);
+    ae_vector_set_length(&work, 4*n+1, _state);
+    ae_vector_set_length(&iwork, 3*n+1, _state);
+    ae_vector_set_length(w, n+1, _state);
+    ae_vector_set_length(iblock, n+1, _state);
+    ae_vector_set_length(isplit, n+1, _state);
+    ae_vector_set_length(&ia1s2, 2+1, _state);
+    ae_vector_set_length(&ra1s2, 2+1, _state);
+    ae_matrix_set_length(&ra1s2x2, 2+1, 2+1, _state);
+    ae_matrix_set_length(&ia1s2x2, 2+1, 2+1, _state);
+    ae_vector_set_length(&ra1siin, n+1, _state);
+    ae_vector_set_length(&ra2siin, n+1, _state);
+    ae_vector_set_length(&ra3siin, n+1, _state);
+    ae_vector_set_length(&ra4siin, n+1, _state);
+    ae_matrix_set_length(&ra1siinx2, n+1, 2+1, _state);
+    ae_matrix_set_length(&ia1siinx2, n+1, 2+1, _state);
+    ae_vector_set_length(&iworkspace, n+1, _state);
+    ae_vector_set_length(&rworkspace, n+1, _state);
+    
+    /*
+     * these initializers are not really necessary,
+     * but without them compiler complains about uninitialized locals
+     */
+    wlu = 0;
+    wul = 0;
+    
+    /*
+     * Check for Errors
+     */
+    result = ae_false;
+    *errorcode = 0;
+    if( irange<=0||irange>=4 )
+    {
+        *errorcode = -4;
+    }
+    if( iorder<=0||iorder>=3 )
+    {
+        *errorcode = -5;
+    }
+    if( n<0 )
+    {
+        *errorcode = -3;
+    }
+    if( irange==2&&ae_fp_greater_eq(vl,vu) )
+    {
+        *errorcode = -6;
+    }
+    if( irange==3&&(il<1||il>ae_maxint(1, n, _state)) )
+    {
+        *errorcode = -8;
+    }
+    if( irange==3&&(iu<ae_minint(n, il, _state)||iu>n) )
+    {
+        *errorcode = -9;
+    }
+    if( *errorcode!=0 )
+    {
+        ae_frame_leave(_state);
+        return result;
+    }
+    
+    /*
+     * Initialize error flags
+     */
+    ncnvrg = ae_false;
+    toofew = ae_false;
+    
+    /*
+     * Simplifications:
+     */
+    if( (irange==3&&il==1)&&iu==n )
+    {
+        irange = 1;
+    }
+    
+    /*
+     * Special Case when N=1
+     */
+    if( n==1 )
+    {
+        *nsplit = 1;
+        isplit->ptr.p_int[1] = 1;
+        if( irange==2&&(ae_fp_greater_eq(vl,d->ptr.p_double[1])||ae_fp_less(vu,d->ptr.p_double[1])) )
+        {
+            *m = 0;
+        }
+        else
+        {
+            w->ptr.p_double[1] = d->ptr.p_double[1];
+            iblock->ptr.p_int[1] = 1;
+            *m = 1;
+        }
+        result = ae_true;
+        ae_frame_leave(_state);
+        return result;
+    }
+    
+    /*
+     * Scaling
+     */
+    t = ae_fabs(d->ptr.p_double[n], _state);
+    for(j=1; j<=n-1; j++)
+    {
+        t = ae_maxreal(t, ae_fabs(d->ptr.p_double[j], _state), _state);
+        t = ae_maxreal(t, ae_fabs(e->ptr.p_double[j], _state), _state);
+    }
+    scalefactor = 1;
+    if( ae_fp_neq(t,0) )
+    {
+        if( ae_fp_greater(t,ae_sqrt(ae_sqrt(ae_minrealnumber, _state), _state)*ae_sqrt(ae_maxrealnumber, _state)) )
+        {
+            scalefactor = t;
+        }
+        if( ae_fp_less(t,ae_sqrt(ae_sqrt(ae_maxrealnumber, _state), _state)*ae_sqrt(ae_minrealnumber, _state)) )
+        {
+            scalefactor = t;
+        }
+        for(j=1; j<=n-1; j++)
+        {
+            d->ptr.p_double[j] = d->ptr.p_double[j]/scalefactor;
+            e->ptr.p_double[j] = e->ptr.p_double[j]/scalefactor;
+        }
+        d->ptr.p_double[n] = d->ptr.p_double[n]/scalefactor;
+    }
+    
+    /*
+     * Compute Splitting Points
+     */
+    *nsplit = 1;
+    work.ptr.p_double[n] = 0;
+    pivmin = 1;
+    for(j=2; j<=n; j++)
+    {
+        tmp1 = ae_sqr(e->ptr.p_double[j-1], _state);
+        if( ae_fp_greater(ae_fabs(d->ptr.p_double[j]*d->ptr.p_double[j-1], _state)*ae_sqr(ulp, _state)+safemn,tmp1) )
+        {
+            isplit->ptr.p_int[*nsplit] = j-1;
+            *nsplit = *nsplit+1;
+            work.ptr.p_double[j-1] = 0;
+        }
+        else
+        {
+            work.ptr.p_double[j-1] = tmp1;
+            pivmin = ae_maxreal(pivmin, tmp1, _state);
+        }
+    }
+    isplit->ptr.p_int[*nsplit] = n;
+    pivmin = pivmin*safemn;
+    
+    /*
+     * Compute Interval and ATOLI
+     */
+    if( irange==3 )
+    {
+        
+        /*
+         * RANGE='I': Compute the interval containing eigenvalues
+         *     IL through IU.
+         *
+         * Compute Gershgorin interval for entire (split) matrix
+         * and use it as the initial interval
+         */
+        gu = d->ptr.p_double[1];
+        gl = d->ptr.p_double[1];
+        tmp1 = 0;
+        for(j=1; j<=n-1; j++)
+        {
+            tmp2 = ae_sqrt(work.ptr.p_double[j], _state);
+            gu = ae_maxreal(gu, d->ptr.p_double[j]+tmp1+tmp2, _state);
+            gl = ae_minreal(gl, d->ptr.p_double[j]-tmp1-tmp2, _state);
+            tmp1 = tmp2;
+        }
+        gu = ae_maxreal(gu, d->ptr.p_double[n]+tmp1, _state);
+        gl = ae_minreal(gl, d->ptr.p_double[n]-tmp1, _state);
+        tnorm = ae_maxreal(ae_fabs(gl, _state), ae_fabs(gu, _state), _state);
+        gl = gl-fudge*tnorm*ulp*n-fudge*2*pivmin;
+        gu = gu+fudge*tnorm*ulp*n+fudge*pivmin;
+        
+        /*
+         * Compute Iteration parameters
+         */
+        itmax = ae_iceil((ae_log(tnorm+pivmin, _state)-ae_log(pivmin, _state))/ae_log(2, _state), _state)+2;
+        if( ae_fp_less_eq(abstol,0) )
+        {
+            atoli = ulp*tnorm;
+        }
+        else
+        {
+            atoli = abstol;
+        }
+        work.ptr.p_double[n+1] = gl;
+        work.ptr.p_double[n+2] = gl;
+        work.ptr.p_double[n+3] = gu;
+        work.ptr.p_double[n+4] = gu;
+        work.ptr.p_double[n+5] = gl;
+        work.ptr.p_double[n+6] = gu;
+        iwork.ptr.p_int[1] = -1;
+        iwork.ptr.p_int[2] = -1;
+        iwork.ptr.p_int[3] = n+1;
+        iwork.ptr.p_int[4] = n+1;
+        iwork.ptr.p_int[5] = il-1;
+        iwork.ptr.p_int[6] = iu;
+        
+        /*
+         * Calling DLAEBZ
+         *
+         * DLAEBZ( 3, ITMAX, N, 2, 2, NB, ATOLI, RTOLI, PIVMIN, D, E,
+         *    WORK, IWORK( 5 ), WORK( N+1 ), WORK( N+5 ), IOUT,
+         *    IWORK, W, IBLOCK, IINFO )
+         */
+        ia1s2.ptr.p_int[1] = iwork.ptr.p_int[5];
+        ia1s2.ptr.p_int[2] = iwork.ptr.p_int[6];
+        ra1s2.ptr.p_double[1] = work.ptr.p_double[n+5];
+        ra1s2.ptr.p_double[2] = work.ptr.p_double[n+6];
+        ra1s2x2.ptr.pp_double[1][1] = work.ptr.p_double[n+1];
+        ra1s2x2.ptr.pp_double[2][1] = work.ptr.p_double[n+2];
+        ra1s2x2.ptr.pp_double[1][2] = work.ptr.p_double[n+3];
+        ra1s2x2.ptr.pp_double[2][2] = work.ptr.p_double[n+4];
+        ia1s2x2.ptr.pp_int[1][1] = iwork.ptr.p_int[1];
+        ia1s2x2.ptr.pp_int[2][1] = iwork.ptr.p_int[2];
+        ia1s2x2.ptr.pp_int[1][2] = iwork.ptr.p_int[3];
+        ia1s2x2.ptr.pp_int[2][2] = iwork.ptr.p_int[4];
+        evd_internaldlaebz(3, itmax, n, 2, 2, atoli, rtoli, pivmin, d, e, &work, &ia1s2, &ra1s2x2, &ra1s2, &iout, &ia1s2x2, w, iblock, &iinfo, _state);
+        iwork.ptr.p_int[5] = ia1s2.ptr.p_int[1];
+        iwork.ptr.p_int[6] = ia1s2.ptr.p_int[2];
+        work.ptr.p_double[n+5] = ra1s2.ptr.p_double[1];
+        work.ptr.p_double[n+6] = ra1s2.ptr.p_double[2];
+        work.ptr.p_double[n+1] = ra1s2x2.ptr.pp_double[1][1];
+        work.ptr.p_double[n+2] = ra1s2x2.ptr.pp_double[2][1];
+        work.ptr.p_double[n+3] = ra1s2x2.ptr.pp_double[1][2];
+        work.ptr.p_double[n+4] = ra1s2x2.ptr.pp_double[2][2];
+        iwork.ptr.p_int[1] = ia1s2x2.ptr.pp_int[1][1];
+        iwork.ptr.p_int[2] = ia1s2x2.ptr.pp_int[2][1];
+        iwork.ptr.p_int[3] = ia1s2x2.ptr.pp_int[1][2];
+        iwork.ptr.p_int[4] = ia1s2x2.ptr.pp_int[2][2];
+        if( iwork.ptr.p_int[6]==iu )
+        {
+            wl = work.ptr.p_double[n+1];
+            wlu = work.ptr.p_double[n+3];
+            nwl = iwork.ptr.p_int[1];
+            wu = work.ptr.p_double[n+4];
+            wul = work.ptr.p_double[n+2];
+            nwu = iwork.ptr.p_int[4];
+        }
+        else
+        {
+            wl = work.ptr.p_double[n+2];
+            wlu = work.ptr.p_double[n+4];
+            nwl = iwork.ptr.p_int[2];
+            wu = work.ptr.p_double[n+3];
+            wul = work.ptr.p_double[n+1];
+            nwu = iwork.ptr.p_int[3];
+        }
+        if( ((nwl<0||nwl>=n)||nwu<1)||nwu>n )
+        {
+            *errorcode = 4;
+            result = ae_false;
+            ae_frame_leave(_state);
+            return result;
+        }
+    }
+    else
+    {
+        
+        /*
+         * RANGE='A' or 'V' -- Set ATOLI
+         */
+        tnorm = ae_maxreal(ae_fabs(d->ptr.p_double[1], _state)+ae_fabs(e->ptr.p_double[1], _state), ae_fabs(d->ptr.p_double[n], _state)+ae_fabs(e->ptr.p_double[n-1], _state), _state);
+        for(j=2; j<=n-1; j++)
+        {
+            tnorm = ae_maxreal(tnorm, ae_fabs(d->ptr.p_double[j], _state)+ae_fabs(e->ptr.p_double[j-1], _state)+ae_fabs(e->ptr.p_double[j], _state), _state);
+        }
+        if( ae_fp_less_eq(abstol,0) )
+        {
+            atoli = ulp*tnorm;
+        }
+        else
+        {
+            atoli = abstol;
+        }
+        if( irange==2 )
+        {
+            wl = vl;
+            wu = vu;
+        }
+        else
+        {
+            wl = 0;
+            wu = 0;
+        }
+    }
+    
+    /*
+     * Find Eigenvalues -- Loop Over Blocks and recompute NWL and NWU.
+     * NWL accumulates the number of eigenvalues .le. WL,
+     * NWU accumulates the number of eigenvalues .le. WU
+     */
+    *m = 0;
+    iend = 0;
+    *errorcode = 0;
+    nwl = 0;
+    nwu = 0;
+    for(jb=1; jb<=*nsplit; jb++)
+    {
+        ioff = iend;
+        ibegin = ioff+1;
+        iend = isplit->ptr.p_int[jb];
+        iin = iend-ioff;
+        if( iin==1 )
+        {
+            
+            /*
+             * Special Case -- IIN=1
+             */
+            if( irange==1||ae_fp_greater_eq(wl,d->ptr.p_double[ibegin]-pivmin) )
+            {
+                nwl = nwl+1;
+            }
+            if( irange==1||ae_fp_greater_eq(wu,d->ptr.p_double[ibegin]-pivmin) )
+            {
+                nwu = nwu+1;
+            }
+            if( irange==1||(ae_fp_less(wl,d->ptr.p_double[ibegin]-pivmin)&&ae_fp_greater_eq(wu,d->ptr.p_double[ibegin]-pivmin)) )
+            {
+                *m = *m+1;
+                w->ptr.p_double[*m] = d->ptr.p_double[ibegin];
+                iblock->ptr.p_int[*m] = jb;
+            }
+        }
+        else
+        {
+            
+            /*
+             * General Case -- IIN > 1
+             *
+             * Compute Gershgorin Interval
+             * and use it as the initial interval
+             */
+            gu = d->ptr.p_double[ibegin];
+            gl = d->ptr.p_double[ibegin];
+            tmp1 = 0;
+            for(j=ibegin; j<=iend-1; j++)
+            {
+                tmp2 = ae_fabs(e->ptr.p_double[j], _state);
+                gu = ae_maxreal(gu, d->ptr.p_double[j]+tmp1+tmp2, _state);
+                gl = ae_minreal(gl, d->ptr.p_double[j]-tmp1-tmp2, _state);
+                tmp1 = tmp2;
+            }
+            gu = ae_maxreal(gu, d->ptr.p_double[iend]+tmp1, _state);
+            gl = ae_minreal(gl, d->ptr.p_double[iend]-tmp1, _state);
+            bnorm = ae_maxreal(ae_fabs(gl, _state), ae_fabs(gu, _state), _state);
+            gl = gl-fudge*bnorm*ulp*iin-fudge*pivmin;
+            gu = gu+fudge*bnorm*ulp*iin+fudge*pivmin;
+            
+            /*
+             * Compute ATOLI for the current submatrix
+             */
+            if( ae_fp_less_eq(abstol,0) )
+            {
+                atoli = ulp*ae_maxreal(ae_fabs(gl, _state), ae_fabs(gu, _state), _state);
+            }
+            else
+            {
+                atoli = abstol;
+            }
+            if( irange>1 )
+            {
+                if( ae_fp_less(gu,wl) )
+                {
+                    nwl = nwl+iin;
+                    nwu = nwu+iin;
+                    continue;
+                }
+                gl = ae_maxreal(gl, wl, _state);
+                gu = ae_minreal(gu, wu, _state);
+                if( ae_fp_greater_eq(gl,gu) )
+                {
+                    continue;
+                }
+            }
+            
+            /*
+             * Set Up Initial Interval
+             */
+            work.ptr.p_double[n+1] = gl;
+            work.ptr.p_double[n+iin+1] = gu;
+            
+            /*
+             * Calling DLAEBZ
+             *
+             * CALL DLAEBZ( 1, 0, IN, IN, 1, NB, ATOLI, RTOLI, PIVMIN,
+             *    D( IBEGIN ), E( IBEGIN ), WORK( IBEGIN ),
+             *    IDUMMA, WORK( N+1 ), WORK( N+2*IN+1 ), IM,
+             *    IWORK, W( M+1 ), IBLOCK( M+1 ), IINFO )
+             */
+            for(tmpi=1; tmpi<=iin; tmpi++)
+            {
+                ra1siin.ptr.p_double[tmpi] = d->ptr.p_double[ibegin-1+tmpi];
+                if( ibegin-1+tmpi<n )
+                {
+                    ra2siin.ptr.p_double[tmpi] = e->ptr.p_double[ibegin-1+tmpi];
+                }
+                ra3siin.ptr.p_double[tmpi] = work.ptr.p_double[ibegin-1+tmpi];
+                ra1siinx2.ptr.pp_double[tmpi][1] = work.ptr.p_double[n+tmpi];
+                ra1siinx2.ptr.pp_double[tmpi][2] = work.ptr.p_double[n+tmpi+iin];
+                ra4siin.ptr.p_double[tmpi] = work.ptr.p_double[n+2*iin+tmpi];
+                rworkspace.ptr.p_double[tmpi] = w->ptr.p_double[*m+tmpi];
+                iworkspace.ptr.p_int[tmpi] = iblock->ptr.p_int[*m+tmpi];
+                ia1siinx2.ptr.pp_int[tmpi][1] = iwork.ptr.p_int[tmpi];
+                ia1siinx2.ptr.pp_int[tmpi][2] = iwork.ptr.p_int[tmpi+iin];
+            }
+            evd_internaldlaebz(1, 0, iin, iin, 1, atoli, rtoli, pivmin, &ra1siin, &ra2siin, &ra3siin, &idumma, &ra1siinx2, &ra4siin, &im, &ia1siinx2, &rworkspace, &iworkspace, &iinfo, _state);
+            for(tmpi=1; tmpi<=iin; tmpi++)
+            {
+                work.ptr.p_double[n+tmpi] = ra1siinx2.ptr.pp_double[tmpi][1];
+                work.ptr.p_double[n+tmpi+iin] = ra1siinx2.ptr.pp_double[tmpi][2];
+                work.ptr.p_double[n+2*iin+tmpi] = ra4siin.ptr.p_double[tmpi];
+                w->ptr.p_double[*m+tmpi] = rworkspace.ptr.p_double[tmpi];
+                iblock->ptr.p_int[*m+tmpi] = iworkspace.ptr.p_int[tmpi];
+                iwork.ptr.p_int[tmpi] = ia1siinx2.ptr.pp_int[tmpi][1];
+                iwork.ptr.p_int[tmpi+iin] = ia1siinx2.ptr.pp_int[tmpi][2];
+            }
+            nwl = nwl+iwork.ptr.p_int[1];
+            nwu = nwu+iwork.ptr.p_int[iin+1];
+            iwoff = *m-iwork.ptr.p_int[1];
+            
+            /*
+             * Compute Eigenvalues
+             */
+            itmax = ae_iceil((ae_log(gu-gl+pivmin, _state)-ae_log(pivmin, _state))/ae_log(2, _state), _state)+2;
+            
+            /*
+             * Calling DLAEBZ
+             *
+             *CALL DLAEBZ( 2, ITMAX, IN, IN, 1, NB, ATOLI, RTOLI, PIVMIN,
+             *    D( IBEGIN ), E( IBEGIN ), WORK( IBEGIN ),
+             *    IDUMMA, WORK( N+1 ), WORK( N+2*IN+1 ), IOUT,
+             *    IWORK, W( M+1 ), IBLOCK( M+1 ), IINFO )
+             */
+            for(tmpi=1; tmpi<=iin; tmpi++)
+            {
+                ra1siin.ptr.p_double[tmpi] = d->ptr.p_double[ibegin-1+tmpi];
+                if( ibegin-1+tmpi<n )
+                {
+                    ra2siin.ptr.p_double[tmpi] = e->ptr.p_double[ibegin-1+tmpi];
+                }
+                ra3siin.ptr.p_double[tmpi] = work.ptr.p_double[ibegin-1+tmpi];
+                ra1siinx2.ptr.pp_double[tmpi][1] = work.ptr.p_double[n+tmpi];
+                ra1siinx2.ptr.pp_double[tmpi][2] = work.ptr.p_double[n+tmpi+iin];
+                ra4siin.ptr.p_double[tmpi] = work.ptr.p_double[n+2*iin+tmpi];
+                rworkspace.ptr.p_double[tmpi] = w->ptr.p_double[*m+tmpi];
+                iworkspace.ptr.p_int[tmpi] = iblock->ptr.p_int[*m+tmpi];
+                ia1siinx2.ptr.pp_int[tmpi][1] = iwork.ptr.p_int[tmpi];
+                ia1siinx2.ptr.pp_int[tmpi][2] = iwork.ptr.p_int[tmpi+iin];
+            }
+            evd_internaldlaebz(2, itmax, iin, iin, 1, atoli, rtoli, pivmin, &ra1siin, &ra2siin, &ra3siin, &idumma, &ra1siinx2, &ra4siin, &iout, &ia1siinx2, &rworkspace, &iworkspace, &iinfo, _state);
+            for(tmpi=1; tmpi<=iin; tmpi++)
+            {
+                work.ptr.p_double[n+tmpi] = ra1siinx2.ptr.pp_double[tmpi][1];
+                work.ptr.p_double[n+tmpi+iin] = ra1siinx2.ptr.pp_double[tmpi][2];
+                work.ptr.p_double[n+2*iin+tmpi] = ra4siin.ptr.p_double[tmpi];
+                w->ptr.p_double[*m+tmpi] = rworkspace.ptr.p_double[tmpi];
+                iblock->ptr.p_int[*m+tmpi] = iworkspace.ptr.p_int[tmpi];
+                iwork.ptr.p_int[tmpi] = ia1siinx2.ptr.pp_int[tmpi][1];
+                iwork.ptr.p_int[tmpi+iin] = ia1siinx2.ptr.pp_int[tmpi][2];
+            }
+            
+            /*
+             * Copy Eigenvalues Into W and IBLOCK
+             * Use -JB for block number for unconverged eigenvalues.
+             */
+            for(j=1; j<=iout; j++)
+            {
+                tmp1 = 0.5*(work.ptr.p_double[j+n]+work.ptr.p_double[j+iin+n]);
+                
+                /*
+                 * Flag non-convergence.
+                 */
+                if( j>iout-iinfo )
+                {
+                    ncnvrg = ae_true;
+                    ib = -jb;
+                }
+                else
+                {
+                    ib = jb;
+                }
+                for(je=iwork.ptr.p_int[j]+1+iwoff; je<=iwork.ptr.p_int[j+iin]+iwoff; je++)
+                {
+                    w->ptr.p_double[je] = tmp1;
+                    iblock->ptr.p_int[je] = ib;
+                }
+            }
+            *m = *m+im;
+        }
+    }
+    
+    /*
+     * If RANGE='I', then (WL,WU) contains eigenvalues NWL+1,...,NWU
+     * If NWL+1 < IL or NWU > IU, discard extra eigenvalues.
+     */
+    if( irange==3 )
+    {
+        im = 0;
+        idiscl = il-1-nwl;
+        idiscu = nwu-iu;
+        if( idiscl>0||idiscu>0 )
+        {
+            for(je=1; je<=*m; je++)
+            {
+                if( ae_fp_less_eq(w->ptr.p_double[je],wlu)&&idiscl>0 )
+                {
+                    idiscl = idiscl-1;
+                }
+                else
+                {
+                    if( ae_fp_greater_eq(w->ptr.p_double[je],wul)&&idiscu>0 )
+                    {
+                        idiscu = idiscu-1;
+                    }
+                    else
+                    {
+                        im = im+1;
+                        w->ptr.p_double[im] = w->ptr.p_double[je];
+                        iblock->ptr.p_int[im] = iblock->ptr.p_int[je];
+                    }
+                }
+            }
+            *m = im;
+        }
+        if( idiscl>0||idiscu>0 )
+        {
+            
+            /*
+             * Code to deal with effects of bad arithmetic:
+             * Some low eigenvalues to be discarded are not in (WL,WLU],
+             * or high eigenvalues to be discarded are not in (WUL,WU]
+             * so just kill off the smallest IDISCL/largest IDISCU
+             * eigenvalues, by simply finding the smallest/largest
+             * eigenvalue(s).
+             *
+             * (If N(w) is monotone non-decreasing, this should never
+             *  happen.)
+             */
+            if( idiscl>0 )
+            {
+                wkill = wu;
+                for(jdisc=1; jdisc<=idiscl; jdisc++)
+                {
+                    iw = 0;
+                    for(je=1; je<=*m; je++)
+                    {
+                        if( iblock->ptr.p_int[je]!=0&&(ae_fp_less(w->ptr.p_double[je],wkill)||iw==0) )
+                        {
+                            iw = je;
+                            wkill = w->ptr.p_double[je];
+                        }
+                    }
+                    iblock->ptr.p_int[iw] = 0;
+                }
+            }
+            if( idiscu>0 )
+            {
+                wkill = wl;
+                for(jdisc=1; jdisc<=idiscu; jdisc++)
+                {
+                    iw = 0;
+                    for(je=1; je<=*m; je++)
+                    {
+                        if( iblock->ptr.p_int[je]!=0&&(ae_fp_greater(w->ptr.p_double[je],wkill)||iw==0) )
+                        {
+                            iw = je;
+                            wkill = w->ptr.p_double[je];
+                        }
+                    }
+                    iblock->ptr.p_int[iw] = 0;
+                }
+            }
+            im = 0;
+            for(je=1; je<=*m; je++)
+            {
+                if( iblock->ptr.p_int[je]!=0 )
+                {
+                    im = im+1;
+                    w->ptr.p_double[im] = w->ptr.p_double[je];
+                    iblock->ptr.p_int[im] = iblock->ptr.p_int[je];
+                }
+            }
+            *m = im;
+        }
+        if( idiscl<0||idiscu<0 )
+        {
+            toofew = ae_true;
+        }
+    }
+    
+    /*
+     * If ORDER='B', do nothing -- the eigenvalues are already sorted
+     *    by block.
+     * If ORDER='E', sort the eigenvalues from smallest to largest
+     */
+    if( iorder==1&&*nsplit>1 )
+    {
+        for(je=1; je<=*m-1; je++)
+        {
+            ie = 0;
+            tmp1 = w->ptr.p_double[je];
+            for(j=je+1; j<=*m; j++)
+            {
+                if( ae_fp_less(w->ptr.p_double[j],tmp1) )
+                {
+                    ie = j;
+                    tmp1 = w->ptr.p_double[j];
+                }
+            }
+            if( ie!=0 )
+            {
+                itmp1 = iblock->ptr.p_int[ie];
+                w->ptr.p_double[ie] = w->ptr.p_double[je];
+                iblock->ptr.p_int[ie] = iblock->ptr.p_int[je];
+                w->ptr.p_double[je] = tmp1;
+                iblock->ptr.p_int[je] = itmp1;
+            }
+        }
+    }
+    for(j=1; j<=*m; j++)
+    {
+        w->ptr.p_double[j] = w->ptr.p_double[j]*scalefactor;
+    }
+    *errorcode = 0;
+    if( ncnvrg )
+    {
+        *errorcode = *errorcode+1;
+    }
+    if( toofew )
+    {
+        *errorcode = *errorcode+2;
+    }
+    result = *errorcode==0;
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+static void evd_internaldstein(ae_int_t n,
+     /* Real    */ ae_vector* d,
+     /* Real    */ ae_vector* e,
+     ae_int_t m,
+     /* Real    */ ae_vector* w,
+     /* Integer */ ae_vector* iblock,
+     /* Integer */ ae_vector* isplit,
+     /* Real    */ ae_matrix* z,
+     /* Integer */ ae_vector* ifail,
+     ae_int_t* info,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector _e;
+    ae_vector _w;
+    ae_int_t maxits;
+    ae_int_t extra;
+    ae_int_t b1;
+    ae_int_t blksiz;
+    ae_int_t bn;
+    ae_int_t gpind;
+    ae_int_t i;
+    ae_int_t iinfo;
+    ae_int_t its;
+    ae_int_t j;
+    ae_int_t j1;
+    ae_int_t jblk;
+    ae_int_t jmax;
+    ae_int_t nblk;
+    ae_int_t nrmchk;
+    double dtpcrt;
+    double eps;
+    double eps1;
+    double nrm;
+    double onenrm;
+    double ortol;
+    double pertol;
+    double scl;
+    double sep;
+    double tol;
+    double xj;
+    double xjm;
+    double ztr;
+    ae_vector work1;
+    ae_vector work2;
+    ae_vector work3;
+    ae_vector work4;
+    ae_vector work5;
+    ae_vector iwork;
+    ae_bool tmpcriterion;
+    ae_int_t ti;
+    ae_int_t i1;
+    ae_int_t i2;
+    double v;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_init_copy(&_e, e, _state, ae_true);
+    e = &_e;
+    ae_vector_init_copy(&_w, w, _state, ae_true);
+    w = &_w;
+    ae_matrix_clear(z);
+    ae_vector_clear(ifail);
+    *info = 0;
+    ae_vector_init(&work1, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&work2, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&work3, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&work4, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&work5, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&iwork, 0, DT_INT, _state, ae_true);
+
+    maxits = 5;
+    extra = 2;
+    ae_vector_set_length(&work1, ae_maxint(n, 1, _state)+1, _state);
+    ae_vector_set_length(&work2, ae_maxint(n-1, 1, _state)+1, _state);
+    ae_vector_set_length(&work3, ae_maxint(n, 1, _state)+1, _state);
+    ae_vector_set_length(&work4, ae_maxint(n, 1, _state)+1, _state);
+    ae_vector_set_length(&work5, ae_maxint(n, 1, _state)+1, _state);
+    ae_vector_set_length(&iwork, ae_maxint(n, 1, _state)+1, _state);
+    ae_vector_set_length(ifail, ae_maxint(m, 1, _state)+1, _state);
+    ae_matrix_set_length(z, ae_maxint(n, 1, _state)+1, ae_maxint(m, 1, _state)+1, _state);
+    
+    /*
+     * these initializers are not really necessary,
+     * but without them compiler complains about uninitialized locals
+     */
+    gpind = 0;
+    onenrm = 0;
+    ortol = 0;
+    dtpcrt = 0;
+    xjm = 0;
+    
+    /*
+     * Test the input parameters.
+     */
+    *info = 0;
+    for(i=1; i<=m; i++)
+    {
+        ifail->ptr.p_int[i] = 0;
+    }
+    if( n<0 )
+    {
+        *info = -1;
+        ae_frame_leave(_state);
+        return;
+    }
+    if( m<0||m>n )
+    {
+        *info = -4;
+        ae_frame_leave(_state);
+        return;
+    }
+    for(j=2; j<=m; j++)
+    {
+        if( iblock->ptr.p_int[j]<iblock->ptr.p_int[j-1] )
+        {
+            *info = -6;
+            break;
+        }
+        if( iblock->ptr.p_int[j]==iblock->ptr.p_int[j-1]&&ae_fp_less(w->ptr.p_double[j],w->ptr.p_double[j-1]) )
+        {
+            *info = -5;
+            break;
+        }
+    }
+    if( *info!=0 )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * Quick return if possible
+     */
+    if( n==0||m==0 )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    if( n==1 )
+    {
+        z->ptr.pp_double[1][1] = 1;
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * Some preparations
+     */
+    ti = n-1;
+    ae_v_move(&work1.ptr.p_double[1], 1, &e->ptr.p_double[1], 1, ae_v_len(1,ti));
+    ae_vector_set_length(e, n+1, _state);
+    ae_v_move(&e->ptr.p_double[1], 1, &work1.ptr.p_double[1], 1, ae_v_len(1,ti));
+    ae_v_move(&work1.ptr.p_double[1], 1, &w->ptr.p_double[1], 1, ae_v_len(1,m));
+    ae_vector_set_length(w, n+1, _state);
+    ae_v_move(&w->ptr.p_double[1], 1, &work1.ptr.p_double[1], 1, ae_v_len(1,m));
+    
+    /*
+     * Get machine constants.
+     */
+    eps = ae_machineepsilon;
+    
+    /*
+     * Compute eigenvectors of matrix blocks.
+     */
+    j1 = 1;
+    for(nblk=1; nblk<=iblock->ptr.p_int[m]; nblk++)
+    {
+        
+        /*
+         * Find starting and ending indices of block nblk.
+         */
+        if( nblk==1 )
+        {
+            b1 = 1;
+        }
+        else
+        {
+            b1 = isplit->ptr.p_int[nblk-1]+1;
+        }
+        bn = isplit->ptr.p_int[nblk];
+        blksiz = bn-b1+1;
+        if( blksiz!=1 )
+        {
+            
+            /*
+             * Compute reorthogonalization criterion and stopping criterion.
+             */
+            gpind = b1;
+            onenrm = ae_fabs(d->ptr.p_double[b1], _state)+ae_fabs(e->ptr.p_double[b1], _state);
+            onenrm = ae_maxreal(onenrm, ae_fabs(d->ptr.p_double[bn], _state)+ae_fabs(e->ptr.p_double[bn-1], _state), _state);
+            for(i=b1+1; i<=bn-1; i++)
+            {
+                onenrm = ae_maxreal(onenrm, ae_fabs(d->ptr.p_double[i], _state)+ae_fabs(e->ptr.p_double[i-1], _state)+ae_fabs(e->ptr.p_double[i], _state), _state);
+            }
+            ortol = 0.001*onenrm;
+            dtpcrt = ae_sqrt(0.1/blksiz, _state);
+        }
+        
+        /*
+         * Loop through eigenvalues of block nblk.
+         */
+        jblk = 0;
+        for(j=j1; j<=m; j++)
+        {
+            if( iblock->ptr.p_int[j]!=nblk )
+            {
+                j1 = j;
+                break;
+            }
+            jblk = jblk+1;
+            xj = w->ptr.p_double[j];
+            if( blksiz==1 )
+            {
+                
+                /*
+                 * Skip all the work if the block size is one.
+                 */
+                work1.ptr.p_double[1] = 1;
+            }
+            else
+            {
+                
+                /*
+                 * If eigenvalues j and j-1 are too close, add a relatively
+                 * small perturbation.
+                 */
+                if( jblk>1 )
+                {
+                    eps1 = ae_fabs(eps*xj, _state);
+                    pertol = 10*eps1;
+                    sep = xj-xjm;
+                    if( ae_fp_less(sep,pertol) )
+                    {
+                        xj = xjm+pertol;
+                    }
+                }
+                its = 0;
+                nrmchk = 0;
+                
+                /*
+                 * Get random starting vector.
+                 */
+                for(ti=1; ti<=blksiz; ti++)
+                {
+                    work1.ptr.p_double[ti] = 2*ae_randomreal(_state)-1;
+                }
+                
+                /*
+                 * Copy the matrix T so it won't be destroyed in factorization.
+                 */
+                for(ti=1; ti<=blksiz-1; ti++)
+                {
+                    work2.ptr.p_double[ti] = e->ptr.p_double[b1+ti-1];
+                    work3.ptr.p_double[ti] = e->ptr.p_double[b1+ti-1];
+                    work4.ptr.p_double[ti] = d->ptr.p_double[b1+ti-1];
+                }
+                work4.ptr.p_double[blksiz] = d->ptr.p_double[b1+blksiz-1];
+                
+                /*
+                 * Compute LU factors with partial pivoting  ( PT = LU )
+                 */
+                tol = 0;
+                evd_tdininternaldlagtf(blksiz, &work4, xj, &work2, &work3, tol, &work5, &iwork, &iinfo, _state);
+                
+                /*
+                 * Update iteration count.
+                 */
+                do
+                {
+                    its = its+1;
+                    if( its>maxits )
+                    {
+                        
+                        /*
+                         * If stopping criterion was not satisfied, update info and
+                         * store eigenvector number in array ifail.
+                         */
+                        *info = *info+1;
+                        ifail->ptr.p_int[*info] = j;
+                        break;
+                    }
+                    
+                    /*
+                     * Normalize and scale the righthand side vector Pb.
+                     */
+                    v = 0;
+                    for(ti=1; ti<=blksiz; ti++)
+                    {
+                        v = v+ae_fabs(work1.ptr.p_double[ti], _state);
+                    }
+                    scl = blksiz*onenrm*ae_maxreal(eps, ae_fabs(work4.ptr.p_double[blksiz], _state), _state)/v;
+                    ae_v_muld(&work1.ptr.p_double[1], 1, ae_v_len(1,blksiz), scl);
+                    
+                    /*
+                     * Solve the system LU = Pb.
+                     */
+                    evd_tdininternaldlagts(blksiz, &work4, &work2, &work3, &work5, &iwork, &work1, &tol, &iinfo, _state);
+                    
+                    /*
+                     * Reorthogonalize by modified Gram-Schmidt if eigenvalues are
+                     * close enough.
+                     */
+                    if( jblk!=1 )
+                    {
+                        if( ae_fp_greater(ae_fabs(xj-xjm, _state),ortol) )
+                        {
+                            gpind = j;
+                        }
+                        if( gpind!=j )
+                        {
+                            for(i=gpind; i<=j-1; i++)
+                            {
+                                i1 = b1;
+                                i2 = b1+blksiz-1;
+                                ztr = ae_v_dotproduct(&work1.ptr.p_double[1], 1, &z->ptr.pp_double[i1][i], z->stride, ae_v_len(1,blksiz));
+                                ae_v_subd(&work1.ptr.p_double[1], 1, &z->ptr.pp_double[i1][i], z->stride, ae_v_len(1,blksiz), ztr);
+                            }
+                        }
+                    }
+                    
+                    /*
+                     * Check the infinity norm of the iterate.
+                     */
+                    jmax = vectoridxabsmax(&work1, 1, blksiz, _state);
+                    nrm = ae_fabs(work1.ptr.p_double[jmax], _state);
+                    
+                    /*
+                     * Continue for additional iterations after norm reaches
+                     * stopping criterion.
+                     */
+                    tmpcriterion = ae_false;
+                    if( ae_fp_less(nrm,dtpcrt) )
+                    {
+                        tmpcriterion = ae_true;
+                    }
+                    else
+                    {
+                        nrmchk = nrmchk+1;
+                        if( nrmchk<extra+1 )
+                        {
+                            tmpcriterion = ae_true;
+                        }
+                    }
+                }
+                while(tmpcriterion);
+                
+                /*
+                 * Accept iterate as jth eigenvector.
+                 */
+                scl = 1/vectornorm2(&work1, 1, blksiz, _state);
+                jmax = vectoridxabsmax(&work1, 1, blksiz, _state);
+                if( ae_fp_less(work1.ptr.p_double[jmax],0) )
+                {
+                    scl = -scl;
+                }
+                ae_v_muld(&work1.ptr.p_double[1], 1, ae_v_len(1,blksiz), scl);
+            }
+            for(i=1; i<=n; i++)
+            {
+                z->ptr.pp_double[i][j] = 0;
+            }
+            for(i=1; i<=blksiz; i++)
+            {
+                z->ptr.pp_double[b1+i-1][j] = work1.ptr.p_double[i];
+            }
+            
+            /*
+             * Save the shift to check eigenvalue spacing at next
+             * iteration.
+             */
+            xjm = xj;
+        }
+    }
+    ae_frame_leave(_state);
+}
+
+
+static void evd_tdininternaldlagtf(ae_int_t n,
+     /* Real    */ ae_vector* a,
+     double lambdav,
+     /* Real    */ ae_vector* b,
+     /* Real    */ ae_vector* c,
+     double tol,
+     /* Real    */ ae_vector* d,
+     /* Integer */ ae_vector* iin,
+     ae_int_t* info,
+     ae_state *_state)
+{
+    ae_int_t k;
+    double eps;
+    double mult;
+    double piv1;
+    double piv2;
+    double scale1;
+    double scale2;
+    double temp;
+    double tl;
+
+    *info = 0;
+
+    *info = 0;
+    if( n<0 )
+    {
+        *info = -1;
+        return;
+    }
+    if( n==0 )
+    {
+        return;
+    }
+    a->ptr.p_double[1] = a->ptr.p_double[1]-lambdav;
+    iin->ptr.p_int[n] = 0;
+    if( n==1 )
+    {
+        if( ae_fp_eq(a->ptr.p_double[1],0) )
+        {
+            iin->ptr.p_int[1] = 1;
+        }
+        return;
+    }
+    eps = ae_machineepsilon;
+    tl = ae_maxreal(tol, eps, _state);
+    scale1 = ae_fabs(a->ptr.p_double[1], _state)+ae_fabs(b->ptr.p_double[1], _state);
+    for(k=1; k<=n-1; k++)
+    {
+        a->ptr.p_double[k+1] = a->ptr.p_double[k+1]-lambdav;
+        scale2 = ae_fabs(c->ptr.p_double[k], _state)+ae_fabs(a->ptr.p_double[k+1], _state);
+        if( k<n-1 )
+        {
+            scale2 = scale2+ae_fabs(b->ptr.p_double[k+1], _state);
+        }
+        if( ae_fp_eq(a->ptr.p_double[k],0) )
+        {
+            piv1 = 0;
+        }
+        else
+        {
+            piv1 = ae_fabs(a->ptr.p_double[k], _state)/scale1;
+        }
+        if( ae_fp_eq(c->ptr.p_double[k],0) )
+        {
+            iin->ptr.p_int[k] = 0;
+            piv2 = 0;
+            scale1 = scale2;
+            if( k<n-1 )
+            {
+                d->ptr.p_double[k] = 0;
+            }
+        }
+        else
+        {
+            piv2 = ae_fabs(c->ptr.p_double[k], _state)/scale2;
+            if( ae_fp_less_eq(piv2,piv1) )
+            {
+                iin->ptr.p_int[k] = 0;
+                scale1 = scale2;
+                c->ptr.p_double[k] = c->ptr.p_double[k]/a->ptr.p_double[k];
+                a->ptr.p_double[k+1] = a->ptr.p_double[k+1]-c->ptr.p_double[k]*b->ptr.p_double[k];
+                if( k<n-1 )
+                {
+                    d->ptr.p_double[k] = 0;
+                }
+            }
+            else
+            {
+                iin->ptr.p_int[k] = 1;
+                mult = a->ptr.p_double[k]/c->ptr.p_double[k];
+                a->ptr.p_double[k] = c->ptr.p_double[k];
+                temp = a->ptr.p_double[k+1];
+                a->ptr.p_double[k+1] = b->ptr.p_double[k]-mult*temp;
+                if( k<n-1 )
+                {
+                    d->ptr.p_double[k] = b->ptr.p_double[k+1];
+                    b->ptr.p_double[k+1] = -mult*d->ptr.p_double[k];
+                }
+                b->ptr.p_double[k] = temp;
+                c->ptr.p_double[k] = mult;
+            }
+        }
+        if( ae_fp_less_eq(ae_maxreal(piv1, piv2, _state),tl)&&iin->ptr.p_int[n]==0 )
+        {
+            iin->ptr.p_int[n] = k;
+        }
+    }
+    if( ae_fp_less_eq(ae_fabs(a->ptr.p_double[n], _state),scale1*tl)&&iin->ptr.p_int[n]==0 )
+    {
+        iin->ptr.p_int[n] = n;
+    }
+}
+
+
+static void evd_tdininternaldlagts(ae_int_t n,
+     /* Real    */ ae_vector* a,
+     /* Real    */ ae_vector* b,
+     /* Real    */ ae_vector* c,
+     /* Real    */ ae_vector* d,
+     /* Integer */ ae_vector* iin,
+     /* Real    */ ae_vector* y,
+     double* tol,
+     ae_int_t* info,
+     ae_state *_state)
+{
+    ae_int_t k;
+    double absak;
+    double ak;
+    double bignum;
+    double eps;
+    double pert;
+    double sfmin;
+    double temp;
+
+    *info = 0;
+
+    *info = 0;
+    if( n<0 )
+    {
+        *info = -1;
+        return;
+    }
+    if( n==0 )
+    {
+        return;
+    }
+    eps = ae_machineepsilon;
+    sfmin = ae_minrealnumber;
+    bignum = 1/sfmin;
+    if( ae_fp_less_eq(*tol,0) )
+    {
+        *tol = ae_fabs(a->ptr.p_double[1], _state);
+        if( n>1 )
+        {
+            *tol = ae_maxreal(*tol, ae_maxreal(ae_fabs(a->ptr.p_double[2], _state), ae_fabs(b->ptr.p_double[1], _state), _state), _state);
+        }
+        for(k=3; k<=n; k++)
+        {
+            *tol = ae_maxreal(*tol, ae_maxreal(ae_fabs(a->ptr.p_double[k], _state), ae_maxreal(ae_fabs(b->ptr.p_double[k-1], _state), ae_fabs(d->ptr.p_double[k-2], _state), _state), _state), _state);
+        }
+        *tol = *tol*eps;
+        if( ae_fp_eq(*tol,0) )
+        {
+            *tol = eps;
+        }
+    }
+    for(k=2; k<=n; k++)
+    {
+        if( iin->ptr.p_int[k-1]==0 )
+        {
+            y->ptr.p_double[k] = y->ptr.p_double[k]-c->ptr.p_double[k-1]*y->ptr.p_double[k-1];
+        }
+        else
+        {
+            temp = y->ptr.p_double[k-1];
+            y->ptr.p_double[k-1] = y->ptr.p_double[k];
+            y->ptr.p_double[k] = temp-c->ptr.p_double[k-1]*y->ptr.p_double[k];
+        }
+    }
+    for(k=n; k>=1; k--)
+    {
+        if( k<=n-2 )
+        {
+            temp = y->ptr.p_double[k]-b->ptr.p_double[k]*y->ptr.p_double[k+1]-d->ptr.p_double[k]*y->ptr.p_double[k+2];
+        }
+        else
+        {
+            if( k==n-1 )
+            {
+                temp = y->ptr.p_double[k]-b->ptr.p_double[k]*y->ptr.p_double[k+1];
+            }
+            else
+            {
+                temp = y->ptr.p_double[k];
+            }
+        }
+        ak = a->ptr.p_double[k];
+        pert = ae_fabs(*tol, _state);
+        if( ae_fp_less(ak,0) )
+        {
+            pert = -pert;
+        }
+        for(;;)
+        {
+            absak = ae_fabs(ak, _state);
+            if( ae_fp_less(absak,1) )
+            {
+                if( ae_fp_less(absak,sfmin) )
+                {
+                    if( ae_fp_eq(absak,0)||ae_fp_greater(ae_fabs(temp, _state)*sfmin,absak) )
+                    {
+                        ak = ak+pert;
+                        pert = 2*pert;
+                        continue;
+                    }
+                    else
+                    {
+                        temp = temp*bignum;
+                        ak = ak*bignum;
+                    }
+                }
+                else
+                {
+                    if( ae_fp_greater(ae_fabs(temp, _state),absak*bignum) )
+                    {
+                        ak = ak+pert;
+                        pert = 2*pert;
+                        continue;
+                    }
+                }
+            }
+            break;
+        }
+        y->ptr.p_double[k] = temp/ak;
+    }
+}
+
+
+static void evd_internaldlaebz(ae_int_t ijob,
+     ae_int_t nitmax,
+     ae_int_t n,
+     ae_int_t mmax,
+     ae_int_t minp,
+     double abstol,
+     double reltol,
+     double pivmin,
+     /* Real    */ ae_vector* d,
+     /* Real    */ ae_vector* e,
+     /* Real    */ ae_vector* e2,
+     /* Integer */ ae_vector* nval,
+     /* Real    */ ae_matrix* ab,
+     /* Real    */ ae_vector* c,
+     ae_int_t* mout,
+     /* Integer */ ae_matrix* nab,
+     /* Real    */ ae_vector* work,
+     /* Integer */ ae_vector* iwork,
+     ae_int_t* info,
+     ae_state *_state)
+{
+    ae_int_t itmp1;
+    ae_int_t itmp2;
+    ae_int_t j;
+    ae_int_t ji;
+    ae_int_t jit;
+    ae_int_t jp;
+    ae_int_t kf;
+    ae_int_t kfnew;
+    ae_int_t kl;
+    ae_int_t klnew;
+    double tmp1;
+    double tmp2;
+
+    *mout = 0;
+    *info = 0;
+
+    *info = 0;
+    if( ijob<1||ijob>3 )
+    {
+        *info = -1;
+        return;
+    }
+    
+    /*
+     * Initialize NAB
+     */
+    if( ijob==1 )
+    {
+        
+        /*
+         * Compute the number of eigenvalues in the initial intervals.
+         */
+        *mout = 0;
+        
+        /*
+         *DIR$ NOVECTOR
+         */
+        for(ji=1; ji<=minp; ji++)
+        {
+            for(jp=1; jp<=2; jp++)
+            {
+                tmp1 = d->ptr.p_double[1]-ab->ptr.pp_double[ji][jp];
+                if( ae_fp_less(ae_fabs(tmp1, _state),pivmin) )
+                {
+                    tmp1 = -pivmin;
+                }
+                nab->ptr.pp_int[ji][jp] = 0;
+                if( ae_fp_less_eq(tmp1,0) )
+                {
+                    nab->ptr.pp_int[ji][jp] = 1;
+                }
+                for(j=2; j<=n; j++)
+                {
+                    tmp1 = d->ptr.p_double[j]-e2->ptr.p_double[j-1]/tmp1-ab->ptr.pp_double[ji][jp];
+                    if( ae_fp_less(ae_fabs(tmp1, _state),pivmin) )
+                    {
+                        tmp1 = -pivmin;
+                    }
+                    if( ae_fp_less_eq(tmp1,0) )
+                    {
+                        nab->ptr.pp_int[ji][jp] = nab->ptr.pp_int[ji][jp]+1;
+                    }
+                }
+            }
+            *mout = *mout+nab->ptr.pp_int[ji][2]-nab->ptr.pp_int[ji][1];
+        }
+        return;
+    }
+    
+    /*
+     * Initialize for loop
+     *
+     * KF and KL have the following meaning:
+     *   Intervals 1,...,KF-1 have converged.
+     *   Intervals KF,...,KL  still need to be refined.
+     */
+    kf = 1;
+    kl = minp;
+    
+    /*
+     * If IJOB=2, initialize C.
+     * If IJOB=3, use the user-supplied starting point.
+     */
+    if( ijob==2 )
+    {
+        for(ji=1; ji<=minp; ji++)
+        {
+            c->ptr.p_double[ji] = 0.5*(ab->ptr.pp_double[ji][1]+ab->ptr.pp_double[ji][2]);
+        }
+    }
+    
+    /*
+     * Iteration loop
+     */
+    for(jit=1; jit<=nitmax; jit++)
+    {
+        
+        /*
+         * Loop over intervals
+         *
+         *
+         * Serial Version of the loop
+         */
+        klnew = kl;
+        for(ji=kf; ji<=kl; ji++)
+        {
+            
+            /*
+             * Compute N(w), the number of eigenvalues less than w
+             */
+            tmp1 = c->ptr.p_double[ji];
+            tmp2 = d->ptr.p_double[1]-tmp1;
+            itmp1 = 0;
+            if( ae_fp_less_eq(tmp2,pivmin) )
+            {
+                itmp1 = 1;
+                tmp2 = ae_minreal(tmp2, -pivmin, _state);
+            }
+            
+            /*
+             * A series of compiler directives to defeat vectorization
+             * for the next loop
+             *
+             **$PL$ CMCHAR=' '
+             *CDIR$          NEXTSCALAR
+             *C$DIR          SCALAR
+             *CDIR$          NEXT SCALAR
+             *CVD$L          NOVECTOR
+             *CDEC$          NOVECTOR
+             *CVD$           NOVECTOR
+             **VDIR          NOVECTOR
+             **VOCL          LOOP,SCALAR
+             *CIBM           PREFER SCALAR
+             **$PL$ CMCHAR='*'
+             */
+            for(j=2; j<=n; j++)
+            {
+                tmp2 = d->ptr.p_double[j]-e2->ptr.p_double[j-1]/tmp2-tmp1;
+                if( ae_fp_less_eq(tmp2,pivmin) )
+                {
+                    itmp1 = itmp1+1;
+                    tmp2 = ae_minreal(tmp2, -pivmin, _state);
+                }
+            }
+            if( ijob<=2 )
+            {
+                
+                /*
+                 * IJOB=2: Choose all intervals containing eigenvalues.
+                 *
+                 * Insure that N(w) is monotone
+                 */
+                itmp1 = ae_minint(nab->ptr.pp_int[ji][2], ae_maxint(nab->ptr.pp_int[ji][1], itmp1, _state), _state);
+                
+                /*
+                 * Update the Queue -- add intervals if both halves
+                 * contain eigenvalues.
+                 */
+                if( itmp1==nab->ptr.pp_int[ji][2] )
+                {
+                    
+                    /*
+                     * No eigenvalue in the upper interval:
+                     * just use the lower interval.
+                     */
+                    ab->ptr.pp_double[ji][2] = tmp1;
+                }
+                else
+                {
+                    if( itmp1==nab->ptr.pp_int[ji][1] )
+                    {
+                        
+                        /*
+                         * No eigenvalue in the lower interval:
+                         * just use the upper interval.
+                         */
+                        ab->ptr.pp_double[ji][1] = tmp1;
+                    }
+                    else
+                    {
+                        if( klnew<mmax )
+                        {
+                            
+                            /*
+                             * Eigenvalue in both intervals -- add upper to queue.
+                             */
+                            klnew = klnew+1;
+                            ab->ptr.pp_double[klnew][2] = ab->ptr.pp_double[ji][2];
+                            nab->ptr.pp_int[klnew][2] = nab->ptr.pp_int[ji][2];
+                            ab->ptr.pp_double[klnew][1] = tmp1;
+                            nab->ptr.pp_int[klnew][1] = itmp1;
+                            ab->ptr.pp_double[ji][2] = tmp1;
+                            nab->ptr.pp_int[ji][2] = itmp1;
+                        }
+                        else
+                        {
+                            *info = mmax+1;
+                            return;
+                        }
+                    }
+                }
+            }
+            else
+            {
+                
+                /*
+                 * IJOB=3: Binary search.  Keep only the interval
+                 * containing  w  s.t. N(w) = NVAL
+                 */
+                if( itmp1<=nval->ptr.p_int[ji] )
+                {
+                    ab->ptr.pp_double[ji][1] = tmp1;
+                    nab->ptr.pp_int[ji][1] = itmp1;
+                }
+                if( itmp1>=nval->ptr.p_int[ji] )
+                {
+                    ab->ptr.pp_double[ji][2] = tmp1;
+                    nab->ptr.pp_int[ji][2] = itmp1;
+                }
+            }
+        }
+        kl = klnew;
+        
+        /*
+         * Check for convergence
+         */
+        kfnew = kf;
+        for(ji=kf; ji<=kl; ji++)
+        {
+            tmp1 = ae_fabs(ab->ptr.pp_double[ji][2]-ab->ptr.pp_double[ji][1], _state);
+            tmp2 = ae_maxreal(ae_fabs(ab->ptr.pp_double[ji][2], _state), ae_fabs(ab->ptr.pp_double[ji][1], _state), _state);
+            if( ae_fp_less(tmp1,ae_maxreal(abstol, ae_maxreal(pivmin, reltol*tmp2, _state), _state))||nab->ptr.pp_int[ji][1]>=nab->ptr.pp_int[ji][2] )
+            {
+                
+                /*
+                 * Converged -- Swap with position KFNEW,
+                 * then increment KFNEW
+                 */
+                if( ji>kfnew )
+                {
+                    tmp1 = ab->ptr.pp_double[ji][1];
+                    tmp2 = ab->ptr.pp_double[ji][2];
+                    itmp1 = nab->ptr.pp_int[ji][1];
+                    itmp2 = nab->ptr.pp_int[ji][2];
+                    ab->ptr.pp_double[ji][1] = ab->ptr.pp_double[kfnew][1];
+                    ab->ptr.pp_double[ji][2] = ab->ptr.pp_double[kfnew][2];
+                    nab->ptr.pp_int[ji][1] = nab->ptr.pp_int[kfnew][1];
+                    nab->ptr.pp_int[ji][2] = nab->ptr.pp_int[kfnew][2];
+                    ab->ptr.pp_double[kfnew][1] = tmp1;
+                    ab->ptr.pp_double[kfnew][2] = tmp2;
+                    nab->ptr.pp_int[kfnew][1] = itmp1;
+                    nab->ptr.pp_int[kfnew][2] = itmp2;
+                    if( ijob==3 )
+                    {
+                        itmp1 = nval->ptr.p_int[ji];
+                        nval->ptr.p_int[ji] = nval->ptr.p_int[kfnew];
+                        nval->ptr.p_int[kfnew] = itmp1;
+                    }
+                }
+                kfnew = kfnew+1;
+            }
+        }
+        kf = kfnew;
+        
+        /*
+         * Choose Midpoints
+         */
+        for(ji=kf; ji<=kl; ji++)
+        {
+            c->ptr.p_double[ji] = 0.5*(ab->ptr.pp_double[ji][1]+ab->ptr.pp_double[ji][2]);
+        }
+        
+        /*
+         * If no more intervals to refine, quit.
+         */
+        if( kf>kl )
+        {
+            break;
+        }
+    }
+    
+    /*
+     * Converged
+     */
+    *info = ae_maxint(kl+1-kf, 0, _state);
+    *mout = kl;
+}
+
+
+/*************************************************************************
+Internal subroutine
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     June 30, 1999
+*************************************************************************/
+static void evd_internaltrevc(/* Real    */ ae_matrix* t,
+     ae_int_t n,
+     ae_int_t side,
+     ae_int_t howmny,
+     /* Boolean */ ae_vector* vselect,
+     /* Real    */ ae_matrix* vl,
+     /* Real    */ ae_matrix* vr,
+     ae_int_t* m,
+     ae_int_t* info,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector _vselect;
+    ae_bool allv;
+    ae_bool bothv;
+    ae_bool leftv;
+    ae_bool over;
+    ae_bool pair;
+    ae_bool rightv;
+    ae_bool somev;
+    ae_int_t i;
+    ae_int_t ierr;
+    ae_int_t ii;
+    ae_int_t ip;
+    ae_int_t iis;
+    ae_int_t j;
+    ae_int_t j1;
+    ae_int_t j2;
+    ae_int_t jnxt;
+    ae_int_t k;
+    ae_int_t ki;
+    ae_int_t n2;
+    double beta;
+    double bignum;
+    double emax;
+    double ovfl;
+    double rec;
+    double remax;
+    double scl;
+    double smin;
+    double smlnum;
+    double ulp;
+    double unfl;
+    double vcrit;
+    double vmax;
+    double wi;
+    double wr;
+    double xnorm;
+    ae_matrix x;
+    ae_vector work;
+    ae_vector temp;
+    ae_matrix temp11;
+    ae_matrix temp22;
+    ae_matrix temp11b;
+    ae_matrix temp21b;
+    ae_matrix temp12b;
+    ae_matrix temp22b;
+    ae_bool skipflag;
+    ae_int_t k1;
+    ae_int_t k2;
+    ae_int_t k3;
+    ae_int_t k4;
+    double vt;
+    ae_vector rswap4;
+    ae_vector zswap4;
+    ae_matrix ipivot44;
+    ae_vector civ4;
+    ae_vector crv4;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_init_copy(&_vselect, vselect, _state, ae_true);
+    vselect = &_vselect;
+    *m = 0;
+    *info = 0;
+    ae_matrix_init(&x, 0, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&temp, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&temp11, 0, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&temp22, 0, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&temp11b, 0, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&temp21b, 0, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&temp12b, 0, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&temp22b, 0, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&rswap4, 0, DT_BOOL, _state, ae_true);
+    ae_vector_init(&zswap4, 0, DT_BOOL, _state, ae_true);
+    ae_matrix_init(&ipivot44, 0, 0, DT_INT, _state, ae_true);
+    ae_vector_init(&civ4, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&crv4, 0, DT_REAL, _state, ae_true);
+
+    ae_matrix_set_length(&x, 2+1, 2+1, _state);
+    ae_matrix_set_length(&temp11, 1+1, 1+1, _state);
+    ae_matrix_set_length(&temp11b, 1+1, 1+1, _state);
+    ae_matrix_set_length(&temp21b, 2+1, 1+1, _state);
+    ae_matrix_set_length(&temp12b, 1+1, 2+1, _state);
+    ae_matrix_set_length(&temp22b, 2+1, 2+1, _state);
+    ae_matrix_set_length(&temp22, 2+1, 2+1, _state);
+    ae_vector_set_length(&work, 3*n+1, _state);
+    ae_vector_set_length(&temp, n+1, _state);
+    ae_vector_set_length(&rswap4, 4+1, _state);
+    ae_vector_set_length(&zswap4, 4+1, _state);
+    ae_matrix_set_length(&ipivot44, 4+1, 4+1, _state);
+    ae_vector_set_length(&civ4, 4+1, _state);
+    ae_vector_set_length(&crv4, 4+1, _state);
+    if( howmny!=1 )
+    {
+        if( side==1||side==3 )
+        {
+            ae_matrix_set_length(vr, n+1, n+1, _state);
+        }
+        if( side==2||side==3 )
+        {
+            ae_matrix_set_length(vl, n+1, n+1, _state);
+        }
+    }
+    
+    /*
+     * Decode and test the input parameters
+     */
+    bothv = side==3;
+    rightv = side==1||bothv;
+    leftv = side==2||bothv;
+    allv = howmny==2;
+    over = howmny==1;
+    somev = howmny==3;
+    *info = 0;
+    if( n<0 )
+    {
+        *info = -2;
+        ae_frame_leave(_state);
+        return;
+    }
+    if( !rightv&&!leftv )
+    {
+        *info = -3;
+        ae_frame_leave(_state);
+        return;
+    }
+    if( (!allv&&!over)&&!somev )
+    {
+        *info = -4;
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * Set M to the number of columns required to store the selected
+     * eigenvectors, standardize the array SELECT if necessary, and
+     * test MM.
+     */
+    if( somev )
+    {
+        *m = 0;
+        pair = ae_false;
+        for(j=1; j<=n; j++)
+        {
+            if( pair )
+            {
+                pair = ae_false;
+                vselect->ptr.p_bool[j] = ae_false;
+            }
+            else
+            {
+                if( j<n )
+                {
+                    if( ae_fp_eq(t->ptr.pp_double[j+1][j],0) )
+                    {
+                        if( vselect->ptr.p_bool[j] )
+                        {
+                            *m = *m+1;
+                        }
+                    }
+                    else
+                    {
+                        pair = ae_true;
+                        if( vselect->ptr.p_bool[j]||vselect->ptr.p_bool[j+1] )
+                        {
+                            vselect->ptr.p_bool[j] = ae_true;
+                            *m = *m+2;
+                        }
+                    }
+                }
+                else
+                {
+                    if( vselect->ptr.p_bool[n] )
+                    {
+                        *m = *m+1;
+                    }
+                }
+            }
+        }
+    }
+    else
+    {
+        *m = n;
+    }
+    
+    /*
+     * Quick return if possible.
+     */
+    if( n==0 )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * Set the constants to control overflow.
+     */
+    unfl = ae_minrealnumber;
+    ovfl = 1/unfl;
+    ulp = ae_machineepsilon;
+    smlnum = unfl*(n/ulp);
+    bignum = (1-ulp)/smlnum;
+    
+    /*
+     * Compute 1-norm of each column of strictly upper triangular
+     * part of T to control overflow in triangular solver.
+     */
+    work.ptr.p_double[1] = 0;
+    for(j=2; j<=n; j++)
+    {
+        work.ptr.p_double[j] = 0;
+        for(i=1; i<=j-1; i++)
+        {
+            work.ptr.p_double[j] = work.ptr.p_double[j]+ae_fabs(t->ptr.pp_double[i][j], _state);
+        }
+    }
+    
+    /*
+     * Index IP is used to specify the real or complex eigenvalue:
+     * IP = 0, real eigenvalue,
+     *      1, first of conjugate complex pair: (wr,wi)
+     *     -1, second of conjugate complex pair: (wr,wi)
+     */
+    n2 = 2*n;
+    if( rightv )
+    {
+        
+        /*
+         * Compute right eigenvectors.
+         */
+        ip = 0;
+        iis = *m;
+        for(ki=n; ki>=1; ki--)
+        {
+            skipflag = ae_false;
+            if( ip==1 )
+            {
+                skipflag = ae_true;
+            }
+            else
+            {
+                if( ki!=1 )
+                {
+                    if( ae_fp_neq(t->ptr.pp_double[ki][ki-1],0) )
+                    {
+                        ip = -1;
+                    }
+                }
+                if( somev )
+                {
+                    if( ip==0 )
+                    {
+                        if( !vselect->ptr.p_bool[ki] )
+                        {
+                            skipflag = ae_true;
+                        }
+                    }
+                    else
+                    {
+                        if( !vselect->ptr.p_bool[ki-1] )
+                        {
+                            skipflag = ae_true;
+                        }
+                    }
+                }
+            }
+            if( !skipflag )
+            {
+                
+                /*
+                 * Compute the KI-th eigenvalue (WR,WI).
+                 */
+                wr = t->ptr.pp_double[ki][ki];
+                wi = 0;
+                if( ip!=0 )
+                {
+                    wi = ae_sqrt(ae_fabs(t->ptr.pp_double[ki][ki-1], _state), _state)*ae_sqrt(ae_fabs(t->ptr.pp_double[ki-1][ki], _state), _state);
+                }
+                smin = ae_maxreal(ulp*(ae_fabs(wr, _state)+ae_fabs(wi, _state)), smlnum, _state);
+                if( ip==0 )
+                {
+                    
+                    /*
+                     * Real right eigenvector
+                     */
+                    work.ptr.p_double[ki+n] = 1;
+                    
+                    /*
+                     * Form right-hand side
+                     */
+                    for(k=1; k<=ki-1; k++)
+                    {
+                        work.ptr.p_double[k+n] = -t->ptr.pp_double[k][ki];
+                    }
+                    
+                    /*
+                     * Solve the upper quasi-triangular system:
+                     *   (T(1:KI-1,1:KI-1) - WR)*X = SCALE*WORK.
+                     */
+                    jnxt = ki-1;
+                    for(j=ki-1; j>=1; j--)
+                    {
+                        if( j>jnxt )
+                        {
+                            continue;
+                        }
+                        j1 = j;
+                        j2 = j;
+                        jnxt = j-1;
+                        if( j>1 )
+                        {
+                            if( ae_fp_neq(t->ptr.pp_double[j][j-1],0) )
+                            {
+                                j1 = j-1;
+                                jnxt = j-2;
+                            }
+                        }
+                        if( j1==j2 )
+                        {
+                            
+                            /*
+                             * 1-by-1 diagonal block
+                             */
+                            temp11.ptr.pp_double[1][1] = t->ptr.pp_double[j][j];
+                            temp11b.ptr.pp_double[1][1] = work.ptr.p_double[j+n];
+                            evd_internalhsevdlaln2(ae_false, 1, 1, smin, 1, &temp11, 1.0, 1.0, &temp11b, wr, 0.0, &rswap4, &zswap4, &ipivot44, &civ4, &crv4, &x, &scl, &xnorm, &ierr, _state);
+                            
+                            /*
+                             * Scale X(1,1) to avoid overflow when updating
+                             * the right-hand side.
+                             */
+                            if( ae_fp_greater(xnorm,1) )
+                            {
+                                if( ae_fp_greater(work.ptr.p_double[j],bignum/xnorm) )
+                                {
+                                    x.ptr.pp_double[1][1] = x.ptr.pp_double[1][1]/xnorm;
+                                    scl = scl/xnorm;
+                                }
+                            }
+                            
+                            /*
+                             * Scale if necessary
+                             */
+                            if( ae_fp_neq(scl,1) )
+                            {
+                                k1 = n+1;
+                                k2 = n+ki;
+                                ae_v_muld(&work.ptr.p_double[k1], 1, ae_v_len(k1,k2), scl);
+                            }
+                            work.ptr.p_double[j+n] = x.ptr.pp_double[1][1];
+                            
+                            /*
+                             * Update right-hand side
+                             */
+                            k1 = 1+n;
+                            k2 = j-1+n;
+                            k3 = j-1;
+                            vt = -x.ptr.pp_double[1][1];
+                            ae_v_addd(&work.ptr.p_double[k1], 1, &t->ptr.pp_double[1][j], t->stride, ae_v_len(k1,k2), vt);
+                        }
+                        else
+                        {
+                            
+                            /*
+                             * 2-by-2 diagonal block
+                             */
+                            temp22.ptr.pp_double[1][1] = t->ptr.pp_double[j-1][j-1];
+                            temp22.ptr.pp_double[1][2] = t->ptr.pp_double[j-1][j];
+                            temp22.ptr.pp_double[2][1] = t->ptr.pp_double[j][j-1];
+                            temp22.ptr.pp_double[2][2] = t->ptr.pp_double[j][j];
+                            temp21b.ptr.pp_double[1][1] = work.ptr.p_double[j-1+n];
+                            temp21b.ptr.pp_double[2][1] = work.ptr.p_double[j+n];
+                            evd_internalhsevdlaln2(ae_false, 2, 1, smin, 1.0, &temp22, 1.0, 1.0, &temp21b, wr, 0, &rswap4, &zswap4, &ipivot44, &civ4, &crv4, &x, &scl, &xnorm, &ierr, _state);
+                            
+                            /*
+                             * Scale X(1,1) and X(2,1) to avoid overflow when
+                             * updating the right-hand side.
+                             */
+                            if( ae_fp_greater(xnorm,1) )
+                            {
+                                beta = ae_maxreal(work.ptr.p_double[j-1], work.ptr.p_double[j], _state);
+                                if( ae_fp_greater(beta,bignum/xnorm) )
+                                {
+                                    x.ptr.pp_double[1][1] = x.ptr.pp_double[1][1]/xnorm;
+                                    x.ptr.pp_double[2][1] = x.ptr.pp_double[2][1]/xnorm;
+                                    scl = scl/xnorm;
+                                }
+                            }
+                            
+                            /*
+                             * Scale if necessary
+                             */
+                            if( ae_fp_neq(scl,1) )
+                            {
+                                k1 = 1+n;
+                                k2 = ki+n;
+                                ae_v_muld(&work.ptr.p_double[k1], 1, ae_v_len(k1,k2), scl);
+                            }
+                            work.ptr.p_double[j-1+n] = x.ptr.pp_double[1][1];
+                            work.ptr.p_double[j+n] = x.ptr.pp_double[2][1];
+                            
+                            /*
+                             * Update right-hand side
+                             */
+                            k1 = 1+n;
+                            k2 = j-2+n;
+                            k3 = j-2;
+                            k4 = j-1;
+                            vt = -x.ptr.pp_double[1][1];
+                            ae_v_addd(&work.ptr.p_double[k1], 1, &t->ptr.pp_double[1][k4], t->stride, ae_v_len(k1,k2), vt);
+                            vt = -x.ptr.pp_double[2][1];
+                            ae_v_addd(&work.ptr.p_double[k1], 1, &t->ptr.pp_double[1][j], t->stride, ae_v_len(k1,k2), vt);
+                        }
+                    }
+                    
+                    /*
+                     * Copy the vector x or Q*x to VR and normalize.
+                     */
+                    if( !over )
+                    {
+                        k1 = 1+n;
+                        k2 = ki+n;
+                        ae_v_move(&vr->ptr.pp_double[1][iis], vr->stride, &work.ptr.p_double[k1], 1, ae_v_len(1,ki));
+                        ii = columnidxabsmax(vr, 1, ki, iis, _state);
+                        remax = 1/ae_fabs(vr->ptr.pp_double[ii][iis], _state);
+                        ae_v_muld(&vr->ptr.pp_double[1][iis], vr->stride, ae_v_len(1,ki), remax);
+                        for(k=ki+1; k<=n; k++)
+                        {
+                            vr->ptr.pp_double[k][iis] = 0;
+                        }
+                    }
+                    else
+                    {
+                        if( ki>1 )
+                        {
+                            ae_v_move(&temp.ptr.p_double[1], 1, &vr->ptr.pp_double[1][ki], vr->stride, ae_v_len(1,n));
+                            matrixvectormultiply(vr, 1, n, 1, ki-1, ae_false, &work, 1+n, ki-1+n, 1.0, &temp, 1, n, work.ptr.p_double[ki+n], _state);
+                            ae_v_move(&vr->ptr.pp_double[1][ki], vr->stride, &temp.ptr.p_double[1], 1, ae_v_len(1,n));
+                        }
+                        ii = columnidxabsmax(vr, 1, n, ki, _state);
+                        remax = 1/ae_fabs(vr->ptr.pp_double[ii][ki], _state);
+                        ae_v_muld(&vr->ptr.pp_double[1][ki], vr->stride, ae_v_len(1,n), remax);
+                    }
+                }
+                else
+                {
+                    
+                    /*
+                     * Complex right eigenvector.
+                     *
+                     * Initial solve
+                     *     [ (T(KI-1,KI-1) T(KI-1,KI) ) - (WR + I* WI)]*X = 0.
+                     *     [ (T(KI,KI-1)   T(KI,KI)   )               ]
+                     */
+                    if( ae_fp_greater_eq(ae_fabs(t->ptr.pp_double[ki-1][ki], _state),ae_fabs(t->ptr.pp_double[ki][ki-1], _state)) )
+                    {
+                        work.ptr.p_double[ki-1+n] = 1;
+                        work.ptr.p_double[ki+n2] = wi/t->ptr.pp_double[ki-1][ki];
+                    }
+                    else
+                    {
+                        work.ptr.p_double[ki-1+n] = -wi/t->ptr.pp_double[ki][ki-1];
+                        work.ptr.p_double[ki+n2] = 1;
+                    }
+                    work.ptr.p_double[ki+n] = 0;
+                    work.ptr.p_double[ki-1+n2] = 0;
+                    
+                    /*
+                     * Form right-hand side
+                     */
+                    for(k=1; k<=ki-2; k++)
+                    {
+                        work.ptr.p_double[k+n] = -work.ptr.p_double[ki-1+n]*t->ptr.pp_double[k][ki-1];
+                        work.ptr.p_double[k+n2] = -work.ptr.p_double[ki+n2]*t->ptr.pp_double[k][ki];
+                    }
+                    
+                    /*
+                     * Solve upper quasi-triangular system:
+                     * (T(1:KI-2,1:KI-2) - (WR+i*WI))*X = SCALE*(WORK+i*WORK2)
+                     */
+                    jnxt = ki-2;
+                    for(j=ki-2; j>=1; j--)
+                    {
+                        if( j>jnxt )
+                        {
+                            continue;
+                        }
+                        j1 = j;
+                        j2 = j;
+                        jnxt = j-1;
+                        if( j>1 )
+                        {
+                            if( ae_fp_neq(t->ptr.pp_double[j][j-1],0) )
+                            {
+                                j1 = j-1;
+                                jnxt = j-2;
+                            }
+                        }
+                        if( j1==j2 )
+                        {
+                            
+                            /*
+                             * 1-by-1 diagonal block
+                             */
+                            temp11.ptr.pp_double[1][1] = t->ptr.pp_double[j][j];
+                            temp12b.ptr.pp_double[1][1] = work.ptr.p_double[j+n];
+                            temp12b.ptr.pp_double[1][2] = work.ptr.p_double[j+n+n];
+                            evd_internalhsevdlaln2(ae_false, 1, 2, smin, 1.0, &temp11, 1.0, 1.0, &temp12b, wr, wi, &rswap4, &zswap4, &ipivot44, &civ4, &crv4, &x, &scl, &xnorm, &ierr, _state);
+                            
+                            /*
+                             * Scale X(1,1) and X(1,2) to avoid overflow when
+                             * updating the right-hand side.
+                             */
+                            if( ae_fp_greater(xnorm,1) )
+                            {
+                                if( ae_fp_greater(work.ptr.p_double[j],bignum/xnorm) )
+                                {
+                                    x.ptr.pp_double[1][1] = x.ptr.pp_double[1][1]/xnorm;
+                                    x.ptr.pp_double[1][2] = x.ptr.pp_double[1][2]/xnorm;
+                                    scl = scl/xnorm;
+                                }
+                            }
+                            
+                            /*
+                             * Scale if necessary
+                             */
+                            if( ae_fp_neq(scl,1) )
+                            {
+                                k1 = 1+n;
+                                k2 = ki+n;
+                                ae_v_muld(&work.ptr.p_double[k1], 1, ae_v_len(k1,k2), scl);
+                                k1 = 1+n2;
+                                k2 = ki+n2;
+                                ae_v_muld(&work.ptr.p_double[k1], 1, ae_v_len(k1,k2), scl);
+                            }
+                            work.ptr.p_double[j+n] = x.ptr.pp_double[1][1];
+                            work.ptr.p_double[j+n2] = x.ptr.pp_double[1][2];
+                            
+                            /*
+                             * Update the right-hand side
+                             */
+                            k1 = 1+n;
+                            k2 = j-1+n;
+                            k3 = 1;
+                            k4 = j-1;
+                            vt = -x.ptr.pp_double[1][1];
+                            ae_v_addd(&work.ptr.p_double[k1], 1, &t->ptr.pp_double[k3][j], t->stride, ae_v_len(k1,k2), vt);
+                            k1 = 1+n2;
+                            k2 = j-1+n2;
+                            k3 = 1;
+                            k4 = j-1;
+                            vt = -x.ptr.pp_double[1][2];
+                            ae_v_addd(&work.ptr.p_double[k1], 1, &t->ptr.pp_double[k3][j], t->stride, ae_v_len(k1,k2), vt);
+                        }
+                        else
+                        {
+                            
+                            /*
+                             * 2-by-2 diagonal block
+                             */
+                            temp22.ptr.pp_double[1][1] = t->ptr.pp_double[j-1][j-1];
+                            temp22.ptr.pp_double[1][2] = t->ptr.pp_double[j-1][j];
+                            temp22.ptr.pp_double[2][1] = t->ptr.pp_double[j][j-1];
+                            temp22.ptr.pp_double[2][2] = t->ptr.pp_double[j][j];
+                            temp22b.ptr.pp_double[1][1] = work.ptr.p_double[j-1+n];
+                            temp22b.ptr.pp_double[1][2] = work.ptr.p_double[j-1+n+n];
+                            temp22b.ptr.pp_double[2][1] = work.ptr.p_double[j+n];
+                            temp22b.ptr.pp_double[2][2] = work.ptr.p_double[j+n+n];
+                            evd_internalhsevdlaln2(ae_false, 2, 2, smin, 1.0, &temp22, 1.0, 1.0, &temp22b, wr, wi, &rswap4, &zswap4, &ipivot44, &civ4, &crv4, &x, &scl, &xnorm, &ierr, _state);
+                            
+                            /*
+                             * Scale X to avoid overflow when updating
+                             * the right-hand side.
+                             */
+                            if( ae_fp_greater(xnorm,1) )
+                            {
+                                beta = ae_maxreal(work.ptr.p_double[j-1], work.ptr.p_double[j], _state);
+                                if( ae_fp_greater(beta,bignum/xnorm) )
+                                {
+                                    rec = 1/xnorm;
+                                    x.ptr.pp_double[1][1] = x.ptr.pp_double[1][1]*rec;
+                                    x.ptr.pp_double[1][2] = x.ptr.pp_double[1][2]*rec;
+                                    x.ptr.pp_double[2][1] = x.ptr.pp_double[2][1]*rec;
+                                    x.ptr.pp_double[2][2] = x.ptr.pp_double[2][2]*rec;
+                                    scl = scl*rec;
+                                }
+                            }
+                            
+                            /*
+                             * Scale if necessary
+                             */
+                            if( ae_fp_neq(scl,1) )
+                            {
+                                ae_v_muld(&work.ptr.p_double[1+n], 1, ae_v_len(1+n,ki+n), scl);
+                                ae_v_muld(&work.ptr.p_double[1+n2], 1, ae_v_len(1+n2,ki+n2), scl);
+                            }
+                            work.ptr.p_double[j-1+n] = x.ptr.pp_double[1][1];
+                            work.ptr.p_double[j+n] = x.ptr.pp_double[2][1];
+                            work.ptr.p_double[j-1+n2] = x.ptr.pp_double[1][2];
+                            work.ptr.p_double[j+n2] = x.ptr.pp_double[2][2];
+                            
+                            /*
+                             * Update the right-hand side
+                             */
+                            vt = -x.ptr.pp_double[1][1];
+                            ae_v_addd(&work.ptr.p_double[n+1], 1, &t->ptr.pp_double[1][j-1], t->stride, ae_v_len(n+1,n+j-2), vt);
+                            vt = -x.ptr.pp_double[2][1];
+                            ae_v_addd(&work.ptr.p_double[n+1], 1, &t->ptr.pp_double[1][j], t->stride, ae_v_len(n+1,n+j-2), vt);
+                            vt = -x.ptr.pp_double[1][2];
+                            ae_v_addd(&work.ptr.p_double[n2+1], 1, &t->ptr.pp_double[1][j-1], t->stride, ae_v_len(n2+1,n2+j-2), vt);
+                            vt = -x.ptr.pp_double[2][2];
+                            ae_v_addd(&work.ptr.p_double[n2+1], 1, &t->ptr.pp_double[1][j], t->stride, ae_v_len(n2+1,n2+j-2), vt);
+                        }
+                    }
+                    
+                    /*
+                     * Copy the vector x or Q*x to VR and normalize.
+                     */
+                    if( !over )
+                    {
+                        ae_v_move(&vr->ptr.pp_double[1][iis-1], vr->stride, &work.ptr.p_double[n+1], 1, ae_v_len(1,ki));
+                        ae_v_move(&vr->ptr.pp_double[1][iis], vr->stride, &work.ptr.p_double[n2+1], 1, ae_v_len(1,ki));
+                        emax = 0;
+                        for(k=1; k<=ki; k++)
+                        {
+                            emax = ae_maxreal(emax, ae_fabs(vr->ptr.pp_double[k][iis-1], _state)+ae_fabs(vr->ptr.pp_double[k][iis], _state), _state);
+                        }
+                        remax = 1/emax;
+                        ae_v_muld(&vr->ptr.pp_double[1][iis-1], vr->stride, ae_v_len(1,ki), remax);
+                        ae_v_muld(&vr->ptr.pp_double[1][iis], vr->stride, ae_v_len(1,ki), remax);
+                        for(k=ki+1; k<=n; k++)
+                        {
+                            vr->ptr.pp_double[k][iis-1] = 0;
+                            vr->ptr.pp_double[k][iis] = 0;
+                        }
+                    }
+                    else
+                    {
+                        if( ki>2 )
+                        {
+                            ae_v_move(&temp.ptr.p_double[1], 1, &vr->ptr.pp_double[1][ki-1], vr->stride, ae_v_len(1,n));
+                            matrixvectormultiply(vr, 1, n, 1, ki-2, ae_false, &work, 1+n, ki-2+n, 1.0, &temp, 1, n, work.ptr.p_double[ki-1+n], _state);
+                            ae_v_move(&vr->ptr.pp_double[1][ki-1], vr->stride, &temp.ptr.p_double[1], 1, ae_v_len(1,n));
+                            ae_v_move(&temp.ptr.p_double[1], 1, &vr->ptr.pp_double[1][ki], vr->stride, ae_v_len(1,n));
+                            matrixvectormultiply(vr, 1, n, 1, ki-2, ae_false, &work, 1+n2, ki-2+n2, 1.0, &temp, 1, n, work.ptr.p_double[ki+n2], _state);
+                            ae_v_move(&vr->ptr.pp_double[1][ki], vr->stride, &temp.ptr.p_double[1], 1, ae_v_len(1,n));
+                        }
+                        else
+                        {
+                            vt = work.ptr.p_double[ki-1+n];
+                            ae_v_muld(&vr->ptr.pp_double[1][ki-1], vr->stride, ae_v_len(1,n), vt);
+                            vt = work.ptr.p_double[ki+n2];
+                            ae_v_muld(&vr->ptr.pp_double[1][ki], vr->stride, ae_v_len(1,n), vt);
+                        }
+                        emax = 0;
+                        for(k=1; k<=n; k++)
+                        {
+                            emax = ae_maxreal(emax, ae_fabs(vr->ptr.pp_double[k][ki-1], _state)+ae_fabs(vr->ptr.pp_double[k][ki], _state), _state);
+                        }
+                        remax = 1/emax;
+                        ae_v_muld(&vr->ptr.pp_double[1][ki-1], vr->stride, ae_v_len(1,n), remax);
+                        ae_v_muld(&vr->ptr.pp_double[1][ki], vr->stride, ae_v_len(1,n), remax);
+                    }
+                }
+                iis = iis-1;
+                if( ip!=0 )
+                {
+                    iis = iis-1;
+                }
+            }
+            if( ip==1 )
+            {
+                ip = 0;
+            }
+            if( ip==-1 )
+            {
+                ip = 1;
+            }
+        }
+    }
+    if( leftv )
+    {
+        
+        /*
+         * Compute left eigenvectors.
+         */
+        ip = 0;
+        iis = 1;
+        for(ki=1; ki<=n; ki++)
+        {
+            skipflag = ae_false;
+            if( ip==-1 )
+            {
+                skipflag = ae_true;
+            }
+            else
+            {
+                if( ki!=n )
+                {
+                    if( ae_fp_neq(t->ptr.pp_double[ki+1][ki],0) )
+                    {
+                        ip = 1;
+                    }
+                }
+                if( somev )
+                {
+                    if( !vselect->ptr.p_bool[ki] )
+                    {
+                        skipflag = ae_true;
+                    }
+                }
+            }
+            if( !skipflag )
+            {
+                
+                /*
+                 * Compute the KI-th eigenvalue (WR,WI).
+                 */
+                wr = t->ptr.pp_double[ki][ki];
+                wi = 0;
+                if( ip!=0 )
+                {
+                    wi = ae_sqrt(ae_fabs(t->ptr.pp_double[ki][ki+1], _state), _state)*ae_sqrt(ae_fabs(t->ptr.pp_double[ki+1][ki], _state), _state);
+                }
+                smin = ae_maxreal(ulp*(ae_fabs(wr, _state)+ae_fabs(wi, _state)), smlnum, _state);
+                if( ip==0 )
+                {
+                    
+                    /*
+                     * Real left eigenvector.
+                     */
+                    work.ptr.p_double[ki+n] = 1;
+                    
+                    /*
+                     * Form right-hand side
+                     */
+                    for(k=ki+1; k<=n; k++)
+                    {
+                        work.ptr.p_double[k+n] = -t->ptr.pp_double[ki][k];
+                    }
+                    
+                    /*
+                     * Solve the quasi-triangular system:
+                     * (T(KI+1:N,KI+1:N) - WR)'*X = SCALE*WORK
+                     */
+                    vmax = 1;
+                    vcrit = bignum;
+                    jnxt = ki+1;
+                    for(j=ki+1; j<=n; j++)
+                    {
+                        if( j<jnxt )
+                        {
+                            continue;
+                        }
+                        j1 = j;
+                        j2 = j;
+                        jnxt = j+1;
+                        if( j<n )
+                        {
+                            if( ae_fp_neq(t->ptr.pp_double[j+1][j],0) )
+                            {
+                                j2 = j+1;
+                                jnxt = j+2;
+                            }
+                        }
+                        if( j1==j2 )
+                        {
+                            
+                            /*
+                             * 1-by-1 diagonal block
+                             *
+                             * Scale if necessary to avoid overflow when forming
+                             * the right-hand side.
+                             */
+                            if( ae_fp_greater(work.ptr.p_double[j],vcrit) )
+                            {
+                                rec = 1/vmax;
+                                ae_v_muld(&work.ptr.p_double[ki+n], 1, ae_v_len(ki+n,n+n), rec);
+                                vmax = 1;
+                                vcrit = bignum;
+                            }
+                            vt = ae_v_dotproduct(&t->ptr.pp_double[ki+1][j], t->stride, &work.ptr.p_double[ki+1+n], 1, ae_v_len(ki+1,j-1));
+                            work.ptr.p_double[j+n] = work.ptr.p_double[j+n]-vt;
+                            
+                            /*
+                             * Solve (T(J,J)-WR)'*X = WORK
+                             */
+                            temp11.ptr.pp_double[1][1] = t->ptr.pp_double[j][j];
+                            temp11b.ptr.pp_double[1][1] = work.ptr.p_double[j+n];
+                            evd_internalhsevdlaln2(ae_false, 1, 1, smin, 1.0, &temp11, 1.0, 1.0, &temp11b, wr, 0, &rswap4, &zswap4, &ipivot44, &civ4, &crv4, &x, &scl, &xnorm, &ierr, _state);
+                            
+                            /*
+                             * Scale if necessary
+                             */
+                            if( ae_fp_neq(scl,1) )
+                            {
+                                ae_v_muld(&work.ptr.p_double[ki+n], 1, ae_v_len(ki+n,n+n), scl);
+                            }
+                            work.ptr.p_double[j+n] = x.ptr.pp_double[1][1];
+                            vmax = ae_maxreal(ae_fabs(work.ptr.p_double[j+n], _state), vmax, _state);
+                            vcrit = bignum/vmax;
+                        }
+                        else
+                        {
+                            
+                            /*
+                             * 2-by-2 diagonal block
+                             *
+                             * Scale if necessary to avoid overflow when forming
+                             * the right-hand side.
+                             */
+                            beta = ae_maxreal(work.ptr.p_double[j], work.ptr.p_double[j+1], _state);
+                            if( ae_fp_greater(beta,vcrit) )
+                            {
+                                rec = 1/vmax;
+                                ae_v_muld(&work.ptr.p_double[ki+n], 1, ae_v_len(ki+n,n+n), rec);
+                                vmax = 1;
+                                vcrit = bignum;
+                            }
+                            vt = ae_v_dotproduct(&t->ptr.pp_double[ki+1][j], t->stride, &work.ptr.p_double[ki+1+n], 1, ae_v_len(ki+1,j-1));
+                            work.ptr.p_double[j+n] = work.ptr.p_double[j+n]-vt;
+                            vt = ae_v_dotproduct(&t->ptr.pp_double[ki+1][j+1], t->stride, &work.ptr.p_double[ki+1+n], 1, ae_v_len(ki+1,j-1));
+                            work.ptr.p_double[j+1+n] = work.ptr.p_double[j+1+n]-vt;
+                            
+                            /*
+                             * Solve
+                             *    [T(J,J)-WR   T(J,J+1)     ]'* X = SCALE*( WORK1 )
+                             *    [T(J+1,J)    T(J+1,J+1)-WR]             ( WORK2 )
+                             */
+                            temp22.ptr.pp_double[1][1] = t->ptr.pp_double[j][j];
+                            temp22.ptr.pp_double[1][2] = t->ptr.pp_double[j][j+1];
+                            temp22.ptr.pp_double[2][1] = t->ptr.pp_double[j+1][j];
+                            temp22.ptr.pp_double[2][2] = t->ptr.pp_double[j+1][j+1];
+                            temp21b.ptr.pp_double[1][1] = work.ptr.p_double[j+n];
+                            temp21b.ptr.pp_double[2][1] = work.ptr.p_double[j+1+n];
+                            evd_internalhsevdlaln2(ae_true, 2, 1, smin, 1.0, &temp22, 1.0, 1.0, &temp21b, wr, 0, &rswap4, &zswap4, &ipivot44, &civ4, &crv4, &x, &scl, &xnorm, &ierr, _state);
+                            
+                            /*
+                             * Scale if necessary
+                             */
+                            if( ae_fp_neq(scl,1) )
+                            {
+                                ae_v_muld(&work.ptr.p_double[ki+n], 1, ae_v_len(ki+n,n+n), scl);
+                            }
+                            work.ptr.p_double[j+n] = x.ptr.pp_double[1][1];
+                            work.ptr.p_double[j+1+n] = x.ptr.pp_double[2][1];
+                            vmax = ae_maxreal(ae_fabs(work.ptr.p_double[j+n], _state), ae_maxreal(ae_fabs(work.ptr.p_double[j+1+n], _state), vmax, _state), _state);
+                            vcrit = bignum/vmax;
+                        }
+                    }
+                    
+                    /*
+                     * Copy the vector x or Q*x to VL and normalize.
+                     */
+                    if( !over )
+                    {
+                        ae_v_move(&vl->ptr.pp_double[ki][iis], vl->stride, &work.ptr.p_double[ki+n], 1, ae_v_len(ki,n));
+                        ii = columnidxabsmax(vl, ki, n, iis, _state);
+                        remax = 1/ae_fabs(vl->ptr.pp_double[ii][iis], _state);
+                        ae_v_muld(&vl->ptr.pp_double[ki][iis], vl->stride, ae_v_len(ki,n), remax);
+                        for(k=1; k<=ki-1; k++)
+                        {
+                            vl->ptr.pp_double[k][iis] = 0;
+                        }
+                    }
+                    else
+                    {
+                        if( ki<n )
+                        {
+                            ae_v_move(&temp.ptr.p_double[1], 1, &vl->ptr.pp_double[1][ki], vl->stride, ae_v_len(1,n));
+                            matrixvectormultiply(vl, 1, n, ki+1, n, ae_false, &work, ki+1+n, n+n, 1.0, &temp, 1, n, work.ptr.p_double[ki+n], _state);
+                            ae_v_move(&vl->ptr.pp_double[1][ki], vl->stride, &temp.ptr.p_double[1], 1, ae_v_len(1,n));
+                        }
+                        ii = columnidxabsmax(vl, 1, n, ki, _state);
+                        remax = 1/ae_fabs(vl->ptr.pp_double[ii][ki], _state);
+                        ae_v_muld(&vl->ptr.pp_double[1][ki], vl->stride, ae_v_len(1,n), remax);
+                    }
+                }
+                else
+                {
+                    
+                    /*
+                     * Complex left eigenvector.
+                     *
+                     * Initial solve:
+                     *   ((T(KI,KI)    T(KI,KI+1) )' - (WR - I* WI))*X = 0.
+                     *   ((T(KI+1,KI) T(KI+1,KI+1))                )
+                     */
+                    if( ae_fp_greater_eq(ae_fabs(t->ptr.pp_double[ki][ki+1], _state),ae_fabs(t->ptr.pp_double[ki+1][ki], _state)) )
+                    {
+                        work.ptr.p_double[ki+n] = wi/t->ptr.pp_double[ki][ki+1];
+                        work.ptr.p_double[ki+1+n2] = 1;
+                    }
+                    else
+                    {
+                        work.ptr.p_double[ki+n] = 1;
+                        work.ptr.p_double[ki+1+n2] = -wi/t->ptr.pp_double[ki+1][ki];
+                    }
+                    work.ptr.p_double[ki+1+n] = 0;
+                    work.ptr.p_double[ki+n2] = 0;
+                    
+                    /*
+                     * Form right-hand side
+                     */
+                    for(k=ki+2; k<=n; k++)
+                    {
+                        work.ptr.p_double[k+n] = -work.ptr.p_double[ki+n]*t->ptr.pp_double[ki][k];
+                        work.ptr.p_double[k+n2] = -work.ptr.p_double[ki+1+n2]*t->ptr.pp_double[ki+1][k];
+                    }
+                    
+                    /*
+                     * Solve complex quasi-triangular system:
+                     * ( T(KI+2,N:KI+2,N) - (WR-i*WI) )*X = WORK1+i*WORK2
+                     */
+                    vmax = 1;
+                    vcrit = bignum;
+                    jnxt = ki+2;
+                    for(j=ki+2; j<=n; j++)
+                    {
+                        if( j<jnxt )
+                        {
+                            continue;
+                        }
+                        j1 = j;
+                        j2 = j;
+                        jnxt = j+1;
+                        if( j<n )
+                        {
+                            if( ae_fp_neq(t->ptr.pp_double[j+1][j],0) )
+                            {
+                                j2 = j+1;
+                                jnxt = j+2;
+                            }
+                        }
+                        if( j1==j2 )
+                        {
+                            
+                            /*
+                             * 1-by-1 diagonal block
+                             *
+                             * Scale if necessary to avoid overflow when
+                             * forming the right-hand side elements.
+                             */
+                            if( ae_fp_greater(work.ptr.p_double[j],vcrit) )
+                            {
+                                rec = 1/vmax;
+                                ae_v_muld(&work.ptr.p_double[ki+n], 1, ae_v_len(ki+n,n+n), rec);
+                                ae_v_muld(&work.ptr.p_double[ki+n2], 1, ae_v_len(ki+n2,n+n2), rec);
+                                vmax = 1;
+                                vcrit = bignum;
+                            }
+                            vt = ae_v_dotproduct(&t->ptr.pp_double[ki+2][j], t->stride, &work.ptr.p_double[ki+2+n], 1, ae_v_len(ki+2,j-1));
+                            work.ptr.p_double[j+n] = work.ptr.p_double[j+n]-vt;
+                            vt = ae_v_dotproduct(&t->ptr.pp_double[ki+2][j], t->stride, &work.ptr.p_double[ki+2+n2], 1, ae_v_len(ki+2,j-1));
+                            work.ptr.p_double[j+n2] = work.ptr.p_double[j+n2]-vt;
+                            
+                            /*
+                             * Solve (T(J,J)-(WR-i*WI))*(X11+i*X12)= WK+I*WK2
+                             */
+                            temp11.ptr.pp_double[1][1] = t->ptr.pp_double[j][j];
+                            temp12b.ptr.pp_double[1][1] = work.ptr.p_double[j+n];
+                            temp12b.ptr.pp_double[1][2] = work.ptr.p_double[j+n+n];
+                            evd_internalhsevdlaln2(ae_false, 1, 2, smin, 1.0, &temp11, 1.0, 1.0, &temp12b, wr, -wi, &rswap4, &zswap4, &ipivot44, &civ4, &crv4, &x, &scl, &xnorm, &ierr, _state);
+                            
+                            /*
+                             * Scale if necessary
+                             */
+                            if( ae_fp_neq(scl,1) )
+                            {
+                                ae_v_muld(&work.ptr.p_double[ki+n], 1, ae_v_len(ki+n,n+n), scl);
+                                ae_v_muld(&work.ptr.p_double[ki+n2], 1, ae_v_len(ki+n2,n+n2), scl);
+                            }
+                            work.ptr.p_double[j+n] = x.ptr.pp_double[1][1];
+                            work.ptr.p_double[j+n2] = x.ptr.pp_double[1][2];
+                            vmax = ae_maxreal(ae_fabs(work.ptr.p_double[j+n], _state), ae_maxreal(ae_fabs(work.ptr.p_double[j+n2], _state), vmax, _state), _state);
+                            vcrit = bignum/vmax;
+                        }
+                        else
+                        {
+                            
+                            /*
+                             * 2-by-2 diagonal block
+                             *
+                             * Scale if necessary to avoid overflow when forming
+                             * the right-hand side elements.
+                             */
+                            beta = ae_maxreal(work.ptr.p_double[j], work.ptr.p_double[j+1], _state);
+                            if( ae_fp_greater(beta,vcrit) )
+                            {
+                                rec = 1/vmax;
+                                ae_v_muld(&work.ptr.p_double[ki+n], 1, ae_v_len(ki+n,n+n), rec);
+                                ae_v_muld(&work.ptr.p_double[ki+n2], 1, ae_v_len(ki+n2,n+n2), rec);
+                                vmax = 1;
+                                vcrit = bignum;
+                            }
+                            vt = ae_v_dotproduct(&t->ptr.pp_double[ki+2][j], t->stride, &work.ptr.p_double[ki+2+n], 1, ae_v_len(ki+2,j-1));
+                            work.ptr.p_double[j+n] = work.ptr.p_double[j+n]-vt;
+                            vt = ae_v_dotproduct(&t->ptr.pp_double[ki+2][j], t->stride, &work.ptr.p_double[ki+2+n2], 1, ae_v_len(ki+2,j-1));
+                            work.ptr.p_double[j+n2] = work.ptr.p_double[j+n2]-vt;
+                            vt = ae_v_dotproduct(&t->ptr.pp_double[ki+2][j+1], t->stride, &work.ptr.p_double[ki+2+n], 1, ae_v_len(ki+2,j-1));
+                            work.ptr.p_double[j+1+n] = work.ptr.p_double[j+1+n]-vt;
+                            vt = ae_v_dotproduct(&t->ptr.pp_double[ki+2][j+1], t->stride, &work.ptr.p_double[ki+2+n2], 1, ae_v_len(ki+2,j-1));
+                            work.ptr.p_double[j+1+n2] = work.ptr.p_double[j+1+n2]-vt;
+                            
+                            /*
+                             * Solve 2-by-2 complex linear equation
+                             *   ([T(j,j)   T(j,j+1)  ]'-(wr-i*wi)*I)*X = SCALE*B
+                             *   ([T(j+1,j) T(j+1,j+1)]             )
+                             */
+                            temp22.ptr.pp_double[1][1] = t->ptr.pp_double[j][j];
+                            temp22.ptr.pp_double[1][2] = t->ptr.pp_double[j][j+1];
+                            temp22.ptr.pp_double[2][1] = t->ptr.pp_double[j+1][j];
+                            temp22.ptr.pp_double[2][2] = t->ptr.pp_double[j+1][j+1];
+                            temp22b.ptr.pp_double[1][1] = work.ptr.p_double[j+n];
+                            temp22b.ptr.pp_double[1][2] = work.ptr.p_double[j+n+n];
+                            temp22b.ptr.pp_double[2][1] = work.ptr.p_double[j+1+n];
+                            temp22b.ptr.pp_double[2][2] = work.ptr.p_double[j+1+n+n];
+                            evd_internalhsevdlaln2(ae_true, 2, 2, smin, 1.0, &temp22, 1.0, 1.0, &temp22b, wr, -wi, &rswap4, &zswap4, &ipivot44, &civ4, &crv4, &x, &scl, &xnorm, &ierr, _state);
+                            
+                            /*
+                             * Scale if necessary
+                             */
+                            if( ae_fp_neq(scl,1) )
+                            {
+                                ae_v_muld(&work.ptr.p_double[ki+n], 1, ae_v_len(ki+n,n+n), scl);
+                                ae_v_muld(&work.ptr.p_double[ki+n2], 1, ae_v_len(ki+n2,n+n2), scl);
+                            }
+                            work.ptr.p_double[j+n] = x.ptr.pp_double[1][1];
+                            work.ptr.p_double[j+n2] = x.ptr.pp_double[1][2];
+                            work.ptr.p_double[j+1+n] = x.ptr.pp_double[2][1];
+                            work.ptr.p_double[j+1+n2] = x.ptr.pp_double[2][2];
+                            vmax = ae_maxreal(ae_fabs(x.ptr.pp_double[1][1], _state), vmax, _state);
+                            vmax = ae_maxreal(ae_fabs(x.ptr.pp_double[1][2], _state), vmax, _state);
+                            vmax = ae_maxreal(ae_fabs(x.ptr.pp_double[2][1], _state), vmax, _state);
+                            vmax = ae_maxreal(ae_fabs(x.ptr.pp_double[2][2], _state), vmax, _state);
+                            vcrit = bignum/vmax;
+                        }
+                    }
+                    
+                    /*
+                     * Copy the vector x or Q*x to VL and normalize.
+                     */
+                    if( !over )
+                    {
+                        ae_v_move(&vl->ptr.pp_double[ki][iis], vl->stride, &work.ptr.p_double[ki+n], 1, ae_v_len(ki,n));
+                        ae_v_move(&vl->ptr.pp_double[ki][iis+1], vl->stride, &work.ptr.p_double[ki+n2], 1, ae_v_len(ki,n));
+                        emax = 0;
+                        for(k=ki; k<=n; k++)
+                        {
+                            emax = ae_maxreal(emax, ae_fabs(vl->ptr.pp_double[k][iis], _state)+ae_fabs(vl->ptr.pp_double[k][iis+1], _state), _state);
+                        }
+                        remax = 1/emax;
+                        ae_v_muld(&vl->ptr.pp_double[ki][iis], vl->stride, ae_v_len(ki,n), remax);
+                        ae_v_muld(&vl->ptr.pp_double[ki][iis+1], vl->stride, ae_v_len(ki,n), remax);
+                        for(k=1; k<=ki-1; k++)
+                        {
+                            vl->ptr.pp_double[k][iis] = 0;
+                            vl->ptr.pp_double[k][iis+1] = 0;
+                        }
+                    }
+                    else
+                    {
+                        if( ki<n-1 )
+                        {
+                            ae_v_move(&temp.ptr.p_double[1], 1, &vl->ptr.pp_double[1][ki], vl->stride, ae_v_len(1,n));
+                            matrixvectormultiply(vl, 1, n, ki+2, n, ae_false, &work, ki+2+n, n+n, 1.0, &temp, 1, n, work.ptr.p_double[ki+n], _state);
+                            ae_v_move(&vl->ptr.pp_double[1][ki], vl->stride, &temp.ptr.p_double[1], 1, ae_v_len(1,n));
+                            ae_v_move(&temp.ptr.p_double[1], 1, &vl->ptr.pp_double[1][ki+1], vl->stride, ae_v_len(1,n));
+                            matrixvectormultiply(vl, 1, n, ki+2, n, ae_false, &work, ki+2+n2, n+n2, 1.0, &temp, 1, n, work.ptr.p_double[ki+1+n2], _state);
+                            ae_v_move(&vl->ptr.pp_double[1][ki+1], vl->stride, &temp.ptr.p_double[1], 1, ae_v_len(1,n));
+                        }
+                        else
+                        {
+                            vt = work.ptr.p_double[ki+n];
+                            ae_v_muld(&vl->ptr.pp_double[1][ki], vl->stride, ae_v_len(1,n), vt);
+                            vt = work.ptr.p_double[ki+1+n2];
+                            ae_v_muld(&vl->ptr.pp_double[1][ki+1], vl->stride, ae_v_len(1,n), vt);
+                        }
+                        emax = 0;
+                        for(k=1; k<=n; k++)
+                        {
+                            emax = ae_maxreal(emax, ae_fabs(vl->ptr.pp_double[k][ki], _state)+ae_fabs(vl->ptr.pp_double[k][ki+1], _state), _state);
+                        }
+                        remax = 1/emax;
+                        ae_v_muld(&vl->ptr.pp_double[1][ki], vl->stride, ae_v_len(1,n), remax);
+                        ae_v_muld(&vl->ptr.pp_double[1][ki+1], vl->stride, ae_v_len(1,n), remax);
+                    }
+                }
+                iis = iis+1;
+                if( ip!=0 )
+                {
+                    iis = iis+1;
+                }
+            }
+            if( ip==-1 )
+            {
+                ip = 0;
+            }
+            if( ip==1 )
+            {
+                ip = -1;
+            }
+        }
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+DLALN2 solves a system of the form  (ca A - w D ) X = s B
+or (ca A' - w D) X = s B   with possible scaling ("s") and
+perturbation of A.  (A' means A-transpose.)
+
+A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA
+real diagonal matrix, w is a real or complex value, and X and B are
+NA x 1 matrices -- real if w is real, complex if w is complex.  NA
+may be 1 or 2.
+
+If w is complex, X and B are represented as NA x 2 matrices,
+the first column of each being the real part and the second
+being the imaginary part.
+
+"s" is a scaling factor (.LE. 1), computed by DLALN2, which is
+so chosen that X can be computed without overflow.  X is further
+scaled if necessary to assure that norm(ca A - w D)*norm(X) is less
+than overflow.
+
+If both singular values of (ca A - w D) are less than SMIN,
+SMIN*identity will be used instead of (ca A - w D).  If only one
+singular value is less than SMIN, one element of (ca A - w D) will be
+perturbed enough to make the smallest singular value roughly SMIN.
+If both singular values are at least SMIN, (ca A - w D) will not be
+perturbed.  In any case, the perturbation will be at most some small
+multiple of max( SMIN, ulp*norm(ca A - w D) ).  The singular values
+are computed by infinity-norm approximations, and thus will only be
+correct to a factor of 2 or so.
+
+Note: all input quantities are assumed to be smaller than overflow
+by a reasonable factor.  (See BIGNUM.)
+
+  -- LAPACK auxiliary routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     October 31, 1992
+*************************************************************************/
+static void evd_internalhsevdlaln2(ae_bool ltrans,
+     ae_int_t na,
+     ae_int_t nw,
+     double smin,
+     double ca,
+     /* Real    */ ae_matrix* a,
+     double d1,
+     double d2,
+     /* Real    */ ae_matrix* b,
+     double wr,
+     double wi,
+     /* Boolean */ ae_vector* rswap4,
+     /* Boolean */ ae_vector* zswap4,
+     /* Integer */ ae_matrix* ipivot44,
+     /* Real    */ ae_vector* civ4,
+     /* Real    */ ae_vector* crv4,
+     /* Real    */ ae_matrix* x,
+     double* scl,
+     double* xnorm,
+     ae_int_t* info,
+     ae_state *_state)
+{
+    ae_int_t icmax;
+    ae_int_t j;
+    double bbnd;
+    double bi1;
+    double bi2;
+    double bignum;
+    double bnorm;
+    double br1;
+    double br2;
+    double ci21;
+    double ci22;
+    double cmax;
+    double cnorm;
+    double cr21;
+    double cr22;
+    double csi;
+    double csr;
+    double li21;
+    double lr21;
+    double smini;
+    double smlnum;
+    double temp;
+    double u22abs;
+    double ui11;
+    double ui11r;
+    double ui12;
+    double ui12s;
+    double ui22;
+    double ur11;
+    double ur11r;
+    double ur12;
+    double ur12s;
+    double ur22;
+    double xi1;
+    double xi2;
+    double xr1;
+    double xr2;
+    double tmp1;
+    double tmp2;
+
+    *scl = 0;
+    *xnorm = 0;
+    *info = 0;
+
+    zswap4->ptr.p_bool[1] = ae_false;
+    zswap4->ptr.p_bool[2] = ae_false;
+    zswap4->ptr.p_bool[3] = ae_true;
+    zswap4->ptr.p_bool[4] = ae_true;
+    rswap4->ptr.p_bool[1] = ae_false;
+    rswap4->ptr.p_bool[2] = ae_true;
+    rswap4->ptr.p_bool[3] = ae_false;
+    rswap4->ptr.p_bool[4] = ae_true;
+    ipivot44->ptr.pp_int[1][1] = 1;
+    ipivot44->ptr.pp_int[2][1] = 2;
+    ipivot44->ptr.pp_int[3][1] = 3;
+    ipivot44->ptr.pp_int[4][1] = 4;
+    ipivot44->ptr.pp_int[1][2] = 2;
+    ipivot44->ptr.pp_int[2][2] = 1;
+    ipivot44->ptr.pp_int[3][2] = 4;
+    ipivot44->ptr.pp_int[4][2] = 3;
+    ipivot44->ptr.pp_int[1][3] = 3;
+    ipivot44->ptr.pp_int[2][3] = 4;
+    ipivot44->ptr.pp_int[3][3] = 1;
+    ipivot44->ptr.pp_int[4][3] = 2;
+    ipivot44->ptr.pp_int[1][4] = 4;
+    ipivot44->ptr.pp_int[2][4] = 3;
+    ipivot44->ptr.pp_int[3][4] = 2;
+    ipivot44->ptr.pp_int[4][4] = 1;
+    smlnum = 2*ae_minrealnumber;
+    bignum = 1/smlnum;
+    smini = ae_maxreal(smin, smlnum, _state);
+    
+    /*
+     * Don't check for input errors
+     */
+    *info = 0;
+    
+    /*
+     * Standard Initializations
+     */
+    *scl = 1;
+    if( na==1 )
+    {
+        
+        /*
+         * 1 x 1  (i.e., scalar) system   C X = B
+         */
+        if( nw==1 )
+        {
+            
+            /*
+             * Real 1x1 system.
+             *
+             * C = ca A - w D
+             */
+            csr = ca*a->ptr.pp_double[1][1]-wr*d1;
+            cnorm = ae_fabs(csr, _state);
+            
+            /*
+             * If | C | < SMINI, use C = SMINI
+             */
+            if( ae_fp_less(cnorm,smini) )
+            {
+                csr = smini;
+                cnorm = smini;
+                *info = 1;
+            }
+            
+            /*
+             * Check scaling for  X = B / C
+             */
+            bnorm = ae_fabs(b->ptr.pp_double[1][1], _state);
+            if( ae_fp_less(cnorm,1)&&ae_fp_greater(bnorm,1) )
+            {
+                if( ae_fp_greater(bnorm,bignum*cnorm) )
+                {
+                    *scl = 1/bnorm;
+                }
+            }
+            
+            /*
+             * Compute X
+             */
+            x->ptr.pp_double[1][1] = b->ptr.pp_double[1][1]*(*scl)/csr;
+            *xnorm = ae_fabs(x->ptr.pp_double[1][1], _state);
+        }
+        else
+        {
+            
+            /*
+             * Complex 1x1 system (w is complex)
+             *
+             * C = ca A - w D
+             */
+            csr = ca*a->ptr.pp_double[1][1]-wr*d1;
+            csi = -wi*d1;
+            cnorm = ae_fabs(csr, _state)+ae_fabs(csi, _state);
+            
+            /*
+             * If | C | < SMINI, use C = SMINI
+             */
+            if( ae_fp_less(cnorm,smini) )
+            {
+                csr = smini;
+                csi = 0;
+                cnorm = smini;
+                *info = 1;
+            }
+            
+            /*
+             * Check scaling for  X = B / C
+             */
+            bnorm = ae_fabs(b->ptr.pp_double[1][1], _state)+ae_fabs(b->ptr.pp_double[1][2], _state);
+            if( ae_fp_less(cnorm,1)&&ae_fp_greater(bnorm,1) )
+            {
+                if( ae_fp_greater(bnorm,bignum*cnorm) )
+                {
+                    *scl = 1/bnorm;
+                }
+            }
+            
+            /*
+             * Compute X
+             */
+            evd_internalhsevdladiv(*scl*b->ptr.pp_double[1][1], *scl*b->ptr.pp_double[1][2], csr, csi, &tmp1, &tmp2, _state);
+            x->ptr.pp_double[1][1] = tmp1;
+            x->ptr.pp_double[1][2] = tmp2;
+            *xnorm = ae_fabs(x->ptr.pp_double[1][1], _state)+ae_fabs(x->ptr.pp_double[1][2], _state);
+        }
+    }
+    else
+    {
+        
+        /*
+         * 2x2 System
+         *
+         * Compute the real part of  C = ca A - w D  (or  ca A' - w D )
+         */
+        crv4->ptr.p_double[1+0] = ca*a->ptr.pp_double[1][1]-wr*d1;
+        crv4->ptr.p_double[2+2] = ca*a->ptr.pp_double[2][2]-wr*d2;
+        if( ltrans )
+        {
+            crv4->ptr.p_double[1+2] = ca*a->ptr.pp_double[2][1];
+            crv4->ptr.p_double[2+0] = ca*a->ptr.pp_double[1][2];
+        }
+        else
+        {
+            crv4->ptr.p_double[2+0] = ca*a->ptr.pp_double[2][1];
+            crv4->ptr.p_double[1+2] = ca*a->ptr.pp_double[1][2];
+        }
+        if( nw==1 )
+        {
+            
+            /*
+             * Real 2x2 system  (w is real)
+             *
+             * Find the largest element in C
+             */
+            cmax = 0;
+            icmax = 0;
+            for(j=1; j<=4; j++)
+            {
+                if( ae_fp_greater(ae_fabs(crv4->ptr.p_double[j], _state),cmax) )
+                {
+                    cmax = ae_fabs(crv4->ptr.p_double[j], _state);
+                    icmax = j;
+                }
+            }
+            
+            /*
+             * If norm(C) < SMINI, use SMINI*identity.
+             */
+            if( ae_fp_less(cmax,smini) )
+            {
+                bnorm = ae_maxreal(ae_fabs(b->ptr.pp_double[1][1], _state), ae_fabs(b->ptr.pp_double[2][1], _state), _state);
+                if( ae_fp_less(smini,1)&&ae_fp_greater(bnorm,1) )
+                {
+                    if( ae_fp_greater(bnorm,bignum*smini) )
+                    {
+                        *scl = 1/bnorm;
+                    }
+                }
+                temp = *scl/smini;
+                x->ptr.pp_double[1][1] = temp*b->ptr.pp_double[1][1];
+                x->ptr.pp_double[2][1] = temp*b->ptr.pp_double[2][1];
+                *xnorm = temp*bnorm;
+                *info = 1;
+                return;
+            }
+            
+            /*
+             * Gaussian elimination with complete pivoting.
+             */
+            ur11 = crv4->ptr.p_double[icmax];
+            cr21 = crv4->ptr.p_double[ipivot44->ptr.pp_int[2][icmax]];
+            ur12 = crv4->ptr.p_double[ipivot44->ptr.pp_int[3][icmax]];
+            cr22 = crv4->ptr.p_double[ipivot44->ptr.pp_int[4][icmax]];
+            ur11r = 1/ur11;
+            lr21 = ur11r*cr21;
+            ur22 = cr22-ur12*lr21;
+            
+            /*
+             * If smaller pivot < SMINI, use SMINI
+             */
+            if( ae_fp_less(ae_fabs(ur22, _state),smini) )
+            {
+                ur22 = smini;
+                *info = 1;
+            }
+            if( rswap4->ptr.p_bool[icmax] )
+            {
+                br1 = b->ptr.pp_double[2][1];
+                br2 = b->ptr.pp_double[1][1];
+            }
+            else
+            {
+                br1 = b->ptr.pp_double[1][1];
+                br2 = b->ptr.pp_double[2][1];
+            }
+            br2 = br2-lr21*br1;
+            bbnd = ae_maxreal(ae_fabs(br1*(ur22*ur11r), _state), ae_fabs(br2, _state), _state);
+            if( ae_fp_greater(bbnd,1)&&ae_fp_less(ae_fabs(ur22, _state),1) )
+            {
+                if( ae_fp_greater_eq(bbnd,bignum*ae_fabs(ur22, _state)) )
+                {
+                    *scl = 1/bbnd;
+                }
+            }
+            xr2 = br2*(*scl)/ur22;
+            xr1 = *scl*br1*ur11r-xr2*(ur11r*ur12);
+            if( zswap4->ptr.p_bool[icmax] )
+            {
+                x->ptr.pp_double[1][1] = xr2;
+                x->ptr.pp_double[2][1] = xr1;
+            }
+            else
+            {
+                x->ptr.pp_double[1][1] = xr1;
+                x->ptr.pp_double[2][1] = xr2;
+            }
+            *xnorm = ae_maxreal(ae_fabs(xr1, _state), ae_fabs(xr2, _state), _state);
+            
+            /*
+             * Further scaling if  norm(A) norm(X) > overflow
+             */
+            if( ae_fp_greater(*xnorm,1)&&ae_fp_greater(cmax,1) )
+            {
+                if( ae_fp_greater(*xnorm,bignum/cmax) )
+                {
+                    temp = cmax/bignum;
+                    x->ptr.pp_double[1][1] = temp*x->ptr.pp_double[1][1];
+                    x->ptr.pp_double[2][1] = temp*x->ptr.pp_double[2][1];
+                    *xnorm = temp*(*xnorm);
+                    *scl = temp*(*scl);
+                }
+            }
+        }
+        else
+        {
+            
+            /*
+             * Complex 2x2 system  (w is complex)
+             *
+             * Find the largest element in C
+             */
+            civ4->ptr.p_double[1+0] = -wi*d1;
+            civ4->ptr.p_double[2+0] = 0;
+            civ4->ptr.p_double[1+2] = 0;
+            civ4->ptr.p_double[2+2] = -wi*d2;
+            cmax = 0;
+            icmax = 0;
+            for(j=1; j<=4; j++)
+            {
+                if( ae_fp_greater(ae_fabs(crv4->ptr.p_double[j], _state)+ae_fabs(civ4->ptr.p_double[j], _state),cmax) )
+                {
+                    cmax = ae_fabs(crv4->ptr.p_double[j], _state)+ae_fabs(civ4->ptr.p_double[j], _state);
+                    icmax = j;
+                }
+            }
+            
+            /*
+             * If norm(C) < SMINI, use SMINI*identity.
+             */
+            if( ae_fp_less(cmax,smini) )
+            {
+                bnorm = ae_maxreal(ae_fabs(b->ptr.pp_double[1][1], _state)+ae_fabs(b->ptr.pp_double[1][2], _state), ae_fabs(b->ptr.pp_double[2][1], _state)+ae_fabs(b->ptr.pp_double[2][2], _state), _state);
+                if( ae_fp_less(smini,1)&&ae_fp_greater(bnorm,1) )
+                {
+                    if( ae_fp_greater(bnorm,bignum*smini) )
+                    {
+                        *scl = 1/bnorm;
+                    }
+                }
+                temp = *scl/smini;
+                x->ptr.pp_double[1][1] = temp*b->ptr.pp_double[1][1];
+                x->ptr.pp_double[2][1] = temp*b->ptr.pp_double[2][1];
+                x->ptr.pp_double[1][2] = temp*b->ptr.pp_double[1][2];
+                x->ptr.pp_double[2][2] = temp*b->ptr.pp_double[2][2];
+                *xnorm = temp*bnorm;
+                *info = 1;
+                return;
+            }
+            
+            /*
+             * Gaussian elimination with complete pivoting.
+             */
+            ur11 = crv4->ptr.p_double[icmax];
+            ui11 = civ4->ptr.p_double[icmax];
+            cr21 = crv4->ptr.p_double[ipivot44->ptr.pp_int[2][icmax]];
+            ci21 = civ4->ptr.p_double[ipivot44->ptr.pp_int[2][icmax]];
+            ur12 = crv4->ptr.p_double[ipivot44->ptr.pp_int[3][icmax]];
+            ui12 = civ4->ptr.p_double[ipivot44->ptr.pp_int[3][icmax]];
+            cr22 = crv4->ptr.p_double[ipivot44->ptr.pp_int[4][icmax]];
+            ci22 = civ4->ptr.p_double[ipivot44->ptr.pp_int[4][icmax]];
+            if( icmax==1||icmax==4 )
+            {
+                
+                /*
+                 * Code when off-diagonals of pivoted C are real
+                 */
+                if( ae_fp_greater(ae_fabs(ur11, _state),ae_fabs(ui11, _state)) )
+                {
+                    temp = ui11/ur11;
+                    ur11r = 1/(ur11*(1+ae_sqr(temp, _state)));
+                    ui11r = -temp*ur11r;
+                }
+                else
+                {
+                    temp = ur11/ui11;
+                    ui11r = -1/(ui11*(1+ae_sqr(temp, _state)));
+                    ur11r = -temp*ui11r;
+                }
+                lr21 = cr21*ur11r;
+                li21 = cr21*ui11r;
+                ur12s = ur12*ur11r;
+                ui12s = ur12*ui11r;
+                ur22 = cr22-ur12*lr21;
+                ui22 = ci22-ur12*li21;
+            }
+            else
+            {
+                
+                /*
+                 * Code when diagonals of pivoted C are real
+                 */
+                ur11r = 1/ur11;
+                ui11r = 0;
+                lr21 = cr21*ur11r;
+                li21 = ci21*ur11r;
+                ur12s = ur12*ur11r;
+                ui12s = ui12*ur11r;
+                ur22 = cr22-ur12*lr21+ui12*li21;
+                ui22 = -ur12*li21-ui12*lr21;
+            }
+            u22abs = ae_fabs(ur22, _state)+ae_fabs(ui22, _state);
+            
+            /*
+             * If smaller pivot < SMINI, use SMINI
+             */
+            if( ae_fp_less(u22abs,smini) )
+            {
+                ur22 = smini;
+                ui22 = 0;
+                *info = 1;
+            }
+            if( rswap4->ptr.p_bool[icmax] )
+            {
+                br2 = b->ptr.pp_double[1][1];
+                br1 = b->ptr.pp_double[2][1];
+                bi2 = b->ptr.pp_double[1][2];
+                bi1 = b->ptr.pp_double[2][2];
+            }
+            else
+            {
+                br1 = b->ptr.pp_double[1][1];
+                br2 = b->ptr.pp_double[2][1];
+                bi1 = b->ptr.pp_double[1][2];
+                bi2 = b->ptr.pp_double[2][2];
+            }
+            br2 = br2-lr21*br1+li21*bi1;
+            bi2 = bi2-li21*br1-lr21*bi1;
+            bbnd = ae_maxreal((ae_fabs(br1, _state)+ae_fabs(bi1, _state))*(u22abs*(ae_fabs(ur11r, _state)+ae_fabs(ui11r, _state))), ae_fabs(br2, _state)+ae_fabs(bi2, _state), _state);
+            if( ae_fp_greater(bbnd,1)&&ae_fp_less(u22abs,1) )
+            {
+                if( ae_fp_greater_eq(bbnd,bignum*u22abs) )
+                {
+                    *scl = 1/bbnd;
+                    br1 = *scl*br1;
+                    bi1 = *scl*bi1;
+                    br2 = *scl*br2;
+                    bi2 = *scl*bi2;
+                }
+            }
+            evd_internalhsevdladiv(br2, bi2, ur22, ui22, &xr2, &xi2, _state);
+            xr1 = ur11r*br1-ui11r*bi1-ur12s*xr2+ui12s*xi2;
+            xi1 = ui11r*br1+ur11r*bi1-ui12s*xr2-ur12s*xi2;
+            if( zswap4->ptr.p_bool[icmax] )
+            {
+                x->ptr.pp_double[1][1] = xr2;
+                x->ptr.pp_double[2][1] = xr1;
+                x->ptr.pp_double[1][2] = xi2;
+                x->ptr.pp_double[2][2] = xi1;
+            }
+            else
+            {
+                x->ptr.pp_double[1][1] = xr1;
+                x->ptr.pp_double[2][1] = xr2;
+                x->ptr.pp_double[1][2] = xi1;
+                x->ptr.pp_double[2][2] = xi2;
+            }
+            *xnorm = ae_maxreal(ae_fabs(xr1, _state)+ae_fabs(xi1, _state), ae_fabs(xr2, _state)+ae_fabs(xi2, _state), _state);
+            
+            /*
+             * Further scaling if  norm(A) norm(X) > overflow
+             */
+            if( ae_fp_greater(*xnorm,1)&&ae_fp_greater(cmax,1) )
+            {
+                if( ae_fp_greater(*xnorm,bignum/cmax) )
+                {
+                    temp = cmax/bignum;
+                    x->ptr.pp_double[1][1] = temp*x->ptr.pp_double[1][1];
+                    x->ptr.pp_double[2][1] = temp*x->ptr.pp_double[2][1];
+                    x->ptr.pp_double[1][2] = temp*x->ptr.pp_double[1][2];
+                    x->ptr.pp_double[2][2] = temp*x->ptr.pp_double[2][2];
+                    *xnorm = temp*(*xnorm);
+                    *scl = temp*(*scl);
+                }
+            }
+        }
+    }
+}
+
+
+/*************************************************************************
+performs complex division in  real arithmetic
+
+                        a + i*b
+             p + i*q = ---------
+                        c + i*d
+
+The algorithm is due to Robert L. Smith and can be found
+in D. Knuth, The art of Computer Programming, Vol.2, p.195
+
+  -- LAPACK auxiliary routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     October 31, 1992
+*************************************************************************/
+static void evd_internalhsevdladiv(double a,
+     double b,
+     double c,
+     double d,
+     double* p,
+     double* q,
+     ae_state *_state)
+{
+    double e;
+    double f;
+
+    *p = 0;
+    *q = 0;
+
+    if( ae_fp_less(ae_fabs(d, _state),ae_fabs(c, _state)) )
+    {
+        e = d/c;
+        f = c+d*e;
+        *p = (a+b*e)/f;
+        *q = (b-a*e)/f;
+    }
+    else
+    {
+        e = c/d;
+        f = d+c*e;
+        *p = (b+a*e)/f;
+        *q = (-a+b*e)/f;
+    }
+}
+
+
+static ae_bool evd_nonsymmetricevd(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_int_t vneeded,
+     /* Real    */ ae_vector* wr,
+     /* Real    */ ae_vector* wi,
+     /* Real    */ ae_matrix* vl,
+     /* Real    */ ae_matrix* vr,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_matrix _a;
+    ae_matrix s;
+    ae_vector tau;
+    ae_vector sel;
+    ae_int_t i;
+    ae_int_t info;
+    ae_int_t m;
+    ae_bool result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_matrix_init_copy(&_a, a, _state, ae_true);
+    a = &_a;
+    ae_vector_clear(wr);
+    ae_vector_clear(wi);
+    ae_matrix_clear(vl);
+    ae_matrix_clear(vr);
+    ae_matrix_init(&s, 0, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&tau, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&sel, 0, DT_BOOL, _state, ae_true);
+
+    ae_assert(vneeded>=0&&vneeded<=3, "NonSymmetricEVD: incorrect VNeeded!", _state);
+    if( vneeded==0 )
+    {
+        
+        /*
+         * Eigen values only
+         */
+        evd_toupperhessenberg(a, n, &tau, _state);
+        internalschurdecomposition(a, n, 0, 0, wr, wi, &s, &info, _state);
+        result = info==0;
+        ae_frame_leave(_state);
+        return result;
+    }
+    
+    /*
+     * Eigen values and vectors
+     */
+    evd_toupperhessenberg(a, n, &tau, _state);
+    evd_unpackqfromupperhessenberg(a, n, &tau, &s, _state);
+    internalschurdecomposition(a, n, 1, 1, wr, wi, &s, &info, _state);
+    result = info==0;
+    if( !result )
+    {
+        ae_frame_leave(_state);
+        return result;
+    }
+    if( vneeded==1||vneeded==3 )
+    {
+        ae_matrix_set_length(vr, n+1, n+1, _state);
+        for(i=1; i<=n; i++)
+        {
+            ae_v_move(&vr->ptr.pp_double[i][1], 1, &s.ptr.pp_double[i][1], 1, ae_v_len(1,n));
+        }
+    }
+    if( vneeded==2||vneeded==3 )
+    {
+        ae_matrix_set_length(vl, n+1, n+1, _state);
+        for(i=1; i<=n; i++)
+        {
+            ae_v_move(&vl->ptr.pp_double[i][1], 1, &s.ptr.pp_double[i][1], 1, ae_v_len(1,n));
+        }
+    }
+    evd_internaltrevc(a, n, vneeded, 1, &sel, vl, vr, &m, &info, _state);
+    result = info==0;
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+static void evd_toupperhessenberg(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     /* Real    */ ae_vector* tau,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_int_t i;
+    ae_int_t ip1;
+    ae_int_t nmi;
+    double v;
+    ae_vector t;
+    ae_vector work;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_clear(tau);
+    ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
+
+    ae_assert(n>=0, "ToUpperHessenberg: incorrect N!", _state);
+    
+    /*
+     * Quick return if possible
+     */
+    if( n<=1 )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    ae_vector_set_length(tau, n-1+1, _state);
+    ae_vector_set_length(&t, n+1, _state);
+    ae_vector_set_length(&work, n+1, _state);
+    for(i=1; i<=n-1; i++)
+    {
+        
+        /*
+         * Compute elementary reflector H(i) to annihilate A(i+2:ihi,i)
+         */
+        ip1 = i+1;
+        nmi = n-i;
+        ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[ip1][i], a->stride, ae_v_len(1,nmi));
+        generatereflection(&t, nmi, &v, _state);
+        ae_v_move(&a->ptr.pp_double[ip1][i], a->stride, &t.ptr.p_double[1], 1, ae_v_len(ip1,n));
+        tau->ptr.p_double[i] = v;
+        t.ptr.p_double[1] = 1;
+        
+        /*
+         * Apply H(i) to A(1:ihi,i+1:ihi) from the right
+         */
+        applyreflectionfromtheright(a, v, &t, 1, n, i+1, n, &work, _state);
+        
+        /*
+         * Apply H(i) to A(i+1:ihi,i+1:n) from the left
+         */
+        applyreflectionfromtheleft(a, v, &t, i+1, n, i+1, n, &work, _state);
+    }
+    ae_frame_leave(_state);
+}
+
+
+static void evd_unpackqfromupperhessenberg(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     /* Real    */ ae_vector* tau,
+     /* Real    */ ae_matrix* q,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_int_t i;
+    ae_int_t j;
+    ae_vector v;
+    ae_vector work;
+    ae_int_t ip1;
+    ae_int_t nmi;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_matrix_clear(q);
+    ae_vector_init(&v, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
+
+    if( n==0 )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * init
+     */
+    ae_matrix_set_length(q, n+1, n+1, _state);
+    ae_vector_set_length(&v, n+1, _state);
+    ae_vector_set_length(&work, n+1, _state);
+    for(i=1; i<=n; i++)
+    {
+        for(j=1; j<=n; j++)
+        {
+            if( i==j )
+            {
+                q->ptr.pp_double[i][j] = 1;
+            }
+            else
+            {
+                q->ptr.pp_double[i][j] = 0;
+            }
+        }
+    }
+    
+    /*
+     * unpack Q
+     */
+    for(i=1; i<=n-1; i++)
+    {
+        
+        /*
+         * Apply H(i)
+         */
+        ip1 = i+1;
+        nmi = n-i;
+        ae_v_move(&v.ptr.p_double[1], 1, &a->ptr.pp_double[ip1][i], a->stride, ae_v_len(1,nmi));
+        v.ptr.p_double[1] = 1;
+        applyreflectionfromtheright(q, tau->ptr.p_double[i], &v, 1, n, i+1, n, &work, _state);
+    }
+    ae_frame_leave(_state);
+}
+
+
+
+
+/*************************************************************************
+Generation of a random uniformly distributed (Haar) orthogonal matrix
+
+INPUT PARAMETERS:
+    N   -   matrix size, N>=1
+    
+OUTPUT PARAMETERS:
+    A   -   orthogonal NxN matrix, array[0..N-1,0..N-1]
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixrndorthogonal(ae_int_t n,
+     /* Real    */ ae_matrix* a,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+
+    ae_matrix_clear(a);
+
+    ae_assert(n>=1, "RMatrixRndOrthogonal: N<1!", _state);
+    ae_matrix_set_length(a, n, n, _state);
+    for(i=0; i<=n-1; i++)
+    {
+        for(j=0; j<=n-1; j++)
+        {
+            if( i==j )
+            {
+                a->ptr.pp_double[i][j] = 1;
+            }
+            else
+            {
+                a->ptr.pp_double[i][j] = 0;
+            }
+        }
+    }
+    rmatrixrndorthogonalfromtheright(a, n, n, _state);
+}
+
+
+/*************************************************************************
+Generation of random NxN matrix with given condition number and norm2(A)=1
+
+INPUT PARAMETERS:
+    N   -   matrix size
+    C   -   condition number (in 2-norm)
+
+OUTPUT PARAMETERS:
+    A   -   random matrix with norm2(A)=1 and cond(A)=C
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixrndcond(ae_int_t n,
+     double c,
+     /* Real    */ ae_matrix* a,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    double l1;
+    double l2;
+
+    ae_matrix_clear(a);
+
+    ae_assert(n>=1&&ae_fp_greater_eq(c,1), "RMatrixRndCond: N<1 or C<1!", _state);
+    ae_matrix_set_length(a, n, n, _state);
+    if( n==1 )
+    {
+        
+        /*
+         * special case
+         */
+        a->ptr.pp_double[0][0] = 2*ae_randominteger(2, _state)-1;
+        return;
+    }
+    l1 = 0;
+    l2 = ae_log(1/c, _state);
+    for(i=0; i<=n-1; i++)
+    {
+        for(j=0; j<=n-1; j++)
+        {
+            a->ptr.pp_double[i][j] = 0;
+        }
+    }
+    a->ptr.pp_double[0][0] = ae_exp(l1, _state);
+    for(i=1; i<=n-2; i++)
+    {
+        a->ptr.pp_double[i][i] = ae_exp(ae_randomreal(_state)*(l2-l1)+l1, _state);
+    }
+    a->ptr.pp_double[n-1][n-1] = ae_exp(l2, _state);
+    rmatrixrndorthogonalfromtheleft(a, n, n, _state);
+    rmatrixrndorthogonalfromtheright(a, n, n, _state);
+}
+
+
+/*************************************************************************
+Generation of a random Haar distributed orthogonal complex matrix
+
+INPUT PARAMETERS:
+    N   -   matrix size, N>=1
+
+OUTPUT PARAMETERS:
+    A   -   orthogonal NxN matrix, array[0..N-1,0..N-1]
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixrndorthogonal(ae_int_t n,
+     /* Complex */ ae_matrix* a,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+
+    ae_matrix_clear(a);
+
+    ae_assert(n>=1, "CMatrixRndOrthogonal: N<1!", _state);
+    ae_matrix_set_length(a, n, n, _state);
+    for(i=0; i<=n-1; i++)
+    {
+        for(j=0; j<=n-1; j++)
+        {
+            if( i==j )
+            {
+                a->ptr.pp_complex[i][j] = ae_complex_from_d(1);
+            }
+            else
+            {
+                a->ptr.pp_complex[i][j] = ae_complex_from_d(0);
+            }
+        }
+    }
+    cmatrixrndorthogonalfromtheright(a, n, n, _state);
+}
+
+
+/*************************************************************************
+Generation of random NxN complex matrix with given condition number C and
+norm2(A)=1
+
+INPUT PARAMETERS:
+    N   -   matrix size
+    C   -   condition number (in 2-norm)
+
+OUTPUT PARAMETERS:
+    A   -   random matrix with norm2(A)=1 and cond(A)=C
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixrndcond(ae_int_t n,
+     double c,
+     /* Complex */ ae_matrix* a,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_int_t i;
+    ae_int_t j;
+    double l1;
+    double l2;
+    hqrndstate state;
+    ae_complex v;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_matrix_clear(a);
+    _hqrndstate_init(&state, _state, ae_true);
+
+    ae_assert(n>=1&&ae_fp_greater_eq(c,1), "CMatrixRndCond: N<1 or C<1!", _state);
+    ae_matrix_set_length(a, n, n, _state);
+    if( n==1 )
+    {
+        
+        /*
+         * special case
+         */
+        hqrndrandomize(&state, _state);
+        hqrndunit2(&state, &v.x, &v.y, _state);
+        a->ptr.pp_complex[0][0] = v;
+        ae_frame_leave(_state);
+        return;
+    }
+    l1 = 0;
+    l2 = ae_log(1/c, _state);
+    for(i=0; i<=n-1; i++)
+    {
+        for(j=0; j<=n-1; j++)
+        {
+            a->ptr.pp_complex[i][j] = ae_complex_from_d(0);
+        }
+    }
+    a->ptr.pp_complex[0][0] = ae_complex_from_d(ae_exp(l1, _state));
+    for(i=1; i<=n-2; i++)
+    {
+        a->ptr.pp_complex[i][i] = ae_complex_from_d(ae_exp(ae_randomreal(_state)*(l2-l1)+l1, _state));
+    }
+    a->ptr.pp_complex[n-1][n-1] = ae_complex_from_d(ae_exp(l2, _state));
+    cmatrixrndorthogonalfromtheleft(a, n, n, _state);
+    cmatrixrndorthogonalfromtheright(a, n, n, _state);
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Generation of random NxN symmetric matrix with given condition number  and
+norm2(A)=1
+
+INPUT PARAMETERS:
+    N   -   matrix size
+    C   -   condition number (in 2-norm)
+
+OUTPUT PARAMETERS:
+    A   -   random matrix with norm2(A)=1 and cond(A)=C
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void smatrixrndcond(ae_int_t n,
+     double c,
+     /* Real    */ ae_matrix* a,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    double l1;
+    double l2;
+
+    ae_matrix_clear(a);
+
+    ae_assert(n>=1&&ae_fp_greater_eq(c,1), "SMatrixRndCond: N<1 or C<1!", _state);
+    ae_matrix_set_length(a, n, n, _state);
+    if( n==1 )
+    {
+        
+        /*
+         * special case
+         */
+        a->ptr.pp_double[0][0] = 2*ae_randominteger(2, _state)-1;
+        return;
+    }
+    
+    /*
+     * Prepare matrix
+     */
+    l1 = 0;
+    l2 = ae_log(1/c, _state);
+    for(i=0; i<=n-1; i++)
+    {
+        for(j=0; j<=n-1; j++)
+        {
+            a->ptr.pp_double[i][j] = 0;
+        }
+    }
+    a->ptr.pp_double[0][0] = ae_exp(l1, _state);
+    for(i=1; i<=n-2; i++)
+    {
+        a->ptr.pp_double[i][i] = (2*ae_randominteger(2, _state)-1)*ae_exp(ae_randomreal(_state)*(l2-l1)+l1, _state);
+    }
+    a->ptr.pp_double[n-1][n-1] = ae_exp(l2, _state);
+    
+    /*
+     * Multiply
+     */
+    smatrixrndmultiply(a, n, _state);
+}
+
+
+/*************************************************************************
+Generation of random NxN symmetric positive definite matrix with given
+condition number and norm2(A)=1
+
+INPUT PARAMETERS:
+    N   -   matrix size
+    C   -   condition number (in 2-norm)
+
+OUTPUT PARAMETERS:
+    A   -   random SPD matrix with norm2(A)=1 and cond(A)=C
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void spdmatrixrndcond(ae_int_t n,
+     double c,
+     /* Real    */ ae_matrix* a,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    double l1;
+    double l2;
+
+    ae_matrix_clear(a);
+
+    
+    /*
+     * Special cases
+     */
+    if( n<=0||ae_fp_less(c,1) )
+    {
+        return;
+    }
+    ae_matrix_set_length(a, n, n, _state);
+    if( n==1 )
+    {
+        a->ptr.pp_double[0][0] = 1;
+        return;
+    }
+    
+    /*
+     * Prepare matrix
+     */
+    l1 = 0;
+    l2 = ae_log(1/c, _state);
+    for(i=0; i<=n-1; i++)
+    {
+        for(j=0; j<=n-1; j++)
+        {
+            a->ptr.pp_double[i][j] = 0;
+        }
+    }
+    a->ptr.pp_double[0][0] = ae_exp(l1, _state);
+    for(i=1; i<=n-2; i++)
+    {
+        a->ptr.pp_double[i][i] = ae_exp(ae_randomreal(_state)*(l2-l1)+l1, _state);
+    }
+    a->ptr.pp_double[n-1][n-1] = ae_exp(l2, _state);
+    
+    /*
+     * Multiply
+     */
+    smatrixrndmultiply(a, n, _state);
+}
+
+
+/*************************************************************************
+Generation of random NxN Hermitian matrix with given condition number  and
+norm2(A)=1
+
+INPUT PARAMETERS:
+    N   -   matrix size
+    C   -   condition number (in 2-norm)
+
+OUTPUT PARAMETERS:
+    A   -   random matrix with norm2(A)=1 and cond(A)=C
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void hmatrixrndcond(ae_int_t n,
+     double c,
+     /* Complex */ ae_matrix* a,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    double l1;
+    double l2;
+
+    ae_matrix_clear(a);
+
+    ae_assert(n>=1&&ae_fp_greater_eq(c,1), "HMatrixRndCond: N<1 or C<1!", _state);
+    ae_matrix_set_length(a, n, n, _state);
+    if( n==1 )
+    {
+        
+        /*
+         * special case
+         */
+        a->ptr.pp_complex[0][0] = ae_complex_from_d(2*ae_randominteger(2, _state)-1);
+        return;
+    }
+    
+    /*
+     * Prepare matrix
+     */
+    l1 = 0;
+    l2 = ae_log(1/c, _state);
+    for(i=0; i<=n-1; i++)
+    {
+        for(j=0; j<=n-1; j++)
+        {
+            a->ptr.pp_complex[i][j] = ae_complex_from_d(0);
+        }
+    }
+    a->ptr.pp_complex[0][0] = ae_complex_from_d(ae_exp(l1, _state));
+    for(i=1; i<=n-2; i++)
+    {
+        a->ptr.pp_complex[i][i] = ae_complex_from_d((2*ae_randominteger(2, _state)-1)*ae_exp(ae_randomreal(_state)*(l2-l1)+l1, _state));
+    }
+    a->ptr.pp_complex[n-1][n-1] = ae_complex_from_d(ae_exp(l2, _state));
+    
+    /*
+     * Multiply
+     */
+    hmatrixrndmultiply(a, n, _state);
+    
+    /*
+     * post-process to ensure that matrix diagonal is real
+     */
+    for(i=0; i<=n-1; i++)
+    {
+        a->ptr.pp_complex[i][i].y = 0;
+    }
+}
+
+
+/*************************************************************************
+Generation of random NxN Hermitian positive definite matrix with given
+condition number and norm2(A)=1
+
+INPUT PARAMETERS:
+    N   -   matrix size
+    C   -   condition number (in 2-norm)
+
+OUTPUT PARAMETERS:
+    A   -   random HPD matrix with norm2(A)=1 and cond(A)=C
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void hpdmatrixrndcond(ae_int_t n,
+     double c,
+     /* Complex */ ae_matrix* a,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    double l1;
+    double l2;
+
+    ae_matrix_clear(a);
+
+    
+    /*
+     * Special cases
+     */
+    if( n<=0||ae_fp_less(c,1) )
+    {
+        return;
+    }
+    ae_matrix_set_length(a, n, n, _state);
+    if( n==1 )
+    {
+        a->ptr.pp_complex[0][0] = ae_complex_from_d(1);
+        return;
+    }
+    
+    /*
+     * Prepare matrix
+     */
+    l1 = 0;
+    l2 = ae_log(1/c, _state);
+    for(i=0; i<=n-1; i++)
+    {
+        for(j=0; j<=n-1; j++)
+        {
+            a->ptr.pp_complex[i][j] = ae_complex_from_d(0);
+        }
+    }
+    a->ptr.pp_complex[0][0] = ae_complex_from_d(ae_exp(l1, _state));
+    for(i=1; i<=n-2; i++)
+    {
+        a->ptr.pp_complex[i][i] = ae_complex_from_d(ae_exp(ae_randomreal(_state)*(l2-l1)+l1, _state));
+    }
+    a->ptr.pp_complex[n-1][n-1] = ae_complex_from_d(ae_exp(l2, _state));
+    
+    /*
+     * Multiply
+     */
+    hmatrixrndmultiply(a, n, _state);
+    
+    /*
+     * post-process to ensure that matrix diagonal is real
+     */
+    for(i=0; i<=n-1; i++)
+    {
+        a->ptr.pp_complex[i][i].y = 0;
+    }
+}
+
+
+/*************************************************************************
+Multiplication of MxN matrix by NxN random Haar distributed orthogonal matrix
+
+INPUT PARAMETERS:
+    A   -   matrix, array[0..M-1, 0..N-1]
+    M, N-   matrix size
+
+OUTPUT PARAMETERS:
+    A   -   A*Q, where Q is random NxN orthogonal matrix
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixrndorthogonalfromtheright(/* Real    */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    double tau;
+    double lambdav;
+    ae_int_t s;
+    ae_int_t i;
+    double u1;
+    double u2;
+    ae_vector w;
+    ae_vector v;
+    hqrndstate state;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_init(&w, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&v, 0, DT_REAL, _state, ae_true);
+    _hqrndstate_init(&state, _state, ae_true);
+
+    ae_assert(n>=1&&m>=1, "RMatrixRndOrthogonalFromTheRight: N<1 or M<1!", _state);
+    if( n==1 )
+    {
+        
+        /*
+         * Special case
+         */
+        tau = 2*ae_randominteger(2, _state)-1;
+        for(i=0; i<=m-1; i++)
+        {
+            a->ptr.pp_double[i][0] = a->ptr.pp_double[i][0]*tau;
+        }
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * General case.
+     * First pass.
+     */
+    ae_vector_set_length(&w, m, _state);
+    ae_vector_set_length(&v, n+1, _state);
+    hqrndrandomize(&state, _state);
+    for(s=2; s<=n; s++)
+    {
+        
+        /*
+         * Prepare random normal v
+         */
+        do
+        {
+            i = 1;
+            while(i<=s)
+            {
+                hqrndnormal2(&state, &u1, &u2, _state);
+                v.ptr.p_double[i] = u1;
+                if( i+1<=s )
+                {
+                    v.ptr.p_double[i+1] = u2;
+                }
+                i = i+2;
+            }
+            lambdav = ae_v_dotproduct(&v.ptr.p_double[1], 1, &v.ptr.p_double[1], 1, ae_v_len(1,s));
+        }
+        while(ae_fp_eq(lambdav,0));
+        
+        /*
+         * Prepare and apply reflection
+         */
+        generatereflection(&v, s, &tau, _state);
+        v.ptr.p_double[1] = 1;
+        applyreflectionfromtheright(a, tau, &v, 0, m-1, n-s, n-1, &w, _state);
+    }
+    
+    /*
+     * Second pass.
+     */
+    for(i=0; i<=n-1; i++)
+    {
+        tau = 2*ae_randominteger(2, _state)-1;
+        ae_v_muld(&a->ptr.pp_double[0][i], a->stride, ae_v_len(0,m-1), tau);
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Multiplication of MxN matrix by MxM random Haar distributed orthogonal matrix
+
+INPUT PARAMETERS:
+    A   -   matrix, array[0..M-1, 0..N-1]
+    M, N-   matrix size
+
+OUTPUT PARAMETERS:
+    A   -   Q*A, where Q is random MxM orthogonal matrix
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixrndorthogonalfromtheleft(/* Real    */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    double tau;
+    double lambdav;
+    ae_int_t s;
+    ae_int_t i;
+    ae_int_t j;
+    double u1;
+    double u2;
+    ae_vector w;
+    ae_vector v;
+    hqrndstate state;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_init(&w, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&v, 0, DT_REAL, _state, ae_true);
+    _hqrndstate_init(&state, _state, ae_true);
+
+    ae_assert(n>=1&&m>=1, "RMatrixRndOrthogonalFromTheRight: N<1 or M<1!", _state);
+    if( m==1 )
+    {
+        
+        /*
+         * special case
+         */
+        tau = 2*ae_randominteger(2, _state)-1;
+        for(j=0; j<=n-1; j++)
+        {
+            a->ptr.pp_double[0][j] = a->ptr.pp_double[0][j]*tau;
+        }
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * General case.
+     * First pass.
+     */
+    ae_vector_set_length(&w, n, _state);
+    ae_vector_set_length(&v, m+1, _state);
+    hqrndrandomize(&state, _state);
+    for(s=2; s<=m; s++)
+    {
+        
+        /*
+         * Prepare random normal v
+         */
+        do
+        {
+            i = 1;
+            while(i<=s)
+            {
+                hqrndnormal2(&state, &u1, &u2, _state);
+                v.ptr.p_double[i] = u1;
+                if( i+1<=s )
+                {
+                    v.ptr.p_double[i+1] = u2;
+                }
+                i = i+2;
+            }
+            lambdav = ae_v_dotproduct(&v.ptr.p_double[1], 1, &v.ptr.p_double[1], 1, ae_v_len(1,s));
+        }
+        while(ae_fp_eq(lambdav,0));
+        
+        /*
+         * Prepare and apply reflection
+         */
+        generatereflection(&v, s, &tau, _state);
+        v.ptr.p_double[1] = 1;
+        applyreflectionfromtheleft(a, tau, &v, m-s, m-1, 0, n-1, &w, _state);
+    }
+    
+    /*
+     * Second pass.
+     */
+    for(i=0; i<=m-1; i++)
+    {
+        tau = 2*ae_randominteger(2, _state)-1;
+        ae_v_muld(&a->ptr.pp_double[i][0], 1, ae_v_len(0,n-1), tau);
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Multiplication of MxN complex matrix by NxN random Haar distributed
+complex orthogonal matrix
+
+INPUT PARAMETERS:
+    A   -   matrix, array[0..M-1, 0..N-1]
+    M, N-   matrix size
+
+OUTPUT PARAMETERS:
+    A   -   A*Q, where Q is random NxN orthogonal matrix
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixrndorthogonalfromtheright(/* Complex */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_complex lambdav;
+    ae_complex tau;
+    ae_int_t s;
+    ae_int_t i;
+    ae_vector w;
+    ae_vector v;
+    hqrndstate state;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_init(&w, 0, DT_COMPLEX, _state, ae_true);
+    ae_vector_init(&v, 0, DT_COMPLEX, _state, ae_true);
+    _hqrndstate_init(&state, _state, ae_true);
+
+    ae_assert(n>=1&&m>=1, "CMatrixRndOrthogonalFromTheRight: N<1 or M<1!", _state);
+    if( n==1 )
+    {
+        
+        /*
+         * Special case
+         */
+        hqrndrandomize(&state, _state);
+        hqrndunit2(&state, &tau.x, &tau.y, _state);
+        for(i=0; i<=m-1; i++)
+        {
+            a->ptr.pp_complex[i][0] = ae_c_mul(a->ptr.pp_complex[i][0],tau);
+        }
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * General case.
+     * First pass.
+     */
+    ae_vector_set_length(&w, m, _state);
+    ae_vector_set_length(&v, n+1, _state);
+    hqrndrandomize(&state, _state);
+    for(s=2; s<=n; s++)
+    {
+        
+        /*
+         * Prepare random normal v
+         */
+        do
+        {
+            for(i=1; i<=s; i++)
+            {
+                hqrndnormal2(&state, &tau.x, &tau.y, _state);
+                v.ptr.p_complex[i] = tau;
+            }
+            lambdav = ae_v_cdotproduct(&v.ptr.p_complex[1], 1, "N", &v.ptr.p_complex[1], 1, "Conj", ae_v_len(1,s));
+        }
+        while(ae_c_eq_d(lambdav,0));
+        
+        /*
+         * Prepare and apply reflection
+         */
+        complexgeneratereflection(&v, s, &tau, _state);
+        v.ptr.p_complex[1] = ae_complex_from_d(1);
+        complexapplyreflectionfromtheright(a, tau, &v, 0, m-1, n-s, n-1, &w, _state);
+    }
+    
+    /*
+     * Second pass.
+     */
+    for(i=0; i<=n-1; i++)
+    {
+        hqrndunit2(&state, &tau.x, &tau.y, _state);
+        ae_v_cmulc(&a->ptr.pp_complex[0][i], a->stride, ae_v_len(0,m-1), tau);
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Multiplication of MxN complex matrix by MxM random Haar distributed
+complex orthogonal matrix
+
+INPUT PARAMETERS:
+    A   -   matrix, array[0..M-1, 0..N-1]
+    M, N-   matrix size
+
+OUTPUT PARAMETERS:
+    A   -   Q*A, where Q is random MxM orthogonal matrix
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixrndorthogonalfromtheleft(/* Complex */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_complex tau;
+    ae_complex lambdav;
+    ae_int_t s;
+    ae_int_t i;
+    ae_int_t j;
+    ae_vector w;
+    ae_vector v;
+    hqrndstate state;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_init(&w, 0, DT_COMPLEX, _state, ae_true);
+    ae_vector_init(&v, 0, DT_COMPLEX, _state, ae_true);
+    _hqrndstate_init(&state, _state, ae_true);
+
+    ae_assert(n>=1&&m>=1, "CMatrixRndOrthogonalFromTheRight: N<1 or M<1!", _state);
+    if( m==1 )
+    {
+        
+        /*
+         * special case
+         */
+        hqrndrandomize(&state, _state);
+        hqrndunit2(&state, &tau.x, &tau.y, _state);
+        for(j=0; j<=n-1; j++)
+        {
+            a->ptr.pp_complex[0][j] = ae_c_mul(a->ptr.pp_complex[0][j],tau);
+        }
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * General case.
+     * First pass.
+     */
+    ae_vector_set_length(&w, n, _state);
+    ae_vector_set_length(&v, m+1, _state);
+    hqrndrandomize(&state, _state);
+    for(s=2; s<=m; s++)
+    {
+        
+        /*
+         * Prepare random normal v
+         */
+        do
+        {
+            for(i=1; i<=s; i++)
+            {
+                hqrndnormal2(&state, &tau.x, &tau.y, _state);
+                v.ptr.p_complex[i] = tau;
+            }
+            lambdav = ae_v_cdotproduct(&v.ptr.p_complex[1], 1, "N", &v.ptr.p_complex[1], 1, "Conj", ae_v_len(1,s));
+        }
+        while(ae_c_eq_d(lambdav,0));
+        
+        /*
+         * Prepare and apply reflection
+         */
+        complexgeneratereflection(&v, s, &tau, _state);
+        v.ptr.p_complex[1] = ae_complex_from_d(1);
+        complexapplyreflectionfromtheleft(a, tau, &v, m-s, m-1, 0, n-1, &w, _state);
+    }
+    
+    /*
+     * Second pass.
+     */
+    for(i=0; i<=m-1; i++)
+    {
+        hqrndunit2(&state, &tau.x, &tau.y, _state);
+        ae_v_cmulc(&a->ptr.pp_complex[i][0], 1, ae_v_len(0,n-1), tau);
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Symmetric multiplication of NxN matrix by random Haar distributed
+orthogonal  matrix
+
+INPUT PARAMETERS:
+    A   -   matrix, array[0..N-1, 0..N-1]
+    N   -   matrix size
+
+OUTPUT PARAMETERS:
+    A   -   Q'*A*Q, where Q is random NxN orthogonal matrix
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void smatrixrndmultiply(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    double tau;
+    double lambdav;
+    ae_int_t s;
+    ae_int_t i;
+    double u1;
+    double u2;
+    ae_vector w;
+    ae_vector v;
+    hqrndstate state;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_init(&w, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&v, 0, DT_REAL, _state, ae_true);
+    _hqrndstate_init(&state, _state, ae_true);
+
+    
+    /*
+     * General case.
+     */
+    ae_vector_set_length(&w, n, _state);
+    ae_vector_set_length(&v, n+1, _state);
+    hqrndrandomize(&state, _state);
+    for(s=2; s<=n; s++)
+    {
+        
+        /*
+         * Prepare random normal v
+         */
+        do
+        {
+            i = 1;
+            while(i<=s)
+            {
+                hqrndnormal2(&state, &u1, &u2, _state);
+                v.ptr.p_double[i] = u1;
+                if( i+1<=s )
+                {
+                    v.ptr.p_double[i+1] = u2;
+                }
+                i = i+2;
+            }
+            lambdav = ae_v_dotproduct(&v.ptr.p_double[1], 1, &v.ptr.p_double[1], 1, ae_v_len(1,s));
+        }
+        while(ae_fp_eq(lambdav,0));
+        
+        /*
+         * Prepare and apply reflection
+         */
+        generatereflection(&v, s, &tau, _state);
+        v.ptr.p_double[1] = 1;
+        applyreflectionfromtheright(a, tau, &v, 0, n-1, n-s, n-1, &w, _state);
+        applyreflectionfromtheleft(a, tau, &v, n-s, n-1, 0, n-1, &w, _state);
+    }
+    
+    /*
+     * Second pass.
+     */
+    for(i=0; i<=n-1; i++)
+    {
+        tau = 2*ae_randominteger(2, _state)-1;
+        ae_v_muld(&a->ptr.pp_double[0][i], a->stride, ae_v_len(0,n-1), tau);
+        ae_v_muld(&a->ptr.pp_double[i][0], 1, ae_v_len(0,n-1), tau);
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Hermitian multiplication of NxN matrix by random Haar distributed
+complex orthogonal matrix
+
+INPUT PARAMETERS:
+    A   -   matrix, array[0..N-1, 0..N-1]
+    N   -   matrix size
+
+OUTPUT PARAMETERS:
+    A   -   Q^H*A*Q, where Q is random NxN orthogonal matrix
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void hmatrixrndmultiply(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_complex tau;
+    ae_complex lambdav;
+    ae_int_t s;
+    ae_int_t i;
+    ae_vector w;
+    ae_vector v;
+    hqrndstate state;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_init(&w, 0, DT_COMPLEX, _state, ae_true);
+    ae_vector_init(&v, 0, DT_COMPLEX, _state, ae_true);
+    _hqrndstate_init(&state, _state, ae_true);
+
+    
+    /*
+     * General case.
+     */
+    ae_vector_set_length(&w, n, _state);
+    ae_vector_set_length(&v, n+1, _state);
+    hqrndrandomize(&state, _state);
+    for(s=2; s<=n; s++)
+    {
+        
+        /*
+         * Prepare random normal v
+         */
+        do
+        {
+            for(i=1; i<=s; i++)
+            {
+                hqrndnormal2(&state, &tau.x, &tau.y, _state);
+                v.ptr.p_complex[i] = tau;
+            }
+            lambdav = ae_v_cdotproduct(&v.ptr.p_complex[1], 1, "N", &v.ptr.p_complex[1], 1, "Conj", ae_v_len(1,s));
+        }
+        while(ae_c_eq_d(lambdav,0));
+        
+        /*
+         * Prepare and apply reflection
+         */
+        complexgeneratereflection(&v, s, &tau, _state);
+        v.ptr.p_complex[1] = ae_complex_from_d(1);
+        complexapplyreflectionfromtheright(a, tau, &v, 0, n-1, n-s, n-1, &w, _state);
+        complexapplyreflectionfromtheleft(a, ae_c_conj(tau, _state), &v, n-s, n-1, 0, n-1, &w, _state);
+    }
+    
+    /*
+     * Second pass.
+     */
+    for(i=0; i<=n-1; i++)
+    {
+        hqrndunit2(&state, &tau.x, &tau.y, _state);
+        ae_v_cmulc(&a->ptr.pp_complex[0][i], a->stride, ae_v_len(0,n-1), tau);
+        tau = ae_c_conj(tau, _state);
+        ae_v_cmulc(&a->ptr.pp_complex[i][0], 1, ae_v_len(0,n-1), tau);
+    }
+    ae_frame_leave(_state);
+}
+
+
+
+
+/*************************************************************************
+LU decomposition of a general real matrix with row pivoting
+
+A is represented as A = P*L*U, where:
+* L is lower unitriangular matrix
+* U is upper triangular matrix
+* P = P0*P1*...*PK, K=min(M,N)-1,
+  Pi - permutation matrix for I and Pivots[I]
+
+This is cache-oblivous implementation of LU decomposition.
+It is optimized for square matrices. As for rectangular matrices:
+* best case - M>>N
+* worst case - N>>M, small M, large N, matrix does not fit in CPU cache
+
+INPUT PARAMETERS:
+    A       -   array[0..M-1, 0..N-1].
+    M       -   number of rows in matrix A.
+    N       -   number of columns in matrix A.
+
+
+OUTPUT PARAMETERS:
+    A       -   matrices L and U in compact form:
+                * L is stored under main diagonal
+                * U is stored on and above main diagonal
+    Pivots  -   permutation matrix in compact form.
+                array[0..Min(M-1,N-1)].
+
+  -- ALGLIB routine --
+     10.01.2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixlu(/* Real    */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Integer */ ae_vector* pivots,
+     ae_state *_state)
+{
+
+    ae_vector_clear(pivots);
+
+    ae_assert(m>0, "RMatrixLU: incorrect M!", _state);
+    ae_assert(n>0, "RMatrixLU: incorrect N!", _state);
+    rmatrixplu(a, m, n, pivots, _state);
+}
+
+
+/*************************************************************************
+LU decomposition of a general complex matrix with row pivoting
+
+A is represented as A = P*L*U, where:
+* L is lower unitriangular matrix
+* U is upper triangular matrix
+* P = P0*P1*...*PK, K=min(M,N)-1,
+  Pi - permutation matrix for I and Pivots[I]
+
+This is cache-oblivous implementation of LU decomposition. It is optimized
+for square matrices. As for rectangular matrices:
+* best case - M>>N
+* worst case - N>>M, small M, large N, matrix does not fit in CPU cache
+
+INPUT PARAMETERS:
+    A       -   array[0..M-1, 0..N-1].
+    M       -   number of rows in matrix A.
+    N       -   number of columns in matrix A.
+
+
+OUTPUT PARAMETERS:
+    A       -   matrices L and U in compact form:
+                * L is stored under main diagonal
+                * U is stored on and above main diagonal
+    Pivots  -   permutation matrix in compact form.
+                array[0..Min(M-1,N-1)].
+
+  -- ALGLIB routine --
+     10.01.2010
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixlu(/* Complex */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Integer */ ae_vector* pivots,
+     ae_state *_state)
+{
+
+    ae_vector_clear(pivots);
+
+    ae_assert(m>0, "CMatrixLU: incorrect M!", _state);
+    ae_assert(n>0, "CMatrixLU: incorrect N!", _state);
+    cmatrixplu(a, m, n, pivots, _state);
+}
+
+
+/*************************************************************************
+Cache-oblivious Cholesky decomposition
+
+The algorithm computes Cholesky decomposition  of  a  Hermitian  positive-
+definite matrix. The result of an algorithm is a representation  of  A  as
+A=U'*U  or A=L*L' (here X' detones conj(X^T)).
+
+INPUT PARAMETERS:
+    A       -   upper or lower triangle of a factorized matrix.
+                array with elements [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    IsUpper -   if IsUpper=True, then A contains an upper triangle of
+                a symmetric matrix, otherwise A contains a lower one.
+
+OUTPUT PARAMETERS:
+    A       -   the result of factorization. If IsUpper=True, then
+                the upper triangle contains matrix U, so that A = U'*U,
+                and the elements below the main diagonal are not modified.
+                Similarly, if IsUpper = False.
+
+RESULT:
+    If  the  matrix  is  positive-definite,  the  function  returns  True.
+    Otherwise, the function returns False. Contents of A is not determined
+    in such case.
+
+  -- ALGLIB routine --
+     15.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+ae_bool hpdmatrixcholesky(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector tmp;
+    ae_bool result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_init(&tmp, 0, DT_COMPLEX, _state, ae_true);
+
+    if( n<1 )
+    {
+        result = ae_false;
+        ae_frame_leave(_state);
+        return result;
+    }
+    result = trfac_hpdmatrixcholeskyrec(a, 0, n, isupper, &tmp, _state);
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+/*************************************************************************
+Cache-oblivious Cholesky decomposition
+
+The algorithm computes Cholesky decomposition  of  a  symmetric  positive-
+definite matrix. The result of an algorithm is a representation  of  A  as
+A=U^T*U  or A=L*L^T
+
+INPUT PARAMETERS:
+    A       -   upper or lower triangle of a factorized matrix.
+                array with elements [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    IsUpper -   if IsUpper=True, then A contains an upper triangle of
+                a symmetric matrix, otherwise A contains a lower one.
+
+OUTPUT PARAMETERS:
+    A       -   the result of factorization. If IsUpper=True, then
+                the upper triangle contains matrix U, so that A = U^T*U,
+                and the elements below the main diagonal are not modified.
+                Similarly, if IsUpper = False.
+
+RESULT:
+    If  the  matrix  is  positive-definite,  the  function  returns  True.
+    Otherwise, the function returns False. Contents of A is not determined
+    in such case.
+
+  -- ALGLIB routine --
+     15.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+ae_bool spdmatrixcholesky(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector tmp;
+    ae_bool result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);
+
+    if( n<1 )
+    {
+        result = ae_false;
+        ae_frame_leave(_state);
+        return result;
+    }
+    result = spdmatrixcholeskyrec(a, 0, n, isupper, &tmp, _state);
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+void rmatrixlup(/* Real    */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Integer */ ae_vector* pivots,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector tmp;
+    ae_int_t i;
+    ae_int_t j;
+    double mx;
+    double v;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_clear(pivots);
+    ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);
+
+    
+    /*
+     * Internal LU decomposition subroutine.
+     * Never call it directly.
+     */
+    ae_assert(m>0, "RMatrixLUP: incorrect M!", _state);
+    ae_assert(n>0, "RMatrixLUP: incorrect N!", _state);
+    
+    /*
+     * Scale matrix to avoid overflows,
+     * decompose it, then scale back.
+     */
+    mx = 0;
+    for(i=0; i<=m-1; i++)
+    {
+        for(j=0; j<=n-1; j++)
+        {
+            mx = ae_maxreal(mx, ae_fabs(a->ptr.pp_double[i][j], _state), _state);
+        }
+    }
+    if( ae_fp_neq(mx,0) )
+    {
+        v = 1/mx;
+        for(i=0; i<=m-1; i++)
+        {
+            ae_v_muld(&a->ptr.pp_double[i][0], 1, ae_v_len(0,n-1), v);
+        }
+    }
+    ae_vector_set_length(pivots, ae_minint(m, n, _state), _state);
+    ae_vector_set_length(&tmp, 2*ae_maxint(m, n, _state), _state);
+    trfac_rmatrixluprec(a, 0, m, n, pivots, &tmp, _state);
+    if( ae_fp_neq(mx,0) )
+    {
+        v = mx;
+        for(i=0; i<=m-1; i++)
+        {
+            ae_v_muld(&a->ptr.pp_double[i][0], 1, ae_v_len(0,ae_minint(i, n-1, _state)), v);
+        }
+    }
+    ae_frame_leave(_state);
+}
+
+
+void cmatrixlup(/* Complex */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Integer */ ae_vector* pivots,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector tmp;
+    ae_int_t i;
+    ae_int_t j;
+    double mx;
+    double v;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_clear(pivots);
+    ae_vector_init(&tmp, 0, DT_COMPLEX, _state, ae_true);
+
+    
+    /*
+     * Internal LU decomposition subroutine.
+     * Never call it directly.
+     */
+    ae_assert(m>0, "CMatrixLUP: incorrect M!", _state);
+    ae_assert(n>0, "CMatrixLUP: incorrect N!", _state);
+    
+    /*
+     * Scale matrix to avoid overflows,
+     * decompose it, then scale back.
+     */
+    mx = 0;
+    for(i=0; i<=m-1; i++)
+    {
+        for(j=0; j<=n-1; j++)
+        {
+            mx = ae_maxreal(mx, ae_c_abs(a->ptr.pp_complex[i][j], _state), _state);
+        }
+    }
+    if( ae_fp_neq(mx,0) )
+    {
+        v = 1/mx;
+        for(i=0; i<=m-1; i++)
+        {
+            ae_v_cmuld(&a->ptr.pp_complex[i][0], 1, ae_v_len(0,n-1), v);
+        }
+    }
+    ae_vector_set_length(pivots, ae_minint(m, n, _state), _state);
+    ae_vector_set_length(&tmp, 2*ae_maxint(m, n, _state), _state);
+    trfac_cmatrixluprec(a, 0, m, n, pivots, &tmp, _state);
+    if( ae_fp_neq(mx,0) )
+    {
+        v = mx;
+        for(i=0; i<=m-1; i++)
+        {
+            ae_v_cmuld(&a->ptr.pp_complex[i][0], 1, ae_v_len(0,ae_minint(i, n-1, _state)), v);
+        }
+    }
+    ae_frame_leave(_state);
+}
+
+
+void rmatrixplu(/* Real    */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Integer */ ae_vector* pivots,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector tmp;
+    ae_int_t i;
+    ae_int_t j;
+    double mx;
+    double v;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_clear(pivots);
+    ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);
+
+    
+    /*
+     * Internal LU decomposition subroutine.
+     * Never call it directly.
+     */
+    ae_assert(m>0, "RMatrixPLU: incorrect M!", _state);
+    ae_assert(n>0, "RMatrixPLU: incorrect N!", _state);
+    ae_vector_set_length(&tmp, 2*ae_maxint(m, n, _state), _state);
+    ae_vector_set_length(pivots, ae_minint(m, n, _state), _state);
+    
+    /*
+     * Scale matrix to avoid overflows,
+     * decompose it, then scale back.
+     */
+    mx = 0;
+    for(i=0; i<=m-1; i++)
+    {
+        for(j=0; j<=n-1; j++)
+        {
+            mx = ae_maxreal(mx, ae_fabs(a->ptr.pp_double[i][j], _state), _state);
+        }
+    }
+    if( ae_fp_neq(mx,0) )
+    {
+        v = 1/mx;
+        for(i=0; i<=m-1; i++)
+        {
+            ae_v_muld(&a->ptr.pp_double[i][0], 1, ae_v_len(0,n-1), v);
+        }
+    }
+    trfac_rmatrixplurec(a, 0, m, n, pivots, &tmp, _state);
+    if( ae_fp_neq(mx,0) )
+    {
+        v = mx;
+        for(i=0; i<=ae_minint(m, n, _state)-1; i++)
+        {
+            ae_v_muld(&a->ptr.pp_double[i][i], 1, ae_v_len(i,n-1), v);
+        }
+    }
+    ae_frame_leave(_state);
+}
+
+
+void cmatrixplu(/* Complex */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Integer */ ae_vector* pivots,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector tmp;
+    ae_int_t i;
+    ae_int_t j;
+    double mx;
+    ae_complex v;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_clear(pivots);
+    ae_vector_init(&tmp, 0, DT_COMPLEX, _state, ae_true);
+
+    
+    /*
+     * Internal LU decomposition subroutine.
+     * Never call it directly.
+     */
+    ae_assert(m>0, "CMatrixPLU: incorrect M!", _state);
+    ae_assert(n>0, "CMatrixPLU: incorrect N!", _state);
+    ae_vector_set_length(&tmp, 2*ae_maxint(m, n, _state), _state);
+    ae_vector_set_length(pivots, ae_minint(m, n, _state), _state);
+    
+    /*
+     * Scale matrix to avoid overflows,
+     * decompose it, then scale back.
+     */
+    mx = 0;
+    for(i=0; i<=m-1; i++)
+    {
+        for(j=0; j<=n-1; j++)
+        {
+            mx = ae_maxreal(mx, ae_c_abs(a->ptr.pp_complex[i][j], _state), _state);
+        }
+    }
+    if( ae_fp_neq(mx,0) )
+    {
+        v = ae_complex_from_d(1/mx);
+        for(i=0; i<=m-1; i++)
+        {
+            ae_v_cmulc(&a->ptr.pp_complex[i][0], 1, ae_v_len(0,n-1), v);
+        }
+    }
+    trfac_cmatrixplurec(a, 0, m, n, pivots, &tmp, _state);
+    if( ae_fp_neq(mx,0) )
+    {
+        v = ae_complex_from_d(mx);
+        for(i=0; i<=ae_minint(m, n, _state)-1; i++)
+        {
+            ae_v_cmulc(&a->ptr.pp_complex[i][i], 1, ae_v_len(i,n-1), v);
+        }
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Recursive computational subroutine for SPDMatrixCholesky.
+
+INPUT PARAMETERS:
+    A       -   matrix given by upper or lower triangle
+    Offs    -   offset of diagonal block to decompose
+    N       -   diagonal block size
+    IsUpper -   what half is given
+    Tmp     -   temporary array; allocated by function, if its size is too
+                small; can be reused on subsequent calls.
+                
+OUTPUT PARAMETERS:
+    A       -   upper (or lower) triangle contains Cholesky decomposition
+
+RESULT:
+    True, on success
+    False, on failure
+
+  -- ALGLIB routine --
+     15.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+ae_bool spdmatrixcholeskyrec(/* Real    */ ae_matrix* a,
+     ae_int_t offs,
+     ae_int_t n,
+     ae_bool isupper,
+     /* Real    */ ae_vector* tmp,
+     ae_state *_state)
+{
+    ae_int_t n1;
+    ae_int_t n2;
+    ae_bool result;
+
+
+    
+    /*
+     * check N
+     */
+    if( n<1 )
+    {
+        result = ae_false;
+        return result;
+    }
+    
+    /*
+     * Prepare buffer
+     */
+    if( tmp->cnt<2*n )
+    {
+        ae_vector_set_length(tmp, 2*n, _state);
+    }
+    
+    /*
+     * special cases
+     */
+    if( n==1 )
+    {
+        if( ae_fp_greater(a->ptr.pp_double[offs][offs],0) )
+        {
+            a->ptr.pp_double[offs][offs] = ae_sqrt(a->ptr.pp_double[offs][offs], _state);
+            result = ae_true;
+        }
+        else
+        {
+            result = ae_false;
+        }
+        return result;
+    }
+    if( n<=ablasblocksize(a, _state) )
+    {
+        result = trfac_spdmatrixcholesky2(a, offs, n, isupper, tmp, _state);
+        return result;
+    }
+    
+    /*
+     * general case: split task in cache-oblivious manner
+     */
+    result = ae_true;
+    ablassplitlength(a, n, &n1, &n2, _state);
+    result = spdmatrixcholeskyrec(a, offs, n1, isupper, tmp, _state);
+    if( !result )
+    {
+        return result;
+    }
+    if( n2>0 )
+    {
+        if( isupper )
+        {
+            rmatrixlefttrsm(n1, n2, a, offs, offs, isupper, ae_false, 1, a, offs, offs+n1, _state);
+            rmatrixsyrk(n2, n1, -1.0, a, offs, offs+n1, 1, 1.0, a, offs+n1, offs+n1, isupper, _state);
+        }
+        else
+        {
+            rmatrixrighttrsm(n2, n1, a, offs, offs, isupper, ae_false, 1, a, offs+n1, offs, _state);
+            rmatrixsyrk(n2, n1, -1.0, a, offs+n1, offs, 0, 1.0, a, offs+n1, offs+n1, isupper, _state);
+        }
+        result = spdmatrixcholeskyrec(a, offs+n1, n2, isupper, tmp, _state);
+        if( !result )
+        {
+            return result;
+        }
+    }
+    return result;
+}
+
+
+/*************************************************************************
+Recurrent complex LU subroutine.
+Never call it directly.
+
+  -- ALGLIB routine --
+     04.01.2010
+     Bochkanov Sergey
+*************************************************************************/
+static void trfac_cmatrixluprec(/* Complex */ ae_matrix* a,
+     ae_int_t offs,
+     ae_int_t m,
+     ae_int_t n,
+     /* Integer */ ae_vector* pivots,
+     /* Complex */ ae_vector* tmp,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t m1;
+    ae_int_t m2;
+
+
+    
+    /*
+     * Kernel case
+     */
+    if( ae_minint(m, n, _state)<=ablascomplexblocksize(a, _state) )
+    {
+        trfac_cmatrixlup2(a, offs, m, n, pivots, tmp, _state);
+        return;
+    }
+    
+    /*
+     * Preliminary step, make N>=M
+     *
+     *     ( A1 )
+     * A = (    ), where A1 is square
+     *     ( A2 )
+     *
+     * Factorize A1, update A2
+     */
+    if( m>n )
+    {
+        trfac_cmatrixluprec(a, offs, n, n, pivots, tmp, _state);
+        for(i=0; i<=n-1; i++)
+        {
+            ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+n][offs+i], a->stride, "N", ae_v_len(0,m-n-1));
+            ae_v_cmove(&a->ptr.pp_complex[offs+n][offs+i], a->stride, &a->ptr.pp_complex[offs+n][pivots->ptr.p_int[offs+i]], a->stride, "N", ae_v_len(offs+n,offs+m-1));
+            ae_v_cmove(&a->ptr.pp_complex[offs+n][pivots->ptr.p_int[offs+i]], a->stride, &tmp->ptr.p_complex[0], 1, "N", ae_v_len(offs+n,offs+m-1));
+        }
+        cmatrixrighttrsm(m-n, n, a, offs, offs, ae_true, ae_true, 0, a, offs+n, offs, _state);
+        return;
+    }
+    
+    /*
+     * Non-kernel case
+     */
+    ablascomplexsplitlength(a, m, &m1, &m2, _state);
+    trfac_cmatrixluprec(a, offs, m1, n, pivots, tmp, _state);
+    if( m2>0 )
+    {
+        for(i=0; i<=m1-1; i++)
+        {
+            if( offs+i!=pivots->ptr.p_int[offs+i] )
+            {
+                ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+m1][offs+i], a->stride, "N", ae_v_len(0,m2-1));
+                ae_v_cmove(&a->ptr.pp_complex[offs+m1][offs+i], a->stride, &a->ptr.pp_complex[offs+m1][pivots->ptr.p_int[offs+i]], a->stride, "N", ae_v_len(offs+m1,offs+m-1));
+                ae_v_cmove(&a->ptr.pp_complex[offs+m1][pivots->ptr.p_int[offs+i]], a->stride, &tmp->ptr.p_complex[0], 1, "N", ae_v_len(offs+m1,offs+m-1));
+            }
+        }
+        cmatrixrighttrsm(m2, m1, a, offs, offs, ae_true, ae_true, 0, a, offs+m1, offs, _state);
+        cmatrixgemm(m-m1, n-m1, m1, ae_complex_from_d(-1.0), a, offs+m1, offs, 0, a, offs, offs+m1, 0, ae_complex_from_d(1.0), a, offs+m1, offs+m1, _state);
+        trfac_cmatrixluprec(a, offs+m1, m-m1, n-m1, pivots, tmp, _state);
+        for(i=0; i<=m2-1; i++)
+        {
+            if( offs+m1+i!=pivots->ptr.p_int[offs+m1+i] )
+            {
+                ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs][offs+m1+i], a->stride, "N", ae_v_len(0,m1-1));
+                ae_v_cmove(&a->ptr.pp_complex[offs][offs+m1+i], a->stride, &a->ptr.pp_complex[offs][pivots->ptr.p_int[offs+m1+i]], a->stride, "N", ae_v_len(offs,offs+m1-1));
+                ae_v_cmove(&a->ptr.pp_complex[offs][pivots->ptr.p_int[offs+m1+i]], a->stride, &tmp->ptr.p_complex[0], 1, "N", ae_v_len(offs,offs+m1-1));
+            }
+        }
+    }
+}
+
+
+/*************************************************************************
+Recurrent real LU subroutine.
+Never call it directly.
+
+  -- ALGLIB routine --
+     04.01.2010
+     Bochkanov Sergey
+*************************************************************************/
+static void trfac_rmatrixluprec(/* Real    */ ae_matrix* a,
+     ae_int_t offs,
+     ae_int_t m,
+     ae_int_t n,
+     /* Integer */ ae_vector* pivots,
+     /* Real    */ ae_vector* tmp,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t m1;
+    ae_int_t m2;
+
+
+    
+    /*
+     * Kernel case
+     */
+    if( ae_minint(m, n, _state)<=ablasblocksize(a, _state) )
+    {
+        trfac_rmatrixlup2(a, offs, m, n, pivots, tmp, _state);
+        return;
+    }
+    
+    /*
+     * Preliminary step, make N>=M
+     *
+     *     ( A1 )
+     * A = (    ), where A1 is square
+     *     ( A2 )
+     *
+     * Factorize A1, update A2
+     */
+    if( m>n )
+    {
+        trfac_rmatrixluprec(a, offs, n, n, pivots, tmp, _state);
+        for(i=0; i<=n-1; i++)
+        {
+            if( offs+i!=pivots->ptr.p_int[offs+i] )
+            {
+                ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+n][offs+i], a->stride, ae_v_len(0,m-n-1));
+                ae_v_move(&a->ptr.pp_double[offs+n][offs+i], a->stride, &a->ptr.pp_double[offs+n][pivots->ptr.p_int[offs+i]], a->stride, ae_v_len(offs+n,offs+m-1));
+                ae_v_move(&a->ptr.pp_double[offs+n][pivots->ptr.p_int[offs+i]], a->stride, &tmp->ptr.p_double[0], 1, ae_v_len(offs+n,offs+m-1));
+            }
+        }
+        rmatrixrighttrsm(m-n, n, a, offs, offs, ae_true, ae_true, 0, a, offs+n, offs, _state);
+        return;
+    }
+    
+    /*
+     * Non-kernel case
+     */
+    ablassplitlength(a, m, &m1, &m2, _state);
+    trfac_rmatrixluprec(a, offs, m1, n, pivots, tmp, _state);
+    if( m2>0 )
+    {
+        for(i=0; i<=m1-1; i++)
+        {
+            if( offs+i!=pivots->ptr.p_int[offs+i] )
+            {
+                ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+m1][offs+i], a->stride, ae_v_len(0,m2-1));
+                ae_v_move(&a->ptr.pp_double[offs+m1][offs+i], a->stride, &a->ptr.pp_double[offs+m1][pivots->ptr.p_int[offs+i]], a->stride, ae_v_len(offs+m1,offs+m-1));
+                ae_v_move(&a->ptr.pp_double[offs+m1][pivots->ptr.p_int[offs+i]], a->stride, &tmp->ptr.p_double[0], 1, ae_v_len(offs+m1,offs+m-1));
+            }
+        }
+        rmatrixrighttrsm(m2, m1, a, offs, offs, ae_true, ae_true, 0, a, offs+m1, offs, _state);
+        rmatrixgemm(m-m1, n-m1, m1, -1.0, a, offs+m1, offs, 0, a, offs, offs+m1, 0, 1.0, a, offs+m1, offs+m1, _state);
+        trfac_rmatrixluprec(a, offs+m1, m-m1, n-m1, pivots, tmp, _state);
+        for(i=0; i<=m2-1; i++)
+        {
+            if( offs+m1+i!=pivots->ptr.p_int[offs+m1+i] )
+            {
+                ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs][offs+m1+i], a->stride, ae_v_len(0,m1-1));
+                ae_v_move(&a->ptr.pp_double[offs][offs+m1+i], a->stride, &a->ptr.pp_double[offs][pivots->ptr.p_int[offs+m1+i]], a->stride, ae_v_len(offs,offs+m1-1));
+                ae_v_move(&a->ptr.pp_double[offs][pivots->ptr.p_int[offs+m1+i]], a->stride, &tmp->ptr.p_double[0], 1, ae_v_len(offs,offs+m1-1));
+            }
+        }
+    }
+}
+
+
+/*************************************************************************
+Recurrent complex LU subroutine.
+Never call it directly.
+
+  -- ALGLIB routine --
+     04.01.2010
+     Bochkanov Sergey
+*************************************************************************/
+static void trfac_cmatrixplurec(/* Complex */ ae_matrix* a,
+     ae_int_t offs,
+     ae_int_t m,
+     ae_int_t n,
+     /* Integer */ ae_vector* pivots,
+     /* Complex */ ae_vector* tmp,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t n1;
+    ae_int_t n2;
+
+
+    
+    /*
+     * Kernel case
+     */
+    if( ae_minint(m, n, _state)<=ablascomplexblocksize(a, _state) )
+    {
+        trfac_cmatrixplu2(a, offs, m, n, pivots, tmp, _state);
+        return;
+    }
+    
+    /*
+     * Preliminary step, make M>=N.
+     *
+     * A = (A1 A2), where A1 is square
+     * Factorize A1, update A2
+     */
+    if( n>m )
+    {
+        trfac_cmatrixplurec(a, offs, m, m, pivots, tmp, _state);
+        for(i=0; i<=m-1; i++)
+        {
+            ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+i][offs+m], 1, "N", ae_v_len(0,n-m-1));
+            ae_v_cmove(&a->ptr.pp_complex[offs+i][offs+m], 1, &a->ptr.pp_complex[pivots->ptr.p_int[offs+i]][offs+m], 1, "N", ae_v_len(offs+m,offs+n-1));
+            ae_v_cmove(&a->ptr.pp_complex[pivots->ptr.p_int[offs+i]][offs+m], 1, &tmp->ptr.p_complex[0], 1, "N", ae_v_len(offs+m,offs+n-1));
+        }
+        cmatrixlefttrsm(m, n-m, a, offs, offs, ae_false, ae_true, 0, a, offs, offs+m, _state);
+        return;
+    }
+    
+    /*
+     * Non-kernel case
+     */
+    ablascomplexsplitlength(a, n, &n1, &n2, _state);
+    trfac_cmatrixplurec(a, offs, m, n1, pivots, tmp, _state);
+    if( n2>0 )
+    {
+        for(i=0; i<=n1-1; i++)
+        {
+            if( offs+i!=pivots->ptr.p_int[offs+i] )
+            {
+                ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+i][offs+n1], 1, "N", ae_v_len(0,n2-1));
+                ae_v_cmove(&a->ptr.pp_complex[offs+i][offs+n1], 1, &a->ptr.pp_complex[pivots->ptr.p_int[offs+i]][offs+n1], 1, "N", ae_v_len(offs+n1,offs+n-1));
+                ae_v_cmove(&a->ptr.pp_complex[pivots->ptr.p_int[offs+i]][offs+n1], 1, &tmp->ptr.p_complex[0], 1, "N", ae_v_len(offs+n1,offs+n-1));
+            }
+        }
+        cmatrixlefttrsm(n1, n2, a, offs, offs, ae_false, ae_true, 0, a, offs, offs+n1, _state);
+        cmatrixgemm(m-n1, n-n1, n1, ae_complex_from_d(-1.0), a, offs+n1, offs, 0, a, offs, offs+n1, 0, ae_complex_from_d(1.0), a, offs+n1, offs+n1, _state);
+        trfac_cmatrixplurec(a, offs+n1, m-n1, n-n1, pivots, tmp, _state);
+        for(i=0; i<=n2-1; i++)
+        {
+            if( offs+n1+i!=pivots->ptr.p_int[offs+n1+i] )
+            {
+                ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+n1+i][offs], 1, "N", ae_v_len(0,n1-1));
+                ae_v_cmove(&a->ptr.pp_complex[offs+n1+i][offs], 1, &a->ptr.pp_complex[pivots->ptr.p_int[offs+n1+i]][offs], 1, "N", ae_v_len(offs,offs+n1-1));
+                ae_v_cmove(&a->ptr.pp_complex[pivots->ptr.p_int[offs+n1+i]][offs], 1, &tmp->ptr.p_complex[0], 1, "N", ae_v_len(offs,offs+n1-1));
+            }
+        }
+    }
+}
+
+
+/*************************************************************************
+Recurrent real LU subroutine.
+Never call it directly.
+
+  -- ALGLIB routine --
+     04.01.2010
+     Bochkanov Sergey
+*************************************************************************/
+static void trfac_rmatrixplurec(/* Real    */ ae_matrix* a,
+     ae_int_t offs,
+     ae_int_t m,
+     ae_int_t n,
+     /* Integer */ ae_vector* pivots,
+     /* Real    */ ae_vector* tmp,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t n1;
+    ae_int_t n2;
+
+
+    
+    /*
+     * Kernel case
+     */
+    if( ae_minint(m, n, _state)<=ablasblocksize(a, _state) )
+    {
+        trfac_rmatrixplu2(a, offs, m, n, pivots, tmp, _state);
+        return;
+    }
+    
+    /*
+     * Preliminary step, make M>=N.
+     *
+     * A = (A1 A2), where A1 is square
+     * Factorize A1, update A2
+     */
+    if( n>m )
+    {
+        trfac_rmatrixplurec(a, offs, m, m, pivots, tmp, _state);
+        for(i=0; i<=m-1; i++)
+        {
+            ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+i][offs+m], 1, ae_v_len(0,n-m-1));
+            ae_v_move(&a->ptr.pp_double[offs+i][offs+m], 1, &a->ptr.pp_double[pivots->ptr.p_int[offs+i]][offs+m], 1, ae_v_len(offs+m,offs+n-1));
+            ae_v_move(&a->ptr.pp_double[pivots->ptr.p_int[offs+i]][offs+m], 1, &tmp->ptr.p_double[0], 1, ae_v_len(offs+m,offs+n-1));
+        }
+        rmatrixlefttrsm(m, n-m, a, offs, offs, ae_false, ae_true, 0, a, offs, offs+m, _state);
+        return;
+    }
+    
+    /*
+     * Non-kernel case
+     */
+    ablassplitlength(a, n, &n1, &n2, _state);
+    trfac_rmatrixplurec(a, offs, m, n1, pivots, tmp, _state);
+    if( n2>0 )
+    {
+        for(i=0; i<=n1-1; i++)
+        {
+            if( offs+i!=pivots->ptr.p_int[offs+i] )
+            {
+                ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+i][offs+n1], 1, ae_v_len(0,n2-1));
+                ae_v_move(&a->ptr.pp_double[offs+i][offs+n1], 1, &a->ptr.pp_double[pivots->ptr.p_int[offs+i]][offs+n1], 1, ae_v_len(offs+n1,offs+n-1));
+                ae_v_move(&a->ptr.pp_double[pivots->ptr.p_int[offs+i]][offs+n1], 1, &tmp->ptr.p_double[0], 1, ae_v_len(offs+n1,offs+n-1));
+            }
+        }
+        rmatrixlefttrsm(n1, n2, a, offs, offs, ae_false, ae_true, 0, a, offs, offs+n1, _state);
+        rmatrixgemm(m-n1, n-n1, n1, -1.0, a, offs+n1, offs, 0, a, offs, offs+n1, 0, 1.0, a, offs+n1, offs+n1, _state);
+        trfac_rmatrixplurec(a, offs+n1, m-n1, n-n1, pivots, tmp, _state);
+        for(i=0; i<=n2-1; i++)
+        {
+            if( offs+n1+i!=pivots->ptr.p_int[offs+n1+i] )
+            {
+                ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+n1+i][offs], 1, ae_v_len(0,n1-1));
+                ae_v_move(&a->ptr.pp_double[offs+n1+i][offs], 1, &a->ptr.pp_double[pivots->ptr.p_int[offs+n1+i]][offs], 1, ae_v_len(offs,offs+n1-1));
+                ae_v_move(&a->ptr.pp_double[pivots->ptr.p_int[offs+n1+i]][offs], 1, &tmp->ptr.p_double[0], 1, ae_v_len(offs,offs+n1-1));
+            }
+        }
+    }
+}
+
+
+/*************************************************************************
+Complex LUP kernel
+
+  -- ALGLIB routine --
+     10.01.2010
+     Bochkanov Sergey
+*************************************************************************/
+static void trfac_cmatrixlup2(/* Complex */ ae_matrix* a,
+     ae_int_t offs,
+     ae_int_t m,
+     ae_int_t n,
+     /* Integer */ ae_vector* pivots,
+     /* Complex */ ae_vector* tmp,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t jp;
+    ae_complex s;
+
+
+    
+    /*
+     * Quick return if possible
+     */
+    if( m==0||n==0 )
+    {
+        return;
+    }
+    
+    /*
+     * main cycle
+     */
+    for(j=0; j<=ae_minint(m-1, n-1, _state); j++)
+    {
+        
+        /*
+         * Find pivot, swap columns
+         */
+        jp = j;
+        for(i=j+1; i<=n-1; i++)
+        {
+            if( ae_fp_greater(ae_c_abs(a->ptr.pp_complex[offs+j][offs+i], _state),ae_c_abs(a->ptr.pp_complex[offs+j][offs+jp], _state)) )
+            {
+                jp = i;
+            }
+        }
+        pivots->ptr.p_int[offs+j] = offs+jp;
+        if( jp!=j )
+        {
+            ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs][offs+j], a->stride, "N", ae_v_len(0,m-1));
+            ae_v_cmove(&a->ptr.pp_complex[offs][offs+j], a->stride, &a->ptr.pp_complex[offs][offs+jp], a->stride, "N", ae_v_len(offs,offs+m-1));
+            ae_v_cmove(&a->ptr.pp_complex[offs][offs+jp], a->stride, &tmp->ptr.p_complex[0], 1, "N", ae_v_len(offs,offs+m-1));
+        }
+        
+        /*
+         * LU decomposition of 1x(N-J) matrix
+         */
+        if( ae_c_neq_d(a->ptr.pp_complex[offs+j][offs+j],0)&&j+1<=n-1 )
+        {
+            s = ae_c_d_div(1,a->ptr.pp_complex[offs+j][offs+j]);
+            ae_v_cmulc(&a->ptr.pp_complex[offs+j][offs+j+1], 1, ae_v_len(offs+j+1,offs+n-1), s);
+        }
+        
+        /*
+         * Update trailing (M-J-1)x(N-J-1) matrix
+         */
+        if( j<ae_minint(m-1, n-1, _state) )
+        {
+            ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+j+1][offs+j], a->stride, "N", ae_v_len(0,m-j-2));
+            ae_v_cmoveneg(&tmp->ptr.p_complex[m], 1, &a->ptr.pp_complex[offs+j][offs+j+1], 1, "N", ae_v_len(m,m+n-j-2));
+            cmatrixrank1(m-j-1, n-j-1, a, offs+j+1, offs+j+1, tmp, 0, tmp, m, _state);
+        }
+    }
+}
+
+
+/*************************************************************************
+Real LUP kernel
+
+  -- ALGLIB routine --
+     10.01.2010
+     Bochkanov Sergey
+*************************************************************************/
+static void trfac_rmatrixlup2(/* Real    */ ae_matrix* a,
+     ae_int_t offs,
+     ae_int_t m,
+     ae_int_t n,
+     /* Integer */ ae_vector* pivots,
+     /* Real    */ ae_vector* tmp,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t jp;
+    double s;
+
+
+    
+    /*
+     * Quick return if possible
+     */
+    if( m==0||n==0 )
+    {
+        return;
+    }
+    
+    /*
+     * main cycle
+     */
+    for(j=0; j<=ae_minint(m-1, n-1, _state); j++)
+    {
+        
+        /*
+         * Find pivot, swap columns
+         */
+        jp = j;
+        for(i=j+1; i<=n-1; i++)
+        {
+            if( ae_fp_greater(ae_fabs(a->ptr.pp_double[offs+j][offs+i], _state),ae_fabs(a->ptr.pp_double[offs+j][offs+jp], _state)) )
+            {
+                jp = i;
+            }
+        }
+        pivots->ptr.p_int[offs+j] = offs+jp;
+        if( jp!=j )
+        {
+            ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs][offs+j], a->stride, ae_v_len(0,m-1));
+            ae_v_move(&a->ptr.pp_double[offs][offs+j], a->stride, &a->ptr.pp_double[offs][offs+jp], a->stride, ae_v_len(offs,offs+m-1));
+            ae_v_move(&a->ptr.pp_double[offs][offs+jp], a->stride, &tmp->ptr.p_double[0], 1, ae_v_len(offs,offs+m-1));
+        }
+        
+        /*
+         * LU decomposition of 1x(N-J) matrix
+         */
+        if( ae_fp_neq(a->ptr.pp_double[offs+j][offs+j],0)&&j+1<=n-1 )
+        {
+            s = 1/a->ptr.pp_double[offs+j][offs+j];
+            ae_v_muld(&a->ptr.pp_double[offs+j][offs+j+1], 1, ae_v_len(offs+j+1,offs+n-1), s);
+        }
+        
+        /*
+         * Update trailing (M-J-1)x(N-J-1) matrix
+         */
+        if( j<ae_minint(m-1, n-1, _state) )
+        {
+            ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+j+1][offs+j], a->stride, ae_v_len(0,m-j-2));
+            ae_v_moveneg(&tmp->ptr.p_double[m], 1, &a->ptr.pp_double[offs+j][offs+j+1], 1, ae_v_len(m,m+n-j-2));
+            rmatrixrank1(m-j-1, n-j-1, a, offs+j+1, offs+j+1, tmp, 0, tmp, m, _state);
+        }
+    }
+}
+
+
+/*************************************************************************
+Complex PLU kernel
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     June 30, 1992
+*************************************************************************/
+static void trfac_cmatrixplu2(/* Complex */ ae_matrix* a,
+     ae_int_t offs,
+     ae_int_t m,
+     ae_int_t n,
+     /* Integer */ ae_vector* pivots,
+     /* Complex */ ae_vector* tmp,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t jp;
+    ae_complex s;
+
+
+    
+    /*
+     * Quick return if possible
+     */
+    if( m==0||n==0 )
+    {
+        return;
+    }
+    for(j=0; j<=ae_minint(m-1, n-1, _state); j++)
+    {
+        
+        /*
+         * Find pivot and test for singularity.
+         */
+        jp = j;
+        for(i=j+1; i<=m-1; i++)
+        {
+            if( ae_fp_greater(ae_c_abs(a->ptr.pp_complex[offs+i][offs+j], _state),ae_c_abs(a->ptr.pp_complex[offs+jp][offs+j], _state)) )
+            {
+                jp = i;
+            }
+        }
+        pivots->ptr.p_int[offs+j] = offs+jp;
+        if( ae_c_neq_d(a->ptr.pp_complex[offs+jp][offs+j],0) )
+        {
+            
+            /*
+             *Apply the interchange to rows
+             */
+            if( jp!=j )
+            {
+                for(i=0; i<=n-1; i++)
+                {
+                    s = a->ptr.pp_complex[offs+j][offs+i];
+                    a->ptr.pp_complex[offs+j][offs+i] = a->ptr.pp_complex[offs+jp][offs+i];
+                    a->ptr.pp_complex[offs+jp][offs+i] = s;
+                }
+            }
+            
+            /*
+             *Compute elements J+1:M of J-th column.
+             */
+            if( j+1<=m-1 )
+            {
+                s = ae_c_d_div(1,a->ptr.pp_complex[offs+j][offs+j]);
+                ae_v_cmulc(&a->ptr.pp_complex[offs+j+1][offs+j], a->stride, ae_v_len(offs+j+1,offs+m-1), s);
+            }
+        }
+        if( j<ae_minint(m, n, _state)-1 )
+        {
+            
+            /*
+             *Update trailing submatrix.
+             */
+            ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+j+1][offs+j], a->stride, "N", ae_v_len(0,m-j-2));
+            ae_v_cmoveneg(&tmp->ptr.p_complex[m], 1, &a->ptr.pp_complex[offs+j][offs+j+1], 1, "N", ae_v_len(m,m+n-j-2));
+            cmatrixrank1(m-j-1, n-j-1, a, offs+j+1, offs+j+1, tmp, 0, tmp, m, _state);
+        }
+    }
+}
+
+
+/*************************************************************************
+Real PLU kernel
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     June 30, 1992
+*************************************************************************/
+static void trfac_rmatrixplu2(/* Real    */ ae_matrix* a,
+     ae_int_t offs,
+     ae_int_t m,
+     ae_int_t n,
+     /* Integer */ ae_vector* pivots,
+     /* Real    */ ae_vector* tmp,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t jp;
+    double s;
+
+
+    
+    /*
+     * Quick return if possible
+     */
+    if( m==0||n==0 )
+    {
+        return;
+    }
+    for(j=0; j<=ae_minint(m-1, n-1, _state); j++)
+    {
+        
+        /*
+         * Find pivot and test for singularity.
+         */
+        jp = j;
+        for(i=j+1; i<=m-1; i++)
+        {
+            if( ae_fp_greater(ae_fabs(a->ptr.pp_double[offs+i][offs+j], _state),ae_fabs(a->ptr.pp_double[offs+jp][offs+j], _state)) )
+            {
+                jp = i;
+            }
+        }
+        pivots->ptr.p_int[offs+j] = offs+jp;
+        if( ae_fp_neq(a->ptr.pp_double[offs+jp][offs+j],0) )
+        {
+            
+            /*
+             *Apply the interchange to rows
+             */
+            if( jp!=j )
+            {
+                for(i=0; i<=n-1; i++)
+                {
+                    s = a->ptr.pp_double[offs+j][offs+i];
+                    a->ptr.pp_double[offs+j][offs+i] = a->ptr.pp_double[offs+jp][offs+i];
+                    a->ptr.pp_double[offs+jp][offs+i] = s;
+                }
+            }
+            
+            /*
+             *Compute elements J+1:M of J-th column.
+             */
+            if( j+1<=m-1 )
+            {
+                s = 1/a->ptr.pp_double[offs+j][offs+j];
+                ae_v_muld(&a->ptr.pp_double[offs+j+1][offs+j], a->stride, ae_v_len(offs+j+1,offs+m-1), s);
+            }
+        }
+        if( j<ae_minint(m, n, _state)-1 )
+        {
+            
+            /*
+             *Update trailing submatrix.
+             */
+            ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+j+1][offs+j], a->stride, ae_v_len(0,m-j-2));
+            ae_v_moveneg(&tmp->ptr.p_double[m], 1, &a->ptr.pp_double[offs+j][offs+j+1], 1, ae_v_len(m,m+n-j-2));
+            rmatrixrank1(m-j-1, n-j-1, a, offs+j+1, offs+j+1, tmp, 0, tmp, m, _state);
+        }
+    }
+}
+
+
+/*************************************************************************
+Recursive computational subroutine for HPDMatrixCholesky
+
+  -- ALGLIB routine --
+     15.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+static ae_bool trfac_hpdmatrixcholeskyrec(/* Complex */ ae_matrix* a,
+     ae_int_t offs,
+     ae_int_t n,
+     ae_bool isupper,
+     /* Complex */ ae_vector* tmp,
+     ae_state *_state)
+{
+    ae_int_t n1;
+    ae_int_t n2;
+    ae_bool result;
+
+
+    
+    /*
+     * check N
+     */
+    if( n<1 )
+    {
+        result = ae_false;
+        return result;
+    }
+    
+    /*
+     * Prepare buffer
+     */
+    if( tmp->cnt<2*n )
+    {
+        ae_vector_set_length(tmp, 2*n, _state);
+    }
+    
+    /*
+     * special cases
+     */
+    if( n==1 )
+    {
+        if( ae_fp_greater(a->ptr.pp_complex[offs][offs].x,0) )
+        {
+            a->ptr.pp_complex[offs][offs] = ae_complex_from_d(ae_sqrt(a->ptr.pp_complex[offs][offs].x, _state));
+            result = ae_true;
+        }
+        else
+        {
+            result = ae_false;
+        }
+        return result;
+    }
+    if( n<=ablascomplexblocksize(a, _state) )
+    {
+        result = trfac_hpdmatrixcholesky2(a, offs, n, isupper, tmp, _state);
+        return result;
+    }
+    
+    /*
+     * general case: split task in cache-oblivious manner
+     */
+    result = ae_true;
+    ablascomplexsplitlength(a, n, &n1, &n2, _state);
+    result = trfac_hpdmatrixcholeskyrec(a, offs, n1, isupper, tmp, _state);
+    if( !result )
+    {
+        return result;
+    }
+    if( n2>0 )
+    {
+        if( isupper )
+        {
+            cmatrixlefttrsm(n1, n2, a, offs, offs, isupper, ae_false, 2, a, offs, offs+n1, _state);
+            cmatrixsyrk(n2, n1, -1.0, a, offs, offs+n1, 2, 1.0, a, offs+n1, offs+n1, isupper, _state);
+        }
+        else
+        {
+            cmatrixrighttrsm(n2, n1, a, offs, offs, isupper, ae_false, 2, a, offs+n1, offs, _state);
+            cmatrixsyrk(n2, n1, -1.0, a, offs+n1, offs, 0, 1.0, a, offs+n1, offs+n1, isupper, _state);
+        }
+        result = trfac_hpdmatrixcholeskyrec(a, offs+n1, n2, isupper, tmp, _state);
+        if( !result )
+        {
+            return result;
+        }
+    }
+    return result;
+}
+
+
+/*************************************************************************
+Level-2 Hermitian Cholesky subroutine.
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     February 29, 1992
+*************************************************************************/
+static ae_bool trfac_hpdmatrixcholesky2(/* Complex */ ae_matrix* aaa,
+     ae_int_t offs,
+     ae_int_t n,
+     ae_bool isupper,
+     /* Complex */ ae_vector* tmp,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    double ajj;
+    ae_complex v;
+    double r;
+    ae_bool result;
+
+
+    result = ae_true;
+    if( n<0 )
+    {
+        result = ae_false;
+        return result;
+    }
+    
+    /*
+     * Quick return if possible
+     */
+    if( n==0 )
+    {
+        return result;
+    }
+    if( isupper )
+    {
+        
+        /*
+         * Compute the Cholesky factorization A = U'*U.
+         */
+        for(j=0; j<=n-1; j++)
+        {
+            
+            /*
+             * Compute U(J,J) and test for non-positive-definiteness.
+             */
+            v = ae_v_cdotproduct(&aaa->ptr.pp_complex[offs][offs+j], aaa->stride, "Conj", &aaa->ptr.pp_complex[offs][offs+j], aaa->stride, "N", ae_v_len(offs,offs+j-1));
+            ajj = ae_c_sub(aaa->ptr.pp_complex[offs+j][offs+j],v).x;
+            if( ae_fp_less_eq(ajj,0) )
+            {
+                aaa->ptr.pp_complex[offs+j][offs+j] = ae_complex_from_d(ajj);
+                result = ae_false;
+                return result;
+            }
+            ajj = ae_sqrt(ajj, _state);
+            aaa->ptr.pp_complex[offs+j][offs+j] = ae_complex_from_d(ajj);
+            
+            /*
+             * Compute elements J+1:N-1 of row J.
+             */
+            if( j<n-1 )
+            {
+                if( j>0 )
+                {
+                    ae_v_cmoveneg(&tmp->ptr.p_complex[0], 1, &aaa->ptr.pp_complex[offs][offs+j], aaa->stride, "Conj", ae_v_len(0,j-1));
+                    cmatrixmv(n-j-1, j, aaa, offs, offs+j+1, 1, tmp, 0, tmp, n, _state);
+                    ae_v_cadd(&aaa->ptr.pp_complex[offs+j][offs+j+1], 1, &tmp->ptr.p_complex[n], 1, "N", ae_v_len(offs+j+1,offs+n-1));
+                }
+                r = 1/ajj;
+                ae_v_cmuld(&aaa->ptr.pp_complex[offs+j][offs+j+1], 1, ae_v_len(offs+j+1,offs+n-1), r);
+            }
+        }
+    }
+    else
+    {
+        
+        /*
+         * Compute the Cholesky factorization A = L*L'.
+         */
+        for(j=0; j<=n-1; j++)
+        {
+            
+            /*
+             * Compute L(J+1,J+1) and test for non-positive-definiteness.
+             */
+            v = ae_v_cdotproduct(&aaa->ptr.pp_complex[offs+j][offs], 1, "Conj", &aaa->ptr.pp_complex[offs+j][offs], 1, "N", ae_v_len(offs,offs+j-1));
+            ajj = ae_c_sub(aaa->ptr.pp_complex[offs+j][offs+j],v).x;
+            if( ae_fp_less_eq(ajj,0) )
+            {
+                aaa->ptr.pp_complex[offs+j][offs+j] = ae_complex_from_d(ajj);
+                result = ae_false;
+                return result;
+            }
+            ajj = ae_sqrt(ajj, _state);
+            aaa->ptr.pp_complex[offs+j][offs+j] = ae_complex_from_d(ajj);
+            
+            /*
+             * Compute elements J+1:N of column J.
+             */
+            if( j<n-1 )
+            {
+                if( j>0 )
+                {
+                    ae_v_cmove(&tmp->ptr.p_complex[0], 1, &aaa->ptr.pp_complex[offs+j][offs], 1, "Conj", ae_v_len(0,j-1));
+                    cmatrixmv(n-j-1, j, aaa, offs+j+1, offs, 0, tmp, 0, tmp, n, _state);
+                    for(i=0; i<=n-j-2; i++)
+                    {
+                        aaa->ptr.pp_complex[offs+j+1+i][offs+j] = ae_c_div_d(ae_c_sub(aaa->ptr.pp_complex[offs+j+1+i][offs+j],tmp->ptr.p_complex[n+i]),ajj);
+                    }
+                }
+                else
+                {
+                    for(i=0; i<=n-j-2; i++)
+                    {
+                        aaa->ptr.pp_complex[offs+j+1+i][offs+j] = ae_c_div_d(aaa->ptr.pp_complex[offs+j+1+i][offs+j],ajj);
+                    }
+                }
+            }
+        }
+    }
+    return result;
+}
+
+
+/*************************************************************************
+Level-2 Cholesky subroutine
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     February 29, 1992
+*************************************************************************/
+static ae_bool trfac_spdmatrixcholesky2(/* Real    */ ae_matrix* aaa,
+     ae_int_t offs,
+     ae_int_t n,
+     ae_bool isupper,
+     /* Real    */ ae_vector* tmp,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    double ajj;
+    double v;
+    double r;
+    ae_bool result;
+
+
+    result = ae_true;
+    if( n<0 )
+    {
+        result = ae_false;
+        return result;
+    }
+    
+    /*
+     * Quick return if possible
+     */
+    if( n==0 )
+    {
+        return result;
+    }
+    if( isupper )
+    {
+        
+        /*
+         * Compute the Cholesky factorization A = U'*U.
+         */
+        for(j=0; j<=n-1; j++)
+        {
+            
+            /*
+             * Compute U(J,J) and test for non-positive-definiteness.
+             */
+            v = ae_v_dotproduct(&aaa->ptr.pp_double[offs][offs+j], aaa->stride, &aaa->ptr.pp_double[offs][offs+j], aaa->stride, ae_v_len(offs,offs+j-1));
+            ajj = aaa->ptr.pp_double[offs+j][offs+j]-v;
+            if( ae_fp_less_eq(ajj,0) )
+            {
+                aaa->ptr.pp_double[offs+j][offs+j] = ajj;
+                result = ae_false;
+                return result;
+            }
+            ajj = ae_sqrt(ajj, _state);
+            aaa->ptr.pp_double[offs+j][offs+j] = ajj;
+            
+            /*
+             * Compute elements J+1:N-1 of row J.
+             */
+            if( j<n-1 )
+            {
+                if( j>0 )
+                {
+                    ae_v_moveneg(&tmp->ptr.p_double[0], 1, &aaa->ptr.pp_double[offs][offs+j], aaa->stride, ae_v_len(0,j-1));
+                    rmatrixmv(n-j-1, j, aaa, offs, offs+j+1, 1, tmp, 0, tmp, n, _state);
+                    ae_v_add(&aaa->ptr.pp_double[offs+j][offs+j+1], 1, &tmp->ptr.p_double[n], 1, ae_v_len(offs+j+1,offs+n-1));
+                }
+                r = 1/ajj;
+                ae_v_muld(&aaa->ptr.pp_double[offs+j][offs+j+1], 1, ae_v_len(offs+j+1,offs+n-1), r);
+            }
+        }
+    }
+    else
+    {
+        
+        /*
+         * Compute the Cholesky factorization A = L*L'.
+         */
+        for(j=0; j<=n-1; j++)
+        {
+            
+            /*
+             * Compute L(J+1,J+1) and test for non-positive-definiteness.
+             */
+            v = ae_v_dotproduct(&aaa->ptr.pp_double[offs+j][offs], 1, &aaa->ptr.pp_double[offs+j][offs], 1, ae_v_len(offs,offs+j-1));
+            ajj = aaa->ptr.pp_double[offs+j][offs+j]-v;
+            if( ae_fp_less_eq(ajj,0) )
+            {
+                aaa->ptr.pp_double[offs+j][offs+j] = ajj;
+                result = ae_false;
+                return result;
+            }
+            ajj = ae_sqrt(ajj, _state);
+            aaa->ptr.pp_double[offs+j][offs+j] = ajj;
+            
+            /*
+             * Compute elements J+1:N of column J.
+             */
+            if( j<n-1 )
+            {
+                if( j>0 )
+                {
+                    ae_v_move(&tmp->ptr.p_double[0], 1, &aaa->ptr.pp_double[offs+j][offs], 1, ae_v_len(0,j-1));
+                    rmatrixmv(n-j-1, j, aaa, offs+j+1, offs, 0, tmp, 0, tmp, n, _state);
+                    for(i=0; i<=n-j-2; i++)
+                    {
+                        aaa->ptr.pp_double[offs+j+1+i][offs+j] = (aaa->ptr.pp_double[offs+j+1+i][offs+j]-tmp->ptr.p_double[n+i])/ajj;
+                    }
+                }
+                else
+                {
+                    for(i=0; i<=n-j-2; i++)
+                    {
+                        aaa->ptr.pp_double[offs+j+1+i][offs+j] = aaa->ptr.pp_double[offs+j+1+i][offs+j]/ajj;
+                    }
+                }
+            }
+        }
+    }
+    return result;
+}
+
+
+
+
+/*************************************************************************
+Estimate of a matrix condition number (1-norm)
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
+    N   -   size of matrix A.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double rmatrixrcond1(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_matrix _a;
+    ae_int_t i;
+    ae_int_t j;
+    double v;
+    double nrm;
+    ae_vector pivots;
+    ae_vector t;
+    double result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_matrix_init_copy(&_a, a, _state, ae_true);
+    a = &_a;
+    ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);
+    ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
+
+    ae_assert(n>=1, "RMatrixRCond1: N<1!", _state);
+    ae_vector_set_length(&t, n, _state);
+    for(i=0; i<=n-1; i++)
+    {
+        t.ptr.p_double[i] = 0;
+    }
+    for(i=0; i<=n-1; i++)
+    {
+        for(j=0; j<=n-1; j++)
+        {
+            t.ptr.p_double[j] = t.ptr.p_double[j]+ae_fabs(a->ptr.pp_double[i][j], _state);
+        }
+    }
+    nrm = 0;
+    for(i=0; i<=n-1; i++)
+    {
+        nrm = ae_maxreal(nrm, t.ptr.p_double[i], _state);
+    }
+    rmatrixlu(a, n, n, &pivots, _state);
+    rcond_rmatrixrcondluinternal(a, n, ae_true, ae_true, nrm, &v, _state);
+    result = v;
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+/*************************************************************************
+Estimate of a matrix condition number (infinity-norm).
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
+    N   -   size of matrix A.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double rmatrixrcondinf(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_matrix _a;
+    ae_int_t i;
+    ae_int_t j;
+    double v;
+    double nrm;
+    ae_vector pivots;
+    double result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_matrix_init_copy(&_a, a, _state, ae_true);
+    a = &_a;
+    ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);
+
+    ae_assert(n>=1, "RMatrixRCondInf: N<1!", _state);
+    nrm = 0;
+    for(i=0; i<=n-1; i++)
+    {
+        v = 0;
+        for(j=0; j<=n-1; j++)
+        {
+            v = v+ae_fabs(a->ptr.pp_double[i][j], _state);
+        }
+        nrm = ae_maxreal(nrm, v, _state);
+    }
+    rmatrixlu(a, n, n, &pivots, _state);
+    rcond_rmatrixrcondluinternal(a, n, ae_false, ae_true, nrm, &v, _state);
+    result = v;
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+/*************************************************************************
+Condition number estimate of a symmetric positive definite matrix.
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+It should be noted that 1-norm and inf-norm of condition numbers of symmetric
+matrices are equal, so the algorithm doesn't take into account the
+differences between these types of norms.
+
+Input parameters:
+    A       -   symmetric positive definite matrix which is given by its
+                upper or lower triangle depending on the value of
+                IsUpper. Array with elements [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    IsUpper -   storage format.
+
+Result:
+    1/LowerBound(cond(A)), if matrix A is positive definite,
+   -1, if matrix A is not positive definite, and its condition number
+    could not be found by this algorithm.
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double spdmatrixrcond(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_matrix _a;
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t j1;
+    ae_int_t j2;
+    double v;
+    double nrm;
+    ae_vector t;
+    double result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_matrix_init_copy(&_a, a, _state, ae_true);
+    a = &_a;
+    ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
+
+    ae_vector_set_length(&t, n, _state);
+    for(i=0; i<=n-1; i++)
+    {
+        t.ptr.p_double[i] = 0;
+    }
+    for(i=0; i<=n-1; i++)
+    {
+        if( isupper )
+        {
+            j1 = i;
+            j2 = n-1;
+        }
+        else
+        {
+            j1 = 0;
+            j2 = i;
+        }
+        for(j=j1; j<=j2; j++)
+        {
+            if( i==j )
+            {
+                t.ptr.p_double[i] = t.ptr.p_double[i]+ae_fabs(a->ptr.pp_double[i][i], _state);
+            }
+            else
+            {
+                t.ptr.p_double[i] = t.ptr.p_double[i]+ae_fabs(a->ptr.pp_double[i][j], _state);
+                t.ptr.p_double[j] = t.ptr.p_double[j]+ae_fabs(a->ptr.pp_double[i][j], _state);
+            }
+        }
+    }
+    nrm = 0;
+    for(i=0; i<=n-1; i++)
+    {
+        nrm = ae_maxreal(nrm, t.ptr.p_double[i], _state);
+    }
+    if( spdmatrixcholesky(a, n, isupper, _state) )
+    {
+        rcond_spdmatrixrcondcholeskyinternal(a, n, isupper, ae_true, nrm, &v, _state);
+        result = v;
+    }
+    else
+    {
+        result = -1;
+    }
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+/*************************************************************************
+Triangular matrix: estimate of a condition number (1-norm)
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    A       -   matrix. Array[0..N-1, 0..N-1].
+    N       -   size of A.
+    IsUpper -   True, if the matrix is upper triangular.
+    IsUnit  -   True, if the matrix has a unit diagonal.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double rmatrixtrrcond1(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_int_t i;
+    ae_int_t j;
+    double v;
+    double nrm;
+    ae_vector pivots;
+    ae_vector t;
+    ae_int_t j1;
+    ae_int_t j2;
+    double result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);
+    ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
+
+    ae_assert(n>=1, "RMatrixTRRCond1: N<1!", _state);
+    ae_vector_set_length(&t, n, _state);
+    for(i=0; i<=n-1; i++)
+    {
+        t.ptr.p_double[i] = 0;
+    }
+    for(i=0; i<=n-1; i++)
+    {
+        if( isupper )
+        {
+            j1 = i+1;
+            j2 = n-1;
+        }
+        else
+        {
+            j1 = 0;
+            j2 = i-1;
+        }
+        for(j=j1; j<=j2; j++)
+        {
+            t.ptr.p_double[j] = t.ptr.p_double[j]+ae_fabs(a->ptr.pp_double[i][j], _state);
+        }
+        if( isunit )
+        {
+            t.ptr.p_double[i] = t.ptr.p_double[i]+1;
+        }
+        else
+        {
+            t.ptr.p_double[i] = t.ptr.p_double[i]+ae_fabs(a->ptr.pp_double[i][i], _state);
+        }
+    }
+    nrm = 0;
+    for(i=0; i<=n-1; i++)
+    {
+        nrm = ae_maxreal(nrm, t.ptr.p_double[i], _state);
+    }
+    rcond_rmatrixrcondtrinternal(a, n, isupper, isunit, ae_true, nrm, &v, _state);
+    result = v;
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+/*************************************************************************
+Triangular matrix: estimate of a matrix condition number (infinity-norm).
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
+    N   -   size of matrix A.
+    IsUpper -   True, if the matrix is upper triangular.
+    IsUnit  -   True, if the matrix has a unit diagonal.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double rmatrixtrrcondinf(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_int_t i;
+    ae_int_t j;
+    double v;
+    double nrm;
+    ae_vector pivots;
+    ae_int_t j1;
+    ae_int_t j2;
+    double result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);
+
+    ae_assert(n>=1, "RMatrixTRRCondInf: N<1!", _state);
+    nrm = 0;
+    for(i=0; i<=n-1; i++)
+    {
+        if( isupper )
+        {
+            j1 = i+1;
+            j2 = n-1;
+        }
+        else
+        {
+            j1 = 0;
+            j2 = i-1;
+        }
+        v = 0;
+        for(j=j1; j<=j2; j++)
+        {
+            v = v+ae_fabs(a->ptr.pp_double[i][j], _state);
+        }
+        if( isunit )
+        {
+            v = v+1;
+        }
+        else
+        {
+            v = v+ae_fabs(a->ptr.pp_double[i][i], _state);
+        }
+        nrm = ae_maxreal(nrm, v, _state);
+    }
+    rcond_rmatrixrcondtrinternal(a, n, isupper, isunit, ae_false, nrm, &v, _state);
+    result = v;
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+/*************************************************************************
+Condition number estimate of a Hermitian positive definite matrix.
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+It should be noted that 1-norm and inf-norm of condition numbers of symmetric
+matrices are equal, so the algorithm doesn't take into account the
+differences between these types of norms.
+
+Input parameters:
+    A       -   Hermitian positive definite matrix which is given by its
+                upper or lower triangle depending on the value of
+                IsUpper. Array with elements [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    IsUpper -   storage format.
+
+Result:
+    1/LowerBound(cond(A)), if matrix A is positive definite,
+   -1, if matrix A is not positive definite, and its condition number
+    could not be found by this algorithm.
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double hpdmatrixrcond(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_matrix _a;
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t j1;
+    ae_int_t j2;
+    double v;
+    double nrm;
+    ae_vector t;
+    double result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_matrix_init_copy(&_a, a, _state, ae_true);
+    a = &_a;
+    ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
+
+    ae_vector_set_length(&t, n, _state);
+    for(i=0; i<=n-1; i++)
+    {
+        t.ptr.p_double[i] = 0;
+    }
+    for(i=0; i<=n-1; i++)
+    {
+        if( isupper )
+        {
+            j1 = i;
+            j2 = n-1;
+        }
+        else
+        {
+            j1 = 0;
+            j2 = i;
+        }
+        for(j=j1; j<=j2; j++)
+        {
+            if( i==j )
+            {
+                t.ptr.p_double[i] = t.ptr.p_double[i]+ae_c_abs(a->ptr.pp_complex[i][i], _state);
+            }
+            else
+            {
+                t.ptr.p_double[i] = t.ptr.p_double[i]+ae_c_abs(a->ptr.pp_complex[i][j], _state);
+                t.ptr.p_double[j] = t.ptr.p_double[j]+ae_c_abs(a->ptr.pp_complex[i][j], _state);
+            }
+        }
+    }
+    nrm = 0;
+    for(i=0; i<=n-1; i++)
+    {
+        nrm = ae_maxreal(nrm, t.ptr.p_double[i], _state);
+    }
+    if( hpdmatrixcholesky(a, n, isupper, _state) )
+    {
+        rcond_hpdmatrixrcondcholeskyinternal(a, n, isupper, ae_true, nrm, &v, _state);
+        result = v;
+    }
+    else
+    {
+        result = -1;
+    }
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+/*************************************************************************
+Estimate of a matrix condition number (1-norm)
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
+    N   -   size of matrix A.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double cmatrixrcond1(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_matrix _a;
+    ae_int_t i;
+    ae_int_t j;
+    double v;
+    double nrm;
+    ae_vector pivots;
+    ae_vector t;
+    double result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_matrix_init_copy(&_a, a, _state, ae_true);
+    a = &_a;
+    ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);
+    ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
+
+    ae_assert(n>=1, "CMatrixRCond1: N<1!", _state);
+    ae_vector_set_length(&t, n, _state);
+    for(i=0; i<=n-1; i++)
+    {
+        t.ptr.p_double[i] = 0;
+    }
+    for(i=0; i<=n-1; i++)
+    {
+        for(j=0; j<=n-1; j++)
+        {
+            t.ptr.p_double[j] = t.ptr.p_double[j]+ae_c_abs(a->ptr.pp_complex[i][j], _state);
+        }
+    }
+    nrm = 0;
+    for(i=0; i<=n-1; i++)
+    {
+        nrm = ae_maxreal(nrm, t.ptr.p_double[i], _state);
+    }
+    cmatrixlu(a, n, n, &pivots, _state);
+    rcond_cmatrixrcondluinternal(a, n, ae_true, ae_true, nrm, &v, _state);
+    result = v;
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+/*************************************************************************
+Estimate of a matrix condition number (infinity-norm).
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
+    N   -   size of matrix A.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double cmatrixrcondinf(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_matrix _a;
+    ae_int_t i;
+    ae_int_t j;
+    double v;
+    double nrm;
+    ae_vector pivots;
+    double result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_matrix_init_copy(&_a, a, _state, ae_true);
+    a = &_a;
+    ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);
+
+    ae_assert(n>=1, "CMatrixRCondInf: N<1!", _state);
+    nrm = 0;
+    for(i=0; i<=n-1; i++)
+    {
+        v = 0;
+        for(j=0; j<=n-1; j++)
+        {
+            v = v+ae_c_abs(a->ptr.pp_complex[i][j], _state);
+        }
+        nrm = ae_maxreal(nrm, v, _state);
+    }
+    cmatrixlu(a, n, n, &pivots, _state);
+    rcond_cmatrixrcondluinternal(a, n, ae_false, ae_true, nrm, &v, _state);
+    result = v;
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+/*************************************************************************
+Estimate of the condition number of a matrix given by its LU decomposition (1-norm)
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    LUA         -   LU decomposition of a matrix in compact form. Output of
+                    the RMatrixLU subroutine.
+    N           -   size of matrix A.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double rmatrixlurcond1(/* Real    */ ae_matrix* lua,
+     ae_int_t n,
+     ae_state *_state)
+{
+    double v;
+    double result;
+
+
+    rcond_rmatrixrcondluinternal(lua, n, ae_true, ae_false, 0, &v, _state);
+    result = v;
+    return result;
+}
+
+
+/*************************************************************************
+Estimate of the condition number of a matrix given by its LU decomposition
+(infinity norm).
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    LUA     -   LU decomposition of a matrix in compact form. Output of
+                the RMatrixLU subroutine.
+    N       -   size of matrix A.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double rmatrixlurcondinf(/* Real    */ ae_matrix* lua,
+     ae_int_t n,
+     ae_state *_state)
+{
+    double v;
+    double result;
+
+
+    rcond_rmatrixrcondluinternal(lua, n, ae_false, ae_false, 0, &v, _state);
+    result = v;
+    return result;
+}
+
+
+/*************************************************************************
+Condition number estimate of a symmetric positive definite matrix given by
+Cholesky decomposition.
+
+The algorithm calculates a lower bound of the condition number. In this
+case, the algorithm does not return a lower bound of the condition number,
+but an inverse number (to avoid an overflow in case of a singular matrix).
+
+It should be noted that 1-norm and inf-norm condition numbers of symmetric
+matrices are equal, so the algorithm doesn't take into account the
+differences between these types of norms.
+
+Input parameters:
+    CD  - Cholesky decomposition of matrix A,
+          output of SMatrixCholesky subroutine.
+    N   - size of matrix A.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double spdmatrixcholeskyrcond(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_state *_state)
+{
+    double v;
+    double result;
+
+
+    rcond_spdmatrixrcondcholeskyinternal(a, n, isupper, ae_false, 0, &v, _state);
+    result = v;
+    return result;
+}
+
+
+/*************************************************************************
+Condition number estimate of a Hermitian positive definite matrix given by
+Cholesky decomposition.
+
+The algorithm calculates a lower bound of the condition number. In this
+case, the algorithm does not return a lower bound of the condition number,
+but an inverse number (to avoid an overflow in case of a singular matrix).
+
+It should be noted that 1-norm and inf-norm condition numbers of symmetric
+matrices are equal, so the algorithm doesn't take into account the
+differences between these types of norms.
+
+Input parameters:
+    CD  - Cholesky decomposition of matrix A,
+          output of SMatrixCholesky subroutine.
+    N   - size of matrix A.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double hpdmatrixcholeskyrcond(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_state *_state)
+{
+    double v;
+    double result;
+
+
+    rcond_hpdmatrixrcondcholeskyinternal(a, n, isupper, ae_false, 0, &v, _state);
+    result = v;
+    return result;
+}
+
+
+/*************************************************************************
+Estimate of the condition number of a matrix given by its LU decomposition (1-norm)
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    LUA         -   LU decomposition of a matrix in compact form. Output of
+                    the CMatrixLU subroutine.
+    N           -   size of matrix A.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double cmatrixlurcond1(/* Complex */ ae_matrix* lua,
+     ae_int_t n,
+     ae_state *_state)
+{
+    double v;
+    double result;
+
+
+    ae_assert(n>=1, "CMatrixLURCond1: N<1!", _state);
+    rcond_cmatrixrcondluinternal(lua, n, ae_true, ae_false, 0.0, &v, _state);
+    result = v;
+    return result;
+}
+
+
+/*************************************************************************
+Estimate of the condition number of a matrix given by its LU decomposition
+(infinity norm).
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    LUA     -   LU decomposition of a matrix in compact form. Output of
+                the CMatrixLU subroutine.
+    N       -   size of matrix A.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double cmatrixlurcondinf(/* Complex */ ae_matrix* lua,
+     ae_int_t n,
+     ae_state *_state)
+{
+    double v;
+    double result;
+
+
+    ae_assert(n>=1, "CMatrixLURCondInf: N<1!", _state);
+    rcond_cmatrixrcondluinternal(lua, n, ae_false, ae_false, 0.0, &v, _state);
+    result = v;
+    return result;
+}
+
+
+/*************************************************************************
+Triangular matrix: estimate of a condition number (1-norm)
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    A       -   matrix. Array[0..N-1, 0..N-1].
+    N       -   size of A.
+    IsUpper -   True, if the matrix is upper triangular.
+    IsUnit  -   True, if the matrix has a unit diagonal.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double cmatrixtrrcond1(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_int_t i;
+    ae_int_t j;
+    double v;
+    double nrm;
+    ae_vector pivots;
+    ae_vector t;
+    ae_int_t j1;
+    ae_int_t j2;
+    double result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);
+    ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
+
+    ae_assert(n>=1, "RMatrixTRRCond1: N<1!", _state);
+    ae_vector_set_length(&t, n, _state);
+    for(i=0; i<=n-1; i++)
+    {
+        t.ptr.p_double[i] = 0;
+    }
+    for(i=0; i<=n-1; i++)
+    {
+        if( isupper )
+        {
+            j1 = i+1;
+            j2 = n-1;
+        }
+        else
+        {
+            j1 = 0;
+            j2 = i-1;
+        }
+        for(j=j1; j<=j2; j++)
+        {
+            t.ptr.p_double[j] = t.ptr.p_double[j]+ae_c_abs(a->ptr.pp_complex[i][j], _state);
+        }
+        if( isunit )
+        {
+            t.ptr.p_double[i] = t.ptr.p_double[i]+1;
+        }
+        else
+        {
+            t.ptr.p_double[i] = t.ptr.p_double[i]+ae_c_abs(a->ptr.pp_complex[i][i], _state);
+        }
+    }
+    nrm = 0;
+    for(i=0; i<=n-1; i++)
+    {
+        nrm = ae_maxreal(nrm, t.ptr.p_double[i], _state);
+    }
+    rcond_cmatrixrcondtrinternal(a, n, isupper, isunit, ae_true, nrm, &v, _state);
+    result = v;
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+/*************************************************************************
+Triangular matrix: estimate of a matrix condition number (infinity-norm).
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
+    N   -   size of matrix A.
+    IsUpper -   True, if the matrix is upper triangular.
+    IsUnit  -   True, if the matrix has a unit diagonal.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double cmatrixtrrcondinf(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_int_t i;
+    ae_int_t j;
+    double v;
+    double nrm;
+    ae_vector pivots;
+    ae_int_t j1;
+    ae_int_t j2;
+    double result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);
+
+    ae_assert(n>=1, "RMatrixTRRCondInf: N<1!", _state);
+    nrm = 0;
+    for(i=0; i<=n-1; i++)
+    {
+        if( isupper )
+        {
+            j1 = i+1;
+            j2 = n-1;
+        }
+        else
+        {
+            j1 = 0;
+            j2 = i-1;
+        }
+        v = 0;
+        for(j=j1; j<=j2; j++)
+        {
+            v = v+ae_c_abs(a->ptr.pp_complex[i][j], _state);
+        }
+        if( isunit )
+        {
+            v = v+1;
+        }
+        else
+        {
+            v = v+ae_c_abs(a->ptr.pp_complex[i][i], _state);
+        }
+        nrm = ae_maxreal(nrm, v, _state);
+    }
+    rcond_cmatrixrcondtrinternal(a, n, isupper, isunit, ae_false, nrm, &v, _state);
+    result = v;
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+/*************************************************************************
+Threshold for rcond: matrices with condition number beyond this  threshold
+are considered singular.
+
+Threshold must be far enough from underflow, at least Sqr(Threshold)  must
+be greater than underflow.
+*************************************************************************/
+double rcondthreshold(ae_state *_state)
+{
+    double result;
+
+
+    result = ae_sqrt(ae_sqrt(ae_minrealnumber, _state), _state);
+    return result;
+}
+
+
+/*************************************************************************
+Internal subroutine for condition number estimation
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     February 29, 1992
+*************************************************************************/
+static void rcond_rmatrixrcondtrinternal(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_bool onenorm,
+     double anorm,
+     double* rc,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector ex;
+    ae_vector ev;
+    ae_vector iwork;
+    ae_vector tmp;
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t kase;
+    ae_int_t kase1;
+    ae_int_t j1;
+    ae_int_t j2;
+    double ainvnm;
+    double maxgrowth;
+    double s;
+    ae_bool mupper;
+    ae_bool mtrans;
+    ae_bool munit;
+
+    ae_frame_make(_state, &_frame_block);
+    *rc = 0;
+    ae_vector_init(&ex, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&ev, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&iwork, 0, DT_INT, _state, ae_true);
+    ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);
+
+    
+    /*
+     * RC=0 if something happens
+     */
+    *rc = 0;
+    
+    /*
+     * init
+     */
+    if( onenorm )
+    {
+        kase1 = 1;
+    }
+    else
+    {
+        kase1 = 2;
+    }
+    mupper = ae_true;
+    mtrans = ae_true;
+    munit = ae_true;
+    ae_vector_set_length(&iwork, n+1, _state);
+    ae_vector_set_length(&tmp, n, _state);
+    
+    /*
+     * prepare parameters for triangular solver
+     */
+    maxgrowth = 1/rcondthreshold(_state);
+    s = 0;
+    for(i=0; i<=n-1; i++)
+    {
+        if( isupper )
+        {
+            j1 = i+1;
+            j2 = n-1;
+        }
+        else
+        {
+            j1 = 0;
+            j2 = i-1;
+        }
+        for(j=j1; j<=j2; j++)
+        {
+            s = ae_maxreal(s, ae_fabs(a->ptr.pp_double[i][j], _state), _state);
+        }
+        if( isunit )
+        {
+            s = ae_maxreal(s, 1, _state);
+        }
+        else
+        {
+            s = ae_maxreal(s, ae_fabs(a->ptr.pp_double[i][i], _state), _state);
+        }
+    }
+    if( ae_fp_eq(s,0) )
+    {
+        s = 1;
+    }
+    s = 1/s;
+    
+    /*
+     * Scale according to S
+     */
+    anorm = anorm*s;
+    
+    /*
+     * Quick return if possible
+     * We assume that ANORM<>0 after this block
+     */
+    if( ae_fp_eq(anorm,0) )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    if( n==1 )
+    {
+        *rc = 1;
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * Estimate the norm of inv(A).
+     */
+    ainvnm = 0;
+    kase = 0;
+    for(;;)
+    {
+        rcond_rmatrixestimatenorm(n, &ev, &ex, &iwork, &ainvnm, &kase, _state);
+        if( kase==0 )
+        {
+            break;
+        }
+        
+        /*
+         * from 1-based array to 0-based
+         */
+        for(i=0; i<=n-1; i++)
+        {
+            ex.ptr.p_double[i] = ex.ptr.p_double[i+1];
+        }
+        
+        /*
+         * multiply by inv(A) or inv(A')
+         */
+        if( kase==kase1 )
+        {
+            
+            /*
+             * multiply by inv(A)
+             */
+            if( !rmatrixscaledtrsafesolve(a, s, n, &ex, isupper, 0, isunit, maxgrowth, _state) )
+            {
+                ae_frame_leave(_state);
+                return;
+            }
+        }
+        else
+        {
+            
+            /*
+             * multiply by inv(A')
+             */
+            if( !rmatrixscaledtrsafesolve(a, s, n, &ex, isupper, 1, isunit, maxgrowth, _state) )
+            {
+                ae_frame_leave(_state);
+                return;
+            }
+        }
+        
+        /*
+         * from 0-based array to 1-based
+         */
+        for(i=n-1; i>=0; i--)
+        {
+            ex.ptr.p_double[i+1] = ex.ptr.p_double[i];
+        }
+    }
+    
+    /*
+     * Compute the estimate of the reciprocal condition number.
+     */
+    if( ae_fp_neq(ainvnm,0) )
+    {
+        *rc = 1/ainvnm;
+        *rc = *rc/anorm;
+        if( ae_fp_less(*rc,rcondthreshold(_state)) )
+        {
+            *rc = 0;
+        }
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Condition number estimation
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     March 31, 1993
+*************************************************************************/
+static void rcond_cmatrixrcondtrinternal(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_bool onenorm,
+     double anorm,
+     double* rc,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector ex;
+    ae_vector cwork2;
+    ae_vector cwork3;
+    ae_vector cwork4;
+    ae_vector isave;
+    ae_vector rsave;
+    ae_int_t kase;
+    ae_int_t kase1;
+    double ainvnm;
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t j1;
+    ae_int_t j2;
+    double s;
+    double maxgrowth;
+
+    ae_frame_make(_state, &_frame_block);
+    *rc = 0;
+    ae_vector_init(&ex, 0, DT_COMPLEX, _state, ae_true);
+    ae_vector_init(&cwork2, 0, DT_COMPLEX, _state, ae_true);
+    ae_vector_init(&cwork3, 0, DT_COMPLEX, _state, ae_true);
+    ae_vector_init(&cwork4, 0, DT_COMPLEX, _state, ae_true);
+    ae_vector_init(&isave, 0, DT_INT, _state, ae_true);
+    ae_vector_init(&rsave, 0, DT_REAL, _state, ae_true);
+
+    
+    /*
+     * RC=0 if something happens
+     */
+    *rc = 0;
+    
+    /*
+     * init
+     */
+    if( n<=0 )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    if( n==0 )
+    {
+        *rc = 1;
+        ae_frame_leave(_state);
+        return;
+    }
+    ae_vector_set_length(&cwork2, n+1, _state);
+    
+    /*
+     * prepare parameters for triangular solver
+     */
+    maxgrowth = 1/rcondthreshold(_state);
+    s = 0;
+    for(i=0; i<=n-1; i++)
+    {
+        if( isupper )
+        {
+            j1 = i+1;
+            j2 = n-1;
+        }
+        else
+        {
+            j1 = 0;
+            j2 = i-1;
+        }
+        for(j=j1; j<=j2; j++)
+        {
+            s = ae_maxreal(s, ae_c_abs(a->ptr.pp_complex[i][j], _state), _state);
+        }
+        if( isunit )
+        {
+            s = ae_maxreal(s, 1, _state);
+        }
+        else
+        {
+            s = ae_maxreal(s, ae_c_abs(a->ptr.pp_complex[i][i], _state), _state);
+        }
+    }
+    if( ae_fp_eq(s,0) )
+    {
+        s = 1;
+    }
+    s = 1/s;
+    
+    /*
+     * Scale according to S
+     */
+    anorm = anorm*s;
+    
+    /*
+     * Quick return if possible
+     */
+    if( ae_fp_eq(anorm,0) )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * Estimate the norm of inv(A).
+     */
+    ainvnm = 0;
+    if( onenorm )
+    {
+        kase1 = 1;
+    }
+    else
+    {
+        kase1 = 2;
+    }
+    kase = 0;
+    for(;;)
+    {
+        rcond_cmatrixestimatenorm(n, &cwork4, &ex, &ainvnm, &kase, &isave, &rsave, _state);
+        if( kase==0 )
+        {
+            break;
+        }
+        
+        /*
+         * From 1-based to 0-based
+         */
+        for(i=0; i<=n-1; i++)
+        {
+            ex.ptr.p_complex[i] = ex.ptr.p_complex[i+1];
+        }
+        
+        /*
+         * multiply by inv(A) or inv(A')
+         */
+        if( kase==kase1 )
+        {
+            
+            /*
+             * multiply by inv(A)
+             */
+            if( !cmatrixscaledtrsafesolve(a, s, n, &ex, isupper, 0, isunit, maxgrowth, _state) )
+            {
+                ae_frame_leave(_state);
+                return;
+            }
+        }
+        else
+        {
+            
+            /*
+             * multiply by inv(A')
+             */
+            if( !cmatrixscaledtrsafesolve(a, s, n, &ex, isupper, 2, isunit, maxgrowth, _state) )
+            {
+                ae_frame_leave(_state);
+                return;
+            }
+        }
+        
+        /*
+         * from 0-based to 1-based
+         */
+        for(i=n-1; i>=0; i--)
+        {
+            ex.ptr.p_complex[i+1] = ex.ptr.p_complex[i];
+        }
+    }
+    
+    /*
+     * Compute the estimate of the reciprocal condition number.
+     */
+    if( ae_fp_neq(ainvnm,0) )
+    {
+        *rc = 1/ainvnm;
+        *rc = *rc/anorm;
+        if( ae_fp_less(*rc,rcondthreshold(_state)) )
+        {
+            *rc = 0;
+        }
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Internal subroutine for condition number estimation
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     February 29, 1992
+*************************************************************************/
+static void rcond_spdmatrixrcondcholeskyinternal(/* Real    */ ae_matrix* cha,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_bool isnormprovided,
+     double anorm,
+     double* rc,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t kase;
+    double ainvnm;
+    ae_vector ex;
+    ae_vector ev;
+    ae_vector tmp;
+    ae_vector iwork;
+    double sa;
+    double v;
+    double maxgrowth;
+
+    ae_frame_make(_state, &_frame_block);
+    *rc = 0;
+    ae_vector_init(&ex, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&ev, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&iwork, 0, DT_INT, _state, ae_true);
+
+    ae_assert(n>=1, "Assertion failed", _state);
+    ae_vector_set_length(&tmp, n, _state);
+    
+    /*
+     * RC=0 if something happens
+     */
+    *rc = 0;
+    
+    /*
+     * prepare parameters for triangular solver
+     */
+    maxgrowth = 1/rcondthreshold(_state);
+    sa = 0;
+    if( isupper )
+    {
+        for(i=0; i<=n-1; i++)
+        {
+            for(j=i; j<=n-1; j++)
+            {
+                sa = ae_maxreal(sa, ae_c_abs(ae_complex_from_d(cha->ptr.pp_double[i][j]), _state), _state);
+            }
+        }
+    }
+    else
+    {
+        for(i=0; i<=n-1; i++)
+        {
+            for(j=0; j<=i; j++)
+            {
+                sa = ae_maxreal(sa, ae_c_abs(ae_complex_from_d(cha->ptr.pp_double[i][j]), _state), _state);
+            }
+        }
+    }
+    if( ae_fp_eq(sa,0) )
+    {
+        sa = 1;
+    }
+    sa = 1/sa;
+    
+    /*
+     * Estimate the norm of A.
+     */
+    if( !isnormprovided )
+    {
+        kase = 0;
+        anorm = 0;
+        for(;;)
+        {
+            rcond_rmatrixestimatenorm(n, &ev, &ex, &iwork, &anorm, &kase, _state);
+            if( kase==0 )
+            {
+                break;
+            }
+            if( isupper )
+            {
+                
+                /*
+                 * Multiply by U
+                 */
+                for(i=1; i<=n; i++)
+                {
+                    v = ae_v_dotproduct(&cha->ptr.pp_double[i-1][i-1], 1, &ex.ptr.p_double[i], 1, ae_v_len(i-1,n-1));
+                    ex.ptr.p_double[i] = v;
+                }
+                ae_v_muld(&ex.ptr.p_double[1], 1, ae_v_len(1,n), sa);
+                
+                /*
+                 * Multiply by U'
+                 */
+                for(i=0; i<=n-1; i++)
+                {
+                    tmp.ptr.p_double[i] = 0;
+                }
+                for(i=0; i<=n-1; i++)
+                {
+                    v = ex.ptr.p_double[i+1];
+                    ae_v_addd(&tmp.ptr.p_double[i], 1, &cha->ptr.pp_double[i][i], 1, ae_v_len(i,n-1), v);
+                }
+                ae_v_move(&ex.ptr.p_double[1], 1, &tmp.ptr.p_double[0], 1, ae_v_len(1,n));
+                ae_v_muld(&ex.ptr.p_double[1], 1, ae_v_len(1,n), sa);
+            }
+            else
+            {
+                
+                /*
+                 * Multiply by L'
+                 */
+                for(i=0; i<=n-1; i++)
+                {
+                    tmp.ptr.p_double[i] = 0;
+                }
+                for(i=0; i<=n-1; i++)
+                {
+                    v = ex.ptr.p_double[i+1];
+                    ae_v_addd(&tmp.ptr.p_double[0], 1, &cha->ptr.pp_double[i][0], 1, ae_v_len(0,i), v);
+                }
+                ae_v_move(&ex.ptr.p_double[1], 1, &tmp.ptr.p_double[0], 1, ae_v_len(1,n));
+                ae_v_muld(&ex.ptr.p_double[1], 1, ae_v_len(1,n), sa);
+                
+                /*
+                 * Multiply by L
+                 */
+                for(i=n; i>=1; i--)
+                {
+                    v = ae_v_dotproduct(&cha->ptr.pp_double[i-1][0], 1, &ex.ptr.p_double[1], 1, ae_v_len(0,i-1));
+                    ex.ptr.p_double[i] = v;
+                }
+                ae_v_muld(&ex.ptr.p_double[1], 1, ae_v_len(1,n), sa);
+            }
+        }
+    }
+    
+    /*
+     * Quick return if possible
+     */
+    if( ae_fp_eq(anorm,0) )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    if( n==1 )
+    {
+        *rc = 1;
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * Estimate the 1-norm of inv(A).
+     */
+    kase = 0;
+    for(;;)
+    {
+        rcond_rmatrixestimatenorm(n, &ev, &ex, &iwork, &ainvnm, &kase, _state);
+        if( kase==0 )
+        {
+            break;
+        }
+        for(i=0; i<=n-1; i++)
+        {
+            ex.ptr.p_double[i] = ex.ptr.p_double[i+1];
+        }
+        if( isupper )
+        {
+            
+            /*
+             * Multiply by inv(U').
+             */
+            if( !rmatrixscaledtrsafesolve(cha, sa, n, &ex, isupper, 1, ae_false, maxgrowth, _state) )
+            {
+                ae_frame_leave(_state);
+                return;
+            }
+            
+            /*
+             * Multiply by inv(U).
+             */
+            if( !rmatrixscaledtrsafesolve(cha, sa, n, &ex, isupper, 0, ae_false, maxgrowth, _state) )
+            {
+                ae_frame_leave(_state);
+                return;
+            }
+        }
+        else
+        {
+            
+            /*
+             * Multiply by inv(L).
+             */
+            if( !rmatrixscaledtrsafesolve(cha, sa, n, &ex, isupper, 0, ae_false, maxgrowth, _state) )
+            {
+                ae_frame_leave(_state);
+                return;
+            }
+            
+            /*
+             * Multiply by inv(L').
+             */
+            if( !rmatrixscaledtrsafesolve(cha, sa, n, &ex, isupper, 1, ae_false, maxgrowth, _state) )
+            {
+                ae_frame_leave(_state);
+                return;
+            }
+        }
+        for(i=n-1; i>=0; i--)
+        {
+            ex.ptr.p_double[i+1] = ex.ptr.p_double[i];
+        }
+    }
+    
+    /*
+     * Compute the estimate of the reciprocal condition number.
+     */
+    if( ae_fp_neq(ainvnm,0) )
+    {
+        v = 1/ainvnm;
+        *rc = v/anorm;
+        if( ae_fp_less(*rc,rcondthreshold(_state)) )
+        {
+            *rc = 0;
+        }
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Internal subroutine for condition number estimation
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     February 29, 1992
+*************************************************************************/
+static void rcond_hpdmatrixrcondcholeskyinternal(/* Complex */ ae_matrix* cha,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_bool isnormprovided,
+     double anorm,
+     double* rc,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector isave;
+    ae_vector rsave;
+    ae_vector ex;
+    ae_vector ev;
+    ae_vector tmp;
+    ae_int_t kase;
+    double ainvnm;
+    ae_complex v;
+    ae_int_t i;
+    ae_int_t j;
+    double sa;
+    double maxgrowth;
+
+    ae_frame_make(_state, &_frame_block);
+    *rc = 0;
+    ae_vector_init(&isave, 0, DT_INT, _state, ae_true);
+    ae_vector_init(&rsave, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&ex, 0, DT_COMPLEX, _state, ae_true);
+    ae_vector_init(&ev, 0, DT_COMPLEX, _state, ae_true);
+    ae_vector_init(&tmp, 0, DT_COMPLEX, _state, ae_true);
+
+    ae_assert(n>=1, "Assertion failed", _state);
+    ae_vector_set_length(&tmp, n, _state);
+    
+    /*
+     * RC=0 if something happens
+     */
+    *rc = 0;
+    
+    /*
+     * prepare parameters for triangular solver
+     */
+    maxgrowth = 1/rcondthreshold(_state);
+    sa = 0;
+    if( isupper )
+    {
+        for(i=0; i<=n-1; i++)
+        {
+            for(j=i; j<=n-1; j++)
+            {
+                sa = ae_maxreal(sa, ae_c_abs(cha->ptr.pp_complex[i][j], _state), _state);
+            }
+        }
+    }
+    else
+    {
+        for(i=0; i<=n-1; i++)
+        {
+            for(j=0; j<=i; j++)
+            {
+                sa = ae_maxreal(sa, ae_c_abs(cha->ptr.pp_complex[i][j], _state), _state);
+            }
+        }
+    }
+    if( ae_fp_eq(sa,0) )
+    {
+        sa = 1;
+    }
+    sa = 1/sa;
+    
+    /*
+     * Estimate the norm of A
+     */
+    if( !isnormprovided )
+    {
+        anorm = 0;
+        kase = 0;
+        for(;;)
+        {
+            rcond_cmatrixestimatenorm(n, &ev, &ex, &anorm, &kase, &isave, &rsave, _state);
+            if( kase==0 )
+            {
+                break;
+            }
+            if( isupper )
+            {
+                
+                /*
+                 * Multiply by U
+                 */
+                for(i=1; i<=n; i++)
+                {
+                    v = ae_v_cdotproduct(&cha->ptr.pp_complex[i-1][i-1], 1, "N", &ex.ptr.p_complex[i], 1, "N", ae_v_len(i-1,n-1));
+                    ex.ptr.p_complex[i] = v;
+                }
+                ae_v_cmuld(&ex.ptr.p_complex[1], 1, ae_v_len(1,n), sa);
+                
+                /*
+                 * Multiply by U'
+                 */
+                for(i=0; i<=n-1; i++)
+                {
+                    tmp.ptr.p_complex[i] = ae_complex_from_d(0);
+                }
+                for(i=0; i<=n-1; i++)
+                {
+                    v = ex.ptr.p_complex[i+1];
+                    ae_v_caddc(&tmp.ptr.p_complex[i], 1, &cha->ptr.pp_complex[i][i], 1, "Conj", ae_v_len(i,n-1), v);
+                }
+                ae_v_cmove(&ex.ptr.p_complex[1], 1, &tmp.ptr.p_complex[0], 1, "N", ae_v_len(1,n));
+                ae_v_cmuld(&ex.ptr.p_complex[1], 1, ae_v_len(1,n), sa);
+            }
+            else
+            {
+                
+                /*
+                 * Multiply by L'
+                 */
+                for(i=0; i<=n-1; i++)
+                {
+                    tmp.ptr.p_complex[i] = ae_complex_from_d(0);
+                }
+                for(i=0; i<=n-1; i++)
+                {
+                    v = ex.ptr.p_complex[i+1];
+                    ae_v_caddc(&tmp.ptr.p_complex[0], 1, &cha->ptr.pp_complex[i][0], 1, "Conj", ae_v_len(0,i), v);
+                }
+                ae_v_cmove(&ex.ptr.p_complex[1], 1, &tmp.ptr.p_complex[0], 1, "N", ae_v_len(1,n));
+                ae_v_cmuld(&ex.ptr.p_complex[1], 1, ae_v_len(1,n), sa);
+                
+                /*
+                 * Multiply by L
+                 */
+                for(i=n; i>=1; i--)
+                {
+                    v = ae_v_cdotproduct(&cha->ptr.pp_complex[i-1][0], 1, "N", &ex.ptr.p_complex[1], 1, "N", ae_v_len(0,i-1));
+                    ex.ptr.p_complex[i] = v;
+                }
+                ae_v_cmuld(&ex.ptr.p_complex[1], 1, ae_v_len(1,n), sa);
+            }
+        }
+    }
+    
+    /*
+     * Quick return if possible
+     * After this block we assume that ANORM<>0
+     */
+    if( ae_fp_eq(anorm,0) )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    if( n==1 )
+    {
+        *rc = 1;
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * Estimate the norm of inv(A).
+     */
+    ainvnm = 0;
+    kase = 0;
+    for(;;)
+    {
+        rcond_cmatrixestimatenorm(n, &ev, &ex, &ainvnm, &kase, &isave, &rsave, _state);
+        if( kase==0 )
+        {
+            break;
+        }
+        for(i=0; i<=n-1; i++)
+        {
+            ex.ptr.p_complex[i] = ex.ptr.p_complex[i+1];
+        }
+        if( isupper )
+        {
+            
+            /*
+             * Multiply by inv(U').
+             */
+            if( !cmatrixscaledtrsafesolve(cha, sa, n, &ex, isupper, 2, ae_false, maxgrowth, _state) )
+            {
+                ae_frame_leave(_state);
+                return;
+            }
+            
+            /*
+             * Multiply by inv(U).
+             */
+            if( !cmatrixscaledtrsafesolve(cha, sa, n, &ex, isupper, 0, ae_false, maxgrowth, _state) )
+            {
+                ae_frame_leave(_state);
+                return;
+            }
+        }
+        else
+        {
+            
+            /*
+             * Multiply by inv(L).
+             */
+            if( !cmatrixscaledtrsafesolve(cha, sa, n, &ex, isupper, 0, ae_false, maxgrowth, _state) )
+            {
+                ae_frame_leave(_state);
+                return;
+            }
+            
+            /*
+             * Multiply by inv(L').
+             */
+            if( !cmatrixscaledtrsafesolve(cha, sa, n, &ex, isupper, 2, ae_false, maxgrowth, _state) )
+            {
+                ae_frame_leave(_state);
+                return;
+            }
+        }
+        for(i=n-1; i>=0; i--)
+        {
+            ex.ptr.p_complex[i+1] = ex.ptr.p_complex[i];
+        }
+    }
+    
+    /*
+     * Compute the estimate of the reciprocal condition number.
+     */
+    if( ae_fp_neq(ainvnm,0) )
+    {
+        *rc = 1/ainvnm;
+        *rc = *rc/anorm;
+        if( ae_fp_less(*rc,rcondthreshold(_state)) )
+        {
+            *rc = 0;
+        }
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Internal subroutine for condition number estimation
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     February 29, 1992
+*************************************************************************/
+static void rcond_rmatrixrcondluinternal(/* Real    */ ae_matrix* lua,
+     ae_int_t n,
+     ae_bool onenorm,
+     ae_bool isanormprovided,
+     double anorm,
+     double* rc,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector ex;
+    ae_vector ev;
+    ae_vector iwork;
+    ae_vector tmp;
+    double v;
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t kase;
+    ae_int_t kase1;
+    double ainvnm;
+    double maxgrowth;
+    double su;
+    double sl;
+    ae_bool mupper;
+    ae_bool mtrans;
+    ae_bool munit;
+
+    ae_frame_make(_state, &_frame_block);
+    *rc = 0;
+    ae_vector_init(&ex, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&ev, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&iwork, 0, DT_INT, _state, ae_true);
+    ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);
+
+    
+    /*
+     * RC=0 if something happens
+     */
+    *rc = 0;
+    
+    /*
+     * init
+     */
+    if( onenorm )
+    {
+        kase1 = 1;
+    }
+    else
+    {
+        kase1 = 2;
+    }
+    mupper = ae_true;
+    mtrans = ae_true;
+    munit = ae_true;
+    ae_vector_set_length(&iwork, n+1, _state);
+    ae_vector_set_length(&tmp, n, _state);
+    
+    /*
+     * prepare parameters for triangular solver
+     */
+    maxgrowth = 1/rcondthreshold(_state);
+    su = 0;
+    sl = 1;
+    for(i=0; i<=n-1; i++)
+    {
+        for(j=0; j<=i-1; j++)
+        {
+            sl = ae_maxreal(sl, ae_fabs(lua->ptr.pp_double[i][j], _state), _state);
+        }
+        for(j=i; j<=n-1; j++)
+        {
+            su = ae_maxreal(su, ae_fabs(lua->ptr.pp_double[i][j], _state), _state);
+        }
+    }
+    if( ae_fp_eq(su,0) )
+    {
+        su = 1;
+    }
+    su = 1/su;
+    sl = 1/sl;
+    
+    /*
+     * Estimate the norm of A.
+     */
+    if( !isanormprovided )
+    {
+        kase = 0;
+        anorm = 0;
+        for(;;)
+        {
+            rcond_rmatrixestimatenorm(n, &ev, &ex, &iwork, &anorm, &kase, _state);
+            if( kase==0 )
+            {
+                break;
+            }
+            if( kase==kase1 )
+            {
+                
+                /*
+                 * Multiply by U
+                 */
+                for(i=1; i<=n; i++)
+                {
+                    v = ae_v_dotproduct(&lua->ptr.pp_double[i-1][i-1], 1, &ex.ptr.p_double[i], 1, ae_v_len(i-1,n-1));
+                    ex.ptr.p_double[i] = v;
+                }
+                
+                /*
+                 * Multiply by L
+                 */
+                for(i=n; i>=1; i--)
+                {
+                    if( i>1 )
+                    {
+                        v = ae_v_dotproduct(&lua->ptr.pp_double[i-1][0], 1, &ex.ptr.p_double[1], 1, ae_v_len(0,i-2));
+                    }
+                    else
+                    {
+                        v = 0;
+                    }
+                    ex.ptr.p_double[i] = ex.ptr.p_double[i]+v;
+                }
+            }
+            else
+            {
+                
+                /*
+                 * Multiply by L'
+                 */
+                for(i=0; i<=n-1; i++)
+                {
+                    tmp.ptr.p_double[i] = 0;
+                }
+                for(i=0; i<=n-1; i++)
+                {
+                    v = ex.ptr.p_double[i+1];
+                    if( i>=1 )
+                    {
+                        ae_v_addd(&tmp.ptr.p_double[0], 1, &lua->ptr.pp_double[i][0], 1, ae_v_len(0,i-1), v);
+                    }
+                    tmp.ptr.p_double[i] = tmp.ptr.p_double[i]+v;
+                }
+                ae_v_move(&ex.ptr.p_double[1], 1, &tmp.ptr.p_double[0], 1, ae_v_len(1,n));
+                
+                /*
+                 * Multiply by U'
+                 */
+                for(i=0; i<=n-1; i++)
+                {
+                    tmp.ptr.p_double[i] = 0;
+                }
+                for(i=0; i<=n-1; i++)
+                {
+                    v = ex.ptr.p_double[i+1];
+                    ae_v_addd(&tmp.ptr.p_double[i], 1, &lua->ptr.pp_double[i][i], 1, ae_v_len(i,n-1), v);
+                }
+                ae_v_move(&ex.ptr.p_double[1], 1, &tmp.ptr.p_double[0], 1, ae_v_len(1,n));
+            }
+        }
+    }
+    
+    /*
+     * Scale according to SU/SL
+     */
+    anorm = anorm*su*sl;
+    
+    /*
+     * Quick return if possible
+     * We assume that ANORM<>0 after this block
+     */
+    if( ae_fp_eq(anorm,0) )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    if( n==1 )
+    {
+        *rc = 1;
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * Estimate the norm of inv(A).
+     */
+    ainvnm = 0;
+    kase = 0;
+    for(;;)
+    {
+        rcond_rmatrixestimatenorm(n, &ev, &ex, &iwork, &ainvnm, &kase, _state);
+        if( kase==0 )
+        {
+            break;
+        }
+        
+        /*
+         * from 1-based array to 0-based
+         */
+        for(i=0; i<=n-1; i++)
+        {
+            ex.ptr.p_double[i] = ex.ptr.p_double[i+1];
+        }
+        
+        /*
+         * multiply by inv(A) or inv(A')
+         */
+        if( kase==kase1 )
+        {
+            
+            /*
+             * Multiply by inv(L).
+             */
+            if( !rmatrixscaledtrsafesolve(lua, sl, n, &ex, !mupper, 0, munit, maxgrowth, _state) )
+            {
+                ae_frame_leave(_state);
+                return;
+            }
+            
+            /*
+             * Multiply by inv(U).
+             */
+            if( !rmatrixscaledtrsafesolve(lua, su, n, &ex, mupper, 0, !munit, maxgrowth, _state) )
+            {
+                ae_frame_leave(_state);
+                return;
+            }
+        }
+        else
+        {
+            
+            /*
+             * Multiply by inv(U').
+             */
+            if( !rmatrixscaledtrsafesolve(lua, su, n, &ex, mupper, 1, !munit, maxgrowth, _state) )
+            {
+                ae_frame_leave(_state);
+                return;
+            }
+            
+            /*
+             * Multiply by inv(L').
+             */
+            if( !rmatrixscaledtrsafesolve(lua, sl, n, &ex, !mupper, 1, munit, maxgrowth, _state) )
+            {
+                ae_frame_leave(_state);
+                return;
+            }
+        }
+        
+        /*
+         * from 0-based array to 1-based
+         */
+        for(i=n-1; i>=0; i--)
+        {
+            ex.ptr.p_double[i+1] = ex.ptr.p_double[i];
+        }
+    }
+    
+    /*
+     * Compute the estimate of the reciprocal condition number.
+     */
+    if( ae_fp_neq(ainvnm,0) )
+    {
+        *rc = 1/ainvnm;
+        *rc = *rc/anorm;
+        if( ae_fp_less(*rc,rcondthreshold(_state)) )
+        {
+            *rc = 0;
+        }
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Condition number estimation
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     March 31, 1993
+*************************************************************************/
+static void rcond_cmatrixrcondluinternal(/* Complex */ ae_matrix* lua,
+     ae_int_t n,
+     ae_bool onenorm,
+     ae_bool isanormprovided,
+     double anorm,
+     double* rc,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector ex;
+    ae_vector cwork2;
+    ae_vector cwork3;
+    ae_vector cwork4;
+    ae_vector isave;
+    ae_vector rsave;
+    ae_int_t kase;
+    ae_int_t kase1;
+    double ainvnm;
+    ae_complex v;
+    ae_int_t i;
+    ae_int_t j;
+    double su;
+    double sl;
+    double maxgrowth;
+
+    ae_frame_make(_state, &_frame_block);
+    *rc = 0;
+    ae_vector_init(&ex, 0, DT_COMPLEX, _state, ae_true);
+    ae_vector_init(&cwork2, 0, DT_COMPLEX, _state, ae_true);
+    ae_vector_init(&cwork3, 0, DT_COMPLEX, _state, ae_true);
+    ae_vector_init(&cwork4, 0, DT_COMPLEX, _state, ae_true);
+    ae_vector_init(&isave, 0, DT_INT, _state, ae_true);
+    ae_vector_init(&rsave, 0, DT_REAL, _state, ae_true);
+
+    if( n<=0 )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    ae_vector_set_length(&cwork2, n+1, _state);
+    *rc = 0;
+    if( n==0 )
+    {
+        *rc = 1;
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * prepare parameters for triangular solver
+     */
+    maxgrowth = 1/rcondthreshold(_state);
+    su = 0;
+    sl = 1;
+    for(i=0; i<=n-1; i++)
+    {
+        for(j=0; j<=i-1; j++)
+        {
+            sl = ae_maxreal(sl, ae_c_abs(lua->ptr.pp_complex[i][j], _state), _state);
+        }
+        for(j=i; j<=n-1; j++)
+        {
+            su = ae_maxreal(su, ae_c_abs(lua->ptr.pp_complex[i][j], _state), _state);
+        }
+    }
+    if( ae_fp_eq(su,0) )
+    {
+        su = 1;
+    }
+    su = 1/su;
+    sl = 1/sl;
+    
+    /*
+     * Estimate the norm of SU*SL*A.
+     */
+    if( !isanormprovided )
+    {
+        anorm = 0;
+        if( onenorm )
+        {
+            kase1 = 1;
+        }
+        else
+        {
+            kase1 = 2;
+        }
+        kase = 0;
+        do
+        {
+            rcond_cmatrixestimatenorm(n, &cwork4, &ex, &anorm, &kase, &isave, &rsave, _state);
+            if( kase!=0 )
+            {
+                if( kase==kase1 )
+                {
+                    
+                    /*
+                     * Multiply by U
+                     */
+                    for(i=1; i<=n; i++)
+                    {
+                        v = ae_v_cdotproduct(&lua->ptr.pp_complex[i-1][i-1], 1, "N", &ex.ptr.p_complex[i], 1, "N", ae_v_len(i-1,n-1));
+                        ex.ptr.p_complex[i] = v;
+                    }
+                    
+                    /*
+                     * Multiply by L
+                     */
+                    for(i=n; i>=1; i--)
+                    {
+                        v = ae_complex_from_d(0);
+                        if( i>1 )
+                        {
+                            v = ae_v_cdotproduct(&lua->ptr.pp_complex[i-1][0], 1, "N", &ex.ptr.p_complex[1], 1, "N", ae_v_len(0,i-2));
+                        }
+                        ex.ptr.p_complex[i] = ae_c_add(v,ex.ptr.p_complex[i]);
+                    }
+                }
+                else
+                {
+                    
+                    /*
+                     * Multiply by L'
+                     */
+                    for(i=1; i<=n; i++)
+                    {
+                        cwork2.ptr.p_complex[i] = ae_complex_from_d(0);
+                    }
+                    for(i=1; i<=n; i++)
+                    {
+                        v = ex.ptr.p_complex[i];
+                        if( i>1 )
+                        {
+                            ae_v_caddc(&cwork2.ptr.p_complex[1], 1, &lua->ptr.pp_complex[i-1][0], 1, "Conj", ae_v_len(1,i-1), v);
+                        }
+                        cwork2.ptr.p_complex[i] = ae_c_add(cwork2.ptr.p_complex[i],v);
+                    }
+                    
+                    /*
+                     * Multiply by U'
+                     */
+                    for(i=1; i<=n; i++)
+                    {
+                        ex.ptr.p_complex[i] = ae_complex_from_d(0);
+                    }
+                    for(i=1; i<=n; i++)
+                    {
+                        v = cwork2.ptr.p_complex[i];
+                        ae_v_caddc(&ex.ptr.p_complex[i], 1, &lua->ptr.pp_complex[i-1][i-1], 1, "Conj", ae_v_len(i,n), v);
+                    }
+                }
+            }
+        }
+        while(kase!=0);
+    }
+    
+    /*
+     * Scale according to SU/SL
+     */
+    anorm = anorm*su*sl;
+    
+    /*
+     * Quick return if possible
+     */
+    if( ae_fp_eq(anorm,0) )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * Estimate the norm of inv(A).
+     */
+    ainvnm = 0;
+    if( onenorm )
+    {
+        kase1 = 1;
+    }
+    else
+    {
+        kase1 = 2;
+    }
+    kase = 0;
+    for(;;)
+    {
+        rcond_cmatrixestimatenorm(n, &cwork4, &ex, &ainvnm, &kase, &isave, &rsave, _state);
+        if( kase==0 )
+        {
+            break;
+        }
+        
+        /*
+         * From 1-based to 0-based
+         */
+        for(i=0; i<=n-1; i++)
+        {
+            ex.ptr.p_complex[i] = ex.ptr.p_complex[i+1];
+        }
+        
+        /*
+         * multiply by inv(A) or inv(A')
+         */
+        if( kase==kase1 )
+        {
+            
+            /*
+             * Multiply by inv(L).
+             */
+            if( !cmatrixscaledtrsafesolve(lua, sl, n, &ex, ae_false, 0, ae_true, maxgrowth, _state) )
+            {
+                *rc = 0;
+                ae_frame_leave(_state);
+                return;
+            }
+            
+            /*
+             * Multiply by inv(U).
+             */
+            if( !cmatrixscaledtrsafesolve(lua, su, n, &ex, ae_true, 0, ae_false, maxgrowth, _state) )
+            {
+                *rc = 0;
+                ae_frame_leave(_state);
+                return;
+            }
+        }
+        else
+        {
+            
+            /*
+             * Multiply by inv(U').
+             */
+            if( !cmatrixscaledtrsafesolve(lua, su, n, &ex, ae_true, 2, ae_false, maxgrowth, _state) )
+            {
+                *rc = 0;
+                ae_frame_leave(_state);
+                return;
+            }
+            
+            /*
+             * Multiply by inv(L').
+             */
+            if( !cmatrixscaledtrsafesolve(lua, sl, n, &ex, ae_false, 2, ae_true, maxgrowth, _state) )
+            {
+                *rc = 0;
+                ae_frame_leave(_state);
+                return;
+            }
+        }
+        
+        /*
+         * from 0-based to 1-based
+         */
+        for(i=n-1; i>=0; i--)
+        {
+            ex.ptr.p_complex[i+1] = ex.ptr.p_complex[i];
+        }
+    }
+    
+    /*
+     * Compute the estimate of the reciprocal condition number.
+     */
+    if( ae_fp_neq(ainvnm,0) )
+    {
+        *rc = 1/ainvnm;
+        *rc = *rc/anorm;
+        if( ae_fp_less(*rc,rcondthreshold(_state)) )
+        {
+            *rc = 0;
+        }
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Internal subroutine for matrix norm estimation
+
+  -- LAPACK auxiliary routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     February 29, 1992
+*************************************************************************/
+static void rcond_rmatrixestimatenorm(ae_int_t n,
+     /* Real    */ ae_vector* v,
+     /* Real    */ ae_vector* x,
+     /* Integer */ ae_vector* isgn,
+     double* est,
+     ae_int_t* kase,
+     ae_state *_state)
+{
+    ae_int_t itmax;
+    ae_int_t i;
+    double t;
+    ae_bool flg;
+    ae_int_t positer;
+    ae_int_t posj;
+    ae_int_t posjlast;
+    ae_int_t posjump;
+    ae_int_t posaltsgn;
+    ae_int_t posestold;
+    ae_int_t postemp;
+
+
+    itmax = 5;
+    posaltsgn = n+1;
+    posestold = n+2;
+    postemp = n+3;
+    positer = n+1;
+    posj = n+2;
+    posjlast = n+3;
+    posjump = n+4;
+    if( *kase==0 )
+    {
+        ae_vector_set_length(v, n+4, _state);
+        ae_vector_set_length(x, n+1, _state);
+        ae_vector_set_length(isgn, n+5, _state);
+        t = (double)1/(double)n;
+        for(i=1; i<=n; i++)
+        {
+            x->ptr.p_double[i] = t;
+        }
+        *kase = 1;
+        isgn->ptr.p_int[posjump] = 1;
+        return;
+    }
+    
+    /*
+     *     ................ ENTRY   (JUMP = 1)
+     *     FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY A*X.
+     */
+    if( isgn->ptr.p_int[posjump]==1 )
+    {
+        if( n==1 )
+        {
+            v->ptr.p_double[1] = x->ptr.p_double[1];
+            *est = ae_fabs(v->ptr.p_double[1], _state);
+            *kase = 0;
+            return;
+        }
+        *est = 0;
+        for(i=1; i<=n; i++)
+        {
+            *est = *est+ae_fabs(x->ptr.p_double[i], _state);
+        }
+        for(i=1; i<=n; i++)
+        {
+            if( ae_fp_greater_eq(x->ptr.p_double[i],0) )
+            {
+                x->ptr.p_double[i] = 1;
+            }
+            else
+            {
+                x->ptr.p_double[i] = -1;
+            }
+            isgn->ptr.p_int[i] = ae_sign(x->ptr.p_double[i], _state);
+        }
+        *kase = 2;
+        isgn->ptr.p_int[posjump] = 2;
+        return;
+    }
+    
+    /*
+     *     ................ ENTRY   (JUMP = 2)
+     *     FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY TRANDPOSE(A)*X.
+     */
+    if( isgn->ptr.p_int[posjump]==2 )
+    {
+        isgn->ptr.p_int[posj] = 1;
+        for(i=2; i<=n; i++)
+        {
+            if( ae_fp_greater(ae_fabs(x->ptr.p_double[i], _state),ae_fabs(x->ptr.p_double[isgn->ptr.p_int[posj]], _state)) )
+            {
+                isgn->ptr.p_int[posj] = i;
+            }
+        }
+        isgn->ptr.p_int[positer] = 2;
+        
+        /*
+         * MAIN LOOP - ITERATIONS 2,3,...,ITMAX.
+         */
+        for(i=1; i<=n; i++)
+        {
+            x->ptr.p_double[i] = 0;
+        }
+        x->ptr.p_double[isgn->ptr.p_int[posj]] = 1;
+        *kase = 1;
+        isgn->ptr.p_int[posjump] = 3;
+        return;
+    }
+    
+    /*
+     *     ................ ENTRY   (JUMP = 3)
+     *     X HAS BEEN OVERWRITTEN BY A*X.
+     */
+    if( isgn->ptr.p_int[posjump]==3 )
+    {
+        ae_v_move(&v->ptr.p_double[1], 1, &x->ptr.p_double[1], 1, ae_v_len(1,n));
+        v->ptr.p_double[posestold] = *est;
+        *est = 0;
+        for(i=1; i<=n; i++)
+        {
+            *est = *est+ae_fabs(v->ptr.p_double[i], _state);
+        }
+        flg = ae_false;
+        for(i=1; i<=n; i++)
+        {
+            if( (ae_fp_greater_eq(x->ptr.p_double[i],0)&&isgn->ptr.p_int[i]<0)||(ae_fp_less(x->ptr.p_double[i],0)&&isgn->ptr.p_int[i]>=0) )
+            {
+                flg = ae_true;
+            }
+        }
+        
+        /*
+         * REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED.
+         * OR MAY BE CYCLING.
+         */
+        if( !flg||ae_fp_less_eq(*est,v->ptr.p_double[posestold]) )
+        {
+            v->ptr.p_double[posaltsgn] = 1;
+            for(i=1; i<=n; i++)
+            {
+                x->ptr.p_double[i] = v->ptr.p_double[posaltsgn]*(1+(double)(i-1)/(double)(n-1));
+                v->ptr.p_double[posaltsgn] = -v->ptr.p_double[posaltsgn];
+            }
+            *kase = 1;
+            isgn->ptr.p_int[posjump] = 5;
+            return;
+        }
+        for(i=1; i<=n; i++)
+        {
+            if( ae_fp_greater_eq(x->ptr.p_double[i],0) )
+            {
+                x->ptr.p_double[i] = 1;
+                isgn->ptr.p_int[i] = 1;
+            }
+            else
+            {
+                x->ptr.p_double[i] = -1;
+                isgn->ptr.p_int[i] = -1;
+            }
+        }
+        *kase = 2;
+        isgn->ptr.p_int[posjump] = 4;
+        return;
+    }
+    
+    /*
+     *     ................ ENTRY   (JUMP = 4)
+     *     X HAS BEEN OVERWRITTEN BY TRANDPOSE(A)*X.
+     */
+    if( isgn->ptr.p_int[posjump]==4 )
+    {
+        isgn->ptr.p_int[posjlast] = isgn->ptr.p_int[posj];
+        isgn->ptr.p_int[posj] = 1;
+        for(i=2; i<=n; i++)
+        {
+            if( ae_fp_greater(ae_fabs(x->ptr.p_double[i], _state),ae_fabs(x->ptr.p_double[isgn->ptr.p_int[posj]], _state)) )
+            {
+                isgn->ptr.p_int[posj] = i;
+            }
+        }
+        if( ae_fp_neq(x->ptr.p_double[isgn->ptr.p_int[posjlast]],ae_fabs(x->ptr.p_double[isgn->ptr.p_int[posj]], _state))&&isgn->ptr.p_int[positer]<itmax )
+        {
+            isgn->ptr.p_int[positer] = isgn->ptr.p_int[positer]+1;
+            for(i=1; i<=n; i++)
+            {
+                x->ptr.p_double[i] = 0;
+            }
+            x->ptr.p_double[isgn->ptr.p_int[posj]] = 1;
+            *kase = 1;
+            isgn->ptr.p_int[posjump] = 3;
+            return;
+        }
+        
+        /*
+         * ITERATION COMPLETE.  FINAL STAGE.
+         */
+        v->ptr.p_double[posaltsgn] = 1;
+        for(i=1; i<=n; i++)
+        {
+            x->ptr.p_double[i] = v->ptr.p_double[posaltsgn]*(1+(double)(i-1)/(double)(n-1));
+            v->ptr.p_double[posaltsgn] = -v->ptr.p_double[posaltsgn];
+        }
+        *kase = 1;
+        isgn->ptr.p_int[posjump] = 5;
+        return;
+    }
+    
+    /*
+     *     ................ ENTRY   (JUMP = 5)
+     *     X HAS BEEN OVERWRITTEN BY A*X.
+     */
+    if( isgn->ptr.p_int[posjump]==5 )
+    {
+        v->ptr.p_double[postemp] = 0;
+        for(i=1; i<=n; i++)
+        {
+            v->ptr.p_double[postemp] = v->ptr.p_double[postemp]+ae_fabs(x->ptr.p_double[i], _state);
+        }
+        v->ptr.p_double[postemp] = 2*v->ptr.p_double[postemp]/(3*n);
+        if( ae_fp_greater(v->ptr.p_double[postemp],*est) )
+        {
+            ae_v_move(&v->ptr.p_double[1], 1, &x->ptr.p_double[1], 1, ae_v_len(1,n));
+            *est = v->ptr.p_double[postemp];
+        }
+        *kase = 0;
+        return;
+    }
+}
+
+
+static void rcond_cmatrixestimatenorm(ae_int_t n,
+     /* Complex */ ae_vector* v,
+     /* Complex */ ae_vector* x,
+     double* est,
+     ae_int_t* kase,
+     /* Integer */ ae_vector* isave,
+     /* Real    */ ae_vector* rsave,
+     ae_state *_state)
+{
+    ae_int_t itmax;
+    ae_int_t i;
+    ae_int_t iter;
+    ae_int_t j;
+    ae_int_t jlast;
+    ae_int_t jump;
+    double absxi;
+    double altsgn;
+    double estold;
+    double safmin;
+    double temp;
+
+
+    
+    /*
+     *Executable Statements ..
+     */
+    itmax = 5;
+    safmin = ae_minrealnumber;
+    if( *kase==0 )
+    {
+        ae_vector_set_length(v, n+1, _state);
+        ae_vector_set_length(x, n+1, _state);
+        ae_vector_set_length(isave, 5, _state);
+        ae_vector_set_length(rsave, 4, _state);
+        for(i=1; i<=n; i++)
+        {
+            x->ptr.p_complex[i] = ae_complex_from_d((double)1/(double)n);
+        }
+        *kase = 1;
+        jump = 1;
+        rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state);
+        return;
+    }
+    rcond_internalcomplexrcondloadall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state);
+    
+    /*
+     * ENTRY   (JUMP = 1)
+     * FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY A*X.
+     */
+    if( jump==1 )
+    {
+        if( n==1 )
+        {
+            v->ptr.p_complex[1] = x->ptr.p_complex[1];
+            *est = ae_c_abs(v->ptr.p_complex[1], _state);
+            *kase = 0;
+            rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state);
+            return;
+        }
+        *est = rcond_internalcomplexrcondscsum1(x, n, _state);
+        for(i=1; i<=n; i++)
+        {
+            absxi = ae_c_abs(x->ptr.p_complex[i], _state);
+            if( ae_fp_greater(absxi,safmin) )
+            {
+                x->ptr.p_complex[i] = ae_c_div_d(x->ptr.p_complex[i],absxi);
+            }
+            else
+            {
+                x->ptr.p_complex[i] = ae_complex_from_d(1);
+            }
+        }
+        *kase = 2;
+        jump = 2;
+        rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state);
+        return;
+    }
+    
+    /*
+     * ENTRY   (JUMP = 2)
+     * FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY CTRANS(A)*X.
+     */
+    if( jump==2 )
+    {
+        j = rcond_internalcomplexrcondicmax1(x, n, _state);
+        iter = 2;
+        
+        /*
+         * MAIN LOOP - ITERATIONS 2,3,...,ITMAX.
+         */
+        for(i=1; i<=n; i++)
+        {
+            x->ptr.p_complex[i] = ae_complex_from_d(0);
+        }
+        x->ptr.p_complex[j] = ae_complex_from_d(1);
+        *kase = 1;
+        jump = 3;
+        rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state);
+        return;
+    }
+    
+    /*
+     * ENTRY   (JUMP = 3)
+     * X HAS BEEN OVERWRITTEN BY A*X.
+     */
+    if( jump==3 )
+    {
+        ae_v_cmove(&v->ptr.p_complex[1], 1, &x->ptr.p_complex[1], 1, "N", ae_v_len(1,n));
+        estold = *est;
+        *est = rcond_internalcomplexrcondscsum1(v, n, _state);
+        
+        /*
+         * TEST FOR CYCLING.
+         */
+        if( ae_fp_less_eq(*est,estold) )
+        {
+            
+            /*
+             * ITERATION COMPLETE.  FINAL STAGE.
+             */
+            altsgn = 1;
+            for(i=1; i<=n; i++)
+            {
+                x->ptr.p_complex[i] = ae_complex_from_d(altsgn*(1+(double)(i-1)/(double)(n-1)));
+                altsgn = -altsgn;
+            }
+            *kase = 1;
+            jump = 5;
+            rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state);
+            return;
+        }
+        for(i=1; i<=n; i++)
+        {
+            absxi = ae_c_abs(x->ptr.p_complex[i], _state);
+            if( ae_fp_greater(absxi,safmin) )
+            {
+                x->ptr.p_complex[i] = ae_c_div_d(x->ptr.p_complex[i],absxi);
+            }
+            else
+            {
+                x->ptr.p_complex[i] = ae_complex_from_d(1);
+            }
+        }
+        *kase = 2;
+        jump = 4;
+        rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state);
+        return;
+    }
+    
+    /*
+     * ENTRY   (JUMP = 4)
+     * X HAS BEEN OVERWRITTEN BY CTRANS(A)*X.
+     */
+    if( jump==4 )
+    {
+        jlast = j;
+        j = rcond_internalcomplexrcondicmax1(x, n, _state);
+        if( ae_fp_neq(ae_c_abs(x->ptr.p_complex[jlast], _state),ae_c_abs(x->ptr.p_complex[j], _state))&&iter<itmax )
+        {
+            iter = iter+1;
+            
+            /*
+             * MAIN LOOP - ITERATIONS 2,3,...,ITMAX.
+             */
+            for(i=1; i<=n; i++)
+            {
+                x->ptr.p_complex[i] = ae_complex_from_d(0);
+            }
+            x->ptr.p_complex[j] = ae_complex_from_d(1);
+            *kase = 1;
+            jump = 3;
+            rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state);
+            return;
+        }
+        
+        /*
+         * ITERATION COMPLETE.  FINAL STAGE.
+         */
+        altsgn = 1;
+        for(i=1; i<=n; i++)
+        {
+            x->ptr.p_complex[i] = ae_complex_from_d(altsgn*(1+(double)(i-1)/(double)(n-1)));
+            altsgn = -altsgn;
+        }
+        *kase = 1;
+        jump = 5;
+        rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state);
+        return;
+    }
+    
+    /*
+     * ENTRY   (JUMP = 5)
+     * X HAS BEEN OVERWRITTEN BY A*X.
+     */
+    if( jump==5 )
+    {
+        temp = 2*(rcond_internalcomplexrcondscsum1(x, n, _state)/(3*n));
+        if( ae_fp_greater(temp,*est) )
+        {
+            ae_v_cmove(&v->ptr.p_complex[1], 1, &x->ptr.p_complex[1], 1, "N", ae_v_len(1,n));
+            *est = temp;
+        }
+        *kase = 0;
+        rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state);
+        return;
+    }
+}
+
+
+static double rcond_internalcomplexrcondscsum1(/* Complex */ ae_vector* x,
+     ae_int_t n,
+     ae_state *_state)
+{
+    ae_int_t i;
+    double result;
+
+
+    result = 0;
+    for(i=1; i<=n; i++)
+    {
+        result = result+ae_c_abs(x->ptr.p_complex[i], _state);
+    }
+    return result;
+}
+
+
+static ae_int_t rcond_internalcomplexrcondicmax1(/* Complex */ ae_vector* x,
+     ae_int_t n,
+     ae_state *_state)
+{
+    ae_int_t i;
+    double m;
+    ae_int_t result;
+
+
+    result = 1;
+    m = ae_c_abs(x->ptr.p_complex[1], _state);
+    for(i=2; i<=n; i++)
+    {
+        if( ae_fp_greater(ae_c_abs(x->ptr.p_complex[i], _state),m) )
+        {
+            result = i;
+            m = ae_c_abs(x->ptr.p_complex[i], _state);
+        }
+    }
+    return result;
+}
+
+
+static void rcond_internalcomplexrcondsaveall(/* Integer */ ae_vector* isave,
+     /* Real    */ ae_vector* rsave,
+     ae_int_t* i,
+     ae_int_t* iter,
+     ae_int_t* j,
+     ae_int_t* jlast,
+     ae_int_t* jump,
+     double* absxi,
+     double* altsgn,
+     double* estold,
+     double* temp,
+     ae_state *_state)
+{
+
+
+    isave->ptr.p_int[0] = *i;
+    isave->ptr.p_int[1] = *iter;
+    isave->ptr.p_int[2] = *j;
+    isave->ptr.p_int[3] = *jlast;
+    isave->ptr.p_int[4] = *jump;
+    rsave->ptr.p_double[0] = *absxi;
+    rsave->ptr.p_double[1] = *altsgn;
+    rsave->ptr.p_double[2] = *estold;
+    rsave->ptr.p_double[3] = *temp;
+}
+
+
+static void rcond_internalcomplexrcondloadall(/* Integer */ ae_vector* isave,
+     /* Real    */ ae_vector* rsave,
+     ae_int_t* i,
+     ae_int_t* iter,
+     ae_int_t* j,
+     ae_int_t* jlast,
+     ae_int_t* jump,
+     double* absxi,
+     double* altsgn,
+     double* estold,
+     double* temp,
+     ae_state *_state)
+{
+
+
+    *i = isave->ptr.p_int[0];
+    *iter = isave->ptr.p_int[1];
+    *j = isave->ptr.p_int[2];
+    *jlast = isave->ptr.p_int[3];
+    *jump = isave->ptr.p_int[4];
+    *absxi = rsave->ptr.p_double[0];
+    *altsgn = rsave->ptr.p_double[1];
+    *estold = rsave->ptr.p_double[2];
+    *temp = rsave->ptr.p_double[3];
+}
+
+
+
+
+/*************************************************************************
+Inversion of a matrix given by its LU decomposition.
+
+INPUT PARAMETERS:
+    A       -   LU decomposition of the matrix
+                (output of RMatrixLU subroutine).
+    Pivots  -   table of permutations
+                (the output of RMatrixLU subroutine).
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+
+OUTPUT PARAMETERS:
+    Info    -   return code:
+                * -3    A is singular, or VERY close to singular.
+                        it is filled by zeros in such cases.
+                *  1    task is solved (but matrix A may be ill-conditioned,
+                        check R1/RInf parameters for condition numbers).
+    Rep     -   solver report, see below for more info
+    A       -   inverse of matrix A.
+                Array whose indexes range within [0..N-1, 0..N-1].
+
+SOLVER REPORT
+
+Subroutine sets following fields of the Rep structure:
+* R1        reciprocal of condition number: 1/cond(A), 1-norm.
+* RInf      reciprocal of condition number: 1/cond(A), inf-norm.
+
+  -- ALGLIB routine --
+     05.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixluinverse(/* Real    */ ae_matrix* a,
+     /* Integer */ ae_vector* pivots,
+     ae_int_t n,
+     ae_int_t* info,
+     matinvreport* rep,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector work;
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t k;
+    double v;
+
+    ae_frame_make(_state, &_frame_block);
+    *info = 0;
+    _matinvreport_clear(rep);
+    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
+
+    ae_assert(n>0, "RMatrixLUInverse: N<=0!", _state);
+    ae_assert(a->cols>=n, "RMatrixLUInverse: cols(A)<N!", _state);
+    ae_assert(a->rows>=n, "RMatrixLUInverse: rows(A)<N!", _state);
+    ae_assert(pivots->cnt>=n, "RMatrixLUInverse: len(Pivots)<N!", _state);
+    ae_assert(apservisfinitematrix(a, n, n, _state), "RMatrixLUInverse: A contains infinite or NaN values!", _state);
+    *info = 1;
+    for(i=0; i<=n-1; i++)
+    {
+        if( pivots->ptr.p_int[i]>n-1||pivots->ptr.p_int[i]<i )
+        {
+            *info = -1;
+        }
+    }
+    ae_assert(*info>0, "RMatrixLUInverse: incorrect Pivots array!", _state);
+    
+    /*
+     * calculate condition numbers
+     */
+    rep->r1 = rmatrixlurcond1(a, n, _state);
+    rep->rinf = rmatrixlurcondinf(a, n, _state);
+    if( ae_fp_less(rep->r1,rcondthreshold(_state))||ae_fp_less(rep->rinf,rcondthreshold(_state)) )
+    {
+        for(i=0; i<=n-1; i++)
+        {
+            for(j=0; j<=n-1; j++)
+            {
+                a->ptr.pp_double[i][j] = 0;
+            }
+        }
+        rep->r1 = 0;
+        rep->rinf = 0;
+        *info = -3;
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * Call cache-oblivious code
+     */
+    ae_vector_set_length(&work, n, _state);
+    matinv_rmatrixluinverserec(a, 0, n, &work, info, rep, _state);
+    
+    /*
+     * apply permutations
+     */
+    for(i=0; i<=n-1; i++)
+    {
+        for(j=n-2; j>=0; j--)
+        {
+            k = pivots->ptr.p_int[j];
+            v = a->ptr.pp_double[i][j];
+            a->ptr.pp_double[i][j] = a->ptr.pp_double[i][k];
+            a->ptr.pp_double[i][k] = v;
+        }
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Inversion of a general matrix.
+
+Input parameters:
+    A       -   matrix.
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+
+Output parameters:
+    Info    -   return code, same as in RMatrixLUInverse
+    Rep     -   solver report, same as in RMatrixLUInverse
+    A       -   inverse of matrix A, same as in RMatrixLUInverse
+
+Result:
+    True, if the matrix is not singular.
+    False, if the matrix is singular.
+
+  -- ALGLIB --
+     Copyright 2005-2010 by Bochkanov Sergey
+*************************************************************************/
+void rmatrixinverse(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_int_t* info,
+     matinvreport* rep,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector pivots;
+
+    ae_frame_make(_state, &_frame_block);
+    *info = 0;
+    _matinvreport_clear(rep);
+    ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);
+
+    ae_assert(n>0, "RMatrixInverse: N<=0!", _state);
+    ae_assert(a->cols>=n, "RMatrixInverse: cols(A)<N!", _state);
+    ae_assert(a->rows>=n, "RMatrixInverse: rows(A)<N!", _state);
+    ae_assert(apservisfinitematrix(a, n, n, _state), "RMatrixInverse: A contains infinite or NaN values!", _state);
+    rmatrixlu(a, n, n, &pivots, _state);
+    rmatrixluinverse(a, &pivots, n, info, rep, _state);
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Inversion of a matrix given by its LU decomposition.
+
+INPUT PARAMETERS:
+    A       -   LU decomposition of the matrix
+                (output of CMatrixLU subroutine).
+    Pivots  -   table of permutations
+                (the output of CMatrixLU subroutine).
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+
+OUTPUT PARAMETERS:
+    Info    -   return code, same as in RMatrixLUInverse
+    Rep     -   solver report, same as in RMatrixLUInverse
+    A       -   inverse of matrix A, same as in RMatrixLUInverse
+
+  -- ALGLIB routine --
+     05.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixluinverse(/* Complex */ ae_matrix* a,
+     /* Integer */ ae_vector* pivots,
+     ae_int_t n,
+     ae_int_t* info,
+     matinvreport* rep,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector work;
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t k;
+    ae_complex v;
+
+    ae_frame_make(_state, &_frame_block);
+    *info = 0;
+    _matinvreport_clear(rep);
+    ae_vector_init(&work, 0, DT_COMPLEX, _state, ae_true);
+
+    ae_assert(n>0, "CMatrixLUInverse: N<=0!", _state);
+    ae_assert(a->cols>=n, "CMatrixLUInverse: cols(A)<N!", _state);
+    ae_assert(a->rows>=n, "CMatrixLUInverse: rows(A)<N!", _state);
+    ae_assert(pivots->cnt>=n, "CMatrixLUInverse: len(Pivots)<N!", _state);
+    ae_assert(apservisfinitecmatrix(a, n, n, _state), "CMatrixLUInverse: A contains infinite or NaN values!", _state);
+    *info = 1;
+    for(i=0; i<=n-1; i++)
+    {
+        if( pivots->ptr.p_int[i]>n-1||pivots->ptr.p_int[i]<i )
+        {
+            *info = -1;
+        }
+    }
+    ae_assert(*info>0, "CMatrixLUInverse: incorrect Pivots array!", _state);
+    
+    /*
+     * calculate condition numbers
+     */
+    rep->r1 = cmatrixlurcond1(a, n, _state);
+    rep->rinf = cmatrixlurcondinf(a, n, _state);
+    if( ae_fp_less(rep->r1,rcondthreshold(_state))||ae_fp_less(rep->rinf,rcondthreshold(_state)) )
+    {
+        for(i=0; i<=n-1; i++)
+        {
+            for(j=0; j<=n-1; j++)
+            {
+                a->ptr.pp_complex[i][j] = ae_complex_from_d(0);
+            }
+        }
+        rep->r1 = 0;
+        rep->rinf = 0;
+        *info = -3;
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * Call cache-oblivious code
+     */
+    ae_vector_set_length(&work, n, _state);
+    matinv_cmatrixluinverserec(a, 0, n, &work, info, rep, _state);
+    
+    /*
+     * apply permutations
+     */
+    for(i=0; i<=n-1; i++)
+    {
+        for(j=n-2; j>=0; j--)
+        {
+            k = pivots->ptr.p_int[j];
+            v = a->ptr.pp_complex[i][j];
+            a->ptr.pp_complex[i][j] = a->ptr.pp_complex[i][k];
+            a->ptr.pp_complex[i][k] = v;
+        }
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Inversion of a general matrix.
+
+Input parameters:
+    A       -   matrix
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+
+Output parameters:
+    Info    -   return code, same as in RMatrixLUInverse
+    Rep     -   solver report, same as in RMatrixLUInverse
+    A       -   inverse of matrix A, same as in RMatrixLUInverse
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+void cmatrixinverse(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_int_t* info,
+     matinvreport* rep,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector pivots;
+
+    ae_frame_make(_state, &_frame_block);
+    *info = 0;
+    _matinvreport_clear(rep);
+    ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);
+
+    ae_assert(n>0, "CRMatrixInverse: N<=0!", _state);
+    ae_assert(a->cols>=n, "CRMatrixInverse: cols(A)<N!", _state);
+    ae_assert(a->rows>=n, "CRMatrixInverse: rows(A)<N!", _state);
+    ae_assert(apservisfinitecmatrix(a, n, n, _state), "CMatrixInverse: A contains infinite or NaN values!", _state);
+    cmatrixlu(a, n, n, &pivots, _state);
+    cmatrixluinverse(a, &pivots, n, info, rep, _state);
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Inversion of a symmetric positive definite matrix which is given
+by Cholesky decomposition.
+
+Input parameters:
+    A       -   Cholesky decomposition of the matrix to be inverted:
+                A=U’*U or A = L*L'.
+                Output of  SPDMatrixCholesky subroutine.
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+    IsUpper -   storage type (optional):
+                * if True, symmetric  matrix  A  is  given  by  its  upper
+                  triangle, and the lower triangle isn’t  used/changed  by
+                  function
+                * if False,  symmetric matrix  A  is  given  by  its lower
+                  triangle, and the  upper triangle isn’t used/changed  by
+                  function
+                * if not given, lower half is used.
+
+Output parameters:
+    Info    -   return code, same as in RMatrixLUInverse
+    Rep     -   solver report, same as in RMatrixLUInverse
+    A       -   inverse of matrix A, same as in RMatrixLUInverse
+
+  -- ALGLIB routine --
+     10.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void spdmatrixcholeskyinverse(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_int_t* info,
+     matinvreport* rep,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_int_t i;
+    ae_int_t j;
+    ae_vector tmp;
+    matinvreport rep2;
+    ae_bool f;
+
+    ae_frame_make(_state, &_frame_block);
+    *info = 0;
+    _matinvreport_clear(rep);
+    ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);
+    _matinvreport_init(&rep2, _state, ae_true);
+
+    ae_assert(n>0, "SPDMatrixCholeskyInverse: N<=0!", _state);
+    ae_assert(a->cols>=n, "SPDMatrixCholeskyInverse: cols(A)<N!", _state);
+    ae_assert(a->rows>=n, "SPDMatrixCholeskyInverse: rows(A)<N!", _state);
+    *info = 1;
+    f = ae_true;
+    for(i=0; i<=n-1; i++)
+    {
+        f = f&&ae_isfinite(a->ptr.pp_double[i][i], _state);
+    }
+    ae_assert(f, "SPDMatrixCholeskyInverse: A contains infinite or NaN values!", _state);
+    
+    /*
+     * calculate condition numbers
+     */
+    rep->r1 = spdmatrixcholeskyrcond(a, n, isupper, _state);
+    rep->rinf = rep->r1;
+    if( ae_fp_less(rep->r1,rcondthreshold(_state))||ae_fp_less(rep->rinf,rcondthreshold(_state)) )
+    {
+        if( isupper )
+        {
+            for(i=0; i<=n-1; i++)
+            {
+                for(j=i; j<=n-1; j++)
+                {
+                    a->ptr.pp_double[i][j] = 0;
+                }
+            }
+        }
+        else
+        {
+            for(i=0; i<=n-1; i++)
+            {
+                for(j=0; j<=i; j++)
+                {
+                    a->ptr.pp_double[i][j] = 0;
+                }
+            }
+        }
+        rep->r1 = 0;
+        rep->rinf = 0;
+        *info = -3;
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * Inverse
+     */
+    ae_vector_set_length(&tmp, n, _state);
+    matinv_spdmatrixcholeskyinverserec(a, 0, n, isupper, &tmp, _state);
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Inversion of a symmetric positive definite matrix.
+
+Given an upper or lower triangle of a symmetric positive definite matrix,
+the algorithm generates matrix A^-1 and saves the upper or lower triangle
+depending on the input.
+
+Input parameters:
+    A       -   matrix to be inverted (upper or lower triangle).
+                Array with elements [0..N-1,0..N-1].
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+    IsUpper -   storage type (optional):
+                * if True, symmetric  matrix  A  is  given  by  its  upper
+                  triangle, and the lower triangle isn’t  used/changed  by
+                  function
+                * if False,  symmetric matrix  A  is  given  by  its lower
+                  triangle, and the  upper triangle isn’t used/changed  by
+                  function
+                * if not given,  both lower and upper  triangles  must  be
+                  filled.
+
+Output parameters:
+    Info    -   return code, same as in RMatrixLUInverse
+    Rep     -   solver report, same as in RMatrixLUInverse
+    A       -   inverse of matrix A, same as in RMatrixLUInverse
+
+  -- ALGLIB routine --
+     10.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void spdmatrixinverse(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_int_t* info,
+     matinvreport* rep,
+     ae_state *_state)
+{
+
+    *info = 0;
+    _matinvreport_clear(rep);
+
+    ae_assert(n>0, "SPDMatrixInverse: N<=0!", _state);
+    ae_assert(a->cols>=n, "SPDMatrixInverse: cols(A)<N!", _state);
+    ae_assert(a->rows>=n, "SPDMatrixInverse: rows(A)<N!", _state);
+    ae_assert(isfinitertrmatrix(a, n, isupper, _state), "SPDMatrixInverse: A contains infinite or NaN values!", _state);
+    *info = 1;
+    if( spdmatrixcholesky(a, n, isupper, _state) )
+    {
+        spdmatrixcholeskyinverse(a, n, isupper, info, rep, _state);
+    }
+    else
+    {
+        *info = -3;
+    }
+}
+
+
+/*************************************************************************
+Inversion of a Hermitian positive definite matrix which is given
+by Cholesky decomposition.
+
+Input parameters:
+    A       -   Cholesky decomposition of the matrix to be inverted:
+                A=U’*U or A = L*L'.
+                Output of  HPDMatrixCholesky subroutine.
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+    IsUpper -   storage type (optional):
+                * if True, symmetric  matrix  A  is  given  by  its  upper
+                  triangle, and the lower triangle isn’t  used/changed  by
+                  function
+                * if False,  symmetric matrix  A  is  given  by  its lower
+                  triangle, and the  upper triangle isn’t used/changed  by
+                  function
+                * if not given, lower half is used.
+
+Output parameters:
+    Info    -   return code, same as in RMatrixLUInverse
+    Rep     -   solver report, same as in RMatrixLUInverse
+    A       -   inverse of matrix A, same as in RMatrixLUInverse
+
+  -- ALGLIB routine --
+     10.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void hpdmatrixcholeskyinverse(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_int_t* info,
+     matinvreport* rep,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_int_t i;
+    ae_int_t j;
+    matinvreport rep2;
+    ae_vector tmp;
+    ae_bool f;
+
+    ae_frame_make(_state, &_frame_block);
+    *info = 0;
+    _matinvreport_clear(rep);
+    _matinvreport_init(&rep2, _state, ae_true);
+    ae_vector_init(&tmp, 0, DT_COMPLEX, _state, ae_true);
+
+    ae_assert(n>0, "HPDMatrixCholeskyInverse: N<=0!", _state);
+    ae_assert(a->cols>=n, "HPDMatrixCholeskyInverse: cols(A)<N!", _state);
+    ae_assert(a->rows>=n, "HPDMatrixCholeskyInverse: rows(A)<N!", _state);
+    f = ae_true;
+    for(i=0; i<=n-1; i++)
+    {
+        f = (f&&ae_isfinite(a->ptr.pp_complex[i][i].x, _state))&&ae_isfinite(a->ptr.pp_complex[i][i].y, _state);
+    }
+    ae_assert(f, "HPDMatrixCholeskyInverse: A contains infinite or NaN values!", _state);
+    *info = 1;
+    
+    /*
+     * calculate condition numbers
+     */
+    rep->r1 = hpdmatrixcholeskyrcond(a, n, isupper, _state);
+    rep->rinf = rep->r1;
+    if( ae_fp_less(rep->r1,rcondthreshold(_state))||ae_fp_less(rep->rinf,rcondthreshold(_state)) )
+    {
+        if( isupper )
+        {
+            for(i=0; i<=n-1; i++)
+            {
+                for(j=i; j<=n-1; j++)
+                {
+                    a->ptr.pp_complex[i][j] = ae_complex_from_d(0);
+                }
+            }
+        }
+        else
+        {
+            for(i=0; i<=n-1; i++)
+            {
+                for(j=0; j<=i; j++)
+                {
+                    a->ptr.pp_complex[i][j] = ae_complex_from_d(0);
+                }
+            }
+        }
+        rep->r1 = 0;
+        rep->rinf = 0;
+        *info = -3;
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * Inverse
+     */
+    ae_vector_set_length(&tmp, n, _state);
+    matinv_hpdmatrixcholeskyinverserec(a, 0, n, isupper, &tmp, _state);
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Inversion of a Hermitian positive definite matrix.
+
+Given an upper or lower triangle of a Hermitian positive definite matrix,
+the algorithm generates matrix A^-1 and saves the upper or lower triangle
+depending on the input.
+
+Input parameters:
+    A       -   matrix to be inverted (upper or lower triangle).
+                Array with elements [0..N-1,0..N-1].
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+    IsUpper -   storage type (optional):
+                * if True, symmetric  matrix  A  is  given  by  its  upper
+                  triangle, and the lower triangle isn’t  used/changed  by
+                  function
+                * if False,  symmetric matrix  A  is  given  by  its lower
+                  triangle, and the  upper triangle isn’t used/changed  by
+                  function
+                * if not given,  both lower and upper  triangles  must  be
+                  filled.
+
+Output parameters:
+    Info    -   return code, same as in RMatrixLUInverse
+    Rep     -   solver report, same as in RMatrixLUInverse
+    A       -   inverse of matrix A, same as in RMatrixLUInverse
+
+  -- ALGLIB routine --
+     10.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void hpdmatrixinverse(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_int_t* info,
+     matinvreport* rep,
+     ae_state *_state)
+{
+
+    *info = 0;
+    _matinvreport_clear(rep);
+
+    ae_assert(n>0, "HPDMatrixInverse: N<=0!", _state);
+    ae_assert(a->cols>=n, "HPDMatrixInverse: cols(A)<N!", _state);
+    ae_assert(a->rows>=n, "HPDMatrixInverse: rows(A)<N!", _state);
+    ae_assert(apservisfinitectrmatrix(a, n, isupper, _state), "HPDMatrixInverse: A contains infinite or NaN values!", _state);
+    *info = 1;
+    if( hpdmatrixcholesky(a, n, isupper, _state) )
+    {
+        hpdmatrixcholeskyinverse(a, n, isupper, info, rep, _state);
+    }
+    else
+    {
+        *info = -3;
+    }
+}
+
+
+/*************************************************************************
+Triangular matrix inverse (real)
+
+The subroutine inverts the following types of matrices:
+    * upper triangular
+    * upper triangular with unit diagonal
+    * lower triangular
+    * lower triangular with unit diagonal
+
+In case of an upper (lower) triangular matrix,  the  inverse  matrix  will
+also be upper (lower) triangular, and after the end of the algorithm,  the
+inverse matrix replaces the source matrix. The elements  below (above) the
+main diagonal are not changed by the algorithm.
+
+If  the matrix  has a unit diagonal, the inverse matrix also  has  a  unit
+diagonal, and the diagonal elements are not passed to the algorithm.
+
+Input parameters:
+    A       -   matrix, array[0..N-1, 0..N-1].
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+    IsUpper -   True, if the matrix is upper triangular.
+    IsUnit  -   diagonal type (optional):
+                * if True, matrix has unit diagonal (a[i,i] are NOT used)
+                * if False, matrix diagonal is arbitrary
+                * if not given, False is assumed
+
+Output parameters:
+    Info    -   same as for RMatrixLUInverse
+    Rep     -   same as for RMatrixLUInverse
+    A       -   same as for RMatrixLUInverse.
+
+  -- ALGLIB --
+     Copyright 05.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void rmatrixtrinverse(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t* info,
+     matinvreport* rep,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_int_t i;
+    ae_int_t j;
+    ae_vector tmp;
+
+    ae_frame_make(_state, &_frame_block);
+    *info = 0;
+    _matinvreport_clear(rep);
+    ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);
+
+    ae_assert(n>0, "RMatrixTRInverse: N<=0!", _state);
+    ae_assert(a->cols>=n, "RMatrixTRInverse: cols(A)<N!", _state);
+    ae_assert(a->rows>=n, "RMatrixTRInverse: rows(A)<N!", _state);
+    ae_assert(isfinitertrmatrix(a, n, isupper, _state), "RMatrixTRInverse: A contains infinite or NaN values!", _state);
+    *info = 1;
+    
+    /*
+     * calculate condition numbers
+     */
+    rep->r1 = rmatrixtrrcond1(a, n, isupper, isunit, _state);
+    rep->rinf = rmatrixtrrcondinf(a, n, isupper, isunit, _state);
+    if( ae_fp_less(rep->r1,rcondthreshold(_state))||ae_fp_less(rep->rinf,rcondthreshold(_state)) )
+    {
+        for(i=0; i<=n-1; i++)
+        {
+            for(j=0; j<=n-1; j++)
+            {
+                a->ptr.pp_double[i][j] = 0;
+            }
+        }
+        rep->r1 = 0;
+        rep->rinf = 0;
+        *info = -3;
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * Invert
+     */
+    ae_vector_set_length(&tmp, n, _state);
+    matinv_rmatrixtrinverserec(a, 0, n, isupper, isunit, &tmp, info, rep, _state);
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Triangular matrix inverse (complex)
+
+The subroutine inverts the following types of matrices:
+    * upper triangular
+    * upper triangular with unit diagonal
+    * lower triangular
+    * lower triangular with unit diagonal
+
+In case of an upper (lower) triangular matrix,  the  inverse  matrix  will
+also be upper (lower) triangular, and after the end of the algorithm,  the
+inverse matrix replaces the source matrix. The elements  below (above) the
+main diagonal are not changed by the algorithm.
+
+If  the matrix  has a unit diagonal, the inverse matrix also  has  a  unit
+diagonal, and the diagonal elements are not passed to the algorithm.
+
+Input parameters:
+    A       -   matrix, array[0..N-1, 0..N-1].
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+    IsUpper -   True, if the matrix is upper triangular.
+    IsUnit  -   diagonal type (optional):
+                * if True, matrix has unit diagonal (a[i,i] are NOT used)
+                * if False, matrix diagonal is arbitrary
+                * if not given, False is assumed
+
+Output parameters:
+    Info    -   same as for RMatrixLUInverse
+    Rep     -   same as for RMatrixLUInverse
+    A       -   same as for RMatrixLUInverse.
+
+  -- ALGLIB --
+     Copyright 05.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void cmatrixtrinverse(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t* info,
+     matinvreport* rep,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_int_t i;
+    ae_int_t j;
+    ae_vector tmp;
+
+    ae_frame_make(_state, &_frame_block);
+    *info = 0;
+    _matinvreport_clear(rep);
+    ae_vector_init(&tmp, 0, DT_COMPLEX, _state, ae_true);
+
+    ae_assert(n>0, "CMatrixTRInverse: N<=0!", _state);
+    ae_assert(a->cols>=n, "CMatrixTRInverse: cols(A)<N!", _state);
+    ae_assert(a->rows>=n, "CMatrixTRInverse: rows(A)<N!", _state);
+    ae_assert(apservisfinitectrmatrix(a, n, isupper, _state), "CMatrixTRInverse: A contains infinite or NaN values!", _state);
+    *info = 1;
+    
+    /*
+     * calculate condition numbers
+     */
+    rep->r1 = cmatrixtrrcond1(a, n, isupper, isunit, _state);
+    rep->rinf = cmatrixtrrcondinf(a, n, isupper, isunit, _state);
+    if( ae_fp_less(rep->r1,rcondthreshold(_state))||ae_fp_less(rep->rinf,rcondthreshold(_state)) )
+    {
+        for(i=0; i<=n-1; i++)
+        {
+            for(j=0; j<=n-1; j++)
+            {
+                a->ptr.pp_complex[i][j] = ae_complex_from_d(0);
+            }
+        }
+        rep->r1 = 0;
+        rep->rinf = 0;
+        *info = -3;
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * Invert
+     */
+    ae_vector_set_length(&tmp, n, _state);
+    matinv_cmatrixtrinverserec(a, 0, n, isupper, isunit, &tmp, info, rep, _state);
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Triangular matrix inversion, recursive subroutine
+
+  -- ALGLIB --
+     05.02.2010, Bochkanov Sergey.
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     February 29, 1992.
+*************************************************************************/
+static void matinv_rmatrixtrinverserec(/* Real    */ ae_matrix* a,
+     ae_int_t offs,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_bool isunit,
+     /* Real    */ ae_vector* tmp,
+     ae_int_t* info,
+     matinvreport* rep,
+     ae_state *_state)
+{
+    ae_int_t n1;
+    ae_int_t n2;
+    ae_int_t i;
+    ae_int_t j;
+    double v;
+    double ajj;
+
+
+    if( n<1 )
+    {
+        *info = -1;
+        return;
+    }
+    
+    /*
+     * Base case
+     */
+    if( n<=ablasblocksize(a, _state) )
+    {
+        if( isupper )
+        {
+            
+            /*
+             * Compute inverse of upper triangular matrix.
+             */
+            for(j=0; j<=n-1; j++)
+            {
+                if( !isunit )
+                {
+                    if( ae_fp_eq(a->ptr.pp_double[offs+j][offs+j],0) )
+                    {
+                        *info = -3;
+                        return;
+                    }
+                    a->ptr.pp_double[offs+j][offs+j] = 1/a->ptr.pp_double[offs+j][offs+j];
+                    ajj = -a->ptr.pp_double[offs+j][offs+j];
+                }
+                else
+                {
+                    ajj = -1;
+                }
+                
+                /*
+                 * Compute elements 1:j-1 of j-th column.
+                 */
+                if( j>0 )
+                {
+                    ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+0][offs+j], a->stride, ae_v_len(0,j-1));
+                    for(i=0; i<=j-1; i++)
+                    {
+                        if( i<j-1 )
+                        {
+                            v = ae_v_dotproduct(&a->ptr.pp_double[offs+i][offs+i+1], 1, &tmp->ptr.p_double[i+1], 1, ae_v_len(offs+i+1,offs+j-1));
+                        }
+                        else
+                        {
+                            v = 0;
+                        }
+                        if( !isunit )
+                        {
+                            a->ptr.pp_double[offs+i][offs+j] = v+a->ptr.pp_double[offs+i][offs+i]*tmp->ptr.p_double[i];
+                        }
+                        else
+                        {
+                            a->ptr.pp_double[offs+i][offs+j] = v+tmp->ptr.p_double[i];
+                        }
+                    }
+                    ae_v_muld(&a->ptr.pp_double[offs+0][offs+j], a->stride, ae_v_len(offs+0,offs+j-1), ajj);
+                }
+            }
+        }
+        else
+        {
+            
+            /*
+             * Compute inverse of lower triangular matrix.
+             */
+            for(j=n-1; j>=0; j--)
+            {
+                if( !isunit )
+                {
+                    if( ae_fp_eq(a->ptr.pp_double[offs+j][offs+j],0) )
+                    {
+                        *info = -3;
+                        return;
+                    }
+                    a->ptr.pp_double[offs+j][offs+j] = 1/a->ptr.pp_double[offs+j][offs+j];
+                    ajj = -a->ptr.pp_double[offs+j][offs+j];
+                }
+                else
+                {
+                    ajj = -1;
+                }
+                if( j<n-1 )
+                {
+                    
+                    /*
+                     * Compute elements j+1:n of j-th column.
+                     */
+                    ae_v_move(&tmp->ptr.p_double[j+1], 1, &a->ptr.pp_double[offs+j+1][offs+j], a->stride, ae_v_len(j+1,n-1));
+                    for(i=j+1; i<=n-1; i++)
+                    {
+                        if( i>j+1 )
+                        {
+                            v = ae_v_dotproduct(&a->ptr.pp_double[offs+i][offs+j+1], 1, &tmp->ptr.p_double[j+1], 1, ae_v_len(offs+j+1,offs+i-1));
+                        }
+                        else
+                        {
+                            v = 0;
+                        }
+                        if( !isunit )
+                        {
+                            a->ptr.pp_double[offs+i][offs+j] = v+a->ptr.pp_double[offs+i][offs+i]*tmp->ptr.p_double[i];
+                        }
+                        else
+                        {
+                            a->ptr.pp_double[offs+i][offs+j] = v+tmp->ptr.p_double[i];
+                        }
+                    }
+                    ae_v_muld(&a->ptr.pp_double[offs+j+1][offs+j], a->stride, ae_v_len(offs+j+1,offs+n-1), ajj);
+                }
+            }
+        }
+        return;
+    }
+    
+    /*
+     * Recursive case
+     */
+    ablassplitlength(a, n, &n1, &n2, _state);
+    if( n2>0 )
+    {
+        if( isupper )
+        {
+            for(i=0; i<=n1-1; i++)
+            {
+                ae_v_muld(&a->ptr.pp_double[offs+i][offs+n1], 1, ae_v_len(offs+n1,offs+n-1), -1);
+            }
+            rmatrixlefttrsm(n1, n2, a, offs, offs, isupper, isunit, 0, a, offs, offs+n1, _state);
+            rmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, isupper, isunit, 0, a, offs, offs+n1, _state);
+        }
+        else
+        {
+            for(i=0; i<=n2-1; i++)
+            {
+                ae_v_muld(&a->ptr.pp_double[offs+n1+i][offs], 1, ae_v_len(offs,offs+n1-1), -1);
+            }
+            rmatrixrighttrsm(n2, n1, a, offs, offs, isupper, isunit, 0, a, offs+n1, offs, _state);
+            rmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, isupper, isunit, 0, a, offs+n1, offs, _state);
+        }
+        matinv_rmatrixtrinverserec(a, offs+n1, n2, isupper, isunit, tmp, info, rep, _state);
+    }
+    matinv_rmatrixtrinverserec(a, offs, n1, isupper, isunit, tmp, info, rep, _state);
+}
+
+
+/*************************************************************************
+Triangular matrix inversion, recursive subroutine
+
+  -- ALGLIB --
+     05.02.2010, Bochkanov Sergey.
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     February 29, 1992.
+*************************************************************************/
+static void matinv_cmatrixtrinverserec(/* Complex */ ae_matrix* a,
+     ae_int_t offs,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_bool isunit,
+     /* Complex */ ae_vector* tmp,
+     ae_int_t* info,
+     matinvreport* rep,
+     ae_state *_state)
+{
+    ae_int_t n1;
+    ae_int_t n2;
+    ae_int_t i;
+    ae_int_t j;
+    ae_complex v;
+    ae_complex ajj;
+
+
+    if( n<1 )
+    {
+        *info = -1;
+        return;
+    }
+    
+    /*
+     * Base case
+     */
+    if( n<=ablascomplexblocksize(a, _state) )
+    {
+        if( isupper )
+        {
+            
+            /*
+             * Compute inverse of upper triangular matrix.
+             */
+            for(j=0; j<=n-1; j++)
+            {
+                if( !isunit )
+                {
+                    if( ae_c_eq_d(a->ptr.pp_complex[offs+j][offs+j],0) )
+                    {
+                        *info = -3;
+                        return;
+                    }
+                    a->ptr.pp_complex[offs+j][offs+j] = ae_c_d_div(1,a->ptr.pp_complex[offs+j][offs+j]);
+                    ajj = ae_c_neg(a->ptr.pp_complex[offs+j][offs+j]);
+                }
+                else
+                {
+                    ajj = ae_complex_from_d(-1);
+                }
+                
+                /*
+                 * Compute elements 1:j-1 of j-th column.
+                 */
+                if( j>0 )
+                {
+                    ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+0][offs+j], a->stride, "N", ae_v_len(0,j-1));
+                    for(i=0; i<=j-1; i++)
+                    {
+                        if( i<j-1 )
+                        {
+                            v = ae_v_cdotproduct(&a->ptr.pp_complex[offs+i][offs+i+1], 1, "N", &tmp->ptr.p_complex[i+1], 1, "N", ae_v_len(offs+i+1,offs+j-1));
+                        }
+                        else
+                        {
+                            v = ae_complex_from_d(0);
+                        }
+                        if( !isunit )
+                        {
+                            a->ptr.pp_complex[offs+i][offs+j] = ae_c_add(v,ae_c_mul(a->ptr.pp_complex[offs+i][offs+i],tmp->ptr.p_complex[i]));
+                        }
+                        else
+                        {
+                            a->ptr.pp_complex[offs+i][offs+j] = ae_c_add(v,tmp->ptr.p_complex[i]);
+                        }
+                    }
+                    ae_v_cmulc(&a->ptr.pp_complex[offs+0][offs+j], a->stride, ae_v_len(offs+0,offs+j-1), ajj);
+                }
+            }
+        }
+        else
+        {
+            
+            /*
+             * Compute inverse of lower triangular matrix.
+             */
+            for(j=n-1; j>=0; j--)
+            {
+                if( !isunit )
+                {
+                    if( ae_c_eq_d(a->ptr.pp_complex[offs+j][offs+j],0) )
+                    {
+                        *info = -3;
+                        return;
+                    }
+                    a->ptr.pp_complex[offs+j][offs+j] = ae_c_d_div(1,a->ptr.pp_complex[offs+j][offs+j]);
+                    ajj = ae_c_neg(a->ptr.pp_complex[offs+j][offs+j]);
+                }
+                else
+                {
+                    ajj = ae_complex_from_d(-1);
+                }
+                if( j<n-1 )
+                {
+                    
+                    /*
+                     * Compute elements j+1:n of j-th column.
+                     */
+                    ae_v_cmove(&tmp->ptr.p_complex[j+1], 1, &a->ptr.pp_complex[offs+j+1][offs+j], a->stride, "N", ae_v_len(j+1,n-1));
+                    for(i=j+1; i<=n-1; i++)
+                    {
+                        if( i>j+1 )
+                        {
+                            v = ae_v_cdotproduct(&a->ptr.pp_complex[offs+i][offs+j+1], 1, "N", &tmp->ptr.p_complex[j+1], 1, "N", ae_v_len(offs+j+1,offs+i-1));
+                        }
+                        else
+                        {
+                            v = ae_complex_from_d(0);
+                        }
+                        if( !isunit )
+                        {
+                            a->ptr.pp_complex[offs+i][offs+j] = ae_c_add(v,ae_c_mul(a->ptr.pp_complex[offs+i][offs+i],tmp->ptr.p_complex[i]));
+                        }
+                        else
+                        {
+                            a->ptr.pp_complex[offs+i][offs+j] = ae_c_add(v,tmp->ptr.p_complex[i]);
+                        }
+                    }
+                    ae_v_cmulc(&a->ptr.pp_complex[offs+j+1][offs+j], a->stride, ae_v_len(offs+j+1,offs+n-1), ajj);
+                }
+            }
+        }
+        return;
+    }
+    
+    /*
+     * Recursive case
+     */
+    ablascomplexsplitlength(a, n, &n1, &n2, _state);
+    if( n2>0 )
+    {
+        if( isupper )
+        {
+            for(i=0; i<=n1-1; i++)
+            {
+                ae_v_cmuld(&a->ptr.pp_complex[offs+i][offs+n1], 1, ae_v_len(offs+n1,offs+n-1), -1);
+            }
+            cmatrixlefttrsm(n1, n2, a, offs, offs, isupper, isunit, 0, a, offs, offs+n1, _state);
+            cmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, isupper, isunit, 0, a, offs, offs+n1, _state);
+        }
+        else
+        {
+            for(i=0; i<=n2-1; i++)
+            {
+                ae_v_cmuld(&a->ptr.pp_complex[offs+n1+i][offs], 1, ae_v_len(offs,offs+n1-1), -1);
+            }
+            cmatrixrighttrsm(n2, n1, a, offs, offs, isupper, isunit, 0, a, offs+n1, offs, _state);
+            cmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, isupper, isunit, 0, a, offs+n1, offs, _state);
+        }
+        matinv_cmatrixtrinverserec(a, offs+n1, n2, isupper, isunit, tmp, info, rep, _state);
+    }
+    matinv_cmatrixtrinverserec(a, offs, n1, isupper, isunit, tmp, info, rep, _state);
+}
+
+
+static void matinv_rmatrixluinverserec(/* Real    */ ae_matrix* a,
+     ae_int_t offs,
+     ae_int_t n,
+     /* Real    */ ae_vector* work,
+     ae_int_t* info,
+     matinvreport* rep,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    double v;
+    ae_int_t n1;
+    ae_int_t n2;
+
+
+    if( n<1 )
+    {
+        *info = -1;
+        return;
+    }
+    
+    /*
+     * Base case
+     */
+    if( n<=ablasblocksize(a, _state) )
+    {
+        
+        /*
+         * Form inv(U)
+         */
+        matinv_rmatrixtrinverserec(a, offs, n, ae_true, ae_false, work, info, rep, _state);
+        if( *info<=0 )
+        {
+            return;
+        }
+        
+        /*
+         * Solve the equation inv(A)*L = inv(U) for inv(A).
+         */
+        for(j=n-1; j>=0; j--)
+        {
+            
+            /*
+             * Copy current column of L to WORK and replace with zeros.
+             */
+            for(i=j+1; i<=n-1; i++)
+            {
+                work->ptr.p_double[i] = a->ptr.pp_double[offs+i][offs+j];
+                a->ptr.pp_double[offs+i][offs+j] = 0;
+            }
+            
+            /*
+             * Compute current column of inv(A).
+             */
+            if( j<n-1 )
+            {
+                for(i=0; i<=n-1; i++)
+                {
+                    v = ae_v_dotproduct(&a->ptr.pp_double[offs+i][offs+j+1], 1, &work->ptr.p_double[j+1], 1, ae_v_len(offs+j+1,offs+n-1));
+                    a->ptr.pp_double[offs+i][offs+j] = a->ptr.pp_double[offs+i][offs+j]-v;
+                }
+            }
+        }
+        return;
+    }
+    
+    /*
+     * Recursive code:
+     *
+     *         ( L1      )   ( U1  U12 )
+     * A    =  (         ) * (         )
+     *         ( L12  L2 )   (     U2  )
+     *
+     *         ( W   X )
+     * A^-1 =  (       )
+     *         ( Y   Z )
+     */
+    ablassplitlength(a, n, &n1, &n2, _state);
+    ae_assert(n2>0, "LUInverseRec: internal error!", _state);
+    
+    /*
+     * X := inv(U1)*U12*inv(U2)
+     */
+    rmatrixlefttrsm(n1, n2, a, offs, offs, ae_true, ae_false, 0, a, offs, offs+n1, _state);
+    rmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, ae_true, ae_false, 0, a, offs, offs+n1, _state);
+    
+    /*
+     * Y := inv(L2)*L12*inv(L1)
+     */
+    rmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, ae_false, ae_true, 0, a, offs+n1, offs, _state);
+    rmatrixrighttrsm(n2, n1, a, offs, offs, ae_false, ae_true, 0, a, offs+n1, offs, _state);
+    
+    /*
+     * W := inv(L1*U1)+X*Y
+     */
+    matinv_rmatrixluinverserec(a, offs, n1, work, info, rep, _state);
+    if( *info<=0 )
+    {
+        return;
+    }
+    rmatrixgemm(n1, n1, n2, 1.0, a, offs, offs+n1, 0, a, offs+n1, offs, 0, 1.0, a, offs, offs, _state);
+    
+    /*
+     * X := -X*inv(L2)
+     * Y := -inv(U2)*Y
+     */
+    rmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, ae_false, ae_true, 0, a, offs, offs+n1, _state);
+    for(i=0; i<=n1-1; i++)
+    {
+        ae_v_muld(&a->ptr.pp_double[offs+i][offs+n1], 1, ae_v_len(offs+n1,offs+n-1), -1);
+    }
+    rmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, ae_true, ae_false, 0, a, offs+n1, offs, _state);
+    for(i=0; i<=n2-1; i++)
+    {
+        ae_v_muld(&a->ptr.pp_double[offs+n1+i][offs], 1, ae_v_len(offs,offs+n1-1), -1);
+    }
+    
+    /*
+     * Z := inv(L2*U2)
+     */
+    matinv_rmatrixluinverserec(a, offs+n1, n2, work, info, rep, _state);
+}
+
+
+static void matinv_cmatrixluinverserec(/* Complex */ ae_matrix* a,
+     ae_int_t offs,
+     ae_int_t n,
+     /* Complex */ ae_vector* work,
+     ae_int_t* info,
+     matinvreport* rep,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t j;
+    ae_complex v;
+    ae_int_t n1;
+    ae_int_t n2;
+
+
+    if( n<1 )
+    {
+        *info = -1;
+        return;
+    }
+    
+    /*
+     * Base case
+     */
+    if( n<=ablascomplexblocksize(a, _state) )
+    {
+        
+        /*
+         * Form inv(U)
+         */
+        matinv_cmatrixtrinverserec(a, offs, n, ae_true, ae_false, work, info, rep, _state);
+        if( *info<=0 )
+        {
+            return;
+        }
+        
+        /*
+         * Solve the equation inv(A)*L = inv(U) for inv(A).
+         */
+        for(j=n-1; j>=0; j--)
+        {
+            
+            /*
+             * Copy current column of L to WORK and replace with zeros.
+             */
+            for(i=j+1; i<=n-1; i++)
+            {
+                work->ptr.p_complex[i] = a->ptr.pp_complex[offs+i][offs+j];
+                a->ptr.pp_complex[offs+i][offs+j] = ae_complex_from_d(0);
+            }
+            
+            /*
+             * Compute current column of inv(A).
+             */
+            if( j<n-1 )
+            {
+                for(i=0; i<=n-1; i++)
+                {
+                    v = ae_v_cdotproduct(&a->ptr.pp_complex[offs+i][offs+j+1], 1, "N", &work->ptr.p_complex[j+1], 1, "N", ae_v_len(offs+j+1,offs+n-1));
+                    a->ptr.pp_complex[offs+i][offs+j] = ae_c_sub(a->ptr.pp_complex[offs+i][offs+j],v);
+                }
+            }
+        }
+        return;
+    }
+    
+    /*
+     * Recursive code:
+     *
+     *         ( L1      )   ( U1  U12 )
+     * A    =  (         ) * (         )
+     *         ( L12  L2 )   (     U2  )
+     *
+     *         ( W   X )
+     * A^-1 =  (       )
+     *         ( Y   Z )
+     */
+    ablascomplexsplitlength(a, n, &n1, &n2, _state);
+    ae_assert(n2>0, "LUInverseRec: internal error!", _state);
+    
+    /*
+     * X := inv(U1)*U12*inv(U2)
+     */
+    cmatrixlefttrsm(n1, n2, a, offs, offs, ae_true, ae_false, 0, a, offs, offs+n1, _state);
+    cmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, ae_true, ae_false, 0, a, offs, offs+n1, _state);
+    
+    /*
+     * Y := inv(L2)*L12*inv(L1)
+     */
+    cmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, ae_false, ae_true, 0, a, offs+n1, offs, _state);
+    cmatrixrighttrsm(n2, n1, a, offs, offs, ae_false, ae_true, 0, a, offs+n1, offs, _state);
+    
+    /*
+     * W := inv(L1*U1)+X*Y
+     */
+    matinv_cmatrixluinverserec(a, offs, n1, work, info, rep, _state);
+    if( *info<=0 )
+    {
+        return;
+    }
+    cmatrixgemm(n1, n1, n2, ae_complex_from_d(1.0), a, offs, offs+n1, 0, a, offs+n1, offs, 0, ae_complex_from_d(1.0), a, offs, offs, _state);
+    
+    /*
+     * X := -X*inv(L2)
+     * Y := -inv(U2)*Y
+     */
+    cmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, ae_false, ae_true, 0, a, offs, offs+n1, _state);
+    for(i=0; i<=n1-1; i++)
+    {
+        ae_v_cmuld(&a->ptr.pp_complex[offs+i][offs+n1], 1, ae_v_len(offs+n1,offs+n-1), -1);
+    }
+    cmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, ae_true, ae_false, 0, a, offs+n1, offs, _state);
+    for(i=0; i<=n2-1; i++)
+    {
+        ae_v_cmuld(&a->ptr.pp_complex[offs+n1+i][offs], 1, ae_v_len(offs,offs+n1-1), -1);
+    }
+    
+    /*
+     * Z := inv(L2*U2)
+     */
+    matinv_cmatrixluinverserec(a, offs+n1, n2, work, info, rep, _state);
+}
+
+
+/*************************************************************************
+Recursive subroutine for SPD inversion.
+
+  -- ALGLIB routine --
+     10.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+static void matinv_spdmatrixcholeskyinverserec(/* Real    */ ae_matrix* a,
+     ae_int_t offs,
+     ae_int_t n,
+     ae_bool isupper,
+     /* Real    */ ae_vector* tmp,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_int_t i;
+    ae_int_t j;
+    double v;
+    ae_int_t n1;
+    ae_int_t n2;
+    ae_int_t info2;
+    matinvreport rep2;
+
+    ae_frame_make(_state, &_frame_block);
+    _matinvreport_init(&rep2, _state, ae_true);
+
+    if( n<1 )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * Base case
+     */
+    if( n<=ablasblocksize(a, _state) )
+    {
+        matinv_rmatrixtrinverserec(a, offs, n, isupper, ae_false, tmp, &info2, &rep2, _state);
+        if( isupper )
+        {
+            
+            /*
+             * Compute the product U * U'.
+             * NOTE: we never assume that diagonal of U is real
+             */
+            for(i=0; i<=n-1; i++)
+            {
+                if( i==0 )
+                {
+                    
+                    /*
+                     * 1x1 matrix
+                     */
+                    a->ptr.pp_double[offs+i][offs+i] = ae_sqr(a->ptr.pp_double[offs+i][offs+i], _state);
+                }
+                else
+                {
+                    
+                    /*
+                     * (I+1)x(I+1) matrix,
+                     *
+                     * ( A11  A12 )   ( A11^H        )   ( A11*A11^H+A12*A12^H  A12*A22^H )
+                     * (          ) * (              ) = (                                )
+                     * (      A22 )   ( A12^H  A22^H )   ( A22*A12^H            A22*A22^H )
+                     *
+                     * A11 is IxI, A22 is 1x1.
+                     */
+                    ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs][offs+i], a->stride, ae_v_len(0,i-1));
+                    for(j=0; j<=i-1; j++)
+                    {
+                        v = a->ptr.pp_double[offs+j][offs+i];
+                        ae_v_addd(&a->ptr.pp_double[offs+j][offs+j], 1, &tmp->ptr.p_double[j], 1, ae_v_len(offs+j,offs+i-1), v);
+                    }
+                    v = a->ptr.pp_double[offs+i][offs+i];
+                    ae_v_muld(&a->ptr.pp_double[offs][offs+i], a->stride, ae_v_len(offs,offs+i-1), v);
+                    a->ptr.pp_double[offs+i][offs+i] = ae_sqr(a->ptr.pp_double[offs+i][offs+i], _state);
+                }
+            }
+        }
+        else
+        {
+            
+            /*
+             * Compute the product L' * L
+             * NOTE: we never assume that diagonal of L is real
+             */
+            for(i=0; i<=n-1; i++)
+            {
+                if( i==0 )
+                {
+                    
+                    /*
+                     * 1x1 matrix
+                     */
+                    a->ptr.pp_double[offs+i][offs+i] = ae_sqr(a->ptr.pp_double[offs+i][offs+i], _state);
+                }
+                else
+                {
+                    
+                    /*
+                     * (I+1)x(I+1) matrix,
+                     *
+                     * ( A11^H  A21^H )   ( A11      )   ( A11^H*A11+A21^H*A21  A21^H*A22 )
+                     * (              ) * (          ) = (                                )
+                     * (        A22^H )   ( A21  A22 )   ( A22^H*A21            A22^H*A22 )
+                     *
+                     * A11 is IxI, A22 is 1x1.
+                     */
+                    ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+i][offs], 1, ae_v_len(0,i-1));
+                    for(j=0; j<=i-1; j++)
+                    {
+                        v = a->ptr.pp_double[offs+i][offs+j];
+                        ae_v_addd(&a->ptr.pp_double[offs+j][offs], 1, &tmp->ptr.p_double[0], 1, ae_v_len(offs,offs+j), v);
+                    }
+                    v = a->ptr.pp_double[offs+i][offs+i];
+                    ae_v_muld(&a->ptr.pp_double[offs+i][offs], 1, ae_v_len(offs,offs+i-1), v);
+                    a->ptr.pp_double[offs+i][offs+i] = ae_sqr(a->ptr.pp_double[offs+i][offs+i], _state);
+                }
+            }
+        }
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * Recursive code: triangular factor inversion merged with
+     * UU' or L'L multiplication
+     */
+    ablassplitlength(a, n, &n1, &n2, _state);
+    
+    /*
+     * form off-diagonal block of trangular inverse
+     */
+    if( isupper )
+    {
+        for(i=0; i<=n1-1; i++)
+        {
+            ae_v_muld(&a->ptr.pp_double[offs+i][offs+n1], 1, ae_v_len(offs+n1,offs+n-1), -1);
+        }
+        rmatrixlefttrsm(n1, n2, a, offs, offs, isupper, ae_false, 0, a, offs, offs+n1, _state);
+        rmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, isupper, ae_false, 0, a, offs, offs+n1, _state);
+    }
+    else
+    {
+        for(i=0; i<=n2-1; i++)
+        {
+            ae_v_muld(&a->ptr.pp_double[offs+n1+i][offs], 1, ae_v_len(offs,offs+n1-1), -1);
+        }
+        rmatrixrighttrsm(n2, n1, a, offs, offs, isupper, ae_false, 0, a, offs+n1, offs, _state);
+        rmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, isupper, ae_false, 0, a, offs+n1, offs, _state);
+    }
+    
+    /*
+     * invert first diagonal block
+     */
+    matinv_spdmatrixcholeskyinverserec(a, offs, n1, isupper, tmp, _state);
+    
+    /*
+     * update first diagonal block with off-diagonal block,
+     * update off-diagonal block
+     */
+    if( isupper )
+    {
+        rmatrixsyrk(n1, n2, 1.0, a, offs, offs+n1, 0, 1.0, a, offs, offs, isupper, _state);
+        rmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, isupper, ae_false, 1, a, offs, offs+n1, _state);
+    }
+    else
+    {
+        rmatrixsyrk(n1, n2, 1.0, a, offs+n1, offs, 1, 1.0, a, offs, offs, isupper, _state);
+        rmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, isupper, ae_false, 1, a, offs+n1, offs, _state);
+    }
+    
+    /*
+     * invert second diagonal block
+     */
+    matinv_spdmatrixcholeskyinverserec(a, offs+n1, n2, isupper, tmp, _state);
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Recursive subroutine for HPD inversion.
+
+  -- ALGLIB routine --
+     10.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+static void matinv_hpdmatrixcholeskyinverserec(/* Complex */ ae_matrix* a,
+     ae_int_t offs,
+     ae_int_t n,
+     ae_bool isupper,
+     /* Complex */ ae_vector* tmp,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_int_t i;
+    ae_int_t j;
+    ae_complex v;
+    ae_int_t n1;
+    ae_int_t n2;
+    ae_int_t info2;
+    matinvreport rep2;
+
+    ae_frame_make(_state, &_frame_block);
+    _matinvreport_init(&rep2, _state, ae_true);
+
+    if( n<1 )
+    {
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * Base case
+     */
+    if( n<=ablascomplexblocksize(a, _state) )
+    {
+        matinv_cmatrixtrinverserec(a, offs, n, isupper, ae_false, tmp, &info2, &rep2, _state);
+        if( isupper )
+        {
+            
+            /*
+             * Compute the product U * U'.
+             * NOTE: we never assume that diagonal of U is real
+             */
+            for(i=0; i<=n-1; i++)
+            {
+                if( i==0 )
+                {
+                    
+                    /*
+                     * 1x1 matrix
+                     */
+                    a->ptr.pp_complex[offs+i][offs+i] = ae_complex_from_d(ae_sqr(a->ptr.pp_complex[offs+i][offs+i].x, _state)+ae_sqr(a->ptr.pp_complex[offs+i][offs+i].y, _state));
+                }
+                else
+                {
+                    
+                    /*
+                     * (I+1)x(I+1) matrix,
+                     *
+                     * ( A11  A12 )   ( A11^H        )   ( A11*A11^H+A12*A12^H  A12*A22^H )
+                     * (          ) * (              ) = (                                )
+                     * (      A22 )   ( A12^H  A22^H )   ( A22*A12^H            A22*A22^H )
+                     *
+                     * A11 is IxI, A22 is 1x1.
+                     */
+                    ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs][offs+i], a->stride, "Conj", ae_v_len(0,i-1));
+                    for(j=0; j<=i-1; j++)
+                    {
+                        v = a->ptr.pp_complex[offs+j][offs+i];
+                        ae_v_caddc(&a->ptr.pp_complex[offs+j][offs+j], 1, &tmp->ptr.p_complex[j], 1, "N", ae_v_len(offs+j,offs+i-1), v);
+                    }
+                    v = ae_c_conj(a->ptr.pp_complex[offs+i][offs+i], _state);
+                    ae_v_cmulc(&a->ptr.pp_complex[offs][offs+i], a->stride, ae_v_len(offs,offs+i-1), v);
+                    a->ptr.pp_complex[offs+i][offs+i] = ae_complex_from_d(ae_sqr(a->ptr.pp_complex[offs+i][offs+i].x, _state)+ae_sqr(a->ptr.pp_complex[offs+i][offs+i].y, _state));
+                }
+            }
+        }
+        else
+        {
+            
+            /*
+             * Compute the product L' * L
+             * NOTE: we never assume that diagonal of L is real
+             */
+            for(i=0; i<=n-1; i++)
+            {
+                if( i==0 )
+                {
+                    
+                    /*
+                     * 1x1 matrix
+                     */
+                    a->ptr.pp_complex[offs+i][offs+i] = ae_complex_from_d(ae_sqr(a->ptr.pp_complex[offs+i][offs+i].x, _state)+ae_sqr(a->ptr.pp_complex[offs+i][offs+i].y, _state));
+                }
+                else
+                {
+                    
+                    /*
+                     * (I+1)x(I+1) matrix,
+                     *
+                     * ( A11^H  A21^H )   ( A11      )   ( A11^H*A11+A21^H*A21  A21^H*A22 )
+                     * (              ) * (          ) = (                                )
+                     * (        A22^H )   ( A21  A22 )   ( A22^H*A21            A22^H*A22 )
+                     *
+                     * A11 is IxI, A22 is 1x1.
+                     */
+                    ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+i][offs], 1, "N", ae_v_len(0,i-1));
+                    for(j=0; j<=i-1; j++)
+                    {
+                        v = ae_c_conj(a->ptr.pp_complex[offs+i][offs+j], _state);
+                        ae_v_caddc(&a->ptr.pp_complex[offs+j][offs], 1, &tmp->ptr.p_complex[0], 1, "N", ae_v_len(offs,offs+j), v);
+                    }
+                    v = ae_c_conj(a->ptr.pp_complex[offs+i][offs+i], _state);
+                    ae_v_cmulc(&a->ptr.pp_complex[offs+i][offs], 1, ae_v_len(offs,offs+i-1), v);
+                    a->ptr.pp_complex[offs+i][offs+i] = ae_complex_from_d(ae_sqr(a->ptr.pp_complex[offs+i][offs+i].x, _state)+ae_sqr(a->ptr.pp_complex[offs+i][offs+i].y, _state));
+                }
+            }
+        }
+        ae_frame_leave(_state);
+        return;
+    }
+    
+    /*
+     * Recursive code: triangular factor inversion merged with
+     * UU' or L'L multiplication
+     */
+    ablascomplexsplitlength(a, n, &n1, &n2, _state);
+    
+    /*
+     * form off-diagonal block of trangular inverse
+     */
+    if( isupper )
+    {
+        for(i=0; i<=n1-1; i++)
+        {
+            ae_v_cmuld(&a->ptr.pp_complex[offs+i][offs+n1], 1, ae_v_len(offs+n1,offs+n-1), -1);
+        }
+        cmatrixlefttrsm(n1, n2, a, offs, offs, isupper, ae_false, 0, a, offs, offs+n1, _state);
+        cmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, isupper, ae_false, 0, a, offs, offs+n1, _state);
+    }
+    else
+    {
+        for(i=0; i<=n2-1; i++)
+        {
+            ae_v_cmuld(&a->ptr.pp_complex[offs+n1+i][offs], 1, ae_v_len(offs,offs+n1-1), -1);
+        }
+        cmatrixrighttrsm(n2, n1, a, offs, offs, isupper, ae_false, 0, a, offs+n1, offs, _state);
+        cmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, isupper, ae_false, 0, a, offs+n1, offs, _state);
+    }
+    
+    /*
+     * invert first diagonal block
+     */
+    matinv_hpdmatrixcholeskyinverserec(a, offs, n1, isupper, tmp, _state);
+    
+    /*
+     * update first diagonal block with off-diagonal block,
+     * update off-diagonal block
+     */
+    if( isupper )
+    {
+        cmatrixsyrk(n1, n2, 1.0, a, offs, offs+n1, 0, 1.0, a, offs, offs, isupper, _state);
+        cmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, isupper, ae_false, 2, a, offs, offs+n1, _state);
+    }
+    else
+    {
+        cmatrixsyrk(n1, n2, 1.0, a, offs+n1, offs, 2, 1.0, a, offs, offs, isupper, _state);
+        cmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, isupper, ae_false, 2, a, offs+n1, offs, _state);
+    }
+    
+    /*
+     * invert second diagonal block
+     */
+    matinv_hpdmatrixcholeskyinverserec(a, offs+n1, n2, isupper, tmp, _state);
+    ae_frame_leave(_state);
+}
+
+
+ae_bool _matinvreport_init(matinvreport* p, ae_state *_state, ae_bool make_automatic)
+{
+    return ae_true;
+}
+
+
+ae_bool _matinvreport_init_copy(matinvreport* dst, matinvreport* src, ae_state *_state, ae_bool make_automatic)
+{
+    dst->r1 = src->r1;
+    dst->rinf = src->rinf;
+    return ae_true;
+}
+
+
+void _matinvreport_clear(matinvreport* p)
+{
+}
+
+
+
+
+/*************************************************************************
+Singular value decomposition of a bidiagonal matrix (extended algorithm)
+
+The algorithm performs the singular value decomposition  of  a  bidiagonal
+matrix B (upper or lower) representing it as B = Q*S*P^T, where Q and  P -
+orthogonal matrices, S - diagonal matrix with non-negative elements on the
+main diagonal, in descending order.
+
+The  algorithm  finds  singular  values.  In  addition,  the algorithm can
+calculate  matrices  Q  and P (more precisely, not the matrices, but their
+product  with  given  matrices U and VT - U*Q and (P^T)*VT)).  Of  course,
+matrices U and VT can be of any type, including identity. Furthermore, the
+algorithm can calculate Q'*C (this product is calculated more  effectively
+than U*Q,  because  this calculation operates with rows instead  of matrix
+columns).
+
+The feature of the algorithm is its ability to find  all  singular  values
+including those which are arbitrarily close to 0  with  relative  accuracy
+close to  machine precision. If the parameter IsFractionalAccuracyRequired
+is set to True, all singular values will have high relative accuracy close
+to machine precision. If the parameter is set to False, only  the  biggest
+singular value will have relative accuracy  close  to  machine  precision.
+The absolute error of other singular values is equal to the absolute error
+of the biggest singular value.
+
+Input parameters:
+    D       -   main diagonal of matrix B.
+                Array whose index ranges within [0..N-1].
+    E       -   superdiagonal (or subdiagonal) of matrix B.
+                Array whose index ranges within [0..N-2].
+    N       -   size of matrix B.
+    IsUpper -   True, if the matrix is upper bidiagonal.
+    IsFractionalAccuracyRequired -
+                accuracy to search singular values with.
+    U       -   matrix to be multiplied by Q.
+                Array whose indexes range within [0..NRU-1, 0..N-1].
+                The matrix can be bigger, in that case only the  submatrix
+                [0..NRU-1, 0..N-1] will be multiplied by Q.
+    NRU     -   number of rows in matrix U.
+    C       -   matrix to be multiplied by Q'.
+                Array whose indexes range within [0..N-1, 0..NCC-1].
+                The matrix can be bigger, in that case only the  submatrix
+                [0..N-1, 0..NCC-1] will be multiplied by Q'.
+    NCC     -   number of columns in matrix C.
+    VT      -   matrix to be multiplied by P^T.
+                Array whose indexes range within [0..N-1, 0..NCVT-1].
+                The matrix can be bigger, in that case only the  submatrix
+                [0..N-1, 0..NCVT-1] will be multiplied by P^T.
+    NCVT    -   number of columns in matrix VT.
+
+Output parameters:
+    D       -   singular values of matrix B in descending order.
+    U       -   if NRU>0, contains matrix U*Q.
+    VT      -   if NCVT>0, contains matrix (P^T)*VT.
+    C       -   if NCC>0, contains matrix Q'*C.
+
+Result:
+    True, if the algorithm has converged.
+    False, if the algorithm hasn't converged (rare case).
+
+Additional information:
+    The type of convergence is controlled by the internal  parameter  TOL.
+    If the parameter is greater than 0, the singular values will have
+    relative accuracy TOL. If TOL<0, the singular values will have
+    absolute accuracy ABS(TOL)*norm(B).
+    By default, |TOL| falls within the range of 10*Epsilon and 100*Epsilon,
+    where Epsilon is the machine precision. It is not  recommended  to  use
+    TOL less than 10*Epsilon since this will  considerably  slow  down  the
+    algorithm and may not lead to error decreasing.
+History:
+    * 31 March, 2007.
+        changed MAXITR from 6 to 12.
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     October 31, 1999.
+*************************************************************************/
+ae_bool rmatrixbdsvd(/* Real    */ ae_vector* d,
+     /* Real    */ ae_vector* e,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_bool isfractionalaccuracyrequired,
+     /* Real    */ ae_matrix* u,
+     ae_int_t nru,
+     /* Real    */ ae_matrix* c,
+     ae_int_t ncc,
+     /* Real    */ ae_matrix* vt,
+     ae_int_t ncvt,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector _e;
+    ae_vector d1;
+    ae_vector e1;
+    ae_bool result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_init_copy(&_e, e, _state, ae_true);
+    e = &_e;
+    ae_vector_init(&d1, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&e1, 0, DT_REAL, _state, ae_true);
+
+    ae_vector_set_length(&d1, n+1, _state);
+    ae_v_move(&d1.ptr.p_double[1], 1, &d->ptr.p_double[0], 1, ae_v_len(1,n));
+    if( n>1 )
+    {
+        ae_vector_set_length(&e1, n-1+1, _state);
+        ae_v_move(&e1.ptr.p_double[1], 1, &e->ptr.p_double[0], 1, ae_v_len(1,n-1));
+    }
+    result = bdsvd_bidiagonalsvddecompositioninternal(&d1, &e1, n, isupper, isfractionalaccuracyrequired, u, 0, nru, c, 0, ncc, vt, 0, ncvt, _state);
+    ae_v_move(&d->ptr.p_double[0], 1, &d1.ptr.p_double[1], 1, ae_v_len(0,n-1));
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+ae_bool bidiagonalsvddecomposition(/* Real    */ ae_vector* d,
+     /* Real    */ ae_vector* e,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_bool isfractionalaccuracyrequired,
+     /* Real    */ ae_matrix* u,
+     ae_int_t nru,
+     /* Real    */ ae_matrix* c,
+     ae_int_t ncc,
+     /* Real    */ ae_matrix* vt,
+     ae_int_t ncvt,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector _e;
+    ae_bool result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_init_copy(&_e, e, _state, ae_true);
+    e = &_e;
+
+    result = bdsvd_bidiagonalsvddecompositioninternal(d, e, n, isupper, isfractionalaccuracyrequired, u, 1, nru, c, 1, ncc, vt, 1, ncvt, _state);
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+/*************************************************************************
+Internal working subroutine for bidiagonal decomposition
+*************************************************************************/
+static ae_bool bdsvd_bidiagonalsvddecompositioninternal(/* Real    */ ae_vector* d,
+     /* Real    */ ae_vector* e,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_bool isfractionalaccuracyrequired,
+     /* Real    */ ae_matrix* u,
+     ae_int_t ustart,
+     ae_int_t nru,
+     /* Real    */ ae_matrix* c,
+     ae_int_t cstart,
+     ae_int_t ncc,
+     /* Real    */ ae_matrix* vt,
+     ae_int_t vstart,
+     ae_int_t ncvt,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector _e;
+    ae_int_t i;
+    ae_int_t idir;
+    ae_int_t isub;
+    ae_int_t iter;
+    ae_int_t j;
+    ae_int_t ll;
+    ae_int_t lll;
+    ae_int_t m;
+    ae_int_t maxit;
+    ae_int_t oldll;
+    ae_int_t oldm;
+    double abse;
+    double abss;
+    double cosl;
+    double cosr;
+    double cs;
+    double eps;
+    double f;
+    double g;
+    double h;
+    double mu;
+    double oldcs;
+    double oldsn;
+    double r;
+    double shift;
+    double sigmn;
+    double sigmx;
+    double sinl;
+    double sinr;
+    double sll;
+    double smax;
+    double smin;
+    double sminl;
+    double sminlo;
+    double sminoa;
+    double sn;
+    double thresh;
+    double tol;
+    double tolmul;
+    double unfl;
+    ae_vector work0;
+    ae_vector work1;
+    ae_vector work2;
+    ae_vector work3;
+    ae_int_t maxitr;
+    ae_bool matrixsplitflag;
+    ae_bool iterflag;
+    ae_vector utemp;
+    ae_vector vttemp;
+    ae_vector ctemp;
+    ae_vector etemp;
+    ae_bool rightside;
+    ae_bool fwddir;
+    double tmp;
+    ae_int_t mm1;
+    ae_int_t mm0;
+    ae_bool bchangedir;
+    ae_int_t uend;
+    ae_int_t cend;
+    ae_int_t vend;
+    ae_bool result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_init_copy(&_e, e, _state, ae_true);
+    e = &_e;
+    ae_vector_init(&work0, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&work1, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&work2, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&work3, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&utemp, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&vttemp, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&ctemp, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&etemp, 0, DT_REAL, _state, ae_true);
+
+    result = ae_true;
+    if( n==0 )
+    {
+        ae_frame_leave(_state);
+        return result;
+    }
+    if( n==1 )
+    {
+        if( ae_fp_less(d->ptr.p_double[1],0) )
+        {
+            d->ptr.p_double[1] = -d->ptr.p_double[1];
+            if( ncvt>0 )
+            {
+                ae_v_muld(&vt->ptr.pp_double[vstart][vstart], 1, ae_v_len(vstart,vstart+ncvt-1), -1);
+            }
+        }
+        ae_frame_leave(_state);
+        return result;
+    }
+    
+    /*
+     * these initializers are not really necessary,
+     * but without them compiler complains about uninitialized locals
+     */
+    ll = 0;
+    oldsn = 0;
+    
+    /*
+     * init
+     */
+    ae_vector_set_length(&work0, n-1+1, _state);
+    ae_vector_set_length(&work1, n-1+1, _state);
+    ae_vector_set_length(&work2, n-1+1, _state);
+    ae_vector_set_length(&work3, n-1+1, _state);
+    uend = ustart+ae_maxint(nru-1, 0, _state);
+    vend = vstart+ae_maxint(ncvt-1, 0, _state);
+    cend = cstart+ae_maxint(ncc-1, 0, _state);
+    ae_vector_set_length(&utemp, uend+1, _state);
+    ae_vector_set_length(&vttemp, vend+1, _state);
+    ae_vector_set_length(&ctemp, cend+1, _state);
+    maxitr = 12;
+    rightside = ae_true;
+    fwddir = ae_true;
+    
+    /*
+     * resize E from N-1 to N
+     */
+    ae_vector_set_length(&etemp, n+1, _state);
+    for(i=1; i<=n-1; i++)
+    {
+        etemp.ptr.p_double[i] = e->ptr.p_double[i];
+    }
+    ae_vector_set_length(e, n+1, _state);
+    for(i=1; i<=n-1; i++)
+    {
+        e->ptr.p_double[i] = etemp.ptr.p_double[i];
+    }
+    e->ptr.p_double[n] = 0;
+    idir = 0;
+    
+    /*
+     * Get machine constants
+     */
+    eps = ae_machineepsilon;
+    unfl = ae_minrealnumber;
+    
+    /*
+     * If matrix lower bidiagonal, rotate to be upper bidiagonal
+     * by applying Givens rotations on the left
+     */
+    if( !isupper )
+    {
+        for(i=1; i<=n-1; i++)
+        {
+            generaterotation(d->ptr.p_double[i], e->ptr.p_double[i], &cs, &sn, &r, _state);
+            d->ptr.p_double[i] = r;
+            e->ptr.p_double[i] = sn*d->ptr.p_double[i+1];
+            d->ptr.p_double[i+1] = cs*d->ptr.p_double[i+1];
+            work0.ptr.p_double[i] = cs;
+            work1.ptr.p_double[i] = sn;
+        }
+        
+        /*
+         * Update singular vectors if desired
+         */
+        if( nru>0 )
+        {
+            applyrotationsfromtheright(fwddir, ustart, uend, 1+ustart-1, n+ustart-1, &work0, &work1, u, &utemp, _state);
+        }
+        if( ncc>0 )
+        {
+            applyrotationsfromtheleft(fwddir, 1+cstart-1, n+cstart-1, cstart, cend, &work0, &work1, c, &ctemp, _state);
+        }
+    }
+    
+    /*
+     * Compute singular values to relative accuracy TOL
+     * (By setting TOL to be negative, algorithm will compute
+     * singular values to absolute accuracy ABS(TOL)*norm(input matrix))
+     */
+    tolmul = ae_maxreal(10, ae_minreal(100, ae_pow(eps, -0.125, _state), _state), _state);
+    tol = tolmul*eps;
+    if( !isfractionalaccuracyrequired )
+    {
+        tol = -tol;
+    }
+    
+    /*
+     * Compute approximate maximum, minimum singular values
+     */
+    smax = 0;
+    for(i=1; i<=n; i++)
+    {
+        smax = ae_maxreal(smax, ae_fabs(d->ptr.p_double[i], _state), _state);
+    }
+    for(i=1; i<=n-1; i++)
+    {
+        smax = ae_maxreal(smax, ae_fabs(e->ptr.p_double[i], _state), _state);
+    }
+    sminl = 0;
+    if( ae_fp_greater_eq(tol,0) )
+    {
+        
+        /*
+         * Relative accuracy desired
+         */
+        sminoa = ae_fabs(d->ptr.p_double[1], _state);
+        if( ae_fp_neq(sminoa,0) )
+        {
+            mu = sminoa;
+            for(i=2; i<=n; i++)
+            {
+                mu = ae_fabs(d->ptr.p_double[i], _state)*(mu/(mu+ae_fabs(e->ptr.p_double[i-1], _state)));
+                sminoa = ae_minreal(sminoa, mu, _state);
+                if( ae_fp_eq(sminoa,0) )
+                {
+                    break;
+                }
+            }
+        }
+        sminoa = sminoa/ae_sqrt(n, _state);
+        thresh = ae_maxreal(tol*sminoa, maxitr*n*n*unfl, _state);
+    }
+    else
+    {
+        
+        /*
+         * Absolute accuracy desired
+         */
+        thresh = ae_maxreal(ae_fabs(tol, _state)*smax, maxitr*n*n*unfl, _state);
+    }
+    
+    /*
+     * Prepare for main iteration loop for the singular values
+     * (MAXIT is the maximum number of passes through the inner
+     * loop permitted before nonconvergence signalled.)
+     */
+    maxit = maxitr*n*n;
+    iter = 0;
+    oldll = -1;
+    oldm = -1;
+    
+    /*
+     * M points to last element of unconverged part of matrix
+     */
+    m = n;
+    
+    /*
+     * Begin main iteration loop
+     */
+    for(;;)
+    {
+        
+        /*
+         * Check for convergence or exceeding iteration count
+         */
+        if( m<=1 )
+        {
+            break;
+        }
+        if( iter>maxit )
+        {
+            result = ae_false;
+            ae_frame_leave(_state);
+            return result;
+        }
+        
+        /*
+         * Find diagonal block of matrix to work on
+         */
+        if( ae_fp_less(tol,0)&&ae_fp_less_eq(ae_fabs(d->ptr.p_double[m], _state),thresh) )
+        {
+            d->ptr.p_double[m] = 0;
+        }
+        smax = ae_fabs(d->ptr.p_double[m], _state);
+        smin = smax;
+        matrixsplitflag = ae_false;
+        for(lll=1; lll<=m-1; lll++)
+        {
+            ll = m-lll;
+            abss = ae_fabs(d->ptr.p_double[ll], _state);
+            abse = ae_fabs(e->ptr.p_double[ll], _state);
+            if( ae_fp_less(tol,0)&&ae_fp_less_eq(abss,thresh) )
+            {
+                d->ptr.p_double[ll] = 0;
+            }
+            if( ae_fp_less_eq(abse,thresh) )
+            {
+                matrixsplitflag = ae_true;
+                break;
+            }
+            smin = ae_minreal(smin, abss, _state);
+            smax = ae_maxreal(smax, ae_maxreal(abss, abse, _state), _state);
+        }
+        if( !matrixsplitflag )
+        {
+            ll = 0;
+        }
+        else
+        {
+            
+            /*
+             * Matrix splits since E(LL) = 0
+             */
+            e->ptr.p_double[ll] = 0;
+            if( ll==m-1 )
+            {
+                
+                /*
+                 * Convergence of bottom singular value, return to top of loop
+                 */
+                m = m-1;
+                continue;
+            }
+        }
+        ll = ll+1;
+        
+        /*
+         * E(LL) through E(M-1) are nonzero, E(LL-1) is zero
+         */
+        if( ll==m-1 )
+        {
+            
+            /*
+             * 2 by 2 block, handle separately
+             */
+            bdsvd_svdv2x2(d->ptr.p_double[m-1], e->ptr.p_double[m-1], d->ptr.p_double[m], &sigmn, &sigmx, &sinr, &cosr, &sinl, &cosl, _state);
+            d->ptr.p_double[m-1] = sigmx;
+            e->ptr.p_double[m-1] = 0;
+            d->ptr.p_double[m] = sigmn;
+            
+            /*
+             * Compute singular vectors, if desired
+             */
+            if( ncvt>0 )
+            {
+                mm0 = m+(vstart-1);
+                mm1 = m-1+(vstart-1);
+                ae_v_moved(&vttemp.ptr.p_double[vstart], 1, &vt->ptr.pp_double[mm1][vstart], 1, ae_v_len(vstart,vend), cosr);
+                ae_v_addd(&vttemp.ptr.p_double[vstart], 1, &vt->ptr.pp_double[mm0][vstart], 1, ae_v_len(vstart,vend), sinr);
+                ae_v_muld(&vt->ptr.pp_double[mm0][vstart], 1, ae_v_len(vstart,vend), cosr);
+                ae_v_subd(&vt->ptr.pp_double[mm0][vstart], 1, &vt->ptr.pp_double[mm1][vstart], 1, ae_v_len(vstart,vend), sinr);
+                ae_v_move(&vt->ptr.pp_double[mm1][vstart], 1, &vttemp.ptr.p_double[vstart], 1, ae_v_len(vstart,vend));
+            }
+            if( nru>0 )
+            {
+                mm0 = m+ustart-1;
+                mm1 = m-1+ustart-1;
+                ae_v_moved(&utemp.ptr.p_double[ustart], 1, &u->ptr.pp_double[ustart][mm1], u->stride, ae_v_len(ustart,uend), cosl);
+                ae_v_addd(&utemp.ptr.p_double[ustart], 1, &u->ptr.pp_double[ustart][mm0], u->stride, ae_v_len(ustart,uend), sinl);
+                ae_v_muld(&u->ptr.pp_double[ustart][mm0], u->stride, ae_v_len(ustart,uend), cosl);
+                ae_v_subd(&u->ptr.pp_double[ustart][mm0], u->stride, &u->ptr.pp_double[ustart][mm1], u->stride, ae_v_len(ustart,uend), sinl);
+                ae_v_move(&u->ptr.pp_double[ustart][mm1], u->stride, &utemp.ptr.p_double[ustart], 1, ae_v_len(ustart,uend));
+            }
+            if( ncc>0 )
+            {
+                mm0 = m+cstart-1;
+                mm1 = m-1+cstart-1;
+                ae_v_moved(&ctemp.ptr.p_double[cstart], 1, &c->ptr.pp_double[mm1][cstart], 1, ae_v_len(cstart,cend), cosl);
+                ae_v_addd(&ctemp.ptr.p_double[cstart], 1, &c->ptr.pp_double[mm0][cstart], 1, ae_v_len(cstart,cend), sinl);
+                ae_v_muld(&c->ptr.pp_double[mm0][cstart], 1, ae_v_len(cstart,cend), cosl);
+                ae_v_subd(&c->ptr.pp_double[mm0][cstart], 1, &c->ptr.pp_double[mm1][cstart], 1, ae_v_len(cstart,cend), sinl);
+                ae_v_move(&c->ptr.pp_double[mm1][cstart], 1, &ctemp.ptr.p_double[cstart], 1, ae_v_len(cstart,cend));
+            }
+            m = m-2;
+            continue;
+        }
+        
+        /*
+         * If working on new submatrix, choose shift direction
+         * (from larger end diagonal element towards smaller)
+         *
+         * Previously was
+         *     "if (LL>OLDM) or (M<OLDLL) then"
+         * fixed thanks to Michael Rolle < m@rolle.name >
+         * Very strange that LAPACK still contains it.
+         */
+        bchangedir = ae_false;
+        if( idir==1&&ae_fp_less(ae_fabs(d->ptr.p_double[ll], _state),1.0E-3*ae_fabs(d->ptr.p_double[m], _state)) )
+        {
+            bchangedir = ae_true;
+        }
+        if( idir==2&&ae_fp_less(ae_fabs(d->ptr.p_double[m], _state),1.0E-3*ae_fabs(d->ptr.p_double[ll], _state)) )
+        {
+            bchangedir = ae_true;
+        }
+        if( (ll!=oldll||m!=oldm)||bchangedir )
+        {
+            if( ae_fp_greater_eq(ae_fabs(d->ptr.p_double[ll], _state),ae_fabs(d->ptr.p_double[m], _state)) )
+            {
+                
+                /*
+                 * Chase bulge from top (big end) to bottom (small end)
+                 */
+                idir = 1;
+            }
+            else
+            {
+                
+                /*
+                 * Chase bulge from bottom (big end) to top (small end)
+                 */
+                idir = 2;
+            }
+        }
+        
+        /*
+         * Apply convergence tests
+         */
+        if( idir==1 )
+        {
+            
+            /*
+             * Run convergence test in forward direction
+             * First apply standard test to bottom of matrix
+             */
+            if( ae_fp_less_eq(ae_fabs(e->ptr.p_double[m-1], _state),ae_fabs(tol, _state)*ae_fabs(d->ptr.p_double[m], _state))||(ae_fp_less(tol,0)&&ae_fp_less_eq(ae_fabs(e->ptr.p_double[m-1], _state),thresh)) )
+            {
+                e->ptr.p_double[m-1] = 0;
+                continue;
+            }
+            if( ae_fp_greater_eq(tol,0) )
+            {
+                
+                /*
+                 * If relative accuracy desired,
+                 * apply convergence criterion forward
+                 */
+                mu = ae_fabs(d->ptr.p_double[ll], _state);
+                sminl = mu;
+                iterflag = ae_false;
+                for(lll=ll; lll<=m-1; lll++)
+                {
+                    if( ae_fp_less_eq(ae_fabs(e->ptr.p_double[lll], _state),tol*mu) )
+                    {
+                        e->ptr.p_double[lll] = 0;
+                        iterflag = ae_true;
+                        break;
+                    }
+                    sminlo = sminl;
+                    mu = ae_fabs(d->ptr.p_double[lll+1], _state)*(mu/(mu+ae_fabs(e->ptr.p_double[lll], _state)));
+                    sminl = ae_minreal(sminl, mu, _state);
+                }
+                if( iterflag )
+                {
+                    continue;
+                }
+            }
+        }
+        else
+        {
+            
+            /*
+             * Run convergence test in backward direction
+             * First apply standard test to top of matrix
+             */
+            if( ae_fp_less_eq(ae_fabs(e->ptr.p_double[ll], _state),ae_fabs(tol, _state)*ae_fabs(d->ptr.p_double[ll], _state))||(ae_fp_less(tol,0)&&ae_fp_less_eq(ae_fabs(e->ptr.p_double[ll], _state),thresh)) )
+            {
+                e->ptr.p_double[ll] = 0;
+                continue;
+            }
+            if( ae_fp_greater_eq(tol,0) )
+            {
+                
+                /*
+                 * If relative accuracy desired,
+                 * apply convergence criterion backward
+                 */
+                mu = ae_fabs(d->ptr.p_double[m], _state);
+                sminl = mu;
+                iterflag = ae_false;
+                for(lll=m-1; lll>=ll; lll--)
+                {
+                    if( ae_fp_less_eq(ae_fabs(e->ptr.p_double[lll], _state),tol*mu) )
+                    {
+                        e->ptr.p_double[lll] = 0;
+                        iterflag = ae_true;
+                        break;
+                    }
+                    sminlo = sminl;
+                    mu = ae_fabs(d->ptr.p_double[lll], _state)*(mu/(mu+ae_fabs(e->ptr.p_double[lll], _state)));
+                    sminl = ae_minreal(sminl, mu, _state);
+                }
+                if( iterflag )
+                {
+                    continue;
+                }
+            }
+        }
+        oldll = ll;
+        oldm = m;
+        
+        /*
+         * Compute shift.  First, test if shifting would ruin relative
+         * accuracy, and if so set the shift to zero.
+         */
+        if( ae_fp_greater_eq(tol,0)&&ae_fp_less_eq(n*tol*(sminl/smax),ae_maxreal(eps, 0.01*tol, _state)) )
+        {
+            
+            /*
+             * Use a zero shift to avoid loss of relative accuracy
+             */
+            shift = 0;
+        }
+        else
+        {
+            
+            /*
+             * Compute the shift from 2-by-2 block at end of matrix
+             */
+            if( idir==1 )
+            {
+                sll = ae_fabs(d->ptr.p_double[ll], _state);
+                bdsvd_svd2x2(d->ptr.p_double[m-1], e->ptr.p_double[m-1], d->ptr.p_double[m], &shift, &r, _state);
+            }
+            else
+            {
+                sll = ae_fabs(d->ptr.p_double[m], _state);
+                bdsvd_svd2x2(d->ptr.p_double[ll], e->ptr.p_double[ll], d->ptr.p_double[ll+1], &shift, &r, _state);
+            }
+            
+            /*
+             * Test if shift negligible, and if so set to zero
+             */
+            if( ae_fp_greater(sll,0) )
+            {
+                if( ae_fp_less(ae_sqr(shift/sll, _state),eps) )
+                {
+                    shift = 0;
+                }
+            }
+        }
+        
+        /*
+         * Increment iteration count
+         */
+        iter = iter+m-ll;
+        
+        /*
+         * If SHIFT = 0, do simplified QR iteration
+         */
+        if( ae_fp_eq(shift,0) )
+        {
+            if( idir==1 )
+            {
+                
+                /*
+                 * Chase bulge from top to bottom
+                 * Save cosines and sines for later singular vector updates
+                 */
+                cs = 1;
+                oldcs = 1;
+                for(i=ll; i<=m-1; i++)
+                {
+                    generaterotation(d->ptr.p_double[i]*cs, e->ptr.p_double[i], &cs, &sn, &r, _state);
+                    if( i>ll )
+                    {
+                        e->ptr.p_double[i-1] = oldsn*r;
+                    }
+                    generaterotation(oldcs*r, d->ptr.p_double[i+1]*sn, &oldcs, &oldsn, &tmp, _state);
+                    d->ptr.p_double[i] = tmp;
+                    work0.ptr.p_double[i-ll+1] = cs;
+                    work1.ptr.p_double[i-ll+1] = sn;
+                    work2.ptr.p_double[i-ll+1] = oldcs;
+                    work3.ptr.p_double[i-ll+1] = oldsn;
+                }
+                h = d->ptr.p_double[m]*cs;
+                d->ptr.p_double[m] = h*oldcs;
+                e->ptr.p_double[m-1] = h*oldsn;
+                
+                /*
+                 * Update singular vectors
+                 */
+                if( ncvt>0 )
+                {
+                    applyrotationsfromtheleft(fwddir, ll+vstart-1, m+vstart-1, vstart, vend, &work0, &work1, vt, &vttemp, _state);
+                }
+                if( nru>0 )
+                {
+                    applyrotationsfromtheright(fwddir, ustart, uend, ll+ustart-1, m+ustart-1, &work2, &work3, u, &utemp, _state);
+                }
+                if( ncc>0 )
+                {
+                    applyrotationsfromtheleft(fwddir, ll+cstart-1, m+cstart-1, cstart, cend, &work2, &work3, c, &ctemp, _state);
+                }
+                
+                /*
+                 * Test convergence
+                 */
+                if( ae_fp_less_eq(ae_fabs(e->ptr.p_double[m-1], _state),thresh) )
+                {
+                    e->ptr.p_double[m-1] = 0;
+                }
+            }
+            else
+            {
+                
+                /*
+                 * Chase bulge from bottom to top
+                 * Save cosines and sines for later singular vector updates
+                 */
+                cs = 1;
+                oldcs = 1;
+                for(i=m; i>=ll+1; i--)
+                {
+                    generaterotation(d->ptr.p_double[i]*cs, e->ptr.p_double[i-1], &cs, &sn, &r, _state);
+                    if( i<m )
+                    {
+                        e->ptr.p_double[i] = oldsn*r;
+                    }
+                    generaterotation(oldcs*r, d->ptr.p_double[i-1]*sn, &oldcs, &oldsn, &tmp, _state);
+                    d->ptr.p_double[i] = tmp;
+                    work0.ptr.p_double[i-ll] = cs;
+                    work1.ptr.p_double[i-ll] = -sn;
+                    work2.ptr.p_double[i-ll] = oldcs;
+                    work3.ptr.p_double[i-ll] = -oldsn;
+                }
+                h = d->ptr.p_double[ll]*cs;
+                d->ptr.p_double[ll] = h*oldcs;
+                e->ptr.p_double[ll] = h*oldsn;
+                
+                /*
+                 * Update singular vectors
+                 */
+                if( ncvt>0 )
+                {
+                    applyrotationsfromtheleft(!fwddir, ll+vstart-1, m+vstart-1, vstart, vend, &work2, &work3, vt, &vttemp, _state);
+                }
+                if( nru>0 )
+                {
+                    applyrotationsfromtheright(!fwddir, ustart, uend, ll+ustart-1, m+ustart-1, &work0, &work1, u, &utemp, _state);
+                }
+                if( ncc>0 )
+                {
+                    applyrotationsfromtheleft(!fwddir, ll+cstart-1, m+cstart-1, cstart, cend, &work0, &work1, c, &ctemp, _state);
+                }
+                
+                /*
+                 * Test convergence
+                 */
+                if( ae_fp_less_eq(ae_fabs(e->ptr.p_double[ll], _state),thresh) )
+                {
+                    e->ptr.p_double[ll] = 0;
+                }
+            }
+        }
+        else
+        {
+            
+            /*
+             * Use nonzero shift
+             */
+            if( idir==1 )
+            {
+                
+                /*
+                 * Chase bulge from top to bottom
+                 * Save cosines and sines for later singular vector updates
+                 */
+                f = (ae_fabs(d->ptr.p_double[ll], _state)-shift)*(bdsvd_extsignbdsqr(1, d->ptr.p_double[ll], _state)+shift/d->ptr.p_double[ll]);
+                g = e->ptr.p_double[ll];
+                for(i=ll; i<=m-1; i++)
+                {
+                    generaterotation(f, g, &cosr, &sinr, &r, _state);
+                    if( i>ll )
+                    {
+                        e->ptr.p_double[i-1] = r;
+                    }
+                    f = cosr*d->ptr.p_double[i]+sinr*e->ptr.p_double[i];
+                    e->ptr.p_double[i] = cosr*e->ptr.p_double[i]-sinr*d->ptr.p_double[i];
+                    g = sinr*d->ptr.p_double[i+1];
+                    d->ptr.p_double[i+1] = cosr*d->ptr.p_double[i+1];
+                    generaterotation(f, g, &cosl, &sinl, &r, _state);
+                    d->ptr.p_double[i] = r;
+                    f = cosl*e->ptr.p_double[i]+sinl*d->ptr.p_double[i+1];
+                    d->ptr.p_double[i+1] = cosl*d->ptr.p_double[i+1]-sinl*e->ptr.p_double[i];
+                    if( i<m-1 )
+                    {
+                        g = sinl*e->ptr.p_double[i+1];
+                        e->ptr.p_double[i+1] = cosl*e->ptr.p_double[i+1];
+                    }
+                    work0.ptr.p_double[i-ll+1] = cosr;
+                    work1.ptr.p_double[i-ll+1] = sinr;
+                    work2.ptr.p_double[i-ll+1] = cosl;
+                    work3.ptr.p_double[i-ll+1] = sinl;
+                }
+                e->ptr.p_double[m-1] = f;
+                
+                /*
+                 * Update singular vectors
+                 */
+                if( ncvt>0 )
+                {
+                    applyrotationsfromtheleft(fwddir, ll+vstart-1, m+vstart-1, vstart, vend, &work0, &work1, vt, &vttemp, _state);
+                }
+                if( nru>0 )
+                {
+                    applyrotationsfromtheright(fwddir, ustart, uend, ll+ustart-1, m+ustart-1, &work2, &work3, u, &utemp, _state);
+                }
+                if( ncc>0 )
+                {
+                    applyrotationsfromtheleft(fwddir, ll+cstart-1, m+cstart-1, cstart, cend, &work2, &work3, c, &ctemp, _state);
+                }
+                
+                /*
+                 * Test convergence
+                 */
+                if( ae_fp_less_eq(ae_fabs(e->ptr.p_double[m-1], _state),thresh) )
+                {
+                    e->ptr.p_double[m-1] = 0;
+                }
+            }
+            else
+            {
+                
+                /*
+                 * Chase bulge from bottom to top
+                 * Save cosines and sines for later singular vector updates
+                 */
+                f = (ae_fabs(d->ptr.p_double[m], _state)-shift)*(bdsvd_extsignbdsqr(1, d->ptr.p_double[m], _state)+shift/d->ptr.p_double[m]);
+                g = e->ptr.p_double[m-1];
+                for(i=m; i>=ll+1; i--)
+                {
+                    generaterotation(f, g, &cosr, &sinr, &r, _state);
+                    if( i<m )
+                    {
+                        e->ptr.p_double[i] = r;
+                    }
+                    f = cosr*d->ptr.p_double[i]+sinr*e->ptr.p_double[i-1];
+                    e->ptr.p_double[i-1] = cosr*e->ptr.p_double[i-1]-sinr*d->ptr.p_double[i];
+                    g = sinr*d->ptr.p_double[i-1];
+                    d->ptr.p_double[i-1] = cosr*d->ptr.p_double[i-1];
+                    generaterotation(f, g, &cosl, &sinl, &r, _state);
+                    d->ptr.p_double[i] = r;
+                    f = cosl*e->ptr.p_double[i-1]+sinl*d->ptr.p_double[i-1];
+                    d->ptr.p_double[i-1] = cosl*d->ptr.p_double[i-1]-sinl*e->ptr.p_double[i-1];
+                    if( i>ll+1 )
+                    {
+                        g = sinl*e->ptr.p_double[i-2];
+                        e->ptr.p_double[i-2] = cosl*e->ptr.p_double[i-2];
+                    }
+                    work0.ptr.p_double[i-ll] = cosr;
+                    work1.ptr.p_double[i-ll] = -sinr;
+                    work2.ptr.p_double[i-ll] = cosl;
+                    work3.ptr.p_double[i-ll] = -sinl;
+                }
+                e->ptr.p_double[ll] = f;
+                
+                /*
+                 * Test convergence
+                 */
+                if( ae_fp_less_eq(ae_fabs(e->ptr.p_double[ll], _state),thresh) )
+                {
+                    e->ptr.p_double[ll] = 0;
+                }
+                
+                /*
+                 * Update singular vectors if desired
+                 */
+                if( ncvt>0 )
+                {
+                    applyrotationsfromtheleft(!fwddir, ll+vstart-1, m+vstart-1, vstart, vend, &work2, &work3, vt, &vttemp, _state);
+                }
+                if( nru>0 )
+                {
+                    applyrotationsfromtheright(!fwddir, ustart, uend, ll+ustart-1, m+ustart-1, &work0, &work1, u, &utemp, _state);
+                }
+                if( ncc>0 )
+                {
+                    applyrotationsfromtheleft(!fwddir, ll+cstart-1, m+cstart-1, cstart, cend, &work0, &work1, c, &ctemp, _state);
+                }
+            }
+        }
+        
+        /*
+         * QR iteration finished, go back and check convergence
+         */
+        continue;
+    }
+    
+    /*
+     * All singular values converged, so make them positive
+     */
+    for(i=1; i<=n; i++)
+    {
+        if( ae_fp_less(d->ptr.p_double[i],0) )
+        {
+            d->ptr.p_double[i] = -d->ptr.p_double[i];
+            
+            /*
+             * Change sign of singular vectors, if desired
+             */
+            if( ncvt>0 )
+            {
+                ae_v_muld(&vt->ptr.pp_double[i+vstart-1][vstart], 1, ae_v_len(vstart,vend), -1);
+            }
+        }
+    }
+    
+    /*
+     * Sort the singular values into decreasing order (insertion sort on
+     * singular values, but only one transposition per singular vector)
+     */
+    for(i=1; i<=n-1; i++)
+    {
+        
+        /*
+         * Scan for smallest D(I)
+         */
+        isub = 1;
+        smin = d->ptr.p_double[1];
+        for(j=2; j<=n+1-i; j++)
+        {
+            if( ae_fp_less_eq(d->ptr.p_double[j],smin) )
+            {
+                isub = j;
+                smin = d->ptr.p_double[j];
+            }
+        }
+        if( isub!=n+1-i )
+        {
+            
+            /*
+             * Swap singular values and vectors
+             */
+            d->ptr.p_double[isub] = d->ptr.p_double[n+1-i];
+            d->ptr.p_double[n+1-i] = smin;
+            if( ncvt>0 )
+            {
+                j = n+1-i;
+                ae_v_move(&vttemp.ptr.p_double[vstart], 1, &vt->ptr.pp_double[isub+vstart-1][vstart], 1, ae_v_len(vstart,vend));
+                ae_v_move(&vt->ptr.pp_double[isub+vstart-1][vstart], 1, &vt->ptr.pp_double[j+vstart-1][vstart], 1, ae_v_len(vstart,vend));
+                ae_v_move(&vt->ptr.pp_double[j+vstart-1][vstart], 1, &vttemp.ptr.p_double[vstart], 1, ae_v_len(vstart,vend));
+            }
+            if( nru>0 )
+            {
+                j = n+1-i;
+                ae_v_move(&utemp.ptr.p_double[ustart], 1, &u->ptr.pp_double[ustart][isub+ustart-1], u->stride, ae_v_len(ustart,uend));
+                ae_v_move(&u->ptr.pp_double[ustart][isub+ustart-1], u->stride, &u->ptr.pp_double[ustart][j+ustart-1], u->stride, ae_v_len(ustart,uend));
+                ae_v_move(&u->ptr.pp_double[ustart][j+ustart-1], u->stride, &utemp.ptr.p_double[ustart], 1, ae_v_len(ustart,uend));
+            }
+            if( ncc>0 )
+            {
+                j = n+1-i;
+                ae_v_move(&ctemp.ptr.p_double[cstart], 1, &c->ptr.pp_double[isub+cstart-1][cstart], 1, ae_v_len(cstart,cend));
+                ae_v_move(&c->ptr.pp_double[isub+cstart-1][cstart], 1, &c->ptr.pp_double[j+cstart-1][cstart], 1, ae_v_len(cstart,cend));
+                ae_v_move(&c->ptr.pp_double[j+cstart-1][cstart], 1, &ctemp.ptr.p_double[cstart], 1, ae_v_len(cstart,cend));
+            }
+        }
+    }
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+static double bdsvd_extsignbdsqr(double a, double b, ae_state *_state)
+{
+    double result;
+
+
+    if( ae_fp_greater_eq(b,0) )
+    {
+        result = ae_fabs(a, _state);
+    }
+    else
+    {
+        result = -ae_fabs(a, _state);
+    }
+    return result;
+}
+
+
+static void bdsvd_svd2x2(double f,
+     double g,
+     double h,
+     double* ssmin,
+     double* ssmax,
+     ae_state *_state)
+{
+    double aas;
+    double at;
+    double au;
+    double c;
+    double fa;
+    double fhmn;
+    double fhmx;
+    double ga;
+    double ha;
+
+    *ssmin = 0;
+    *ssmax = 0;
+
+    fa = ae_fabs(f, _state);
+    ga = ae_fabs(g, _state);
+    ha = ae_fabs(h, _state);
+    fhmn = ae_minreal(fa, ha, _state);
+    fhmx = ae_maxreal(fa, ha, _state);
+    if( ae_fp_eq(fhmn,0) )
+    {
+        *ssmin = 0;
+        if( ae_fp_eq(fhmx,0) )
+        {
+            *ssmax = ga;
+        }
+        else
+        {
+            *ssmax = ae_maxreal(fhmx, ga, _state)*ae_sqrt(1+ae_sqr(ae_minreal(fhmx, ga, _state)/ae_maxreal(fhmx, ga, _state), _state), _state);
+        }
+    }
+    else
+    {
+        if( ae_fp_less(ga,fhmx) )
+        {
+            aas = 1+fhmn/fhmx;
+            at = (fhmx-fhmn)/fhmx;
+            au = ae_sqr(ga/fhmx, _state);
+            c = 2/(ae_sqrt(aas*aas+au, _state)+ae_sqrt(at*at+au, _state));
+            *ssmin = fhmn*c;
+            *ssmax = fhmx/c;
+        }
+        else
+        {
+            au = fhmx/ga;
+            if( ae_fp_eq(au,0) )
+            {
+                
+                /*
+                 * Avoid possible harmful underflow if exponent range
+                 * asymmetric (true SSMIN may not underflow even if
+                 * AU underflows)
+                 */
+                *ssmin = fhmn*fhmx/ga;
+                *ssmax = ga;
+            }
+            else
+            {
+                aas = 1+fhmn/fhmx;
+                at = (fhmx-fhmn)/fhmx;
+                c = 1/(ae_sqrt(1+ae_sqr(aas*au, _state), _state)+ae_sqrt(1+ae_sqr(at*au, _state), _state));
+                *ssmin = fhmn*c*au;
+                *ssmin = *ssmin+(*ssmin);
+                *ssmax = ga/(c+c);
+            }
+        }
+    }
+}
+
+
+static void bdsvd_svdv2x2(double f,
+     double g,
+     double h,
+     double* ssmin,
+     double* ssmax,
+     double* snr,
+     double* csr,
+     double* snl,
+     double* csl,
+     ae_state *_state)
+{
+    ae_bool gasmal;
+    ae_bool swp;
+    ae_int_t pmax;
+    double a;
+    double clt;
+    double crt;
+    double d;
+    double fa;
+    double ft;
+    double ga;
+    double gt;
+    double ha;
+    double ht;
+    double l;
+    double m;
+    double mm;
+    double r;
+    double s;
+    double slt;
+    double srt;
+    double t;
+    double temp;
+    double tsign;
+    double tt;
+    double v;
+
+    *ssmin = 0;
+    *ssmax = 0;
+    *snr = 0;
+    *csr = 0;
+    *snl = 0;
+    *csl = 0;
+
+    ft = f;
+    fa = ae_fabs(ft, _state);
+    ht = h;
+    ha = ae_fabs(h, _state);
+    
+    /*
+     * these initializers are not really necessary,
+     * but without them compiler complains about uninitialized locals
+     */
+    clt = 0;
+    crt = 0;
+    slt = 0;
+    srt = 0;
+    tsign = 0;
+    
+    /*
+     * PMAX points to the maximum absolute element of matrix
+     *  PMAX = 1 if F largest in absolute values
+     *  PMAX = 2 if G largest in absolute values
+     *  PMAX = 3 if H largest in absolute values
+     */
+    pmax = 1;
+    swp = ae_fp_greater(ha,fa);
+    if( swp )
+    {
+        
+        /*
+         * Now FA .ge. HA
+         */
+        pmax = 3;
+        temp = ft;
+        ft = ht;
+        ht = temp;
+        temp = fa;
+        fa = ha;
+        ha = temp;
+    }
+    gt = g;
+    ga = ae_fabs(gt, _state);
+    if( ae_fp_eq(ga,0) )
+    {
+        
+        /*
+         * Diagonal matrix
+         */
+        *ssmin = ha;
+        *ssmax = fa;
+        clt = 1;
+        crt = 1;
+        slt = 0;
+        srt = 0;
+    }
+    else
+    {
+        gasmal = ae_true;
+        if( ae_fp_greater(ga,fa) )
+        {
+            pmax = 2;
+            if( ae_fp_less(fa/ga,ae_machineepsilon) )
+            {
+                
+                /*
+                 * Case of very large GA
+                 */
+                gasmal = ae_false;
+                *ssmax = ga;
+                if( ae_fp_greater(ha,1) )
+                {
+                    v = ga/ha;
+                    *ssmin = fa/v;
+                }
+                else
+                {
+                    v = fa/ga;
+                    *ssmin = v*ha;
+                }
+                clt = 1;
+                slt = ht/gt;
+                srt = 1;
+                crt = ft/gt;
+            }
+        }
+        if( gasmal )
+        {
+            
+            /*
+             * Normal case
+             */
+            d = fa-ha;
+            if( ae_fp_eq(d,fa) )
+            {
+                l = 1;
+            }
+            else
+            {
+                l = d/fa;
+            }
+            m = gt/ft;
+            t = 2-l;
+            mm = m*m;
+            tt = t*t;
+            s = ae_sqrt(tt+mm, _state);
+            if( ae_fp_eq(l,0) )
+            {
+                r = ae_fabs(m, _state);
+            }
+            else
+            {
+                r = ae_sqrt(l*l+mm, _state);
+            }
+            a = 0.5*(s+r);
+            *ssmin = ha/a;
+            *ssmax = fa*a;
+            if( ae_fp_eq(mm,0) )
+            {
+                
+                /*
+                 * Note that M is very tiny
+                 */
+                if( ae_fp_eq(l,0) )
+                {
+                    t = bdsvd_extsignbdsqr(2, ft, _state)*bdsvd_extsignbdsqr(1, gt, _state);
+                }
+                else
+                {
+                    t = gt/bdsvd_extsignbdsqr(d, ft, _state)+m/t;
+                }
+            }
+            else
+            {
+                t = (m/(s+t)+m/(r+l))*(1+a);
+            }
+            l = ae_sqrt(t*t+4, _state);
+            crt = 2/l;
+            srt = t/l;
+            clt = (crt+srt*m)/a;
+            v = ht/ft;
+            slt = v*srt/a;
+        }
+    }
+    if( swp )
+    {
+        *csl = srt;
+        *snl = crt;
+        *csr = slt;
+        *snr = clt;
+    }
+    else
+    {
+        *csl = clt;
+        *snl = slt;
+        *csr = crt;
+        *snr = srt;
+    }
+    
+    /*
+     * Correct signs of SSMAX and SSMIN
+     */
+    if( pmax==1 )
+    {
+        tsign = bdsvd_extsignbdsqr(1, *csr, _state)*bdsvd_extsignbdsqr(1, *csl, _state)*bdsvd_extsignbdsqr(1, f, _state);
+    }
+    if( pmax==2 )
+    {
+        tsign = bdsvd_extsignbdsqr(1, *snr, _state)*bdsvd_extsignbdsqr(1, *csl, _state)*bdsvd_extsignbdsqr(1, g, _state);
+    }
+    if( pmax==3 )
+    {
+        tsign = bdsvd_extsignbdsqr(1, *snr, _state)*bdsvd_extsignbdsqr(1, *snl, _state)*bdsvd_extsignbdsqr(1, h, _state);
+    }
+    *ssmax = bdsvd_extsignbdsqr(*ssmax, tsign, _state);
+    *ssmin = bdsvd_extsignbdsqr(*ssmin, tsign*bdsvd_extsignbdsqr(1, f, _state)*bdsvd_extsignbdsqr(1, h, _state), _state);
+}
+
+
+
+
+/*************************************************************************
+Singular value decomposition of a rectangular matrix.
+
+The algorithm calculates the singular value decomposition of a matrix of
+size MxN: A = U * S * V^T
+
+The algorithm finds the singular values and, optionally, matrices U and V^T.
+The algorithm can find both first min(M,N) columns of matrix U and rows of
+matrix V^T (singular vectors), and matrices U and V^T wholly (of sizes MxM
+and NxN respectively).
+
+Take into account that the subroutine does not return matrix V but V^T.
+
+Input parameters:
+    A           -   matrix to be decomposed.
+                    Array whose indexes range within [0..M-1, 0..N-1].
+    M           -   number of rows in matrix A.
+    N           -   number of columns in matrix A.
+    UNeeded     -   0, 1 or 2. See the description of the parameter U.
+    VTNeeded    -   0, 1 or 2. See the description of the parameter VT.
+    AdditionalMemory -
+                    If the parameter:
+                     * equals 0, the algorithm doesn’t use additional
+                       memory (lower requirements, lower performance).
+                     * equals 1, the algorithm uses additional
+                       memory of size min(M,N)*min(M,N) of real numbers.
+                       It often speeds up the algorithm.
+                     * equals 2, the algorithm uses additional
+                       memory of size M*min(M,N) of real numbers.
+                       It allows to get a maximum performance.
+                    The recommended value of the parameter is 2.
+
+Output parameters:
+    W           -   contains singular values in descending order.
+    U           -   if UNeeded=0, U isn't changed, the left singular vectors
+                    are not calculated.
+                    if Uneeded=1, U contains left singular vectors (first
+                    min(M,N) columns of matrix U). Array whose indexes range
+                    within [0..M-1, 0..Min(M,N)-1].
+                    if UNeeded=2, U contains matrix U wholly. Array whose
+                    indexes range within [0..M-1, 0..M-1].
+    VT          -   if VTNeeded=0, VT isn’t changed, the right singular vectors
+                    are not calculated.
+                    if VTNeeded=1, VT contains right singular vectors (first
+                    min(M,N) rows of matrix V^T). Array whose indexes range
+                    within [0..min(M,N)-1, 0..N-1].
+                    if VTNeeded=2, VT contains matrix V^T wholly. Array whose
+                    indexes range within [0..N-1, 0..N-1].
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+ae_bool rmatrixsvd(/* Real    */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     ae_int_t uneeded,
+     ae_int_t vtneeded,
+     ae_int_t additionalmemory,
+     /* Real    */ ae_vector* w,
+     /* Real    */ ae_matrix* u,
+     /* Real    */ ae_matrix* vt,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_matrix _a;
+    ae_vector tauq;
+    ae_vector taup;
+    ae_vector tau;
+    ae_vector e;
+    ae_vector work;
+    ae_matrix t2;
+    ae_bool isupper;
+    ae_int_t minmn;
+    ae_int_t ncu;
+    ae_int_t nrvt;
+    ae_int_t nru;
+    ae_int_t ncvt;
+    ae_int_t i;
+    ae_int_t j;
+    ae_bool result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_matrix_init_copy(&_a, a, _state, ae_true);
+    a = &_a;
+    ae_vector_clear(w);
+    ae_matrix_clear(u);
+    ae_matrix_clear(vt);
+    ae_vector_init(&tauq, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&taup, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&tau, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&e, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&t2, 0, 0, DT_REAL, _state, ae_true);
+
+    result = ae_true;
+    if( m==0||n==0 )
+    {
+        ae_frame_leave(_state);
+        return result;
+    }
+    ae_assert(uneeded>=0&&uneeded<=2, "SVDDecomposition: wrong parameters!", _state);
+    ae_assert(vtneeded>=0&&vtneeded<=2, "SVDDecomposition: wrong parameters!", _state);
+    ae_assert(additionalmemory>=0&&additionalmemory<=2, "SVDDecomposition: wrong parameters!", _state);
+    
+    /*
+     * initialize
+     */
+    minmn = ae_minint(m, n, _state);
+    ae_vector_set_length(w, minmn+1, _state);
+    ncu = 0;
+    nru = 0;
+    if( uneeded==1 )
+    {
+        nru = m;
+        ncu = minmn;
+        ae_matrix_set_length(u, nru-1+1, ncu-1+1, _state);
+    }
+    if( uneeded==2 )
+    {
+        nru = m;
+        ncu = m;
+        ae_matrix_set_length(u, nru-1+1, ncu-1+1, _state);
+    }
+    nrvt = 0;
+    ncvt = 0;
+    if( vtneeded==1 )
+    {
+        nrvt = minmn;
+        ncvt = n;
+        ae_matrix_set_length(vt, nrvt-1+1, ncvt-1+1, _state);
+    }
+    if( vtneeded==2 )
+    {
+        nrvt = n;
+        ncvt = n;
+        ae_matrix_set_length(vt, nrvt-1+1, ncvt-1+1, _state);
+    }
+    
+    /*
+     * M much larger than N
+     * Use bidiagonal reduction with QR-decomposition
+     */
+    if( ae_fp_greater(m,1.6*n) )
+    {
+        if( uneeded==0 )
+        {
+            
+            /*
+             * No left singular vectors to be computed
+             */
+            rmatrixqr(a, m, n, &tau, _state);
+            for(i=0; i<=n-1; i++)
+            {
+                for(j=0; j<=i-1; j++)
+                {
+                    a->ptr.pp_double[i][j] = 0;
+                }
+            }
+            rmatrixbd(a, n, n, &tauq, &taup, _state);
+            rmatrixbdunpackpt(a, n, n, &taup, nrvt, vt, _state);
+            rmatrixbdunpackdiagonals(a, n, n, &isupper, w, &e, _state);
+            result = rmatrixbdsvd(w, &e, n, isupper, ae_false, u, 0, a, 0, vt, ncvt, _state);
+            ae_frame_leave(_state);
+            return result;
+        }
+        else
+        {
+            
+            /*
+             * Left singular vectors (may be full matrix U) to be computed
+             */
+            rmatrixqr(a, m, n, &tau, _state);
+            rmatrixqrunpackq(a, m, n, &tau, ncu, u, _state);
+            for(i=0; i<=n-1; i++)
+            {
+                for(j=0; j<=i-1; j++)
+                {
+                    a->ptr.pp_double[i][j] = 0;
+                }
+            }
+            rmatrixbd(a, n, n, &tauq, &taup, _state);
+            rmatrixbdunpackpt(a, n, n, &taup, nrvt, vt, _state);
+            rmatrixbdunpackdiagonals(a, n, n, &isupper, w, &e, _state);
+            if( additionalmemory<1 )
+            {
+                
+                /*
+                 * No additional memory can be used
+                 */
+                rmatrixbdmultiplybyq(a, n, n, &tauq, u, m, n, ae_true, ae_false, _state);
+                result = rmatrixbdsvd(w, &e, n, isupper, ae_false, u, m, a, 0, vt, ncvt, _state);
+            }
+            else
+            {
+                
+                /*
+                 * Large U. Transforming intermediate matrix T2
+                 */
+                ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state);
+                rmatrixbdunpackq(a, n, n, &tauq, n, &t2, _state);
+                copymatrix(u, 0, m-1, 0, n-1, a, 0, m-1, 0, n-1, _state);
+                inplacetranspose(&t2, 0, n-1, 0, n-1, &work, _state);
+                result = rmatrixbdsvd(w, &e, n, isupper, ae_false, u, 0, &t2, n, vt, ncvt, _state);
+                matrixmatrixmultiply(a, 0, m-1, 0, n-1, ae_false, &t2, 0, n-1, 0, n-1, ae_true, 1.0, u, 0, m-1, 0, n-1, 0.0, &work, _state);
+            }
+            ae_frame_leave(_state);
+            return result;
+        }
+    }
+    
+    /*
+     * N much larger than M
+     * Use bidiagonal reduction with LQ-decomposition
+     */
+    if( ae_fp_greater(n,1.6*m) )
+    {
+        if( vtneeded==0 )
+        {
+            
+            /*
+             * No right singular vectors to be computed
+             */
+            rmatrixlq(a, m, n, &tau, _state);
+            for(i=0; i<=m-1; i++)
+            {
+                for(j=i+1; j<=m-1; j++)
+                {
+                    a->ptr.pp_double[i][j] = 0;
+                }
+            }
+            rmatrixbd(a, m, m, &tauq, &taup, _state);
+            rmatrixbdunpackq(a, m, m, &tauq, ncu, u, _state);
+            rmatrixbdunpackdiagonals(a, m, m, &isupper, w, &e, _state);
+            ae_vector_set_length(&work, m+1, _state);
+            inplacetranspose(u, 0, nru-1, 0, ncu-1, &work, _state);
+            result = rmatrixbdsvd(w, &e, m, isupper, ae_false, a, 0, u, nru, vt, 0, _state);
+            inplacetranspose(u, 0, nru-1, 0, ncu-1, &work, _state);
+            ae_frame_leave(_state);
+            return result;
+        }
+        else
+        {
+            
+            /*
+             * Right singular vectors (may be full matrix VT) to be computed
+             */
+            rmatrixlq(a, m, n, &tau, _state);
+            rmatrixlqunpackq(a, m, n, &tau, nrvt, vt, _state);
+            for(i=0; i<=m-1; i++)
+            {
+                for(j=i+1; j<=m-1; j++)
+                {
+                    a->ptr.pp_double[i][j] = 0;
+                }
+            }
+            rmatrixbd(a, m, m, &tauq, &taup, _state);
+            rmatrixbdunpackq(a, m, m, &tauq, ncu, u, _state);
+            rmatrixbdunpackdiagonals(a, m, m, &isupper, w, &e, _state);
+            ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state);
+            inplacetranspose(u, 0, nru-1, 0, ncu-1, &work, _state);
+            if( additionalmemory<1 )
+            {
+                
+                /*
+                 * No additional memory available
+                 */
+                rmatrixbdmultiplybyp(a, m, m, &taup, vt, m, n, ae_false, ae_true, _state);
+                result = rmatrixbdsvd(w, &e, m, isupper, ae_false, a, 0, u, nru, vt, n, _state);
+            }
+            else
+            {
+                
+                /*
+                 * Large VT. Transforming intermediate matrix T2
+                 */
+                rmatrixbdunpackpt(a, m, m, &taup, m, &t2, _state);
+                result = rmatrixbdsvd(w, &e, m, isupper, ae_false, a, 0, u, nru, &t2, m, _state);
+                copymatrix(vt, 0, m-1, 0, n-1, a, 0, m-1, 0, n-1, _state);
+                matrixmatrixmultiply(&t2, 0, m-1, 0, m-1, ae_false, a, 0, m-1, 0, n-1, ae_false, 1.0, vt, 0, m-1, 0, n-1, 0.0, &work, _state);
+            }
+            inplacetranspose(u, 0, nru-1, 0, ncu-1, &work, _state);
+            ae_frame_leave(_state);
+            return result;
+        }
+    }
+    
+    /*
+     * M<=N
+     * We can use inplace transposition of U to get rid of columnwise operations
+     */
+    if( m<=n )
+    {
+        rmatrixbd(a, m, n, &tauq, &taup, _state);
+        rmatrixbdunpackq(a, m, n, &tauq, ncu, u, _state);
+        rmatrixbdunpackpt(a, m, n, &taup, nrvt, vt, _state);
+        rmatrixbdunpackdiagonals(a, m, n, &isupper, w, &e, _state);
+        ae_vector_set_length(&work, m+1, _state);
+        inplacetranspose(u, 0, nru-1, 0, ncu-1, &work, _state);
+        result = rmatrixbdsvd(w, &e, minmn, isupper, ae_false, a, 0, u, nru, vt, ncvt, _state);
+        inplacetranspose(u, 0, nru-1, 0, ncu-1, &work, _state);
+        ae_frame_leave(_state);
+        return result;
+    }
+    
+    /*
+     * Simple bidiagonal reduction
+     */
+    rmatrixbd(a, m, n, &tauq, &taup, _state);
+    rmatrixbdunpackq(a, m, n, &tauq, ncu, u, _state);
+    rmatrixbdunpackpt(a, m, n, &taup, nrvt, vt, _state);
+    rmatrixbdunpackdiagonals(a, m, n, &isupper, w, &e, _state);
+    if( additionalmemory<2||uneeded==0 )
+    {
+        
+        /*
+         * We cant use additional memory or there is no need in such operations
+         */
+        result = rmatrixbdsvd(w, &e, minmn, isupper, ae_false, u, nru, a, 0, vt, ncvt, _state);
+    }
+    else
+    {
+        
+        /*
+         * We can use additional memory
+         */
+        ae_matrix_set_length(&t2, minmn-1+1, m-1+1, _state);
+        copyandtranspose(u, 0, m-1, 0, minmn-1, &t2, 0, minmn-1, 0, m-1, _state);
+        result = rmatrixbdsvd(w, &e, minmn, isupper, ae_false, u, 0, &t2, m, vt, ncvt, _state);
+        copyandtranspose(&t2, 0, minmn-1, 0, m-1, u, 0, m-1, 0, minmn-1, _state);
+    }
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+
+
+/*************************************************************************
+Basic Cholesky solver for ScaleA*Cholesky(A)'*x = y.
+
+This subroutine assumes that:
+* A*ScaleA is well scaled
+* A is well-conditioned, so no zero divisions or overflow may occur
+
+INPUT PARAMETERS:
+    CHA     -   Cholesky decomposition of A
+    SqrtScaleA- square root of scale factor ScaleA
+    N       -   matrix size
+    IsUpper -   storage type
+    XB      -   right part
+    Tmp     -   buffer; function automatically allocates it, if it is  too
+                small.  It  can  be  reused  if function is called several
+                times.
+                
+OUTPUT PARAMETERS:
+    XB      -   solution
+
+NOTES: no assertion or tests are done during algorithm operation
+
+  -- ALGLIB --
+     Copyright 13.10.2010 by Bochkanov Sergey
+*************************************************************************/
+void fblscholeskysolve(/* Real    */ ae_matrix* cha,
+     double sqrtscalea,
+     ae_int_t n,
+     ae_bool isupper,
+     /* Real    */ ae_vector* xb,
+     /* Real    */ ae_vector* tmp,
+     ae_state *_state)
+{
+    ae_int_t i;
+    double v;
+
+
+    if( tmp->cnt<n )
+    {
+        ae_vector_set_length(tmp, n, _state);
+    }
+    
+    /*
+     * A = L*L' or A=U'*U
+     */
+    if( isupper )
+    {
+        
+        /*
+         * Solve U'*y=b first.
+         */
+        for(i=0; i<=n-1; i++)
+        {
+            xb->ptr.p_double[i] = xb->ptr.p_double[i]/(sqrtscalea*cha->ptr.pp_double[i][i]);
+            if( i<n-1 )
+            {
+                v = xb->ptr.p_double[i];
+                ae_v_moved(&tmp->ptr.p_double[i+1], 1, &cha->ptr.pp_double[i][i+1], 1, ae_v_len(i+1,n-1), sqrtscalea);
+                ae_v_subd(&xb->ptr.p_double[i+1], 1, &tmp->ptr.p_double[i+1], 1, ae_v_len(i+1,n-1), v);
+            }
+        }
+        
+        /*
+         * Solve U*x=y then.
+         */
+        for(i=n-1; i>=0; i--)
+        {
+            if( i<n-1 )
+            {
+                ae_v_moved(&tmp->ptr.p_double[i+1], 1, &cha->ptr.pp_double[i][i+1], 1, ae_v_len(i+1,n-1), sqrtscalea);
+                v = ae_v_dotproduct(&tmp->ptr.p_double[i+1], 1, &xb->ptr.p_double[i+1], 1, ae_v_len(i+1,n-1));
+                xb->ptr.p_double[i] = xb->ptr.p_double[i]-v;
+            }
+            xb->ptr.p_double[i] = xb->ptr.p_double[i]/(sqrtscalea*cha->ptr.pp_double[i][i]);
+        }
+    }
+    else
+    {
+        
+        /*
+         * Solve L*y=b first
+         */
+        for(i=0; i<=n-1; i++)
+        {
+            if( i>0 )
+            {
+                ae_v_moved(&tmp->ptr.p_double[0], 1, &cha->ptr.pp_double[i][0], 1, ae_v_len(0,i-1), sqrtscalea);
+                v = ae_v_dotproduct(&tmp->ptr.p_double[0], 1, &xb->ptr.p_double[0], 1, ae_v_len(0,i-1));
+                xb->ptr.p_double[i] = xb->ptr.p_double[i]-v;
+            }
+            xb->ptr.p_double[i] = xb->ptr.p_double[i]/(sqrtscalea*cha->ptr.pp_double[i][i]);
+        }
+        
+        /*
+         * Solve L'*x=y then.
+         */
+        for(i=n-1; i>=0; i--)
+        {
+            xb->ptr.p_double[i] = xb->ptr.p_double[i]/(sqrtscalea*cha->ptr.pp_double[i][i]);
+            if( i>0 )
+            {
+                v = xb->ptr.p_double[i];
+                ae_v_moved(&tmp->ptr.p_double[0], 1, &cha->ptr.pp_double[i][0], 1, ae_v_len(0,i-1), sqrtscalea);
+                ae_v_subd(&xb->ptr.p_double[0], 1, &tmp->ptr.p_double[0], 1, ae_v_len(0,i-1), v);
+            }
+        }
+    }
+}
+
+
+/*************************************************************************
+Fast basic linear solver: linear SPD CG
+
+Solves (A^T*A + alpha*I)*x = b where:
+* A is MxN matrix
+* alpha>0 is a scalar
+* I is NxN identity matrix
+* b is Nx1 vector
+* X is Nx1 unknown vector.
+
+N iterations of linear conjugate gradient are used to solve problem.
+
+INPUT PARAMETERS:
+    A   -   array[M,N], matrix
+    M   -   number of rows
+    N   -   number of unknowns
+    B   -   array[N], right part
+    X   -   initial approxumation, array[N]
+    Buf -   buffer; function automatically allocates it, if it is too
+            small. It can be reused if function is called several times
+            with same M and N.
+            
+OUTPUT PARAMETERS:
+    X   -   improved solution
+    
+NOTES:
+*   solver checks quality of improved solution. If (because of problem
+    condition number, numerical noise, etc.) new solution is WORSE than
+    original approximation, then original approximation is returned.
+*   solver assumes that both A, B, Alpha are well scaled (i.e. they are
+    less than sqrt(overflow) and greater than sqrt(underflow)).
+    
+  -- ALGLIB --
+     Copyright 20.08.2009 by Bochkanov Sergey
+*************************************************************************/
+void fblssolvecgx(/* Real    */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     double alpha,
+     /* Real    */ ae_vector* b,
+     /* Real    */ ae_vector* x,
+     /* Real    */ ae_vector* buf,
+     ae_state *_state)
+{
+    ae_int_t k;
+    ae_int_t offsrk;
+    ae_int_t offsrk1;
+    ae_int_t offsxk;
+    ae_int_t offsxk1;
+    ae_int_t offspk;
+    ae_int_t offspk1;
+    ae_int_t offstmp1;
+    ae_int_t offstmp2;
+    ae_int_t bs;
+    double e1;
+    double e2;
+    double rk2;
+    double rk12;
+    double pap;
+    double s;
+    double betak;
+    double v1;
+    double v2;
+
+
+    
+    /*
+     * Test for special case: B=0
+     */
+    v1 = ae_v_dotproduct(&b->ptr.p_double[0], 1, &b->ptr.p_double[0], 1, ae_v_len(0,n-1));
+    if( ae_fp_eq(v1,0) )
+    {
+        for(k=0; k<=n-1; k++)
+        {
+            x->ptr.p_double[k] = 0;
+        }
+        return;
+    }
+    
+    /*
+     * Offsets inside Buf for:
+     * * R[K], R[K+1]
+     * * X[K], X[K+1]
+     * * P[K], P[K+1]
+     * * Tmp1 - array[M], Tmp2 - array[N]
+     */
+    offsrk = 0;
+    offsrk1 = offsrk+n;
+    offsxk = offsrk1+n;
+    offsxk1 = offsxk+n;
+    offspk = offsxk1+n;
+    offspk1 = offspk+n;
+    offstmp1 = offspk1+n;
+    offstmp2 = offstmp1+m;
+    bs = offstmp2+n;
+    if( buf->cnt<bs )
+    {
+        ae_vector_set_length(buf, bs, _state);
+    }
+    
+    /*
+     * x(0) = x
+     */
+    ae_v_move(&buf->ptr.p_double[offsxk], 1, &x->ptr.p_double[0], 1, ae_v_len(offsxk,offsxk+n-1));
+    
+    /*
+     * r(0) = b-A*x(0)
+     * RK2 = r(0)'*r(0)
+     */
+    rmatrixmv(m, n, a, 0, 0, 0, buf, offsxk, buf, offstmp1, _state);
+    rmatrixmv(n, m, a, 0, 0, 1, buf, offstmp1, buf, offstmp2, _state);
+    ae_v_addd(&buf->ptr.p_double[offstmp2], 1, &buf->ptr.p_double[offsxk], 1, ae_v_len(offstmp2,offstmp2+n-1), alpha);
+    ae_v_move(&buf->ptr.p_double[offsrk], 1, &b->ptr.p_double[0], 1, ae_v_len(offsrk,offsrk+n-1));
+    ae_v_sub(&buf->ptr.p_double[offsrk], 1, &buf->ptr.p_double[offstmp2], 1, ae_v_len(offsrk,offsrk+n-1));
+    rk2 = ae_v_dotproduct(&buf->ptr.p_double[offsrk], 1, &buf->ptr.p_double[offsrk], 1, ae_v_len(offsrk,offsrk+n-1));
+    ae_v_move(&buf->ptr.p_double[offspk], 1, &buf->ptr.p_double[offsrk], 1, ae_v_len(offspk,offspk+n-1));
+    e1 = ae_sqrt(rk2, _state);
+    
+    /*
+     * Cycle
+     */
+    for(k=0; k<=n-1; k++)
+    {
+        
+        /*
+         * Calculate A*p(k) - store in Buf[OffsTmp2:OffsTmp2+N-1]
+         * and p(k)'*A*p(k)  - store in PAP
+         *
+         * If PAP=0, break (iteration is over)
+         */
+        rmatrixmv(m, n, a, 0, 0, 0, buf, offspk, buf, offstmp1, _state);
+        v1 = ae_v_dotproduct(&buf->ptr.p_double[offstmp1], 1, &buf->ptr.p_double[offstmp1], 1, ae_v_len(offstmp1,offstmp1+m-1));
+        v2 = ae_v_dotproduct(&buf->ptr.p_double[offspk], 1, &buf->ptr.p_double[offspk], 1, ae_v_len(offspk,offspk+n-1));
+        pap = v1+alpha*v2;
+        rmatrixmv(n, m, a, 0, 0, 1, buf, offstmp1, buf, offstmp2, _state);
+        ae_v_addd(&buf->ptr.p_double[offstmp2], 1, &buf->ptr.p_double[offspk], 1, ae_v_len(offstmp2,offstmp2+n-1), alpha);
+        if( ae_fp_eq(pap,0) )
+        {
+            break;
+        }
+        
+        /*
+         * S = (r(k)'*r(k))/(p(k)'*A*p(k))
+         */
+        s = rk2/pap;
+        
+        /*
+         * x(k+1) = x(k) + S*p(k)
+         */
+        ae_v_move(&buf->ptr.p_double[offsxk1], 1, &buf->ptr.p_double[offsxk], 1, ae_v_len(offsxk1,offsxk1+n-1));
+        ae_v_addd(&buf->ptr.p_double[offsxk1], 1, &buf->ptr.p_double[offspk], 1, ae_v_len(offsxk1,offsxk1+n-1), s);
+        
+        /*
+         * r(k+1) = r(k) - S*A*p(k)
+         * RK12 = r(k+1)'*r(k+1)
+         *
+         * Break if r(k+1) small enough (when compared to r(k))
+         */
+        ae_v_move(&buf->ptr.p_double[offsrk1], 1, &buf->ptr.p_double[offsrk], 1, ae_v_len(offsrk1,offsrk1+n-1));
+        ae_v_subd(&buf->ptr.p_double[offsrk1], 1, &buf->ptr.p_double[offstmp2], 1, ae_v_len(offsrk1,offsrk1+n-1), s);
+        rk12 = ae_v_dotproduct(&buf->ptr.p_double[offsrk1], 1, &buf->ptr.p_double[offsrk1], 1, ae_v_len(offsrk1,offsrk1+n-1));
+        if( ae_fp_less_eq(ae_sqrt(rk12, _state),100*ae_machineepsilon*ae_sqrt(rk2, _state)) )
+        {
+            
+            /*
+             * X(k) = x(k+1) before exit -
+             * - because we expect to find solution at x(k)
+             */
+            ae_v_move(&buf->ptr.p_double[offsxk], 1, &buf->ptr.p_double[offsxk1], 1, ae_v_len(offsxk,offsxk+n-1));
+            break;
+        }
+        
+        /*
+         * BetaK = RK12/RK2
+         * p(k+1) = r(k+1)+betak*p(k)
+         */
+        betak = rk12/rk2;
+        ae_v_move(&buf->ptr.p_double[offspk1], 1, &buf->ptr.p_double[offsrk1], 1, ae_v_len(offspk1,offspk1+n-1));
+        ae_v_addd(&buf->ptr.p_double[offspk1], 1, &buf->ptr.p_double[offspk], 1, ae_v_len(offspk1,offspk1+n-1), betak);
+        
+        /*
+         * r(k) := r(k+1)
+         * x(k) := x(k+1)
+         * p(k) := p(k+1)
+         */
+        ae_v_move(&buf->ptr.p_double[offsrk], 1, &buf->ptr.p_double[offsrk1], 1, ae_v_len(offsrk,offsrk+n-1));
+        ae_v_move(&buf->ptr.p_double[offsxk], 1, &buf->ptr.p_double[offsxk1], 1, ae_v_len(offsxk,offsxk+n-1));
+        ae_v_move(&buf->ptr.p_double[offspk], 1, &buf->ptr.p_double[offspk1], 1, ae_v_len(offspk,offspk+n-1));
+        rk2 = rk12;
+    }
+    
+    /*
+     * Calculate E2
+     */
+    rmatrixmv(m, n, a, 0, 0, 0, buf, offsxk, buf, offstmp1, _state);
+    rmatrixmv(n, m, a, 0, 0, 1, buf, offstmp1, buf, offstmp2, _state);
+    ae_v_addd(&buf->ptr.p_double[offstmp2], 1, &buf->ptr.p_double[offsxk], 1, ae_v_len(offstmp2,offstmp2+n-1), alpha);
+    ae_v_move(&buf->ptr.p_double[offsrk], 1, &b->ptr.p_double[0], 1, ae_v_len(offsrk,offsrk+n-1));
+    ae_v_sub(&buf->ptr.p_double[offsrk], 1, &buf->ptr.p_double[offstmp2], 1, ae_v_len(offsrk,offsrk+n-1));
+    v1 = ae_v_dotproduct(&buf->ptr.p_double[offsrk], 1, &buf->ptr.p_double[offsrk], 1, ae_v_len(offsrk,offsrk+n-1));
+    e2 = ae_sqrt(v1, _state);
+    
+    /*
+     * Output result (if it was improved)
+     */
+    if( ae_fp_less(e2,e1) )
+    {
+        ae_v_move(&x->ptr.p_double[0], 1, &buf->ptr.p_double[offsxk], 1, ae_v_len(0,n-1));
+    }
+}
+
+
+/*************************************************************************
+Construction of linear conjugate gradient solver.
+
+State parameter passed using "var" semantics (i.e. previous state  is  NOT
+erased). When it is already initialized, we can reause prevously allocated
+memory.
+
+INPUT PARAMETERS:
+    X       -   initial solution
+    B       -   right part
+    N       -   system size
+    State   -   structure; may be preallocated, if we want to reuse memory
+
+OUTPUT PARAMETERS:
+    State   -   structure which is used by FBLSCGIteration() to store
+                algorithm state between subsequent calls.
+
+NOTE: no error checking is done; caller must check all parameters, prevent
+      overflows, and so on.
+
+  -- ALGLIB --
+     Copyright 22.10.2009 by Bochkanov Sergey
+*************************************************************************/
+void fblscgcreate(/* Real    */ ae_vector* x,
+     /* Real    */ ae_vector* b,
+     ae_int_t n,
+     fblslincgstate* state,
+     ae_state *_state)
+{
+
+
+    if( state->b.cnt<n )
+    {
+        ae_vector_set_length(&state->b, n, _state);
+    }
+    if( state->rk.cnt<n )
+    {
+        ae_vector_set_length(&state->rk, n, _state);
+    }
+    if( state->rk1.cnt<n )
+    {
+        ae_vector_set_length(&state->rk1, n, _state);
+    }
+    if( state->xk.cnt<n )
+    {
+        ae_vector_set_length(&state->xk, n, _state);
+    }
+    if( state->xk1.cnt<n )
+    {
+        ae_vector_set_length(&state->xk1, n, _state);
+    }
+    if( state->pk.cnt<n )
+    {
+        ae_vector_set_length(&state->pk, n, _state);
+    }
+    if( state->pk1.cnt<n )
+    {
+        ae_vector_set_length(&state->pk1, n, _state);
+    }
+    if( state->tmp2.cnt<n )
+    {
+        ae_vector_set_length(&state->tmp2, n, _state);
+    }
+    if( state->x.cnt<n )
+    {
+        ae_vector_set_length(&state->x, n, _state);
+    }
+    if( state->ax.cnt<n )
+    {
+        ae_vector_set_length(&state->ax, n, _state);
+    }
+    state->n = n;
+    ae_v_move(&state->xk.ptr.p_double[0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,n-1));
+    ae_v_move(&state->b.ptr.p_double[0], 1, &b->ptr.p_double[0], 1, ae_v_len(0,n-1));
+    ae_vector_set_length(&state->rstate.ia, 1+1, _state);
+    ae_vector_set_length(&state->rstate.ra, 6+1, _state);
+    state->rstate.stage = -1;
+}
+
+
+/*************************************************************************
+Linear CG solver, function relying on reverse communication to calculate
+matrix-vector products.
+
+See comments for FBLSLinCGState structure for more info.
+
+  -- ALGLIB --
+     Copyright 22.10.2009 by Bochkanov Sergey
+*************************************************************************/
+ae_bool fblscgiteration(fblslincgstate* state, ae_state *_state)
+{
+    ae_int_t n;
+    ae_int_t k;
+    double rk2;
+    double rk12;
+    double pap;
+    double s;
+    double betak;
+    double v1;
+    double v2;
+    ae_bool result;
+
+
+    
+    /*
+     * Reverse communication preparations
+     * I know it looks ugly, but it works the same way
+     * anywhere from C++ to Python.
+     *
+     * This code initializes locals by:
+     * * random values determined during code
+     *   generation - on first subroutine call
+     * * values from previous call - on subsequent calls
+     */
+    if( state->rstate.stage>=0 )
+    {
+        n = state->rstate.ia.ptr.p_int[0];
+        k = state->rstate.ia.ptr.p_int[1];
+        rk2 = state->rstate.ra.ptr.p_double[0];
+        rk12 = state->rstate.ra.ptr.p_double[1];
+        pap = state->rstate.ra.ptr.p_double[2];
+        s = state->rstate.ra.ptr.p_double[3];
+        betak = state->rstate.ra.ptr.p_double[4];
+        v1 = state->rstate.ra.ptr.p_double[5];
+        v2 = state->rstate.ra.ptr.p_double[6];
+    }
+    else
+    {
+        n = -983;
+        k = -989;
+        rk2 = -834;
+        rk12 = 900;
+        pap = -287;
+        s = 364;
+        betak = 214;
+        v1 = -338;
+        v2 = -686;
+    }
+    if( state->rstate.stage==0 )
+    {
+        goto lbl_0;
+    }
+    if( state->rstate.stage==1 )
+    {
+        goto lbl_1;
+    }
+    if( state->rstate.stage==2 )
+    {
+        goto lbl_2;
+    }
+    
+    /*
+     * Routine body
+     */
+    
+    /*
+     * prepare locals
+     */
+    n = state->n;
+    
+    /*
+     * Test for special case: B=0
+     */
+    v1 = ae_v_dotproduct(&state->b.ptr.p_double[0], 1, &state->b.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    if( ae_fp_eq(v1,0) )
+    {
+        for(k=0; k<=n-1; k++)
+        {
+            state->xk.ptr.p_double[k] = 0;
+        }
+        result = ae_false;
+        return result;
+    }
+    
+    /*
+     * r(0) = b-A*x(0)
+     * RK2 = r(0)'*r(0)
+     */
+    ae_v_move(&state->x.ptr.p_double[0], 1, &state->xk.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    state->rstate.stage = 0;
+    goto lbl_rcomm;
+lbl_0:
+    ae_v_move(&state->rk.ptr.p_double[0], 1, &state->b.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    ae_v_sub(&state->rk.ptr.p_double[0], 1, &state->ax.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    rk2 = ae_v_dotproduct(&state->rk.ptr.p_double[0], 1, &state->rk.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    ae_v_move(&state->pk.ptr.p_double[0], 1, &state->rk.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    state->e1 = ae_sqrt(rk2, _state);
+    
+    /*
+     * Cycle
+     */
+    k = 0;
+lbl_3:
+    if( k>n-1 )
+    {
+        goto lbl_5;
+    }
+    
+    /*
+     * Calculate A*p(k) - store in State.Tmp2
+     * and p(k)'*A*p(k)  - store in PAP
+     *
+     * If PAP=0, break (iteration is over)
+     */
+    ae_v_move(&state->x.ptr.p_double[0], 1, &state->pk.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    state->rstate.stage = 1;
+    goto lbl_rcomm;
+lbl_1:
+    ae_v_move(&state->tmp2.ptr.p_double[0], 1, &state->ax.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    pap = state->xax;
+    if( !ae_isfinite(pap, _state) )
+    {
+        goto lbl_5;
+    }
+    if( ae_fp_less_eq(pap,0) )
+    {
+        goto lbl_5;
+    }
+    
+    /*
+     * S = (r(k)'*r(k))/(p(k)'*A*p(k))
+     */
+    s = rk2/pap;
+    
+    /*
+     * x(k+1) = x(k) + S*p(k)
+     */
+    ae_v_move(&state->xk1.ptr.p_double[0], 1, &state->xk.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    ae_v_addd(&state->xk1.ptr.p_double[0], 1, &state->pk.ptr.p_double[0], 1, ae_v_len(0,n-1), s);
+    
+    /*
+     * r(k+1) = r(k) - S*A*p(k)
+     * RK12 = r(k+1)'*r(k+1)
+     *
+     * Break if r(k+1) small enough (when compared to r(k))
+     */
+    ae_v_move(&state->rk1.ptr.p_double[0], 1, &state->rk.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    ae_v_subd(&state->rk1.ptr.p_double[0], 1, &state->tmp2.ptr.p_double[0], 1, ae_v_len(0,n-1), s);
+    rk12 = ae_v_dotproduct(&state->rk1.ptr.p_double[0], 1, &state->rk1.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    if( ae_fp_less_eq(ae_sqrt(rk12, _state),100*ae_machineepsilon*state->e1) )
+    {
+        
+        /*
+         * X(k) = x(k+1) before exit -
+         * - because we expect to find solution at x(k)
+         */
+        ae_v_move(&state->xk.ptr.p_double[0], 1, &state->xk1.ptr.p_double[0], 1, ae_v_len(0,n-1));
+        goto lbl_5;
+    }
+    
+    /*
+     * BetaK = RK12/RK2
+     * p(k+1) = r(k+1)+betak*p(k)
+     *
+     * NOTE: we expect that BetaK won't overflow because of
+     * "Sqrt(RK12)<=100*MachineEpsilon*E1" test above.
+     */
+    betak = rk12/rk2;
+    ae_v_move(&state->pk1.ptr.p_double[0], 1, &state->rk1.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    ae_v_addd(&state->pk1.ptr.p_double[0], 1, &state->pk.ptr.p_double[0], 1, ae_v_len(0,n-1), betak);
+    
+    /*
+     * r(k) := r(k+1)
+     * x(k) := x(k+1)
+     * p(k) := p(k+1)
+     */
+    ae_v_move(&state->rk.ptr.p_double[0], 1, &state->rk1.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    ae_v_move(&state->xk.ptr.p_double[0], 1, &state->xk1.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    ae_v_move(&state->pk.ptr.p_double[0], 1, &state->pk1.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    rk2 = rk12;
+    k = k+1;
+    goto lbl_3;
+lbl_5:
+    
+    /*
+     * Calculate E2
+     */
+    ae_v_move(&state->x.ptr.p_double[0], 1, &state->xk.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    state->rstate.stage = 2;
+    goto lbl_rcomm;
+lbl_2:
+    ae_v_move(&state->rk.ptr.p_double[0], 1, &state->b.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    ae_v_sub(&state->rk.ptr.p_double[0], 1, &state->ax.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    v1 = ae_v_dotproduct(&state->rk.ptr.p_double[0], 1, &state->rk.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    state->e2 = ae_sqrt(v1, _state);
+    result = ae_false;
+    return result;
+    
+    /*
+     * Saving state
+     */
+lbl_rcomm:
+    result = ae_true;
+    state->rstate.ia.ptr.p_int[0] = n;
+    state->rstate.ia.ptr.p_int[1] = k;
+    state->rstate.ra.ptr.p_double[0] = rk2;
+    state->rstate.ra.ptr.p_double[1] = rk12;
+    state->rstate.ra.ptr.p_double[2] = pap;
+    state->rstate.ra.ptr.p_double[3] = s;
+    state->rstate.ra.ptr.p_double[4] = betak;
+    state->rstate.ra.ptr.p_double[5] = v1;
+    state->rstate.ra.ptr.p_double[6] = v2;
+    return result;
+}
+
+
+ae_bool _fblslincgstate_init(fblslincgstate* p, ae_state *_state, ae_bool make_automatic)
+{
+    if( !ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->ax, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->rk, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->rk1, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->xk, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->xk1, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->pk, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->pk1, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->b, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !_rcommstate_init(&p->rstate, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->tmp2, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    return ae_true;
+}
+
+
+ae_bool _fblslincgstate_init_copy(fblslincgstate* dst, fblslincgstate* src, ae_state *_state, ae_bool make_automatic)
+{
+    dst->e1 = src->e1;
+    dst->e2 = src->e2;
+    if( !ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->ax, &src->ax, _state, make_automatic) )
+        return ae_false;
+    dst->xax = src->xax;
+    dst->n = src->n;
+    if( !ae_vector_init_copy(&dst->rk, &src->rk, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->rk1, &src->rk1, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->xk, &src->xk, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->xk1, &src->xk1, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->pk, &src->pk, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->pk1, &src->pk1, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->b, &src->b, _state, make_automatic) )
+        return ae_false;
+    if( !_rcommstate_init_copy(&dst->rstate, &src->rstate, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->tmp2, &src->tmp2, _state, make_automatic) )
+        return ae_false;
+    return ae_true;
+}
+
+
+void _fblslincgstate_clear(fblslincgstate* p)
+{
+    ae_vector_clear(&p->x);
+    ae_vector_clear(&p->ax);
+    ae_vector_clear(&p->rk);
+    ae_vector_clear(&p->rk1);
+    ae_vector_clear(&p->xk);
+    ae_vector_clear(&p->xk1);
+    ae_vector_clear(&p->pk);
+    ae_vector_clear(&p->pk1);
+    ae_vector_clear(&p->b);
+    _rcommstate_clear(&p->rstate);
+    ae_vector_clear(&p->tmp2);
+}
+
+
+
+
+/*************************************************************************
+Determinant calculation of the matrix given by its LU decomposition.
+
+Input parameters:
+    A       -   LU decomposition of the matrix (output of
+                RMatrixLU subroutine).
+    Pivots  -   table of permutations which were made during
+                the LU decomposition.
+                Output of RMatrixLU subroutine.
+    N       -   (optional) size of matrix A:
+                * if given, only principal NxN submatrix is processed and
+                  overwritten. other elements are unchanged.
+                * if not given, automatically determined from matrix size
+                  (A must be square matrix)
+
+Result: matrix determinant.
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+double rmatrixludet(/* Real    */ ae_matrix* a,
+     /* Integer */ ae_vector* pivots,
+     ae_int_t n,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t s;
+    double result;
+
+
+    ae_assert(n>=1, "RMatrixLUDet: N<1!", _state);
+    ae_assert(pivots->cnt>=n, "RMatrixLUDet: Pivots array is too short!", _state);
+    ae_assert(a->rows>=n, "RMatrixLUDet: rows(A)<N!", _state);
+    ae_assert(a->cols>=n, "RMatrixLUDet: cols(A)<N!", _state);
+    ae_assert(apservisfinitematrix(a, n, n, _state), "RMatrixLUDet: A contains infinite or NaN values!", _state);
+    result = 1;
+    s = 1;
+    for(i=0; i<=n-1; i++)
+    {
+        result = result*a->ptr.pp_double[i][i];
+        if( pivots->ptr.p_int[i]!=i )
+        {
+            s = -s;
+        }
+    }
+    result = result*s;
+    return result;
+}
+
+
+/*************************************************************************
+Calculation of the determinant of a general matrix
+
+Input parameters:
+    A       -   matrix, array[0..N-1, 0..N-1]
+    N       -   (optional) size of matrix A:
+                * if given, only principal NxN submatrix is processed and
+                  overwritten. other elements are unchanged.
+                * if not given, automatically determined from matrix size
+                  (A must be square matrix)
+
+Result: determinant of matrix A.
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+double rmatrixdet(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_matrix _a;
+    ae_vector pivots;
+    double result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_matrix_init_copy(&_a, a, _state, ae_true);
+    a = &_a;
+    ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);
+
+    ae_assert(n>=1, "RMatrixDet: N<1!", _state);
+    ae_assert(a->rows>=n, "RMatrixDet: rows(A)<N!", _state);
+    ae_assert(a->cols>=n, "RMatrixDet: cols(A)<N!", _state);
+    ae_assert(apservisfinitematrix(a, n, n, _state), "RMatrixDet: A contains infinite or NaN values!", _state);
+    rmatrixlu(a, n, n, &pivots, _state);
+    result = rmatrixludet(a, &pivots, n, _state);
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+/*************************************************************************
+Determinant calculation of the matrix given by its LU decomposition.
+
+Input parameters:
+    A       -   LU decomposition of the matrix (output of
+                RMatrixLU subroutine).
+    Pivots  -   table of permutations which were made during
+                the LU decomposition.
+                Output of RMatrixLU subroutine.
+    N       -   (optional) size of matrix A:
+                * if given, only principal NxN submatrix is processed and
+                  overwritten. other elements are unchanged.
+                * if not given, automatically determined from matrix size
+                  (A must be square matrix)
+
+Result: matrix determinant.
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+ae_complex cmatrixludet(/* Complex */ ae_matrix* a,
+     /* Integer */ ae_vector* pivots,
+     ae_int_t n,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t s;
+    ae_complex result;
+
+
+    ae_assert(n>=1, "CMatrixLUDet: N<1!", _state);
+    ae_assert(pivots->cnt>=n, "CMatrixLUDet: Pivots array is too short!", _state);
+    ae_assert(a->rows>=n, "CMatrixLUDet: rows(A)<N!", _state);
+    ae_assert(a->cols>=n, "CMatrixLUDet: cols(A)<N!", _state);
+    ae_assert(apservisfinitecmatrix(a, n, n, _state), "CMatrixLUDet: A contains infinite or NaN values!", _state);
+    result = ae_complex_from_d(1);
+    s = 1;
+    for(i=0; i<=n-1; i++)
+    {
+        result = ae_c_mul(result,a->ptr.pp_complex[i][i]);
+        if( pivots->ptr.p_int[i]!=i )
+        {
+            s = -s;
+        }
+    }
+    result = ae_c_mul_d(result,s);
+    return result;
+}
+
+
+/*************************************************************************
+Calculation of the determinant of a general matrix
+
+Input parameters:
+    A       -   matrix, array[0..N-1, 0..N-1]
+    N       -   (optional) size of matrix A:
+                * if given, only principal NxN submatrix is processed and
+                  overwritten. other elements are unchanged.
+                * if not given, automatically determined from matrix size
+                  (A must be square matrix)
+
+Result: determinant of matrix A.
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+ae_complex cmatrixdet(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_matrix _a;
+    ae_vector pivots;
+    ae_complex result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_matrix_init_copy(&_a, a, _state, ae_true);
+    a = &_a;
+    ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);
+
+    ae_assert(n>=1, "CMatrixDet: N<1!", _state);
+    ae_assert(a->rows>=n, "CMatrixDet: rows(A)<N!", _state);
+    ae_assert(a->cols>=n, "CMatrixDet: cols(A)<N!", _state);
+    ae_assert(apservisfinitecmatrix(a, n, n, _state), "CMatrixDet: A contains infinite or NaN values!", _state);
+    cmatrixlu(a, n, n, &pivots, _state);
+    result = cmatrixludet(a, &pivots, n, _state);
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+/*************************************************************************
+Determinant calculation of the matrix given by the Cholesky decomposition.
+
+Input parameters:
+    A       -   Cholesky decomposition,
+                output of SMatrixCholesky subroutine.
+    N       -   (optional) size of matrix A:
+                * if given, only principal NxN submatrix is processed and
+                  overwritten. other elements are unchanged.
+                * if not given, automatically determined from matrix size
+                  (A must be square matrix)
+
+As the determinant is equal to the product of squares of diagonal elements,
+it’s not necessary to specify which triangle - lower or upper - the matrix
+is stored in.
+
+Result:
+    matrix determinant.
+
+  -- ALGLIB --
+     Copyright 2005-2008 by Bochkanov Sergey
+*************************************************************************/
+double spdmatrixcholeskydet(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_bool f;
+    double result;
+
+
+    ae_assert(n>=1, "SPDMatrixCholeskyDet: N<1!", _state);
+    ae_assert(a->rows>=n, "SPDMatrixCholeskyDet: rows(A)<N!", _state);
+    ae_assert(a->cols>=n, "SPDMatrixCholeskyDet: cols(A)<N!", _state);
+    f = ae_true;
+    for(i=0; i<=n-1; i++)
+    {
+        f = f&&ae_isfinite(a->ptr.pp_double[i][i], _state);
+    }
+    ae_assert(f, "SPDMatrixCholeskyDet: A contains infinite or NaN values!", _state);
+    result = 1;
+    for(i=0; i<=n-1; i++)
+    {
+        result = result*ae_sqr(a->ptr.pp_double[i][i], _state);
+    }
+    return result;
+}
+
+
+/*************************************************************************
+Determinant calculation of the symmetric positive definite matrix.
+
+Input parameters:
+    A       -   matrix. Array with elements [0..N-1, 0..N-1].
+    N       -   (optional) size of matrix A:
+                * if given, only principal NxN submatrix is processed and
+                  overwritten. other elements are unchanged.
+                * if not given, automatically determined from matrix size
+                  (A must be square matrix)
+    IsUpper -   (optional) storage type:
+                * if True, symmetric matrix  A  is  given  by  its  upper
+                  triangle, and the lower triangle isn’t used/changed  by
+                  function
+                * if False, symmetric matrix  A  is  given  by  its lower
+                  triangle, and the upper triangle isn’t used/changed  by
+                  function
+                * if not given, both lower and upper  triangles  must  be
+                  filled.
+
+Result:
+    determinant of matrix A.
+    If matrix A is not positive definite, exception is thrown.
+
+  -- ALGLIB --
+     Copyright 2005-2008 by Bochkanov Sergey
+*************************************************************************/
+double spdmatrixdet(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_matrix _a;
+    ae_bool b;
+    double result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_matrix_init_copy(&_a, a, _state, ae_true);
+    a = &_a;
+
+    ae_assert(n>=1, "SPDMatrixDet: N<1!", _state);
+    ae_assert(a->rows>=n, "SPDMatrixDet: rows(A)<N!", _state);
+    ae_assert(a->cols>=n, "SPDMatrixDet: cols(A)<N!", _state);
+    ae_assert(isfinitertrmatrix(a, n, isupper, _state), "SPDMatrixDet: A contains infinite or NaN values!", _state);
+    b = spdmatrixcholesky(a, n, isupper, _state);
+    ae_assert(b, "SPDMatrixDet: A is not SPD!", _state);
+    result = spdmatrixcholeskydet(a, n, _state);
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+
+
+/*************************************************************************
+Algorithm for solving the following generalized symmetric positive-definite
+eigenproblem:
+    A*x = lambda*B*x (1) or
+    A*B*x = lambda*x (2) or
+    B*A*x = lambda*x (3).
+where A is a symmetric matrix, B - symmetric positive-definite matrix.
+The problem is solved by reducing it to an ordinary  symmetric  eigenvalue
+problem.
+
+Input parameters:
+    A           -   symmetric matrix which is given by its upper or lower
+                    triangular part.
+                    Array whose indexes range within [0..N-1, 0..N-1].
+    N           -   size of matrices A and B.
+    IsUpperA    -   storage format of matrix A.
+    B           -   symmetric positive-definite matrix which is given by
+                    its upper or lower triangular part.
+                    Array whose indexes range within [0..N-1, 0..N-1].
+    IsUpperB    -   storage format of matrix B.
+    ZNeeded     -   if ZNeeded is equal to:
+                     * 0, the eigenvectors are not returned;
+                     * 1, the eigenvectors are returned.
+    ProblemType -   if ProblemType is equal to:
+                     * 1, the following problem is solved: A*x = lambda*B*x;
+                     * 2, the following problem is solved: A*B*x = lambda*x;
+                     * 3, the following problem is solved: B*A*x = lambda*x.
+
+Output parameters:
+    D           -   eigenvalues in ascending order.
+                    Array whose index ranges within [0..N-1].
+    Z           -   if ZNeeded is equal to:
+                     * 0, Z hasn’t changed;
+                     * 1, Z contains eigenvectors.
+                    Array whose indexes range within [0..N-1, 0..N-1].
+                    The eigenvectors are stored in matrix columns. It should
+                    be noted that the eigenvectors in such problems do not
+                    form an orthogonal system.
+
+Result:
+    True, if the problem was solved successfully.
+    False, if the error occurred during the Cholesky decomposition of matrix
+    B (the matrix isn’t positive-definite) or during the work of the iterative
+    algorithm for solving the symmetric eigenproblem.
+
+See also the GeneralizedSymmetricDefiniteEVDReduce subroutine.
+
+  -- ALGLIB --
+     Copyright 1.28.2006 by Bochkanov Sergey
+*************************************************************************/
+ae_bool smatrixgevd(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isuppera,
+     /* Real    */ ae_matrix* b,
+     ae_bool isupperb,
+     ae_int_t zneeded,
+     ae_int_t problemtype,
+     /* Real    */ ae_vector* d,
+     /* Real    */ ae_matrix* z,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_matrix _a;
+    ae_matrix r;
+    ae_matrix t;
+    ae_bool isupperr;
+    ae_int_t j1;
+    ae_int_t j2;
+    ae_int_t j1inc;
+    ae_int_t j2inc;
+    ae_int_t i;
+    ae_int_t j;
+    double v;
+    ae_bool result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_matrix_init_copy(&_a, a, _state, ae_true);
+    a = &_a;
+    ae_vector_clear(d);
+    ae_matrix_clear(z);
+    ae_matrix_init(&r, 0, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&t, 0, 0, DT_REAL, _state, ae_true);
+
+    
+    /*
+     * Reduce and solve
+     */
+    result = smatrixgevdreduce(a, n, isuppera, b, isupperb, problemtype, &r, &isupperr, _state);
+    if( !result )
+    {
+        ae_frame_leave(_state);
+        return result;
+    }
+    result = smatrixevd(a, n, zneeded, isuppera, d, &t, _state);
+    if( !result )
+    {
+        ae_frame_leave(_state);
+        return result;
+    }
+    
+    /*
+     * Transform eigenvectors if needed
+     */
+    if( zneeded!=0 )
+    {
+        
+        /*
+         * fill Z with zeros
+         */
+        ae_matrix_set_length(z, n-1+1, n-1+1, _state);
+        for(j=0; j<=n-1; j++)
+        {
+            z->ptr.pp_double[0][j] = 0.0;
+        }
+        for(i=1; i<=n-1; i++)
+        {
+            ae_v_move(&z->ptr.pp_double[i][0], 1, &z->ptr.pp_double[0][0], 1, ae_v_len(0,n-1));
+        }
+        
+        /*
+         * Setup R properties
+         */
+        if( isupperr )
+        {
+            j1 = 0;
+            j2 = n-1;
+            j1inc = 1;
+            j2inc = 0;
+        }
+        else
+        {
+            j1 = 0;
+            j2 = 0;
+            j1inc = 0;
+            j2inc = 1;
+        }
+        
+        /*
+         * Calculate R*Z
+         */
+        for(i=0; i<=n-1; i++)
+        {
+            for(j=j1; j<=j2; j++)
+            {
+                v = r.ptr.pp_double[i][j];
+                ae_v_addd(&z->ptr.pp_double[i][0], 1, &t.ptr.pp_double[j][0], 1, ae_v_len(0,n-1), v);
+            }
+            j1 = j1+j1inc;
+            j2 = j2+j2inc;
+        }
+    }
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+/*************************************************************************
+Algorithm for reduction of the following generalized symmetric positive-
+definite eigenvalue problem:
+    A*x = lambda*B*x (1) or
+    A*B*x = lambda*x (2) or
+    B*A*x = lambda*x (3)
+to the symmetric eigenvalues problem C*y = lambda*y (eigenvalues of this and
+the given problems are the same, and the eigenvectors of the given problem
+could be obtained by multiplying the obtained eigenvectors by the
+transformation matrix x = R*y).
+
+Here A is a symmetric matrix, B - symmetric positive-definite matrix.
+
+Input parameters:
+    A           -   symmetric matrix which is given by its upper or lower
+                    triangular part.
+                    Array whose indexes range within [0..N-1, 0..N-1].
+    N           -   size of matrices A and B.
+    IsUpperA    -   storage format of matrix A.
+    B           -   symmetric positive-definite matrix which is given by
+                    its upper or lower triangular part.
+                    Array whose indexes range within [0..N-1, 0..N-1].
+    IsUpperB    -   storage format of matrix B.
+    ProblemType -   if ProblemType is equal to:
+                     * 1, the following problem is solved: A*x = lambda*B*x;
+                     * 2, the following problem is solved: A*B*x = lambda*x;
+                     * 3, the following problem is solved: B*A*x = lambda*x.
+
+Output parameters:
+    A           -   symmetric matrix which is given by its upper or lower
+                    triangle depending on IsUpperA. Contains matrix C.
+                    Array whose indexes range within [0..N-1, 0..N-1].
+    R           -   upper triangular or low triangular transformation matrix
+                    which is used to obtain the eigenvectors of a given problem
+                    as the product of eigenvectors of C (from the right) and
+                    matrix R (from the left). If the matrix is upper
+                    triangular, the elements below the main diagonal
+                    are equal to 0 (and vice versa). Thus, we can perform
+                    the multiplication without taking into account the
+                    internal structure (which is an easier though less
+                    effective way).
+                    Array whose indexes range within [0..N-1, 0..N-1].
+    IsUpperR    -   type of matrix R (upper or lower triangular).
+
+Result:
+    True, if the problem was reduced successfully.
+    False, if the error occurred during the Cholesky decomposition of
+        matrix B (the matrix is not positive-definite).
+
+  -- ALGLIB --
+     Copyright 1.28.2006 by Bochkanov Sergey
+*************************************************************************/
+ae_bool smatrixgevdreduce(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isuppera,
+     /* Real    */ ae_matrix* b,
+     ae_bool isupperb,
+     ae_int_t problemtype,
+     /* Real    */ ae_matrix* r,
+     ae_bool* isupperr,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_matrix t;
+    ae_vector w1;
+    ae_vector w2;
+    ae_vector w3;
+    ae_int_t i;
+    ae_int_t j;
+    double v;
+    matinvreport rep;
+    ae_int_t info;
+    ae_bool result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_matrix_clear(r);
+    *isupperr = ae_false;
+    ae_matrix_init(&t, 0, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&w1, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&w2, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&w3, 0, DT_REAL, _state, ae_true);
+    _matinvreport_init(&rep, _state, ae_true);
+
+    ae_assert(n>0, "SMatrixGEVDReduce: N<=0!", _state);
+    ae_assert((problemtype==1||problemtype==2)||problemtype==3, "SMatrixGEVDReduce: incorrect ProblemType!", _state);
+    result = ae_true;
+    
+    /*
+     * Problem 1:  A*x = lambda*B*x
+     *
+     * Reducing to:
+     *     C*y = lambda*y
+     *     C = L^(-1) * A * L^(-T)
+     *     x = L^(-T) * y
+     */
+    if( problemtype==1 )
+    {
+        
+        /*
+         * Factorize B in T: B = LL'
+         */
+        ae_matrix_set_length(&t, n-1+1, n-1+1, _state);
+        if( isupperb )
+        {
+            for(i=0; i<=n-1; i++)
+            {
+                ae_v_move(&t.ptr.pp_double[i][i], t.stride, &b->ptr.pp_double[i][i], 1, ae_v_len(i,n-1));
+            }
+        }
+        else
+        {
+            for(i=0; i<=n-1; i++)
+            {
+                ae_v_move(&t.ptr.pp_double[i][0], 1, &b->ptr.pp_double[i][0], 1, ae_v_len(0,i));
+            }
+        }
+        if( !spdmatrixcholesky(&t, n, ae_false, _state) )
+        {
+            result = ae_false;
+            ae_frame_leave(_state);
+            return result;
+        }
+        
+        /*
+         * Invert L in T
+         */
+        rmatrixtrinverse(&t, n, ae_false, ae_false, &info, &rep, _state);
+        if( info<=0 )
+        {
+            result = ae_false;
+            ae_frame_leave(_state);
+            return result;
+        }
+        
+        /*
+         * Build L^(-1) * A * L^(-T) in R
+         */
+        ae_vector_set_length(&w1, n+1, _state);
+        ae_vector_set_length(&w2, n+1, _state);
+        ae_matrix_set_length(r, n-1+1, n-1+1, _state);
+        for(j=1; j<=n; j++)
+        {
+            
+            /*
+             * Form w2 = A * l'(j) (here l'(j) is j-th column of L^(-T))
+             */
+            ae_v_move(&w1.ptr.p_double[1], 1, &t.ptr.pp_double[j-1][0], 1, ae_v_len(1,j));
+            symmetricmatrixvectormultiply(a, isuppera, 0, j-1, &w1, 1.0, &w2, _state);
+            if( isuppera )
+            {
+                matrixvectormultiply(a, 0, j-1, j, n-1, ae_true, &w1, 1, j, 1.0, &w2, j+1, n, 0.0, _state);
+            }
+            else
+            {
+                matrixvectormultiply(a, j, n-1, 0, j-1, ae_false, &w1, 1, j, 1.0, &w2, j+1, n, 0.0, _state);
+            }
+            
+            /*
+             * Form l(i)*w2 (here l(i) is i-th row of L^(-1))
+             */
+            for(i=1; i<=n; i++)
+            {
+                v = ae_v_dotproduct(&t.ptr.pp_double[i-1][0], 1, &w2.ptr.p_double[1], 1, ae_v_len(0,i-1));
+                r->ptr.pp_double[i-1][j-1] = v;
+            }
+        }
+        
+        /*
+         * Copy R to A
+         */
+        for(i=0; i<=n-1; i++)
+        {
+            ae_v_move(&a->ptr.pp_double[i][0], 1, &r->ptr.pp_double[i][0], 1, ae_v_len(0,n-1));
+        }
+        
+        /*
+         * Copy L^(-1) from T to R and transpose
+         */
+        *isupperr = ae_true;
+        for(i=0; i<=n-1; i++)
+        {
+            for(j=0; j<=i-1; j++)
+            {
+                r->ptr.pp_double[i][j] = 0;
+            }
+        }
+        for(i=0; i<=n-1; i++)
+        {
+            ae_v_move(&r->ptr.pp_double[i][i], 1, &t.ptr.pp_double[i][i], t.stride, ae_v_len(i,n-1));
+        }
+        ae_frame_leave(_state);
+        return result;
+    }
+    
+    /*
+     * Problem 2:  A*B*x = lambda*x
+     * or
+     * problem 3:  B*A*x = lambda*x
+     *
+     * Reducing to:
+     *     C*y = lambda*y
+     *     C = U * A * U'
+     *     B = U'* U
+     */
+    if( problemtype==2||problemtype==3 )
+    {
+        
+        /*
+         * Factorize B in T: B = U'*U
+         */
+        ae_matrix_set_length(&t, n-1+1, n-1+1, _state);
+        if( isupperb )
+        {
+            for(i=0; i<=n-1; i++)
+            {
+                ae_v_move(&t.ptr.pp_double[i][i], 1, &b->ptr.pp_double[i][i], 1, ae_v_len(i,n-1));
+            }
+        }
+        else
+        {
+            for(i=0; i<=n-1; i++)
+            {
+                ae_v_move(&t.ptr.pp_double[i][i], 1, &b->ptr.pp_double[i][i], b->stride, ae_v_len(i,n-1));
+            }
+        }
+        if( !spdmatrixcholesky(&t, n, ae_true, _state) )
+        {
+            result = ae_false;
+            ae_frame_leave(_state);
+            return result;
+        }
+        
+        /*
+         * Build U * A * U' in R
+         */
+        ae_vector_set_length(&w1, n+1, _state);
+        ae_vector_set_length(&w2, n+1, _state);
+        ae_vector_set_length(&w3, n+1, _state);
+        ae_matrix_set_length(r, n-1+1, n-1+1, _state);
+        for(j=1; j<=n; j++)
+        {
+            
+            /*
+             * Form w2 = A * u'(j) (here u'(j) is j-th column of U')
+             */
+            ae_v_move(&w1.ptr.p_double[1], 1, &t.ptr.pp_double[j-1][j-1], 1, ae_v_len(1,n-j+1));
+            symmetricmatrixvectormultiply(a, isuppera, j-1, n-1, &w1, 1.0, &w3, _state);
+            ae_v_move(&w2.ptr.p_double[j], 1, &w3.ptr.p_double[1], 1, ae_v_len(j,n));
+            ae_v_move(&w1.ptr.p_double[j], 1, &t.ptr.pp_double[j-1][j-1], 1, ae_v_len(j,n));
+            if( isuppera )
+            {
+                matrixvectormultiply(a, 0, j-2, j-1, n-1, ae_false, &w1, j, n, 1.0, &w2, 1, j-1, 0.0, _state);
+            }
+            else
+            {
+                matrixvectormultiply(a, j-1, n-1, 0, j-2, ae_true, &w1, j, n, 1.0, &w2, 1, j-1, 0.0, _state);
+            }
+            
+            /*
+             * Form u(i)*w2 (here u(i) is i-th row of U)
+             */
+            for(i=1; i<=n; i++)
+            {
+                v = ae_v_dotproduct(&t.ptr.pp_double[i-1][i-1], 1, &w2.ptr.p_double[i], 1, ae_v_len(i-1,n-1));
+                r->ptr.pp_double[i-1][j-1] = v;
+            }
+        }
+        
+        /*
+         * Copy R to A
+         */
+        for(i=0; i<=n-1; i++)
+        {
+            ae_v_move(&a->ptr.pp_double[i][0], 1, &r->ptr.pp_double[i][0], 1, ae_v_len(0,n-1));
+        }
+        if( problemtype==2 )
+        {
+            
+            /*
+             * Invert U in T
+             */
+            rmatrixtrinverse(&t, n, ae_true, ae_false, &info, &rep, _state);
+            if( info<=0 )
+            {
+                result = ae_false;
+                ae_frame_leave(_state);
+                return result;
+            }
+            
+            /*
+             * Copy U^-1 from T to R
+             */
+            *isupperr = ae_true;
+            for(i=0; i<=n-1; i++)
+            {
+                for(j=0; j<=i-1; j++)
+                {
+                    r->ptr.pp_double[i][j] = 0;
+                }
+            }
+            for(i=0; i<=n-1; i++)
+            {
+                ae_v_move(&r->ptr.pp_double[i][i], 1, &t.ptr.pp_double[i][i], 1, ae_v_len(i,n-1));
+            }
+        }
+        else
+        {
+            
+            /*
+             * Copy U from T to R and transpose
+             */
+            *isupperr = ae_false;
+            for(i=0; i<=n-1; i++)
+            {
+                for(j=i+1; j<=n-1; j++)
+                {
+                    r->ptr.pp_double[i][j] = 0;
+                }
+            }
+            for(i=0; i<=n-1; i++)
+            {
+                ae_v_move(&r->ptr.pp_double[i][i], r->stride, &t.ptr.pp_double[i][i], 1, ae_v_len(i,n-1));
+            }
+        }
+    }
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+
+
+/*************************************************************************
+Inverse matrix update by the Sherman-Morrison formula
+
+The algorithm updates matrix A^-1 when adding a number to an element
+of matrix A.
+
+Input parameters:
+    InvA    -   inverse of matrix A.
+                Array whose indexes range within [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    UpdRow  -   row where the element to be updated is stored.
+    UpdColumn - column where the element to be updated is stored.
+    UpdVal  -   a number to be added to the element.
+
+
+Output parameters:
+    InvA    -   inverse of modified matrix A.
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+void rmatrixinvupdatesimple(/* Real    */ ae_matrix* inva,
+     ae_int_t n,
+     ae_int_t updrow,
+     ae_int_t updcolumn,
+     double updval,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector t1;
+    ae_vector t2;
+    ae_int_t i;
+    double lambdav;
+    double vt;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_init(&t1, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&t2, 0, DT_REAL, _state, ae_true);
+
+    ae_assert(updrow>=0&&updrow<n, "RMatrixInvUpdateSimple: incorrect UpdRow!", _state);
+    ae_assert(updcolumn>=0&&updcolumn<n, "RMatrixInvUpdateSimple: incorrect UpdColumn!", _state);
+    ae_vector_set_length(&t1, n-1+1, _state);
+    ae_vector_set_length(&t2, n-1+1, _state);
+    
+    /*
+     * T1 = InvA * U
+     */
+    ae_v_move(&t1.ptr.p_double[0], 1, &inva->ptr.pp_double[0][updrow], inva->stride, ae_v_len(0,n-1));
+    
+    /*
+     * T2 = v*InvA
+     */
+    ae_v_move(&t2.ptr.p_double[0], 1, &inva->ptr.pp_double[updcolumn][0], 1, ae_v_len(0,n-1));
+    
+    /*
+     * Lambda = v * InvA * U
+     */
+    lambdav = updval*inva->ptr.pp_double[updcolumn][updrow];
+    
+    /*
+     * InvA = InvA - correction
+     */
+    for(i=0; i<=n-1; i++)
+    {
+        vt = updval*t1.ptr.p_double[i];
+        vt = vt/(1+lambdav);
+        ae_v_subd(&inva->ptr.pp_double[i][0], 1, &t2.ptr.p_double[0], 1, ae_v_len(0,n-1), vt);
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Inverse matrix update by the Sherman-Morrison formula
+
+The algorithm updates matrix A^-1 when adding a vector to a row
+of matrix A.
+
+Input parameters:
+    InvA    -   inverse of matrix A.
+                Array whose indexes range within [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    UpdRow  -   the row of A whose vector V was added.
+                0 <= Row <= N-1
+    V       -   the vector to be added to a row.
+                Array whose index ranges within [0..N-1].
+
+Output parameters:
+    InvA    -   inverse of modified matrix A.
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+void rmatrixinvupdaterow(/* Real    */ ae_matrix* inva,
+     ae_int_t n,
+     ae_int_t updrow,
+     /* Real    */ ae_vector* v,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector t1;
+    ae_vector t2;
+    ae_int_t i;
+    ae_int_t j;
+    double lambdav;
+    double vt;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_init(&t1, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&t2, 0, DT_REAL, _state, ae_true);
+
+    ae_vector_set_length(&t1, n-1+1, _state);
+    ae_vector_set_length(&t2, n-1+1, _state);
+    
+    /*
+     * T1 = InvA * U
+     */
+    ae_v_move(&t1.ptr.p_double[0], 1, &inva->ptr.pp_double[0][updrow], inva->stride, ae_v_len(0,n-1));
+    
+    /*
+     * T2 = v*InvA
+     * Lambda = v * InvA * U
+     */
+    for(j=0; j<=n-1; j++)
+    {
+        vt = ae_v_dotproduct(&v->ptr.p_double[0], 1, &inva->ptr.pp_double[0][j], inva->stride, ae_v_len(0,n-1));
+        t2.ptr.p_double[j] = vt;
+    }
+    lambdav = t2.ptr.p_double[updrow];
+    
+    /*
+     * InvA = InvA - correction
+     */
+    for(i=0; i<=n-1; i++)
+    {
+        vt = t1.ptr.p_double[i]/(1+lambdav);
+        ae_v_subd(&inva->ptr.pp_double[i][0], 1, &t2.ptr.p_double[0], 1, ae_v_len(0,n-1), vt);
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Inverse matrix update by the Sherman-Morrison formula
+
+The algorithm updates matrix A^-1 when adding a vector to a column
+of matrix A.
+
+Input parameters:
+    InvA        -   inverse of matrix A.
+                    Array whose indexes range within [0..N-1, 0..N-1].
+    N           -   size of matrix A.
+    UpdColumn   -   the column of A whose vector U was added.
+                    0 <= UpdColumn <= N-1
+    U           -   the vector to be added to a column.
+                    Array whose index ranges within [0..N-1].
+
+Output parameters:
+    InvA        -   inverse of modified matrix A.
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+void rmatrixinvupdatecolumn(/* Real    */ ae_matrix* inva,
+     ae_int_t n,
+     ae_int_t updcolumn,
+     /* Real    */ ae_vector* u,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector t1;
+    ae_vector t2;
+    ae_int_t i;
+    double lambdav;
+    double vt;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_init(&t1, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&t2, 0, DT_REAL, _state, ae_true);
+
+    ae_vector_set_length(&t1, n-1+1, _state);
+    ae_vector_set_length(&t2, n-1+1, _state);
+    
+    /*
+     * T1 = InvA * U
+     * Lambda = v * InvA * U
+     */
+    for(i=0; i<=n-1; i++)
+    {
+        vt = ae_v_dotproduct(&inva->ptr.pp_double[i][0], 1, &u->ptr.p_double[0], 1, ae_v_len(0,n-1));
+        t1.ptr.p_double[i] = vt;
+    }
+    lambdav = t1.ptr.p_double[updcolumn];
+    
+    /*
+     * T2 = v*InvA
+     */
+    ae_v_move(&t2.ptr.p_double[0], 1, &inva->ptr.pp_double[updcolumn][0], 1, ae_v_len(0,n-1));
+    
+    /*
+     * InvA = InvA - correction
+     */
+    for(i=0; i<=n-1; i++)
+    {
+        vt = t1.ptr.p_double[i]/(1+lambdav);
+        ae_v_subd(&inva->ptr.pp_double[i][0], 1, &t2.ptr.p_double[0], 1, ae_v_len(0,n-1), vt);
+    }
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+Inverse matrix update by the Sherman-Morrison formula
+
+The algorithm computes the inverse of matrix A+u*v’ by using the given matrix
+A^-1 and the vectors u and v.
+
+Input parameters:
+    InvA    -   inverse of matrix A.
+                Array whose indexes range within [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    U       -   the vector modifying the matrix.
+                Array whose index ranges within [0..N-1].
+    V       -   the vector modifying the matrix.
+                Array whose index ranges within [0..N-1].
+
+Output parameters:
+    InvA - inverse of matrix A + u*v'.
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+void rmatrixinvupdateuv(/* Real    */ ae_matrix* inva,
+     ae_int_t n,
+     /* Real    */ ae_vector* u,
+     /* Real    */ ae_vector* v,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector t1;
+    ae_vector t2;
+    ae_int_t i;
+    ae_int_t j;
+    double lambdav;
+    double vt;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_vector_init(&t1, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&t2, 0, DT_REAL, _state, ae_true);
+
+    ae_vector_set_length(&t1, n-1+1, _state);
+    ae_vector_set_length(&t2, n-1+1, _state);
+    
+    /*
+     * T1 = InvA * U
+     * Lambda = v * T1
+     */
+    for(i=0; i<=n-1; i++)
+    {
+        vt = ae_v_dotproduct(&inva->ptr.pp_double[i][0], 1, &u->ptr.p_double[0], 1, ae_v_len(0,n-1));
+        t1.ptr.p_double[i] = vt;
+    }
+    lambdav = ae_v_dotproduct(&v->ptr.p_double[0], 1, &t1.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    
+    /*
+     * T2 = v*InvA
+     */
+    for(j=0; j<=n-1; j++)
+    {
+        vt = ae_v_dotproduct(&v->ptr.p_double[0], 1, &inva->ptr.pp_double[0][j], inva->stride, ae_v_len(0,n-1));
+        t2.ptr.p_double[j] = vt;
+    }
+    
+    /*
+     * InvA = InvA - correction
+     */
+    for(i=0; i<=n-1; i++)
+    {
+        vt = t1.ptr.p_double[i]/(1+lambdav);
+        ae_v_subd(&inva->ptr.pp_double[i][0], 1, &t2.ptr.p_double[0], 1, ae_v_len(0,n-1), vt);
+    }
+    ae_frame_leave(_state);
+}
+
+
+
+
+/*************************************************************************
+Subroutine performing the Schur decomposition of a general matrix by using
+the QR algorithm with multiple shifts.
+
+The source matrix A is represented as S'*A*S = T, where S is an orthogonal
+matrix (Schur vectors), T - upper quasi-triangular matrix (with blocks of
+sizes 1x1 and 2x2 on the main diagonal).
+
+Input parameters:
+    A   -   matrix to be decomposed.
+            Array whose indexes range within [0..N-1, 0..N-1].
+    N   -   size of A, N>=0.
+
+
+Output parameters:
+    A   -   contains matrix T.
+            Array whose indexes range within [0..N-1, 0..N-1].
+    S   -   contains Schur vectors.
+            Array whose indexes range within [0..N-1, 0..N-1].
+
+Note 1:
+    The block structure of matrix T can be easily recognized: since all
+    the elements below the blocks are zeros, the elements a[i+1,i] which
+    are equal to 0 show the block border.
+
+Note 2:
+    The algorithm performance depends on the value of the internal parameter
+    NS of the InternalSchurDecomposition subroutine which defines the number
+    of shifts in the QR algorithm (similarly to the block width in block-matrix
+    algorithms in linear algebra). If you require maximum performance on
+    your machine, it is recommended to adjust this parameter manually.
+
+Result:
+    True,
+        if the algorithm has converged and parameters A and S contain the result.
+    False,
+        if the algorithm has not converged.
+
+Algorithm implemented on the basis of the DHSEQR subroutine (LAPACK 3.0 library).
+*************************************************************************/
+ae_bool rmatrixschur(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     /* Real    */ ae_matrix* s,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_vector tau;
+    ae_vector wi;
+    ae_vector wr;
+    ae_matrix a1;
+    ae_matrix s1;
+    ae_int_t info;
+    ae_int_t i;
+    ae_int_t j;
+    ae_bool result;
+
+    ae_frame_make(_state, &_frame_block);
+    ae_matrix_clear(s);
+    ae_vector_init(&tau, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&wi, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&wr, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&a1, 0, 0, DT_REAL, _state, ae_true);
+    ae_matrix_init(&s1, 0, 0, DT_REAL, _state, ae_true);
+
+    
+    /*
+     * Upper Hessenberg form of the 0-based matrix
+     */
+    rmatrixhessenberg(a, n, &tau, _state);
+    rmatrixhessenbergunpackq(a, n, &tau, s, _state);
+    
+    /*
+     * Convert from 0-based arrays to 1-based,
+     * then call InternalSchurDecomposition
+     * Awkward, of course, but Schur decompisiton subroutine
+     * is too complex to fix it.
+     *
+     */
+    ae_matrix_set_length(&a1, n+1, n+1, _state);
+    ae_matrix_set_length(&s1, n+1, n+1, _state);
+    for(i=1; i<=n; i++)
+    {
+        for(j=1; j<=n; j++)
+        {
+            a1.ptr.pp_double[i][j] = a->ptr.pp_double[i-1][j-1];
+            s1.ptr.pp_double[i][j] = s->ptr.pp_double[i-1][j-1];
+        }
+    }
+    internalschurdecomposition(&a1, n, 1, 1, &wr, &wi, &s1, &info, _state);
+    result = info==0;
+    
+    /*
+     * convert from 1-based arrays to -based
+     */
+    for(i=1; i<=n; i++)
+    {
+        for(j=1; j<=n; j++)
+        {
+            a->ptr.pp_double[i-1][j-1] = a1.ptr.pp_double[i][j];
+            s->ptr.pp_double[i-1][j-1] = s1.ptr.pp_double[i][j];
+        }
+    }
+    ae_frame_leave(_state);
+    return result;
+}
+
+
+
+}
+
diff --git a/contrib/lbfgs/linalg.h b/contrib/lbfgs/linalg.h
new file mode 100755
index 0000000000..38f2def55d
--- /dev/null
+++ b/contrib/lbfgs/linalg.h
@@ -0,0 +1,4101 @@
+/*************************************************************************
+Copyright (c) Sergey Bochkanov (ALGLIB project).
+
+>>> SOURCE LICENSE >>>
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation (www.fsf.org); either version 2 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+A copy of the GNU General Public License is available at
+http://www.fsf.org/licensing/licenses
+>>> END OF LICENSE >>>
+*************************************************************************/
+#ifndef _linalg_pkg_h
+#define _linalg_pkg_h
+#include "ap.h"
+#include "alglibinternal.h"
+#include "alglibmisc.h"
+
+/////////////////////////////////////////////////////////////////////////
+//
+// THIS SECTION CONTAINS COMPUTATIONAL CORE DECLARATIONS (DATATYPES)
+//
+/////////////////////////////////////////////////////////////////////////
+namespace alglib_impl
+{
+typedef struct
+{
+    double r1;
+    double rinf;
+} matinvreport;
+typedef struct
+{
+    double e1;
+    double e2;
+    ae_vector x;
+    ae_vector ax;
+    double xax;
+    ae_int_t n;
+    ae_vector rk;
+    ae_vector rk1;
+    ae_vector xk;
+    ae_vector xk1;
+    ae_vector pk;
+    ae_vector pk1;
+    ae_vector b;
+    rcommstate rstate;
+    ae_vector tmp2;
+} fblslincgstate;
+
+}
+
+/////////////////////////////////////////////////////////////////////////
+//
+// THIS SECTION CONTAINS C++ INTERFACE
+//
+/////////////////////////////////////////////////////////////////////////
+namespace alglib
+{
+
+
+
+
+
+
+
+
+
+
+
+
+
+/*************************************************************************
+Matrix inverse report:
+* R1    reciprocal of condition number in 1-norm
+* RInf  reciprocal of condition number in inf-norm
+*************************************************************************/
+class _matinvreport_owner
+{
+public:
+    _matinvreport_owner();
+    _matinvreport_owner(const _matinvreport_owner &rhs);
+    _matinvreport_owner& operator=(const _matinvreport_owner &rhs);
+    virtual ~_matinvreport_owner();
+    alglib_impl::matinvreport* c_ptr();
+    alglib_impl::matinvreport* c_ptr() const;
+protected:
+    alglib_impl::matinvreport *p_struct;
+};
+class matinvreport : public _matinvreport_owner
+{
+public:
+    matinvreport();
+    matinvreport(const matinvreport &rhs);
+    matinvreport& operator=(const matinvreport &rhs);
+    virtual ~matinvreport();
+    double &r1;
+    double &rinf;
+
+};
+
+/*************************************************************************
+Cache-oblivous complex "copy-and-transpose"
+
+Input parameters:
+    M   -   number of rows
+    N   -   number of columns
+    A   -   source matrix, MxN submatrix is copied and transposed
+    IA  -   submatrix offset (row index)
+    JA  -   submatrix offset (column index)
+    A   -   destination matrix
+    IB  -   submatrix offset (row index)
+    JB  -   submatrix offset (column index)
+*************************************************************************/
+void cmatrixtranspose(const ae_int_t m, const ae_int_t n, const complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, complex_2d_array &b, const ae_int_t ib, const ae_int_t jb);
+
+
+/*************************************************************************
+Cache-oblivous real "copy-and-transpose"
+
+Input parameters:
+    M   -   number of rows
+    N   -   number of columns
+    A   -   source matrix, MxN submatrix is copied and transposed
+    IA  -   submatrix offset (row index)
+    JA  -   submatrix offset (column index)
+    A   -   destination matrix
+    IB  -   submatrix offset (row index)
+    JB  -   submatrix offset (column index)
+*************************************************************************/
+void rmatrixtranspose(const ae_int_t m, const ae_int_t n, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, real_2d_array &b, const ae_int_t ib, const ae_int_t jb);
+
+
+/*************************************************************************
+Copy
+
+Input parameters:
+    M   -   number of rows
+    N   -   number of columns
+    A   -   source matrix, MxN submatrix is copied and transposed
+    IA  -   submatrix offset (row index)
+    JA  -   submatrix offset (column index)
+    B   -   destination matrix
+    IB  -   submatrix offset (row index)
+    JB  -   submatrix offset (column index)
+*************************************************************************/
+void cmatrixcopy(const ae_int_t m, const ae_int_t n, const complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, complex_2d_array &b, const ae_int_t ib, const ae_int_t jb);
+
+
+/*************************************************************************
+Copy
+
+Input parameters:
+    M   -   number of rows
+    N   -   number of columns
+    A   -   source matrix, MxN submatrix is copied and transposed
+    IA  -   submatrix offset (row index)
+    JA  -   submatrix offset (column index)
+    B   -   destination matrix
+    IB  -   submatrix offset (row index)
+    JB  -   submatrix offset (column index)
+*************************************************************************/
+void rmatrixcopy(const ae_int_t m, const ae_int_t n, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, real_2d_array &b, const ae_int_t ib, const ae_int_t jb);
+
+
+/*************************************************************************
+Rank-1 correction: A := A + u*v'
+
+INPUT PARAMETERS:
+    M   -   number of rows
+    N   -   number of columns
+    A   -   target matrix, MxN submatrix is updated
+    IA  -   submatrix offset (row index)
+    JA  -   submatrix offset (column index)
+    U   -   vector #1
+    IU  -   subvector offset
+    V   -   vector #2
+    IV  -   subvector offset
+*************************************************************************/
+void cmatrixrank1(const ae_int_t m, const ae_int_t n, complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, complex_1d_array &u, const ae_int_t iu, complex_1d_array &v, const ae_int_t iv);
+
+
+/*************************************************************************
+Rank-1 correction: A := A + u*v'
+
+INPUT PARAMETERS:
+    M   -   number of rows
+    N   -   number of columns
+    A   -   target matrix, MxN submatrix is updated
+    IA  -   submatrix offset (row index)
+    JA  -   submatrix offset (column index)
+    U   -   vector #1
+    IU  -   subvector offset
+    V   -   vector #2
+    IV  -   subvector offset
+*************************************************************************/
+void rmatrixrank1(const ae_int_t m, const ae_int_t n, real_2d_array &a, const ae_int_t ia, const ae_int_t ja, real_1d_array &u, const ae_int_t iu, real_1d_array &v, const ae_int_t iv);
+
+
+/*************************************************************************
+Matrix-vector product: y := op(A)*x
+
+INPUT PARAMETERS:
+    M   -   number of rows of op(A)
+            M>=0
+    N   -   number of columns of op(A)
+            N>=0
+    A   -   target matrix
+    IA  -   submatrix offset (row index)
+    JA  -   submatrix offset (column index)
+    OpA -   operation type:
+            * OpA=0     =>  op(A) = A
+            * OpA=1     =>  op(A) = A^T
+            * OpA=2     =>  op(A) = A^H
+    X   -   input vector
+    IX  -   subvector offset
+    IY  -   subvector offset
+
+OUTPUT PARAMETERS:
+    Y   -   vector which stores result
+
+if M=0, then subroutine does nothing.
+if N=0, Y is filled by zeros.
+
+
+  -- ALGLIB routine --
+
+     28.01.2010
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixmv(const ae_int_t m, const ae_int_t n, const complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t opa, const complex_1d_array &x, const ae_int_t ix, complex_1d_array &y, const ae_int_t iy);
+
+
+/*************************************************************************
+Matrix-vector product: y := op(A)*x
+
+INPUT PARAMETERS:
+    M   -   number of rows of op(A)
+    N   -   number of columns of op(A)
+    A   -   target matrix
+    IA  -   submatrix offset (row index)
+    JA  -   submatrix offset (column index)
+    OpA -   operation type:
+            * OpA=0     =>  op(A) = A
+            * OpA=1     =>  op(A) = A^T
+    X   -   input vector
+    IX  -   subvector offset
+    IY  -   subvector offset
+
+OUTPUT PARAMETERS:
+    Y   -   vector which stores result
+
+if M=0, then subroutine does nothing.
+if N=0, Y is filled by zeros.
+
+
+  -- ALGLIB routine --
+
+     28.01.2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixmv(const ae_int_t m, const ae_int_t n, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t opa, const real_1d_array &x, const ae_int_t ix, real_1d_array &y, const ae_int_t iy);
+
+
+/*************************************************************************
+This subroutine calculates X*op(A^-1) where:
+* X is MxN general matrix
+* A is NxN upper/lower triangular/unitriangular matrix
+* "op" may be identity transformation, transposition, conjugate transposition
+
+Multiplication result replaces X.
+Cache-oblivious algorithm is used.
+
+INPUT PARAMETERS
+    N   -   matrix size, N>=0
+    M   -   matrix size, N>=0
+    A       -   matrix, actial matrix is stored in A[I1:I1+N-1,J1:J1+N-1]
+    I1      -   submatrix offset
+    J1      -   submatrix offset
+    IsUpper -   whether matrix is upper triangular
+    IsUnit  -   whether matrix is unitriangular
+    OpType  -   transformation type:
+                * 0 - no transformation
+                * 1 - transposition
+                * 2 - conjugate transposition
+    C   -   matrix, actial matrix is stored in C[I2:I2+M-1,J2:J2+N-1]
+    I2  -   submatrix offset
+    J2  -   submatrix offset
+
+  -- ALGLIB routine --
+     15.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixrighttrsm(const ae_int_t m, const ae_int_t n, const complex_2d_array &a, const ae_int_t i1, const ae_int_t j1, const bool isupper, const bool isunit, const ae_int_t optype, complex_2d_array &x, const ae_int_t i2, const ae_int_t j2);
+
+
+/*************************************************************************
+This subroutine calculates op(A^-1)*X where:
+* X is MxN general matrix
+* A is MxM upper/lower triangular/unitriangular matrix
+* "op" may be identity transformation, transposition, conjugate transposition
+
+Multiplication result replaces X.
+Cache-oblivious algorithm is used.
+
+INPUT PARAMETERS
+    N   -   matrix size, N>=0
+    M   -   matrix size, N>=0
+    A       -   matrix, actial matrix is stored in A[I1:I1+M-1,J1:J1+M-1]
+    I1      -   submatrix offset
+    J1      -   submatrix offset
+    IsUpper -   whether matrix is upper triangular
+    IsUnit  -   whether matrix is unitriangular
+    OpType  -   transformation type:
+                * 0 - no transformation
+                * 1 - transposition
+                * 2 - conjugate transposition
+    C   -   matrix, actial matrix is stored in C[I2:I2+M-1,J2:J2+N-1]
+    I2  -   submatrix offset
+    J2  -   submatrix offset
+
+  -- ALGLIB routine --
+     15.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixlefttrsm(const ae_int_t m, const ae_int_t n, const complex_2d_array &a, const ae_int_t i1, const ae_int_t j1, const bool isupper, const bool isunit, const ae_int_t optype, complex_2d_array &x, const ae_int_t i2, const ae_int_t j2);
+
+
+/*************************************************************************
+Same as CMatrixRightTRSM, but for real matrices
+
+OpType may be only 0 or 1.
+
+  -- ALGLIB routine --
+     15.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixrighttrsm(const ae_int_t m, const ae_int_t n, const real_2d_array &a, const ae_int_t i1, const ae_int_t j1, const bool isupper, const bool isunit, const ae_int_t optype, real_2d_array &x, const ae_int_t i2, const ae_int_t j2);
+
+
+/*************************************************************************
+Same as CMatrixLeftTRSM, but for real matrices
+
+OpType may be only 0 or 1.
+
+  -- ALGLIB routine --
+     15.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixlefttrsm(const ae_int_t m, const ae_int_t n, const real_2d_array &a, const ae_int_t i1, const ae_int_t j1, const bool isupper, const bool isunit, const ae_int_t optype, real_2d_array &x, const ae_int_t i2, const ae_int_t j2);
+
+
+/*************************************************************************
+This subroutine calculates  C=alpha*A*A^H+beta*C  or  C=alpha*A^H*A+beta*C
+where:
+* C is NxN Hermitian matrix given by its upper/lower triangle
+* A is NxK matrix when A*A^H is calculated, KxN matrix otherwise
+
+Additional info:
+* cache-oblivious algorithm is used.
+* multiplication result replaces C. If Beta=0, C elements are not used in
+  calculations (not multiplied by zero - just not referenced)
+* if Alpha=0, A is not used (not multiplied by zero - just not referenced)
+* if both Beta and Alpha are zero, C is filled by zeros.
+
+INPUT PARAMETERS
+    N       -   matrix size, N>=0
+    K       -   matrix size, K>=0
+    Alpha   -   coefficient
+    A       -   matrix
+    IA      -   submatrix offset
+    JA      -   submatrix offset
+    OpTypeA -   multiplication type:
+                * 0 - A*A^H is calculated
+                * 2 - A^H*A is calculated
+    Beta    -   coefficient
+    C       -   matrix
+    IC      -   submatrix offset
+    JC      -   submatrix offset
+    IsUpper -   whether C is upper triangular or lower triangular
+
+  -- ALGLIB routine --
+     16.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixsyrk(const ae_int_t n, const ae_int_t k, const double alpha, const complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t optypea, const double beta, complex_2d_array &c, const ae_int_t ic, const ae_int_t jc, const bool isupper);
+
+
+/*************************************************************************
+Same as CMatrixSYRK, but for real matrices
+
+OpType may be only 0 or 1.
+
+  -- ALGLIB routine --
+     16.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixsyrk(const ae_int_t n, const ae_int_t k, const double alpha, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t optypea, const double beta, real_2d_array &c, const ae_int_t ic, const ae_int_t jc, const bool isupper);
+
+
+/*************************************************************************
+This subroutine calculates C = alpha*op1(A)*op2(B) +beta*C where:
+* C is MxN general matrix
+* op1(A) is MxK matrix
+* op2(B) is KxN matrix
+* "op" may be identity transformation, transposition, conjugate transposition
+
+Additional info:
+* cache-oblivious algorithm is used.
+* multiplication result replaces C. If Beta=0, C elements are not used in
+  calculations (not multiplied by zero - just not referenced)
+* if Alpha=0, A is not used (not multiplied by zero - just not referenced)
+* if both Beta and Alpha are zero, C is filled by zeros.
+
+INPUT PARAMETERS
+    N       -   matrix size, N>0
+    M       -   matrix size, N>0
+    K       -   matrix size, K>0
+    Alpha   -   coefficient
+    A       -   matrix
+    IA      -   submatrix offset
+    JA      -   submatrix offset
+    OpTypeA -   transformation type:
+                * 0 - no transformation
+                * 1 - transposition
+                * 2 - conjugate transposition
+    B       -   matrix
+    IB      -   submatrix offset
+    JB      -   submatrix offset
+    OpTypeB -   transformation type:
+                * 0 - no transformation
+                * 1 - transposition
+                * 2 - conjugate transposition
+    Beta    -   coefficient
+    C       -   matrix
+    IC      -   submatrix offset
+    JC      -   submatrix offset
+
+  -- ALGLIB routine --
+     16.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixgemm(const ae_int_t m, const ae_int_t n, const ae_int_t k, const alglib::complex alpha, const complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t optypea, const complex_2d_array &b, const ae_int_t ib, const ae_int_t jb, const ae_int_t optypeb, const alglib::complex beta, complex_2d_array &c, const ae_int_t ic, const ae_int_t jc);
+
+
+/*************************************************************************
+Same as CMatrixGEMM, but for real numbers.
+OpType may be only 0 or 1.
+
+  -- ALGLIB routine --
+     16.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixgemm(const ae_int_t m, const ae_int_t n, const ae_int_t k, const double alpha, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t optypea, const real_2d_array &b, const ae_int_t ib, const ae_int_t jb, const ae_int_t optypeb, const double beta, real_2d_array &c, const ae_int_t ic, const ae_int_t jc);
+
+/*************************************************************************
+QR decomposition of a rectangular matrix of size MxN
+
+Input parameters:
+    A   -   matrix A whose indexes range within [0..M-1, 0..N-1].
+    M   -   number of rows in matrix A.
+    N   -   number of columns in matrix A.
+
+Output parameters:
+    A   -   matrices Q and R in compact form (see below).
+    Tau -   array of scalar factors which are used to form
+            matrix Q. Array whose index ranges within [0.. Min(M-1,N-1)].
+
+Matrix A is represented as A = QR, where Q is an orthogonal matrix of size
+MxM, R - upper triangular (or upper trapezoid) matrix of size M x N.
+
+The elements of matrix R are located on and above the main diagonal of
+matrix A. The elements which are located in Tau array and below the main
+diagonal of matrix A are used to form matrix Q as follows:
+
+Matrix Q is represented as a product of elementary reflections
+
+Q = H(0)*H(2)*...*H(k-1),
+
+where k = min(m,n), and each H(i) is in the form
+
+H(i) = 1 - tau * v * (v^T)
+
+where tau is a scalar stored in Tau[I]; v - real vector,
+so that v(0:i-1) = 0, v(i) = 1, v(i+1:m-1) stored in A(i+1:m-1,i).
+
+  -- ALGLIB routine --
+     17.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixqr(real_2d_array &a, const ae_int_t m, const ae_int_t n, real_1d_array &tau);
+
+
+/*************************************************************************
+LQ decomposition of a rectangular matrix of size MxN
+
+Input parameters:
+    A   -   matrix A whose indexes range within [0..M-1, 0..N-1].
+    M   -   number of rows in matrix A.
+    N   -   number of columns in matrix A.
+
+Output parameters:
+    A   -   matrices L and Q in compact form (see below)
+    Tau -   array of scalar factors which are used to form
+            matrix Q. Array whose index ranges within [0..Min(M,N)-1].
+
+Matrix A is represented as A = LQ, where Q is an orthogonal matrix of size
+MxM, L - lower triangular (or lower trapezoid) matrix of size M x N.
+
+The elements of matrix L are located on and below  the  main  diagonal  of
+matrix A. The elements which are located in Tau array and above  the  main
+diagonal of matrix A are used to form matrix Q as follows:
+
+Matrix Q is represented as a product of elementary reflections
+
+Q = H(k-1)*H(k-2)*...*H(1)*H(0),
+
+where k = min(m,n), and each H(i) is of the form
+
+H(i) = 1 - tau * v * (v^T)
+
+where tau is a scalar stored in Tau[I]; v - real vector, so that v(0:i-1)=0,
+v(i) = 1, v(i+1:n-1) stored in A(i,i+1:n-1).
+
+  -- ALGLIB routine --
+     17.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixlq(real_2d_array &a, const ae_int_t m, const ae_int_t n, real_1d_array &tau);
+
+
+/*************************************************************************
+QR decomposition of a rectangular complex matrix of size MxN
+
+Input parameters:
+    A   -   matrix A whose indexes range within [0..M-1, 0..N-1]
+    M   -   number of rows in matrix A.
+    N   -   number of columns in matrix A.
+
+Output parameters:
+    A   -   matrices Q and R in compact form
+    Tau -   array of scalar factors which are used to form matrix Q. Array
+            whose indexes range within [0.. Min(M,N)-1]
+
+Matrix A is represented as A = QR, where Q is an orthogonal matrix of size
+MxM, R - upper triangular (or upper trapezoid) matrix of size MxN.
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     September 30, 1994
+*************************************************************************/
+void cmatrixqr(complex_2d_array &a, const ae_int_t m, const ae_int_t n, complex_1d_array &tau);
+
+
+/*************************************************************************
+LQ decomposition of a rectangular complex matrix of size MxN
+
+Input parameters:
+    A   -   matrix A whose indexes range within [0..M-1, 0..N-1]
+    M   -   number of rows in matrix A.
+    N   -   number of columns in matrix A.
+
+Output parameters:
+    A   -   matrices Q and L in compact form
+    Tau -   array of scalar factors which are used to form matrix Q. Array
+            whose indexes range within [0.. Min(M,N)-1]
+
+Matrix A is represented as A = LQ, where Q is an orthogonal matrix of size
+MxM, L - lower triangular (or lower trapezoid) matrix of size MxN.
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     September 30, 1994
+*************************************************************************/
+void cmatrixlq(complex_2d_array &a, const ae_int_t m, const ae_int_t n, complex_1d_array &tau);
+
+
+/*************************************************************************
+Partial unpacking of matrix Q from the QR decomposition of a matrix A
+
+Input parameters:
+    A       -   matrices Q and R in compact form.
+                Output of RMatrixQR subroutine.
+    M       -   number of rows in given matrix A. M>=0.
+    N       -   number of columns in given matrix A. N>=0.
+    Tau     -   scalar factors which are used to form Q.
+                Output of the RMatrixQR subroutine.
+    QColumns -  required number of columns of matrix Q. M>=QColumns>=0.
+
+Output parameters:
+    Q       -   first QColumns columns of matrix Q.
+                Array whose indexes range within [0..M-1, 0..QColumns-1].
+                If QColumns=0, the array remains unchanged.
+
+  -- ALGLIB routine --
+     17.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixqrunpackq(const real_2d_array &a, const ae_int_t m, const ae_int_t n, const real_1d_array &tau, const ae_int_t qcolumns, real_2d_array &q);
+
+
+/*************************************************************************
+Unpacking of matrix R from the QR decomposition of a matrix A
+
+Input parameters:
+    A       -   matrices Q and R in compact form.
+                Output of RMatrixQR subroutine.
+    M       -   number of rows in given matrix A. M>=0.
+    N       -   number of columns in given matrix A. N>=0.
+
+Output parameters:
+    R       -   matrix R, array[0..M-1, 0..N-1].
+
+  -- ALGLIB routine --
+     17.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixqrunpackr(const real_2d_array &a, const ae_int_t m, const ae_int_t n, real_2d_array &r);
+
+
+/*************************************************************************
+Partial unpacking of matrix Q from the LQ decomposition of a matrix A
+
+Input parameters:
+    A       -   matrices L and Q in compact form.
+                Output of RMatrixLQ subroutine.
+    M       -   number of rows in given matrix A. M>=0.
+    N       -   number of columns in given matrix A. N>=0.
+    Tau     -   scalar factors which are used to form Q.
+                Output of the RMatrixLQ subroutine.
+    QRows   -   required number of rows in matrix Q. N>=QRows>=0.
+
+Output parameters:
+    Q       -   first QRows rows of matrix Q. Array whose indexes range
+                within [0..QRows-1, 0..N-1]. If QRows=0, the array remains
+                unchanged.
+
+  -- ALGLIB routine --
+     17.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixlqunpackq(const real_2d_array &a, const ae_int_t m, const ae_int_t n, const real_1d_array &tau, const ae_int_t qrows, real_2d_array &q);
+
+
+/*************************************************************************
+Unpacking of matrix L from the LQ decomposition of a matrix A
+
+Input parameters:
+    A       -   matrices Q and L in compact form.
+                Output of RMatrixLQ subroutine.
+    M       -   number of rows in given matrix A. M>=0.
+    N       -   number of columns in given matrix A. N>=0.
+
+Output parameters:
+    L       -   matrix L, array[0..M-1, 0..N-1].
+
+  -- ALGLIB routine --
+     17.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixlqunpackl(const real_2d_array &a, const ae_int_t m, const ae_int_t n, real_2d_array &l);
+
+
+/*************************************************************************
+Partial unpacking of matrix Q from QR decomposition of a complex matrix A.
+
+Input parameters:
+    A           -   matrices Q and R in compact form.
+                    Output of CMatrixQR subroutine .
+    M           -   number of rows in matrix A. M>=0.
+    N           -   number of columns in matrix A. N>=0.
+    Tau         -   scalar factors which are used to form Q.
+                    Output of CMatrixQR subroutine .
+    QColumns    -   required number of columns in matrix Q. M>=QColumns>=0.
+
+Output parameters:
+    Q           -   first QColumns columns of matrix Q.
+                    Array whose index ranges within [0..M-1, 0..QColumns-1].
+                    If QColumns=0, array isn't changed.
+
+  -- ALGLIB routine --
+     17.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixqrunpackq(const complex_2d_array &a, const ae_int_t m, const ae_int_t n, const complex_1d_array &tau, const ae_int_t qcolumns, complex_2d_array &q);
+
+
+/*************************************************************************
+Unpacking of matrix R from the QR decomposition of a matrix A
+
+Input parameters:
+    A       -   matrices Q and R in compact form.
+                Output of CMatrixQR subroutine.
+    M       -   number of rows in given matrix A. M>=0.
+    N       -   number of columns in given matrix A. N>=0.
+
+Output parameters:
+    R       -   matrix R, array[0..M-1, 0..N-1].
+
+  -- ALGLIB routine --
+     17.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixqrunpackr(const complex_2d_array &a, const ae_int_t m, const ae_int_t n, complex_2d_array &r);
+
+
+/*************************************************************************
+Partial unpacking of matrix Q from LQ decomposition of a complex matrix A.
+
+Input parameters:
+    A           -   matrices Q and R in compact form.
+                    Output of CMatrixLQ subroutine .
+    M           -   number of rows in matrix A. M>=0.
+    N           -   number of columns in matrix A. N>=0.
+    Tau         -   scalar factors which are used to form Q.
+                    Output of CMatrixLQ subroutine .
+    QRows       -   required number of rows in matrix Q. N>=QColumns>=0.
+
+Output parameters:
+    Q           -   first QRows rows of matrix Q.
+                    Array whose index ranges within [0..QRows-1, 0..N-1].
+                    If QRows=0, array isn't changed.
+
+  -- ALGLIB routine --
+     17.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixlqunpackq(const complex_2d_array &a, const ae_int_t m, const ae_int_t n, const complex_1d_array &tau, const ae_int_t qrows, complex_2d_array &q);
+
+
+/*************************************************************************
+Unpacking of matrix L from the LQ decomposition of a matrix A
+
+Input parameters:
+    A       -   matrices Q and L in compact form.
+                Output of CMatrixLQ subroutine.
+    M       -   number of rows in given matrix A. M>=0.
+    N       -   number of columns in given matrix A. N>=0.
+
+Output parameters:
+    L       -   matrix L, array[0..M-1, 0..N-1].
+
+  -- ALGLIB routine --
+     17.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixlqunpackl(const complex_2d_array &a, const ae_int_t m, const ae_int_t n, complex_2d_array &l);
+
+
+/*************************************************************************
+Reduction of a rectangular matrix to  bidiagonal form
+
+The algorithm reduces the rectangular matrix A to  bidiagonal form by
+orthogonal transformations P and Q: A = Q*B*P.
+
+Input parameters:
+    A       -   source matrix. array[0..M-1, 0..N-1]
+    M       -   number of rows in matrix A.
+    N       -   number of columns in matrix A.
+
+Output parameters:
+    A       -   matrices Q, B, P in compact form (see below).
+    TauQ    -   scalar factors which are used to form matrix Q.
+    TauP    -   scalar factors which are used to form matrix P.
+
+The main diagonal and one of the  secondary  diagonals  of  matrix  A  are
+replaced with bidiagonal  matrix  B.  Other  elements  contain  elementary
+reflections which form MxM matrix Q and NxN matrix P, respectively.
+
+If M>=N, B is the upper  bidiagonal  MxN  matrix  and  is  stored  in  the
+corresponding  elements  of  matrix  A.  Matrix  Q  is  represented  as  a
+product   of   elementary   reflections   Q = H(0)*H(1)*...*H(n-1),  where
+H(i) = 1-tau*v*v'. Here tau is a scalar which is stored  in  TauQ[i],  and
+vector v has the following  structure:  v(0:i-1)=0, v(i)=1, v(i+1:m-1)  is
+stored   in   elements   A(i+1:m-1,i).   Matrix   P  is  as  follows:  P =
+G(0)*G(1)*...*G(n-2), where G(i) = 1 - tau*u*u'. Tau is stored in TauP[i],
+u(0:i)=0, u(i+1)=1, u(i+2:n-1) is stored in elements A(i,i+2:n-1).
+
+If M<N, B is the  lower  bidiagonal  MxN  matrix  and  is  stored  in  the
+corresponding   elements  of  matrix  A.  Q = H(0)*H(1)*...*H(m-2),  where
+H(i) = 1 - tau*v*v', tau is stored in TauQ, v(0:i)=0, v(i+1)=1, v(i+2:m-1)
+is    stored    in   elements   A(i+2:m-1,i).    P = G(0)*G(1)*...*G(m-1),
+G(i) = 1-tau*u*u', tau is stored in  TauP,  u(0:i-1)=0, u(i)=1, u(i+1:n-1)
+is stored in A(i,i+1:n-1).
+
+EXAMPLE:
+
+m=6, n=5 (m > n):               m=5, n=6 (m < n):
+
+(  d   e   u1  u1  u1 )         (  d   u1  u1  u1  u1  u1 )
+(  v1  d   e   u2  u2 )         (  e   d   u2  u2  u2  u2 )
+(  v1  v2  d   e   u3 )         (  v1  e   d   u3  u3  u3 )
+(  v1  v2  v3  d   e  )         (  v1  v2  e   d   u4  u4 )
+(  v1  v2  v3  v4  d  )         (  v1  v2  v3  e   d   u5 )
+(  v1  v2  v3  v4  v5 )
+
+Here vi and ui are vectors which form H(i) and G(i), and d and e -
+are the diagonal and off-diagonal elements of matrix B.
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     September 30, 1994.
+     Sergey Bochkanov, ALGLIB project, translation from FORTRAN to
+     pseudocode, 2007-2010.
+*************************************************************************/
+void rmatrixbd(real_2d_array &a, const ae_int_t m, const ae_int_t n, real_1d_array &tauq, real_1d_array &taup);
+
+
+/*************************************************************************
+Unpacking matrix Q which reduces a matrix to bidiagonal form.
+
+Input parameters:
+    QP          -   matrices Q and P in compact form.
+                    Output of ToBidiagonal subroutine.
+    M           -   number of rows in matrix A.
+    N           -   number of columns in matrix A.
+    TAUQ        -   scalar factors which are used to form Q.
+                    Output of ToBidiagonal subroutine.
+    QColumns    -   required number of columns in matrix Q.
+                    M>=QColumns>=0.
+
+Output parameters:
+    Q           -   first QColumns columns of matrix Q.
+                    Array[0..M-1, 0..QColumns-1]
+                    If QColumns=0, the array is not modified.
+
+  -- ALGLIB --
+     2005-2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixbdunpackq(const real_2d_array &qp, const ae_int_t m, const ae_int_t n, const real_1d_array &tauq, const ae_int_t qcolumns, real_2d_array &q);
+
+
+/*************************************************************************
+Multiplication by matrix Q which reduces matrix A to  bidiagonal form.
+
+The algorithm allows pre- or post-multiply by Q or Q'.
+
+Input parameters:
+    QP          -   matrices Q and P in compact form.
+                    Output of ToBidiagonal subroutine.
+    M           -   number of rows in matrix A.
+    N           -   number of columns in matrix A.
+    TAUQ        -   scalar factors which are used to form Q.
+                    Output of ToBidiagonal subroutine.
+    Z           -   multiplied matrix.
+                    array[0..ZRows-1,0..ZColumns-1]
+    ZRows       -   number of rows in matrix Z. If FromTheRight=False,
+                    ZRows=M, otherwise ZRows can be arbitrary.
+    ZColumns    -   number of columns in matrix Z. If FromTheRight=True,
+                    ZColumns=M, otherwise ZColumns can be arbitrary.
+    FromTheRight -  pre- or post-multiply.
+    DoTranspose -   multiply by Q or Q'.
+
+Output parameters:
+    Z           -   product of Z and Q.
+                    Array[0..ZRows-1,0..ZColumns-1]
+                    If ZRows=0 or ZColumns=0, the array is not modified.
+
+  -- ALGLIB --
+     2005-2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixbdmultiplybyq(const real_2d_array &qp, const ae_int_t m, const ae_int_t n, const real_1d_array &tauq, real_2d_array &z, const ae_int_t zrows, const ae_int_t zcolumns, const bool fromtheright, const bool dotranspose);
+
+
+/*************************************************************************
+Unpacking matrix P which reduces matrix A to bidiagonal form.
+The subroutine returns transposed matrix P.
+
+Input parameters:
+    QP      -   matrices Q and P in compact form.
+                Output of ToBidiagonal subroutine.
+    M       -   number of rows in matrix A.
+    N       -   number of columns in matrix A.
+    TAUP    -   scalar factors which are used to form P.
+                Output of ToBidiagonal subroutine.
+    PTRows  -   required number of rows of matrix P^T. N >= PTRows >= 0.
+
+Output parameters:
+    PT      -   first PTRows columns of matrix P^T
+                Array[0..PTRows-1, 0..N-1]
+                If PTRows=0, the array is not modified.
+
+  -- ALGLIB --
+     2005-2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixbdunpackpt(const real_2d_array &qp, const ae_int_t m, const ae_int_t n, const real_1d_array &taup, const ae_int_t ptrows, real_2d_array &pt);
+
+
+/*************************************************************************
+Multiplication by matrix P which reduces matrix A to  bidiagonal form.
+
+The algorithm allows pre- or post-multiply by P or P'.
+
+Input parameters:
+    QP          -   matrices Q and P in compact form.
+                    Output of RMatrixBD subroutine.
+    M           -   number of rows in matrix A.
+    N           -   number of columns in matrix A.
+    TAUP        -   scalar factors which are used to form P.
+                    Output of RMatrixBD subroutine.
+    Z           -   multiplied matrix.
+                    Array whose indexes range within [0..ZRows-1,0..ZColumns-1].
+    ZRows       -   number of rows in matrix Z. If FromTheRight=False,
+                    ZRows=N, otherwise ZRows can be arbitrary.
+    ZColumns    -   number of columns in matrix Z. If FromTheRight=True,
+                    ZColumns=N, otherwise ZColumns can be arbitrary.
+    FromTheRight -  pre- or post-multiply.
+    DoTranspose -   multiply by P or P'.
+
+Output parameters:
+    Z - product of Z and P.
+                Array whose indexes range within [0..ZRows-1,0..ZColumns-1].
+                If ZRows=0 or ZColumns=0, the array is not modified.
+
+  -- ALGLIB --
+     2005-2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixbdmultiplybyp(const real_2d_array &qp, const ae_int_t m, const ae_int_t n, const real_1d_array &taup, real_2d_array &z, const ae_int_t zrows, const ae_int_t zcolumns, const bool fromtheright, const bool dotranspose);
+
+
+/*************************************************************************
+Unpacking of the main and secondary diagonals of bidiagonal decomposition
+of matrix A.
+
+Input parameters:
+    B   -   output of RMatrixBD subroutine.
+    M   -   number of rows in matrix B.
+    N   -   number of columns in matrix B.
+
+Output parameters:
+    IsUpper -   True, if the matrix is upper bidiagonal.
+                otherwise IsUpper is False.
+    D       -   the main diagonal.
+                Array whose index ranges within [0..Min(M,N)-1].
+    E       -   the secondary diagonal (upper or lower, depending on
+                the value of IsUpper).
+                Array index ranges within [0..Min(M,N)-1], the last
+                element is not used.
+
+  -- ALGLIB --
+     2005-2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixbdunpackdiagonals(const real_2d_array &b, const ae_int_t m, const ae_int_t n, bool &isupper, real_1d_array &d, real_1d_array &e);
+
+
+/*************************************************************************
+Reduction of a square matrix to  upper Hessenberg form: Q'*A*Q = H,
+where Q is an orthogonal matrix, H - Hessenberg matrix.
+
+Input parameters:
+    A       -   matrix A with elements [0..N-1, 0..N-1]
+    N       -   size of matrix A.
+
+Output parameters:
+    A       -   matrices Q and P in  compact form (see below).
+    Tau     -   array of scalar factors which are used to form matrix Q.
+                Array whose index ranges within [0..N-2]
+
+Matrix H is located on the main diagonal, on the lower secondary  diagonal
+and above the main diagonal of matrix A. The elements which are used to
+form matrix Q are situated in array Tau and below the lower secondary
+diagonal of matrix A as follows:
+
+Matrix Q is represented as a product of elementary reflections
+
+Q = H(0)*H(2)*...*H(n-2),
+
+where each H(i) is given by
+
+H(i) = 1 - tau * v * (v^T)
+
+where tau is a scalar stored in Tau[I]; v - is a real vector,
+so that v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) stored in A(i+2:n-1,i).
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     October 31, 1992
+*************************************************************************/
+void rmatrixhessenberg(real_2d_array &a, const ae_int_t n, real_1d_array &tau);
+
+
+/*************************************************************************
+Unpacking matrix Q which reduces matrix A to upper Hessenberg form
+
+Input parameters:
+    A   -   output of RMatrixHessenberg subroutine.
+    N   -   size of matrix A.
+    Tau -   scalar factors which are used to form Q.
+            Output of RMatrixHessenberg subroutine.
+
+Output parameters:
+    Q   -   matrix Q.
+            Array whose indexes range within [0..N-1, 0..N-1].
+
+  -- ALGLIB --
+     2005-2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixhessenbergunpackq(const real_2d_array &a, const ae_int_t n, const real_1d_array &tau, real_2d_array &q);
+
+
+/*************************************************************************
+Unpacking matrix H (the result of matrix A reduction to upper Hessenberg form)
+
+Input parameters:
+    A   -   output of RMatrixHessenberg subroutine.
+    N   -   size of matrix A.
+
+Output parameters:
+    H   -   matrix H. Array whose indexes range within [0..N-1, 0..N-1].
+
+  -- ALGLIB --
+     2005-2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixhessenbergunpackh(const real_2d_array &a, const ae_int_t n, real_2d_array &h);
+
+
+/*************************************************************************
+Reduction of a symmetric matrix which is given by its higher or lower
+triangular part to a tridiagonal matrix using orthogonal similarity
+transformation: Q'*A*Q=T.
+
+Input parameters:
+    A       -   matrix to be transformed
+                array with elements [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    IsUpper -   storage format. If IsUpper = True, then matrix A is given
+                by its upper triangle, and the lower triangle is not used
+                and not modified by the algorithm, and vice versa
+                if IsUpper = False.
+
+Output parameters:
+    A       -   matrices T and Q in  compact form (see lower)
+    Tau     -   array of factors which are forming matrices H(i)
+                array with elements [0..N-2].
+    D       -   main diagonal of symmetric matrix T.
+                array with elements [0..N-1].
+    E       -   secondary diagonal of symmetric matrix T.
+                array with elements [0..N-2].
+
+
+  If IsUpper=True, the matrix Q is represented as a product of elementary
+  reflectors
+
+     Q = H(n-2) . . . H(2) H(0).
+
+  Each H(i) has the form
+
+     H(i) = I - tau * v * v'
+
+  where tau is a real scalar, and v is a real vector with
+  v(i+1:n-1) = 0, v(i) = 1, v(0:i-1) is stored on exit in
+  A(0:i-1,i+1), and tau in TAU(i).
+
+  If IsUpper=False, the matrix Q is represented as a product of elementary
+  reflectors
+
+     Q = H(0) H(2) . . . H(n-2).
+
+  Each H(i) has the form
+
+     H(i) = I - tau * v * v'
+
+  where tau is a real scalar, and v is a real vector with
+  v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) is stored on exit in A(i+2:n-1,i),
+  and tau in TAU(i).
+
+  The contents of A on exit are illustrated by the following examples
+  with n = 5:
+
+  if UPLO = 'U':                       if UPLO = 'L':
+
+    (  d   e   v1  v2  v3 )              (  d                  )
+    (      d   e   v2  v3 )              (  e   d              )
+    (          d   e   v3 )              (  v0  e   d          )
+    (              d   e  )              (  v0  v1  e   d      )
+    (                  d  )              (  v0  v1  v2  e   d  )
+
+  where d and e denote diagonal and off-diagonal elements of T, and vi
+  denotes an element of the vector defining H(i).
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     October 31, 1992
+*************************************************************************/
+void smatrixtd(real_2d_array &a, const ae_int_t n, const bool isupper, real_1d_array &tau, real_1d_array &d, real_1d_array &e);
+
+
+/*************************************************************************
+Unpacking matrix Q which reduces symmetric matrix to a tridiagonal
+form.
+
+Input parameters:
+    A       -   the result of a SMatrixTD subroutine
+    N       -   size of matrix A.
+    IsUpper -   storage format (a parameter of SMatrixTD subroutine)
+    Tau     -   the result of a SMatrixTD subroutine
+
+Output parameters:
+    Q       -   transformation matrix.
+                array with elements [0..N-1, 0..N-1].
+
+  -- ALGLIB --
+     Copyright 2005-2010 by Bochkanov Sergey
+*************************************************************************/
+void smatrixtdunpackq(const real_2d_array &a, const ae_int_t n, const bool isupper, const real_1d_array &tau, real_2d_array &q);
+
+
+/*************************************************************************
+Reduction of a Hermitian matrix which is given  by  its  higher  or  lower
+triangular part to a real  tridiagonal  matrix  using  unitary  similarity
+transformation: Q'*A*Q = T.
+
+Input parameters:
+    A       -   matrix to be transformed
+                array with elements [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    IsUpper -   storage format. If IsUpper = True, then matrix A is  given
+                by its upper triangle, and the lower triangle is not  used
+                and not modified by the algorithm, and vice versa
+                if IsUpper = False.
+
+Output parameters:
+    A       -   matrices T and Q in  compact form (see lower)
+    Tau     -   array of factors which are forming matrices H(i)
+                array with elements [0..N-2].
+    D       -   main diagonal of real symmetric matrix T.
+                array with elements [0..N-1].
+    E       -   secondary diagonal of real symmetric matrix T.
+                array with elements [0..N-2].
+
+
+  If IsUpper=True, the matrix Q is represented as a product of elementary
+  reflectors
+
+     Q = H(n-2) . . . H(2) H(0).
+
+  Each H(i) has the form
+
+     H(i) = I - tau * v * v'
+
+  where tau is a complex scalar, and v is a complex vector with
+  v(i+1:n-1) = 0, v(i) = 1, v(0:i-1) is stored on exit in
+  A(0:i-1,i+1), and tau in TAU(i).
+
+  If IsUpper=False, the matrix Q is represented as a product of elementary
+  reflectors
+
+     Q = H(0) H(2) . . . H(n-2).
+
+  Each H(i) has the form
+
+     H(i) = I - tau * v * v'
+
+  where tau is a complex scalar, and v is a complex vector with
+  v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) is stored on exit in A(i+2:n-1,i),
+  and tau in TAU(i).
+
+  The contents of A on exit are illustrated by the following examples
+  with n = 5:
+
+  if UPLO = 'U':                       if UPLO = 'L':
+
+    (  d   e   v1  v2  v3 )              (  d                  )
+    (      d   e   v2  v3 )              (  e   d              )
+    (          d   e   v3 )              (  v0  e   d          )
+    (              d   e  )              (  v0  v1  e   d      )
+    (                  d  )              (  v0  v1  v2  e   d  )
+
+where d and e denote diagonal and off-diagonal elements of T, and vi
+denotes an element of the vector defining H(i).
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     October 31, 1992
+*************************************************************************/
+void hmatrixtd(complex_2d_array &a, const ae_int_t n, const bool isupper, complex_1d_array &tau, real_1d_array &d, real_1d_array &e);
+
+
+/*************************************************************************
+Unpacking matrix Q which reduces a Hermitian matrix to a real  tridiagonal
+form.
+
+Input parameters:
+    A       -   the result of a HMatrixTD subroutine
+    N       -   size of matrix A.
+    IsUpper -   storage format (a parameter of HMatrixTD subroutine)
+    Tau     -   the result of a HMatrixTD subroutine
+
+Output parameters:
+    Q       -   transformation matrix.
+                array with elements [0..N-1, 0..N-1].
+
+  -- ALGLIB --
+     Copyright 2005-2010 by Bochkanov Sergey
+*************************************************************************/
+void hmatrixtdunpackq(const complex_2d_array &a, const ae_int_t n, const bool isupper, const complex_1d_array &tau, complex_2d_array &q);
+
+/*************************************************************************
+Finding the eigenvalues and eigenvectors of a symmetric matrix
+
+The algorithm finds eigen pairs of a symmetric matrix by reducing it to
+tridiagonal form and using the QL/QR algorithm.
+
+Input parameters:
+    A       -   symmetric matrix which is given by its upper or lower
+                triangular part.
+                Array whose indexes range within [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    ZNeeded -   flag controlling whether the eigenvectors are needed or not.
+                If ZNeeded is equal to:
+                 * 0, the eigenvectors are not returned;
+                 * 1, the eigenvectors are returned.
+    IsUpper -   storage format.
+
+Output parameters:
+    D       -   eigenvalues in ascending order.
+                Array whose index ranges within [0..N-1].
+    Z       -   if ZNeeded is equal to:
+                 * 0, Z hasn’t changed;
+                 * 1, Z contains the eigenvectors.
+                Array whose indexes range within [0..N-1, 0..N-1].
+                The eigenvectors are stored in the matrix columns.
+
+Result:
+    True, if the algorithm has converged.
+    False, if the algorithm hasn't converged (rare case).
+
+  -- ALGLIB --
+     Copyright 2005-2008 by Bochkanov Sergey
+*************************************************************************/
+bool smatrixevd(const real_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, real_1d_array &d, real_2d_array &z);
+
+
+/*************************************************************************
+Subroutine for finding the eigenvalues (and eigenvectors) of  a  symmetric
+matrix  in  a  given half open interval (A, B] by using  a  bisection  and
+inverse iteration
+
+Input parameters:
+    A       -   symmetric matrix which is given by its upper or lower
+                triangular part. Array [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    ZNeeded -   flag controlling whether the eigenvectors are needed or not.
+                If ZNeeded is equal to:
+                 * 0, the eigenvectors are not returned;
+                 * 1, the eigenvectors are returned.
+    IsUpperA -  storage format of matrix A.
+    B1, B2 -    half open interval (B1, B2] to search eigenvalues in.
+
+Output parameters:
+    M       -   number of eigenvalues found in a given half-interval (M>=0).
+    W       -   array of the eigenvalues found.
+                Array whose index ranges within [0..M-1].
+    Z       -   if ZNeeded is equal to:
+                 * 0, Z hasn’t changed;
+                 * 1, Z contains eigenvectors.
+                Array whose indexes range within [0..N-1, 0..M-1].
+                The eigenvectors are stored in the matrix columns.
+
+Result:
+    True, if successful. M contains the number of eigenvalues in the given
+    half-interval (could be equal to 0), W contains the eigenvalues,
+    Z contains the eigenvectors (if needed).
+
+    False, if the bisection method subroutine wasn't able to find the
+    eigenvalues in the given interval or if the inverse iteration subroutine
+    wasn't able to find all the corresponding eigenvectors.
+    In that case, the eigenvalues and eigenvectors are not returned,
+    M is equal to 0.
+
+  -- ALGLIB --
+     Copyright 07.01.2006 by Bochkanov Sergey
+*************************************************************************/
+bool smatrixevdr(const real_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, const double b1, const double b2, ae_int_t &m, real_1d_array &w, real_2d_array &z);
+
+
+/*************************************************************************
+Subroutine for finding the eigenvalues and  eigenvectors  of  a  symmetric
+matrix with given indexes by using bisection and inverse iteration methods.
+
+Input parameters:
+    A       -   symmetric matrix which is given by its upper or lower
+                triangular part. Array whose indexes range within [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    ZNeeded -   flag controlling whether the eigenvectors are needed or not.
+                If ZNeeded is equal to:
+                 * 0, the eigenvectors are not returned;
+                 * 1, the eigenvectors are returned.
+    IsUpperA -  storage format of matrix A.
+    I1, I2 -    index interval for searching (from I1 to I2).
+                0 <= I1 <= I2 <= N-1.
+
+Output parameters:
+    W       -   array of the eigenvalues found.
+                Array whose index ranges within [0..I2-I1].
+    Z       -   if ZNeeded is equal to:
+                 * 0, Z hasn’t changed;
+                 * 1, Z contains eigenvectors.
+                Array whose indexes range within [0..N-1, 0..I2-I1].
+                In that case, the eigenvectors are stored in the matrix columns.
+
+Result:
+    True, if successful. W contains the eigenvalues, Z contains the
+    eigenvectors (if needed).
+
+    False, if the bisection method subroutine wasn't able to find the
+    eigenvalues in the given interval or if the inverse iteration subroutine
+    wasn't able to find all the corresponding eigenvectors.
+    In that case, the eigenvalues and eigenvectors are not returned.
+
+  -- ALGLIB --
+     Copyright 07.01.2006 by Bochkanov Sergey
+*************************************************************************/
+bool smatrixevdi(const real_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, const ae_int_t i1, const ae_int_t i2, real_1d_array &w, real_2d_array &z);
+
+
+/*************************************************************************
+Finding the eigenvalues and eigenvectors of a Hermitian matrix
+
+The algorithm finds eigen pairs of a Hermitian matrix by  reducing  it  to
+real tridiagonal form and using the QL/QR algorithm.
+
+Input parameters:
+    A       -   Hermitian matrix which is given  by  its  upper  or  lower
+                triangular part.
+                Array whose indexes range within [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    IsUpper -   storage format.
+    ZNeeded -   flag controlling whether the eigenvectors  are  needed  or
+                not. If ZNeeded is equal to:
+                 * 0, the eigenvectors are not returned;
+                 * 1, the eigenvectors are returned.
+
+Output parameters:
+    D       -   eigenvalues in ascending order.
+                Array whose index ranges within [0..N-1].
+    Z       -   if ZNeeded is equal to:
+                 * 0, Z hasn’t changed;
+                 * 1, Z contains the eigenvectors.
+                Array whose indexes range within [0..N-1, 0..N-1].
+                The eigenvectors are stored in the matrix columns.
+
+Result:
+    True, if the algorithm has converged.
+    False, if the algorithm hasn't converged (rare case).
+
+Note:
+    eigenvectors of Hermitian matrix are defined up to  multiplication  by
+    a complex number L, such that |L|=1.
+
+  -- ALGLIB --
+     Copyright 2005, 23 March 2007 by Bochkanov Sergey
+*************************************************************************/
+bool hmatrixevd(const complex_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, real_1d_array &d, complex_2d_array &z);
+
+
+/*************************************************************************
+Subroutine for finding the eigenvalues (and eigenvectors) of  a  Hermitian
+matrix  in  a  given half-interval (A, B] by using a bisection and inverse
+iteration
+
+Input parameters:
+    A       -   Hermitian matrix which is given  by  its  upper  or  lower
+                triangular  part.  Array  whose   indexes   range   within
+                [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    ZNeeded -   flag controlling whether the eigenvectors  are  needed  or
+                not. If ZNeeded is equal to:
+                 * 0, the eigenvectors are not returned;
+                 * 1, the eigenvectors are returned.
+    IsUpperA -  storage format of matrix A.
+    B1, B2 -    half-interval (B1, B2] to search eigenvalues in.
+
+Output parameters:
+    M       -   number of eigenvalues found in a given half-interval, M>=0
+    W       -   array of the eigenvalues found.
+                Array whose index ranges within [0..M-1].
+    Z       -   if ZNeeded is equal to:
+                 * 0, Z hasn’t changed;
+                 * 1, Z contains eigenvectors.
+                Array whose indexes range within [0..N-1, 0..M-1].
+                The eigenvectors are stored in the matrix columns.
+
+Result:
+    True, if successful. M contains the number of eigenvalues in the given
+    half-interval (could be equal to 0), W contains the eigenvalues,
+    Z contains the eigenvectors (if needed).
+
+    False, if the bisection method subroutine  wasn't  able  to  find  the
+    eigenvalues  in  the  given  interval  or  if  the  inverse  iteration
+    subroutine  wasn't  able  to  find all the corresponding eigenvectors.
+    In that case, the eigenvalues and eigenvectors are not returned, M  is
+    equal to 0.
+
+Note:
+    eigen vectors of Hermitian matrix are defined up to multiplication  by
+    a complex number L, such as |L|=1.
+
+  -- ALGLIB --
+     Copyright 07.01.2006, 24.03.2007 by Bochkanov Sergey.
+*************************************************************************/
+bool hmatrixevdr(const complex_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, const double b1, const double b2, ae_int_t &m, real_1d_array &w, complex_2d_array &z);
+
+
+/*************************************************************************
+Subroutine for finding the eigenvalues and  eigenvectors  of  a  Hermitian
+matrix with given indexes by using bisection and inverse iteration methods
+
+Input parameters:
+    A       -   Hermitian matrix which is given  by  its  upper  or  lower
+                triangular part.
+                Array whose indexes range within [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    ZNeeded -   flag controlling whether the eigenvectors  are  needed  or
+                not. If ZNeeded is equal to:
+                 * 0, the eigenvectors are not returned;
+                 * 1, the eigenvectors are returned.
+    IsUpperA -  storage format of matrix A.
+    I1, I2 -    index interval for searching (from I1 to I2).
+                0 <= I1 <= I2 <= N-1.
+
+Output parameters:
+    W       -   array of the eigenvalues found.
+                Array whose index ranges within [0..I2-I1].
+    Z       -   if ZNeeded is equal to:
+                 * 0, Z hasn’t changed;
+                 * 1, Z contains eigenvectors.
+                Array whose indexes range within [0..N-1, 0..I2-I1].
+                In  that  case,  the eigenvectors are stored in the matrix
+                columns.
+
+Result:
+    True, if successful. W contains the eigenvalues, Z contains the
+    eigenvectors (if needed).
+
+    False, if the bisection method subroutine  wasn't  able  to  find  the
+    eigenvalues  in  the  given  interval  or  if  the  inverse  iteration
+    subroutine wasn't able to find  all  the  corresponding  eigenvectors.
+    In that case, the eigenvalues and eigenvectors are not returned.
+
+Note:
+    eigen vectors of Hermitian matrix are defined up to multiplication  by
+    a complex number L, such as |L|=1.
+
+  -- ALGLIB --
+     Copyright 07.01.2006, 24.03.2007 by Bochkanov Sergey.
+*************************************************************************/
+bool hmatrixevdi(const complex_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, const ae_int_t i1, const ae_int_t i2, real_1d_array &w, complex_2d_array &z);
+
+
+/*************************************************************************
+Finding the eigenvalues and eigenvectors of a tridiagonal symmetric matrix
+
+The algorithm finds the eigen pairs of a tridiagonal symmetric matrix by
+using an QL/QR algorithm with implicit shifts.
+
+Input parameters:
+    D       -   the main diagonal of a tridiagonal matrix.
+                Array whose index ranges within [0..N-1].
+    E       -   the secondary diagonal of a tridiagonal matrix.
+                Array whose index ranges within [0..N-2].
+    N       -   size of matrix A.
+    ZNeeded -   flag controlling whether the eigenvectors are needed or not.
+                If ZNeeded is equal to:
+                 * 0, the eigenvectors are not needed;
+                 * 1, the eigenvectors of a tridiagonal matrix
+                   are multiplied by the square matrix Z. It is used if the
+                   tridiagonal matrix is obtained by the similarity
+                   transformation of a symmetric matrix;
+                 * 2, the eigenvectors of a tridiagonal matrix replace the
+                   square matrix Z;
+                 * 3, matrix Z contains the first row of the eigenvectors
+                   matrix.
+    Z       -   if ZNeeded=1, Z contains the square matrix by which the
+                eigenvectors are multiplied.
+                Array whose indexes range within [0..N-1, 0..N-1].
+
+Output parameters:
+    D       -   eigenvalues in ascending order.
+                Array whose index ranges within [0..N-1].
+    Z       -   if ZNeeded is equal to:
+                 * 0, Z hasn’t changed;
+                 * 1, Z contains the product of a given matrix (from the left)
+                   and the eigenvectors matrix (from the right);
+                 * 2, Z contains the eigenvectors.
+                 * 3, Z contains the first row of the eigenvectors matrix.
+                If ZNeeded<3, Z is the array whose indexes range within [0..N-1, 0..N-1].
+                In that case, the eigenvectors are stored in the matrix columns.
+                If ZNeeded=3, Z is the array whose indexes range within [0..0, 0..N-1].
+
+Result:
+    True, if the algorithm has converged.
+    False, if the algorithm hasn't converged.
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     September 30, 1994
+*************************************************************************/
+bool smatrixtdevd(real_1d_array &d, const real_1d_array &e, const ae_int_t n, const ae_int_t zneeded, real_2d_array &z);
+
+
+/*************************************************************************
+Subroutine for finding the tridiagonal matrix eigenvalues/vectors in a
+given half-interval (A, B] by using bisection and inverse iteration.
+
+Input parameters:
+    D       -   the main diagonal of a tridiagonal matrix.
+                Array whose index ranges within [0..N-1].
+    E       -   the secondary diagonal of a tridiagonal matrix.
+                Array whose index ranges within [0..N-2].
+    N       -   size of matrix, N>=0.
+    ZNeeded -   flag controlling whether the eigenvectors are needed or not.
+                If ZNeeded is equal to:
+                 * 0, the eigenvectors are not needed;
+                 * 1, the eigenvectors of a tridiagonal matrix are multiplied
+                   by the square matrix Z. It is used if the tridiagonal
+                   matrix is obtained by the similarity transformation
+                   of a symmetric matrix.
+                 * 2, the eigenvectors of a tridiagonal matrix replace matrix Z.
+    A, B    -   half-interval (A, B] to search eigenvalues in.
+    Z       -   if ZNeeded is equal to:
+                 * 0, Z isn't used and remains unchanged;
+                 * 1, Z contains the square matrix (array whose indexes range
+                   within [0..N-1, 0..N-1]) which reduces the given symmetric
+                   matrix to tridiagonal form;
+                 * 2, Z isn't used (but changed on the exit).
+
+Output parameters:
+    D       -   array of the eigenvalues found.
+                Array whose index ranges within [0..M-1].
+    M       -   number of eigenvalues found in the given half-interval (M>=0).
+    Z       -   if ZNeeded is equal to:
+                 * 0, doesn't contain any information;
+                 * 1, contains the product of a given NxN matrix Z (from the
+                   left) and NxM matrix of the eigenvectors found (from the
+                   right). Array whose indexes range within [0..N-1, 0..M-1].
+                 * 2, contains the matrix of the eigenvectors found.
+                   Array whose indexes range within [0..N-1, 0..M-1].
+
+Result:
+
+    True, if successful. In that case, M contains the number of eigenvalues
+    in the given half-interval (could be equal to 0), D contains the eigenvalues,
+    Z contains the eigenvectors (if needed).
+    It should be noted that the subroutine changes the size of arrays D and Z.
+
+    False, if the bisection method subroutine wasn't able to find the
+    eigenvalues in the given interval or if the inverse iteration subroutine
+    wasn't able to find all the corresponding eigenvectors. In that case,
+    the eigenvalues and eigenvectors are not returned, M is equal to 0.
+
+  -- ALGLIB --
+     Copyright 31.03.2008 by Bochkanov Sergey
+*************************************************************************/
+bool smatrixtdevdr(real_1d_array &d, const real_1d_array &e, const ae_int_t n, const ae_int_t zneeded, const double a, const double b, ae_int_t &m, real_2d_array &z);
+
+
+/*************************************************************************
+Subroutine for finding tridiagonal matrix eigenvalues/vectors with given
+indexes (in ascending order) by using the bisection and inverse iteraion.
+
+Input parameters:
+    D       -   the main diagonal of a tridiagonal matrix.
+                Array whose index ranges within [0..N-1].
+    E       -   the secondary diagonal of a tridiagonal matrix.
+                Array whose index ranges within [0..N-2].
+    N       -   size of matrix. N>=0.
+    ZNeeded -   flag controlling whether the eigenvectors are needed or not.
+                If ZNeeded is equal to:
+                 * 0, the eigenvectors are not needed;
+                 * 1, the eigenvectors of a tridiagonal matrix are multiplied
+                   by the square matrix Z. It is used if the
+                   tridiagonal matrix is obtained by the similarity transformation
+                   of a symmetric matrix.
+                 * 2, the eigenvectors of a tridiagonal matrix replace
+                   matrix Z.
+    I1, I2  -   index interval for searching (from I1 to I2).
+                0 <= I1 <= I2 <= N-1.
+    Z       -   if ZNeeded is equal to:
+                 * 0, Z isn't used and remains unchanged;
+                 * 1, Z contains the square matrix (array whose indexes range within [0..N-1, 0..N-1])
+                   which reduces the given symmetric matrix to  tridiagonal form;
+                 * 2, Z isn't used (but changed on the exit).
+
+Output parameters:
+    D       -   array of the eigenvalues found.
+                Array whose index ranges within [0..I2-I1].
+    Z       -   if ZNeeded is equal to:
+                 * 0, doesn't contain any information;
+                 * 1, contains the product of a given NxN matrix Z (from the left) and
+                   Nx(I2-I1) matrix of the eigenvectors found (from the right).
+                   Array whose indexes range within [0..N-1, 0..I2-I1].
+                 * 2, contains the matrix of the eigenvalues found.
+                   Array whose indexes range within [0..N-1, 0..I2-I1].
+
+
+Result:
+
+    True, if successful. In that case, D contains the eigenvalues,
+    Z contains the eigenvectors (if needed).
+    It should be noted that the subroutine changes the size of arrays D and Z.
+
+    False, if the bisection method subroutine wasn't able to find the eigenvalues
+    in the given interval or if the inverse iteration subroutine wasn't able
+    to find all the corresponding eigenvectors. In that case, the eigenvalues
+    and eigenvectors are not returned.
+
+  -- ALGLIB --
+     Copyright 25.12.2005 by Bochkanov Sergey
+*************************************************************************/
+bool smatrixtdevdi(real_1d_array &d, const real_1d_array &e, const ae_int_t n, const ae_int_t zneeded, const ae_int_t i1, const ae_int_t i2, real_2d_array &z);
+
+
+/*************************************************************************
+Finding eigenvalues and eigenvectors of a general matrix
+
+The algorithm finds eigenvalues and eigenvectors of a general matrix by
+using the QR algorithm with multiple shifts. The algorithm can find
+eigenvalues and both left and right eigenvectors.
+
+The right eigenvector is a vector x such that A*x = w*x, and the left
+eigenvector is a vector y such that y'*A = w*y' (here y' implies a complex
+conjugate transposition of vector y).
+
+Input parameters:
+    A       -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    VNeeded -   flag controlling whether eigenvectors are needed or not.
+                If VNeeded is equal to:
+                 * 0, eigenvectors are not returned;
+                 * 1, right eigenvectors are returned;
+                 * 2, left eigenvectors are returned;
+                 * 3, both left and right eigenvectors are returned.
+
+Output parameters:
+    WR      -   real parts of eigenvalues.
+                Array whose index ranges within [0..N-1].
+    WR      -   imaginary parts of eigenvalues.
+                Array whose index ranges within [0..N-1].
+    VL, VR  -   arrays of left and right eigenvectors (if they are needed).
+                If WI[i]=0, the respective eigenvalue is a real number,
+                and it corresponds to the column number I of matrices VL/VR.
+                If WI[i]>0, we have a pair of complex conjugate numbers with
+                positive and negative imaginary parts:
+                    the first eigenvalue WR[i] + sqrt(-1)*WI[i];
+                    the second eigenvalue WR[i+1] + sqrt(-1)*WI[i+1];
+                    WI[i]>0
+                    WI[i+1] = -WI[i] < 0
+                In that case, the eigenvector  corresponding to the first
+                eigenvalue is located in i and i+1 columns of matrices
+                VL/VR (the column number i contains the real part, and the
+                column number i+1 contains the imaginary part), and the vector
+                corresponding to the second eigenvalue is a complex conjugate to
+                the first vector.
+                Arrays whose indexes range within [0..N-1, 0..N-1].
+
+Result:
+    True, if the algorithm has converged.
+    False, if the algorithm has not converged.
+
+Note 1:
+    Some users may ask the following question: what if WI[N-1]>0?
+    WI[N] must contain an eigenvalue which is complex conjugate to the
+    N-th eigenvalue, but the array has only size N?
+    The answer is as follows: such a situation cannot occur because the
+    algorithm finds a pairs of eigenvalues, therefore, if WI[i]>0, I is
+    strictly less than N-1.
+
+Note 2:
+    The algorithm performance depends on the value of the internal parameter
+    NS of the InternalSchurDecomposition subroutine which defines the number
+    of shifts in the QR algorithm (similarly to the block width in block-matrix
+    algorithms of linear algebra). If you require maximum performance
+    on your machine, it is recommended to adjust this parameter manually.
+
+
+See also the InternalTREVC subroutine.
+
+The algorithm is based on the LAPACK 3.0 library.
+*************************************************************************/
+bool rmatrixevd(const real_2d_array &a, const ae_int_t n, const ae_int_t vneeded, real_1d_array &wr, real_1d_array &wi, real_2d_array &vl, real_2d_array &vr);
+
+/*************************************************************************
+Generation of a random uniformly distributed (Haar) orthogonal matrix
+
+INPUT PARAMETERS:
+    N   -   matrix size, N>=1
+
+OUTPUT PARAMETERS:
+    A   -   orthogonal NxN matrix, array[0..N-1,0..N-1]
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixrndorthogonal(const ae_int_t n, real_2d_array &a);
+
+
+/*************************************************************************
+Generation of random NxN matrix with given condition number and norm2(A)=1
+
+INPUT PARAMETERS:
+    N   -   matrix size
+    C   -   condition number (in 2-norm)
+
+OUTPUT PARAMETERS:
+    A   -   random matrix with norm2(A)=1 and cond(A)=C
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixrndcond(const ae_int_t n, const double c, real_2d_array &a);
+
+
+/*************************************************************************
+Generation of a random Haar distributed orthogonal complex matrix
+
+INPUT PARAMETERS:
+    N   -   matrix size, N>=1
+
+OUTPUT PARAMETERS:
+    A   -   orthogonal NxN matrix, array[0..N-1,0..N-1]
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixrndorthogonal(const ae_int_t n, complex_2d_array &a);
+
+
+/*************************************************************************
+Generation of random NxN complex matrix with given condition number C and
+norm2(A)=1
+
+INPUT PARAMETERS:
+    N   -   matrix size
+    C   -   condition number (in 2-norm)
+
+OUTPUT PARAMETERS:
+    A   -   random matrix with norm2(A)=1 and cond(A)=C
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixrndcond(const ae_int_t n, const double c, complex_2d_array &a);
+
+
+/*************************************************************************
+Generation of random NxN symmetric matrix with given condition number  and
+norm2(A)=1
+
+INPUT PARAMETERS:
+    N   -   matrix size
+    C   -   condition number (in 2-norm)
+
+OUTPUT PARAMETERS:
+    A   -   random matrix with norm2(A)=1 and cond(A)=C
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void smatrixrndcond(const ae_int_t n, const double c, real_2d_array &a);
+
+
+/*************************************************************************
+Generation of random NxN symmetric positive definite matrix with given
+condition number and norm2(A)=1
+
+INPUT PARAMETERS:
+    N   -   matrix size
+    C   -   condition number (in 2-norm)
+
+OUTPUT PARAMETERS:
+    A   -   random SPD matrix with norm2(A)=1 and cond(A)=C
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void spdmatrixrndcond(const ae_int_t n, const double c, real_2d_array &a);
+
+
+/*************************************************************************
+Generation of random NxN Hermitian matrix with given condition number  and
+norm2(A)=1
+
+INPUT PARAMETERS:
+    N   -   matrix size
+    C   -   condition number (in 2-norm)
+
+OUTPUT PARAMETERS:
+    A   -   random matrix with norm2(A)=1 and cond(A)=C
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void hmatrixrndcond(const ae_int_t n, const double c, complex_2d_array &a);
+
+
+/*************************************************************************
+Generation of random NxN Hermitian positive definite matrix with given
+condition number and norm2(A)=1
+
+INPUT PARAMETERS:
+    N   -   matrix size
+    C   -   condition number (in 2-norm)
+
+OUTPUT PARAMETERS:
+    A   -   random HPD matrix with norm2(A)=1 and cond(A)=C
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void hpdmatrixrndcond(const ae_int_t n, const double c, complex_2d_array &a);
+
+
+/*************************************************************************
+Multiplication of MxN matrix by NxN random Haar distributed orthogonal matrix
+
+INPUT PARAMETERS:
+    A   -   matrix, array[0..M-1, 0..N-1]
+    M, N-   matrix size
+
+OUTPUT PARAMETERS:
+    A   -   A*Q, where Q is random NxN orthogonal matrix
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixrndorthogonalfromtheright(real_2d_array &a, const ae_int_t m, const ae_int_t n);
+
+
+/*************************************************************************
+Multiplication of MxN matrix by MxM random Haar distributed orthogonal matrix
+
+INPUT PARAMETERS:
+    A   -   matrix, array[0..M-1, 0..N-1]
+    M, N-   matrix size
+
+OUTPUT PARAMETERS:
+    A   -   Q*A, where Q is random MxM orthogonal matrix
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixrndorthogonalfromtheleft(real_2d_array &a, const ae_int_t m, const ae_int_t n);
+
+
+/*************************************************************************
+Multiplication of MxN complex matrix by NxN random Haar distributed
+complex orthogonal matrix
+
+INPUT PARAMETERS:
+    A   -   matrix, array[0..M-1, 0..N-1]
+    M, N-   matrix size
+
+OUTPUT PARAMETERS:
+    A   -   A*Q, where Q is random NxN orthogonal matrix
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixrndorthogonalfromtheright(complex_2d_array &a, const ae_int_t m, const ae_int_t n);
+
+
+/*************************************************************************
+Multiplication of MxN complex matrix by MxM random Haar distributed
+complex orthogonal matrix
+
+INPUT PARAMETERS:
+    A   -   matrix, array[0..M-1, 0..N-1]
+    M, N-   matrix size
+
+OUTPUT PARAMETERS:
+    A   -   Q*A, where Q is random MxM orthogonal matrix
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixrndorthogonalfromtheleft(complex_2d_array &a, const ae_int_t m, const ae_int_t n);
+
+
+/*************************************************************************
+Symmetric multiplication of NxN matrix by random Haar distributed
+orthogonal  matrix
+
+INPUT PARAMETERS:
+    A   -   matrix, array[0..N-1, 0..N-1]
+    N   -   matrix size
+
+OUTPUT PARAMETERS:
+    A   -   Q'*A*Q, where Q is random NxN orthogonal matrix
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void smatrixrndmultiply(real_2d_array &a, const ae_int_t n);
+
+
+/*************************************************************************
+Hermitian multiplication of NxN matrix by random Haar distributed
+complex orthogonal matrix
+
+INPUT PARAMETERS:
+    A   -   matrix, array[0..N-1, 0..N-1]
+    N   -   matrix size
+
+OUTPUT PARAMETERS:
+    A   -   Q^H*A*Q, where Q is random NxN orthogonal matrix
+
+  -- ALGLIB routine --
+     04.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+void hmatrixrndmultiply(complex_2d_array &a, const ae_int_t n);
+
+/*************************************************************************
+LU decomposition of a general real matrix with row pivoting
+
+A is represented as A = P*L*U, where:
+* L is lower unitriangular matrix
+* U is upper triangular matrix
+* P = P0*P1*...*PK, K=min(M,N)-1,
+  Pi - permutation matrix for I and Pivots[I]
+
+This is cache-oblivous implementation of LU decomposition.
+It is optimized for square matrices. As for rectangular matrices:
+* best case - M>>N
+* worst case - N>>M, small M, large N, matrix does not fit in CPU cache
+
+INPUT PARAMETERS:
+    A       -   array[0..M-1, 0..N-1].
+    M       -   number of rows in matrix A.
+    N       -   number of columns in matrix A.
+
+
+OUTPUT PARAMETERS:
+    A       -   matrices L and U in compact form:
+                * L is stored under main diagonal
+                * U is stored on and above main diagonal
+    Pivots  -   permutation matrix in compact form.
+                array[0..Min(M-1,N-1)].
+
+  -- ALGLIB routine --
+     10.01.2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixlu(real_2d_array &a, const ae_int_t m, const ae_int_t n, integer_1d_array &pivots);
+
+
+/*************************************************************************
+LU decomposition of a general complex matrix with row pivoting
+
+A is represented as A = P*L*U, where:
+* L is lower unitriangular matrix
+* U is upper triangular matrix
+* P = P0*P1*...*PK, K=min(M,N)-1,
+  Pi - permutation matrix for I and Pivots[I]
+
+This is cache-oblivous implementation of LU decomposition. It is optimized
+for square matrices. As for rectangular matrices:
+* best case - M>>N
+* worst case - N>>M, small M, large N, matrix does not fit in CPU cache
+
+INPUT PARAMETERS:
+    A       -   array[0..M-1, 0..N-1].
+    M       -   number of rows in matrix A.
+    N       -   number of columns in matrix A.
+
+
+OUTPUT PARAMETERS:
+    A       -   matrices L and U in compact form:
+                * L is stored under main diagonal
+                * U is stored on and above main diagonal
+    Pivots  -   permutation matrix in compact form.
+                array[0..Min(M-1,N-1)].
+
+  -- ALGLIB routine --
+     10.01.2010
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixlu(complex_2d_array &a, const ae_int_t m, const ae_int_t n, integer_1d_array &pivots);
+
+
+/*************************************************************************
+Cache-oblivious Cholesky decomposition
+
+The algorithm computes Cholesky decomposition  of  a  Hermitian  positive-
+definite matrix. The result of an algorithm is a representation  of  A  as
+A=U'*U  or A=L*L' (here X' detones conj(X^T)).
+
+INPUT PARAMETERS:
+    A       -   upper or lower triangle of a factorized matrix.
+                array with elements [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    IsUpper -   if IsUpper=True, then A contains an upper triangle of
+                a symmetric matrix, otherwise A contains a lower one.
+
+OUTPUT PARAMETERS:
+    A       -   the result of factorization. If IsUpper=True, then
+                the upper triangle contains matrix U, so that A = U'*U,
+                and the elements below the main diagonal are not modified.
+                Similarly, if IsUpper = False.
+
+RESULT:
+    If  the  matrix  is  positive-definite,  the  function  returns  True.
+    Otherwise, the function returns False. Contents of A is not determined
+    in such case.
+
+  -- ALGLIB routine --
+     15.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+bool hpdmatrixcholesky(complex_2d_array &a, const ae_int_t n, const bool isupper);
+
+
+/*************************************************************************
+Cache-oblivious Cholesky decomposition
+
+The algorithm computes Cholesky decomposition  of  a  symmetric  positive-
+definite matrix. The result of an algorithm is a representation  of  A  as
+A=U^T*U  or A=L*L^T
+
+INPUT PARAMETERS:
+    A       -   upper or lower triangle of a factorized matrix.
+                array with elements [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    IsUpper -   if IsUpper=True, then A contains an upper triangle of
+                a symmetric matrix, otherwise A contains a lower one.
+
+OUTPUT PARAMETERS:
+    A       -   the result of factorization. If IsUpper=True, then
+                the upper triangle contains matrix U, so that A = U^T*U,
+                and the elements below the main diagonal are not modified.
+                Similarly, if IsUpper = False.
+
+RESULT:
+    If  the  matrix  is  positive-definite,  the  function  returns  True.
+    Otherwise, the function returns False. Contents of A is not determined
+    in such case.
+
+  -- ALGLIB routine --
+     15.12.2009
+     Bochkanov Sergey
+*************************************************************************/
+bool spdmatrixcholesky(real_2d_array &a, const ae_int_t n, const bool isupper);
+
+/*************************************************************************
+Estimate of a matrix condition number (1-norm)
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
+    N   -   size of matrix A.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double rmatrixrcond1(const real_2d_array &a, const ae_int_t n);
+
+
+/*************************************************************************
+Estimate of a matrix condition number (infinity-norm).
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
+    N   -   size of matrix A.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double rmatrixrcondinf(const real_2d_array &a, const ae_int_t n);
+
+
+/*************************************************************************
+Condition number estimate of a symmetric positive definite matrix.
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+It should be noted that 1-norm and inf-norm of condition numbers of symmetric
+matrices are equal, so the algorithm doesn't take into account the
+differences between these types of norms.
+
+Input parameters:
+    A       -   symmetric positive definite matrix which is given by its
+                upper or lower triangle depending on the value of
+                IsUpper. Array with elements [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    IsUpper -   storage format.
+
+Result:
+    1/LowerBound(cond(A)), if matrix A is positive definite,
+   -1, if matrix A is not positive definite, and its condition number
+    could not be found by this algorithm.
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double spdmatrixrcond(const real_2d_array &a, const ae_int_t n, const bool isupper);
+
+
+/*************************************************************************
+Triangular matrix: estimate of a condition number (1-norm)
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    A       -   matrix. Array[0..N-1, 0..N-1].
+    N       -   size of A.
+    IsUpper -   True, if the matrix is upper triangular.
+    IsUnit  -   True, if the matrix has a unit diagonal.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double rmatrixtrrcond1(const real_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit);
+
+
+/*************************************************************************
+Triangular matrix: estimate of a matrix condition number (infinity-norm).
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
+    N   -   size of matrix A.
+    IsUpper -   True, if the matrix is upper triangular.
+    IsUnit  -   True, if the matrix has a unit diagonal.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double rmatrixtrrcondinf(const real_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit);
+
+
+/*************************************************************************
+Condition number estimate of a Hermitian positive definite matrix.
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+It should be noted that 1-norm and inf-norm of condition numbers of symmetric
+matrices are equal, so the algorithm doesn't take into account the
+differences between these types of norms.
+
+Input parameters:
+    A       -   Hermitian positive definite matrix which is given by its
+                upper or lower triangle depending on the value of
+                IsUpper. Array with elements [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    IsUpper -   storage format.
+
+Result:
+    1/LowerBound(cond(A)), if matrix A is positive definite,
+   -1, if matrix A is not positive definite, and its condition number
+    could not be found by this algorithm.
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double hpdmatrixrcond(const complex_2d_array &a, const ae_int_t n, const bool isupper);
+
+
+/*************************************************************************
+Estimate of a matrix condition number (1-norm)
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
+    N   -   size of matrix A.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double cmatrixrcond1(const complex_2d_array &a, const ae_int_t n);
+
+
+/*************************************************************************
+Estimate of a matrix condition number (infinity-norm).
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
+    N   -   size of matrix A.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double cmatrixrcondinf(const complex_2d_array &a, const ae_int_t n);
+
+
+/*************************************************************************
+Estimate of the condition number of a matrix given by its LU decomposition (1-norm)
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    LUA         -   LU decomposition of a matrix in compact form. Output of
+                    the RMatrixLU subroutine.
+    N           -   size of matrix A.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double rmatrixlurcond1(const real_2d_array &lua, const ae_int_t n);
+
+
+/*************************************************************************
+Estimate of the condition number of a matrix given by its LU decomposition
+(infinity norm).
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    LUA     -   LU decomposition of a matrix in compact form. Output of
+                the RMatrixLU subroutine.
+    N       -   size of matrix A.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double rmatrixlurcondinf(const real_2d_array &lua, const ae_int_t n);
+
+
+/*************************************************************************
+Condition number estimate of a symmetric positive definite matrix given by
+Cholesky decomposition.
+
+The algorithm calculates a lower bound of the condition number. In this
+case, the algorithm does not return a lower bound of the condition number,
+but an inverse number (to avoid an overflow in case of a singular matrix).
+
+It should be noted that 1-norm and inf-norm condition numbers of symmetric
+matrices are equal, so the algorithm doesn't take into account the
+differences between these types of norms.
+
+Input parameters:
+    CD  - Cholesky decomposition of matrix A,
+          output of SMatrixCholesky subroutine.
+    N   - size of matrix A.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double spdmatrixcholeskyrcond(const real_2d_array &a, const ae_int_t n, const bool isupper);
+
+
+/*************************************************************************
+Condition number estimate of a Hermitian positive definite matrix given by
+Cholesky decomposition.
+
+The algorithm calculates a lower bound of the condition number. In this
+case, the algorithm does not return a lower bound of the condition number,
+but an inverse number (to avoid an overflow in case of a singular matrix).
+
+It should be noted that 1-norm and inf-norm condition numbers of symmetric
+matrices are equal, so the algorithm doesn't take into account the
+differences between these types of norms.
+
+Input parameters:
+    CD  - Cholesky decomposition of matrix A,
+          output of SMatrixCholesky subroutine.
+    N   - size of matrix A.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double hpdmatrixcholeskyrcond(const complex_2d_array &a, const ae_int_t n, const bool isupper);
+
+
+/*************************************************************************
+Estimate of the condition number of a matrix given by its LU decomposition (1-norm)
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    LUA         -   LU decomposition of a matrix in compact form. Output of
+                    the CMatrixLU subroutine.
+    N           -   size of matrix A.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double cmatrixlurcond1(const complex_2d_array &lua, const ae_int_t n);
+
+
+/*************************************************************************
+Estimate of the condition number of a matrix given by its LU decomposition
+(infinity norm).
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    LUA     -   LU decomposition of a matrix in compact form. Output of
+                the CMatrixLU subroutine.
+    N       -   size of matrix A.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double cmatrixlurcondinf(const complex_2d_array &lua, const ae_int_t n);
+
+
+/*************************************************************************
+Triangular matrix: estimate of a condition number (1-norm)
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    A       -   matrix. Array[0..N-1, 0..N-1].
+    N       -   size of A.
+    IsUpper -   True, if the matrix is upper triangular.
+    IsUnit  -   True, if the matrix has a unit diagonal.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double cmatrixtrrcond1(const complex_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit);
+
+
+/*************************************************************************
+Triangular matrix: estimate of a matrix condition number (infinity-norm).
+
+The algorithm calculates a lower bound of the condition number. In this case,
+the algorithm does not return a lower bound of the condition number, but an
+inverse number (to avoid an overflow in case of a singular matrix).
+
+Input parameters:
+    A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
+    N   -   size of matrix A.
+    IsUpper -   True, if the matrix is upper triangular.
+    IsUnit  -   True, if the matrix has a unit diagonal.
+
+Result: 1/LowerBound(cond(A))
+
+NOTE:
+    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
+    0.0 is returned in such cases.
+*************************************************************************/
+double cmatrixtrrcondinf(const complex_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit);
+
+/*************************************************************************
+Inversion of a matrix given by its LU decomposition.
+
+INPUT PARAMETERS:
+    A       -   LU decomposition of the matrix
+                (output of RMatrixLU subroutine).
+    Pivots  -   table of permutations
+                (the output of RMatrixLU subroutine).
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+
+OUTPUT PARAMETERS:
+    Info    -   return code:
+                * -3    A is singular, or VERY close to singular.
+                        it is filled by zeros in such cases.
+                *  1    task is solved (but matrix A may be ill-conditioned,
+                        check R1/RInf parameters for condition numbers).
+    Rep     -   solver report, see below for more info
+    A       -   inverse of matrix A.
+                Array whose indexes range within [0..N-1, 0..N-1].
+
+SOLVER REPORT
+
+Subroutine sets following fields of the Rep structure:
+* R1        reciprocal of condition number: 1/cond(A), 1-norm.
+* RInf      reciprocal of condition number: 1/cond(A), inf-norm.
+
+  -- ALGLIB routine --
+     05.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void rmatrixluinverse(real_2d_array &a, const integer_1d_array &pivots, const ae_int_t n, ae_int_t &info, matinvreport &rep);
+void rmatrixluinverse(real_2d_array &a, const integer_1d_array &pivots, ae_int_t &info, matinvreport &rep);
+
+
+/*************************************************************************
+Inversion of a general matrix.
+
+Input parameters:
+    A       -   matrix.
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+
+Output parameters:
+    Info    -   return code, same as in RMatrixLUInverse
+    Rep     -   solver report, same as in RMatrixLUInverse
+    A       -   inverse of matrix A, same as in RMatrixLUInverse
+
+Result:
+    True, if the matrix is not singular.
+    False, if the matrix is singular.
+
+  -- ALGLIB --
+     Copyright 2005-2010 by Bochkanov Sergey
+*************************************************************************/
+void rmatrixinverse(real_2d_array &a, const ae_int_t n, ae_int_t &info, matinvreport &rep);
+void rmatrixinverse(real_2d_array &a, ae_int_t &info, matinvreport &rep);
+
+
+/*************************************************************************
+Inversion of a matrix given by its LU decomposition.
+
+INPUT PARAMETERS:
+    A       -   LU decomposition of the matrix
+                (output of CMatrixLU subroutine).
+    Pivots  -   table of permutations
+                (the output of CMatrixLU subroutine).
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+
+OUTPUT PARAMETERS:
+    Info    -   return code, same as in RMatrixLUInverse
+    Rep     -   solver report, same as in RMatrixLUInverse
+    A       -   inverse of matrix A, same as in RMatrixLUInverse
+
+  -- ALGLIB routine --
+     05.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void cmatrixluinverse(complex_2d_array &a, const integer_1d_array &pivots, const ae_int_t n, ae_int_t &info, matinvreport &rep);
+void cmatrixluinverse(complex_2d_array &a, const integer_1d_array &pivots, ae_int_t &info, matinvreport &rep);
+
+
+/*************************************************************************
+Inversion of a general matrix.
+
+Input parameters:
+    A       -   matrix
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+
+Output parameters:
+    Info    -   return code, same as in RMatrixLUInverse
+    Rep     -   solver report, same as in RMatrixLUInverse
+    A       -   inverse of matrix A, same as in RMatrixLUInverse
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+void cmatrixinverse(complex_2d_array &a, const ae_int_t n, ae_int_t &info, matinvreport &rep);
+void cmatrixinverse(complex_2d_array &a, ae_int_t &info, matinvreport &rep);
+
+
+/*************************************************************************
+Inversion of a symmetric positive definite matrix which is given
+by Cholesky decomposition.
+
+Input parameters:
+    A       -   Cholesky decomposition of the matrix to be inverted:
+                A=U’*U or A = L*L'.
+                Output of  SPDMatrixCholesky subroutine.
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+    IsUpper -   storage type (optional):
+                * if True, symmetric  matrix  A  is  given  by  its  upper
+                  triangle, and the lower triangle isn’t  used/changed  by
+                  function
+                * if False,  symmetric matrix  A  is  given  by  its lower
+                  triangle, and the  upper triangle isn’t used/changed  by
+                  function
+                * if not given, lower half is used.
+
+Output parameters:
+    Info    -   return code, same as in RMatrixLUInverse
+    Rep     -   solver report, same as in RMatrixLUInverse
+    A       -   inverse of matrix A, same as in RMatrixLUInverse
+
+  -- ALGLIB routine --
+     10.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void spdmatrixcholeskyinverse(real_2d_array &a, const ae_int_t n, const bool isupper, ae_int_t &info, matinvreport &rep);
+void spdmatrixcholeskyinverse(real_2d_array &a, ae_int_t &info, matinvreport &rep);
+
+
+/*************************************************************************
+Inversion of a symmetric positive definite matrix.
+
+Given an upper or lower triangle of a symmetric positive definite matrix,
+the algorithm generates matrix A^-1 and saves the upper or lower triangle
+depending on the input.
+
+Input parameters:
+    A       -   matrix to be inverted (upper or lower triangle).
+                Array with elements [0..N-1,0..N-1].
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+    IsUpper -   storage type (optional):
+                * if True, symmetric  matrix  A  is  given  by  its  upper
+                  triangle, and the lower triangle isn’t  used/changed  by
+                  function
+                * if False,  symmetric matrix  A  is  given  by  its lower
+                  triangle, and the  upper triangle isn’t used/changed  by
+                  function
+                * if not given,  both lower and upper  triangles  must  be
+                  filled.
+
+Output parameters:
+    Info    -   return code, same as in RMatrixLUInverse
+    Rep     -   solver report, same as in RMatrixLUInverse
+    A       -   inverse of matrix A, same as in RMatrixLUInverse
+
+  -- ALGLIB routine --
+     10.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void spdmatrixinverse(real_2d_array &a, const ae_int_t n, const bool isupper, ae_int_t &info, matinvreport &rep);
+void spdmatrixinverse(real_2d_array &a, ae_int_t &info, matinvreport &rep);
+
+
+/*************************************************************************
+Inversion of a Hermitian positive definite matrix which is given
+by Cholesky decomposition.
+
+Input parameters:
+    A       -   Cholesky decomposition of the matrix to be inverted:
+                A=U’*U or A = L*L'.
+                Output of  HPDMatrixCholesky subroutine.
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+    IsUpper -   storage type (optional):
+                * if True, symmetric  matrix  A  is  given  by  its  upper
+                  triangle, and the lower triangle isn’t  used/changed  by
+                  function
+                * if False,  symmetric matrix  A  is  given  by  its lower
+                  triangle, and the  upper triangle isn’t used/changed  by
+                  function
+                * if not given, lower half is used.
+
+Output parameters:
+    Info    -   return code, same as in RMatrixLUInverse
+    Rep     -   solver report, same as in RMatrixLUInverse
+    A       -   inverse of matrix A, same as in RMatrixLUInverse
+
+  -- ALGLIB routine --
+     10.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void hpdmatrixcholeskyinverse(complex_2d_array &a, const ae_int_t n, const bool isupper, ae_int_t &info, matinvreport &rep);
+void hpdmatrixcholeskyinverse(complex_2d_array &a, ae_int_t &info, matinvreport &rep);
+
+
+/*************************************************************************
+Inversion of a Hermitian positive definite matrix.
+
+Given an upper or lower triangle of a Hermitian positive definite matrix,
+the algorithm generates matrix A^-1 and saves the upper or lower triangle
+depending on the input.
+
+Input parameters:
+    A       -   matrix to be inverted (upper or lower triangle).
+                Array with elements [0..N-1,0..N-1].
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+    IsUpper -   storage type (optional):
+                * if True, symmetric  matrix  A  is  given  by  its  upper
+                  triangle, and the lower triangle isn’t  used/changed  by
+                  function
+                * if False,  symmetric matrix  A  is  given  by  its lower
+                  triangle, and the  upper triangle isn’t used/changed  by
+                  function
+                * if not given,  both lower and upper  triangles  must  be
+                  filled.
+
+Output parameters:
+    Info    -   return code, same as in RMatrixLUInverse
+    Rep     -   solver report, same as in RMatrixLUInverse
+    A       -   inverse of matrix A, same as in RMatrixLUInverse
+
+  -- ALGLIB routine --
+     10.02.2010
+     Bochkanov Sergey
+*************************************************************************/
+void hpdmatrixinverse(complex_2d_array &a, const ae_int_t n, const bool isupper, ae_int_t &info, matinvreport &rep);
+void hpdmatrixinverse(complex_2d_array &a, ae_int_t &info, matinvreport &rep);
+
+
+/*************************************************************************
+Triangular matrix inverse (real)
+
+The subroutine inverts the following types of matrices:
+    * upper triangular
+    * upper triangular with unit diagonal
+    * lower triangular
+    * lower triangular with unit diagonal
+
+In case of an upper (lower) triangular matrix,  the  inverse  matrix  will
+also be upper (lower) triangular, and after the end of the algorithm,  the
+inverse matrix replaces the source matrix. The elements  below (above) the
+main diagonal are not changed by the algorithm.
+
+If  the matrix  has a unit diagonal, the inverse matrix also  has  a  unit
+diagonal, and the diagonal elements are not passed to the algorithm.
+
+Input parameters:
+    A       -   matrix, array[0..N-1, 0..N-1].
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+    IsUpper -   True, if the matrix is upper triangular.
+    IsUnit  -   diagonal type (optional):
+                * if True, matrix has unit diagonal (a[i,i] are NOT used)
+                * if False, matrix diagonal is arbitrary
+                * if not given, False is assumed
+
+Output parameters:
+    Info    -   same as for RMatrixLUInverse
+    Rep     -   same as for RMatrixLUInverse
+    A       -   same as for RMatrixLUInverse.
+
+  -- ALGLIB --
+     Copyright 05.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void rmatrixtrinverse(real_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit, ae_int_t &info, matinvreport &rep);
+void rmatrixtrinverse(real_2d_array &a, const bool isupper, ae_int_t &info, matinvreport &rep);
+
+
+/*************************************************************************
+Triangular matrix inverse (complex)
+
+The subroutine inverts the following types of matrices:
+    * upper triangular
+    * upper triangular with unit diagonal
+    * lower triangular
+    * lower triangular with unit diagonal
+
+In case of an upper (lower) triangular matrix,  the  inverse  matrix  will
+also be upper (lower) triangular, and after the end of the algorithm,  the
+inverse matrix replaces the source matrix. The elements  below (above) the
+main diagonal are not changed by the algorithm.
+
+If  the matrix  has a unit diagonal, the inverse matrix also  has  a  unit
+diagonal, and the diagonal elements are not passed to the algorithm.
+
+Input parameters:
+    A       -   matrix, array[0..N-1, 0..N-1].
+    N       -   size of matrix A (optional) :
+                * if given, only principal NxN submatrix is processed  and
+                  overwritten. other elements are unchanged.
+                * if not given,  size  is  automatically  determined  from
+                  matrix size (A must be square matrix)
+    IsUpper -   True, if the matrix is upper triangular.
+    IsUnit  -   diagonal type (optional):
+                * if True, matrix has unit diagonal (a[i,i] are NOT used)
+                * if False, matrix diagonal is arbitrary
+                * if not given, False is assumed
+
+Output parameters:
+    Info    -   same as for RMatrixLUInverse
+    Rep     -   same as for RMatrixLUInverse
+    A       -   same as for RMatrixLUInverse.
+
+  -- ALGLIB --
+     Copyright 05.02.2010 by Bochkanov Sergey
+*************************************************************************/
+void cmatrixtrinverse(complex_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit, ae_int_t &info, matinvreport &rep);
+void cmatrixtrinverse(complex_2d_array &a, const bool isupper, ae_int_t &info, matinvreport &rep);
+
+/*************************************************************************
+Singular value decomposition of a bidiagonal matrix (extended algorithm)
+
+The algorithm performs the singular value decomposition  of  a  bidiagonal
+matrix B (upper or lower) representing it as B = Q*S*P^T, where Q and  P -
+orthogonal matrices, S - diagonal matrix with non-negative elements on the
+main diagonal, in descending order.
+
+The  algorithm  finds  singular  values.  In  addition,  the algorithm can
+calculate  matrices  Q  and P (more precisely, not the matrices, but their
+product  with  given  matrices U and VT - U*Q and (P^T)*VT)).  Of  course,
+matrices U and VT can be of any type, including identity. Furthermore, the
+algorithm can calculate Q'*C (this product is calculated more  effectively
+than U*Q,  because  this calculation operates with rows instead  of matrix
+columns).
+
+The feature of the algorithm is its ability to find  all  singular  values
+including those which are arbitrarily close to 0  with  relative  accuracy
+close to  machine precision. If the parameter IsFractionalAccuracyRequired
+is set to True, all singular values will have high relative accuracy close
+to machine precision. If the parameter is set to False, only  the  biggest
+singular value will have relative accuracy  close  to  machine  precision.
+The absolute error of other singular values is equal to the absolute error
+of the biggest singular value.
+
+Input parameters:
+    D       -   main diagonal of matrix B.
+                Array whose index ranges within [0..N-1].
+    E       -   superdiagonal (or subdiagonal) of matrix B.
+                Array whose index ranges within [0..N-2].
+    N       -   size of matrix B.
+    IsUpper -   True, if the matrix is upper bidiagonal.
+    IsFractionalAccuracyRequired -
+                accuracy to search singular values with.
+    U       -   matrix to be multiplied by Q.
+                Array whose indexes range within [0..NRU-1, 0..N-1].
+                The matrix can be bigger, in that case only the  submatrix
+                [0..NRU-1, 0..N-1] will be multiplied by Q.
+    NRU     -   number of rows in matrix U.
+    C       -   matrix to be multiplied by Q'.
+                Array whose indexes range within [0..N-1, 0..NCC-1].
+                The matrix can be bigger, in that case only the  submatrix
+                [0..N-1, 0..NCC-1] will be multiplied by Q'.
+    NCC     -   number of columns in matrix C.
+    VT      -   matrix to be multiplied by P^T.
+                Array whose indexes range within [0..N-1, 0..NCVT-1].
+                The matrix can be bigger, in that case only the  submatrix
+                [0..N-1, 0..NCVT-1] will be multiplied by P^T.
+    NCVT    -   number of columns in matrix VT.
+
+Output parameters:
+    D       -   singular values of matrix B in descending order.
+    U       -   if NRU>0, contains matrix U*Q.
+    VT      -   if NCVT>0, contains matrix (P^T)*VT.
+    C       -   if NCC>0, contains matrix Q'*C.
+
+Result:
+    True, if the algorithm has converged.
+    False, if the algorithm hasn't converged (rare case).
+
+Additional information:
+    The type of convergence is controlled by the internal  parameter  TOL.
+    If the parameter is greater than 0, the singular values will have
+    relative accuracy TOL. If TOL<0, the singular values will have
+    absolute accuracy ABS(TOL)*norm(B).
+    By default, |TOL| falls within the range of 10*Epsilon and 100*Epsilon,
+    where Epsilon is the machine precision. It is not  recommended  to  use
+    TOL less than 10*Epsilon since this will  considerably  slow  down  the
+    algorithm and may not lead to error decreasing.
+History:
+    * 31 March, 2007.
+        changed MAXITR from 6 to 12.
+
+  -- LAPACK routine (version 3.0) --
+     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+     Courant Institute, Argonne National Lab, and Rice University
+     October 31, 1999.
+*************************************************************************/
+bool rmatrixbdsvd(real_1d_array &d, const real_1d_array &e, const ae_int_t n, const bool isupper, const bool isfractionalaccuracyrequired, real_2d_array &u, const ae_int_t nru, real_2d_array &c, const ae_int_t ncc, real_2d_array &vt, const ae_int_t ncvt);
+
+/*************************************************************************
+Singular value decomposition of a rectangular matrix.
+
+The algorithm calculates the singular value decomposition of a matrix of
+size MxN: A = U * S * V^T
+
+The algorithm finds the singular values and, optionally, matrices U and V^T.
+The algorithm can find both first min(M,N) columns of matrix U and rows of
+matrix V^T (singular vectors), and matrices U and V^T wholly (of sizes MxM
+and NxN respectively).
+
+Take into account that the subroutine does not return matrix V but V^T.
+
+Input parameters:
+    A           -   matrix to be decomposed.
+                    Array whose indexes range within [0..M-1, 0..N-1].
+    M           -   number of rows in matrix A.
+    N           -   number of columns in matrix A.
+    UNeeded     -   0, 1 or 2. See the description of the parameter U.
+    VTNeeded    -   0, 1 or 2. See the description of the parameter VT.
+    AdditionalMemory -
+                    If the parameter:
+                     * equals 0, the algorithm doesn’t use additional
+                       memory (lower requirements, lower performance).
+                     * equals 1, the algorithm uses additional
+                       memory of size min(M,N)*min(M,N) of real numbers.
+                       It often speeds up the algorithm.
+                     * equals 2, the algorithm uses additional
+                       memory of size M*min(M,N) of real numbers.
+                       It allows to get a maximum performance.
+                    The recommended value of the parameter is 2.
+
+Output parameters:
+    W           -   contains singular values in descending order.
+    U           -   if UNeeded=0, U isn't changed, the left singular vectors
+                    are not calculated.
+                    if Uneeded=1, U contains left singular vectors (first
+                    min(M,N) columns of matrix U). Array whose indexes range
+                    within [0..M-1, 0..Min(M,N)-1].
+                    if UNeeded=2, U contains matrix U wholly. Array whose
+                    indexes range within [0..M-1, 0..M-1].
+    VT          -   if VTNeeded=0, VT isn’t changed, the right singular vectors
+                    are not calculated.
+                    if VTNeeded=1, VT contains right singular vectors (first
+                    min(M,N) rows of matrix V^T). Array whose indexes range
+                    within [0..min(M,N)-1, 0..N-1].
+                    if VTNeeded=2, VT contains matrix V^T wholly. Array whose
+                    indexes range within [0..N-1, 0..N-1].
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+bool rmatrixsvd(const real_2d_array &a, const ae_int_t m, const ae_int_t n, const ae_int_t uneeded, const ae_int_t vtneeded, const ae_int_t additionalmemory, real_1d_array &w, real_2d_array &u, real_2d_array &vt);
+
+
+
+/*************************************************************************
+Determinant calculation of the matrix given by its LU decomposition.
+
+Input parameters:
+    A       -   LU decomposition of the matrix (output of
+                RMatrixLU subroutine).
+    Pivots  -   table of permutations which were made during
+                the LU decomposition.
+                Output of RMatrixLU subroutine.
+    N       -   (optional) size of matrix A:
+                * if given, only principal NxN submatrix is processed and
+                  overwritten. other elements are unchanged.
+                * if not given, automatically determined from matrix size
+                  (A must be square matrix)
+
+Result: matrix determinant.
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+double rmatrixludet(const real_2d_array &a, const integer_1d_array &pivots, const ae_int_t n);
+double rmatrixludet(const real_2d_array &a, const integer_1d_array &pivots);
+
+
+/*************************************************************************
+Calculation of the determinant of a general matrix
+
+Input parameters:
+    A       -   matrix, array[0..N-1, 0..N-1]
+    N       -   (optional) size of matrix A:
+                * if given, only principal NxN submatrix is processed and
+                  overwritten. other elements are unchanged.
+                * if not given, automatically determined from matrix size
+                  (A must be square matrix)
+
+Result: determinant of matrix A.
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+double rmatrixdet(const real_2d_array &a, const ae_int_t n);
+double rmatrixdet(const real_2d_array &a);
+
+
+/*************************************************************************
+Determinant calculation of the matrix given by its LU decomposition.
+
+Input parameters:
+    A       -   LU decomposition of the matrix (output of
+                RMatrixLU subroutine).
+    Pivots  -   table of permutations which were made during
+                the LU decomposition.
+                Output of RMatrixLU subroutine.
+    N       -   (optional) size of matrix A:
+                * if given, only principal NxN submatrix is processed and
+                  overwritten. other elements are unchanged.
+                * if not given, automatically determined from matrix size
+                  (A must be square matrix)
+
+Result: matrix determinant.
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+alglib::complex cmatrixludet(const complex_2d_array &a, const integer_1d_array &pivots, const ae_int_t n);
+alglib::complex cmatrixludet(const complex_2d_array &a, const integer_1d_array &pivots);
+
+
+/*************************************************************************
+Calculation of the determinant of a general matrix
+
+Input parameters:
+    A       -   matrix, array[0..N-1, 0..N-1]
+    N       -   (optional) size of matrix A:
+                * if given, only principal NxN submatrix is processed and
+                  overwritten. other elements are unchanged.
+                * if not given, automatically determined from matrix size
+                  (A must be square matrix)
+
+Result: determinant of matrix A.
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+alglib::complex cmatrixdet(const complex_2d_array &a, const ae_int_t n);
+alglib::complex cmatrixdet(const complex_2d_array &a);
+
+
+/*************************************************************************
+Determinant calculation of the matrix given by the Cholesky decomposition.
+
+Input parameters:
+    A       -   Cholesky decomposition,
+                output of SMatrixCholesky subroutine.
+    N       -   (optional) size of matrix A:
+                * if given, only principal NxN submatrix is processed and
+                  overwritten. other elements are unchanged.
+                * if not given, automatically determined from matrix size
+                  (A must be square matrix)
+
+As the determinant is equal to the product of squares of diagonal elements,
+it’s not necessary to specify which triangle - lower or upper - the matrix
+is stored in.
+
+Result:
+    matrix determinant.
+
+  -- ALGLIB --
+     Copyright 2005-2008 by Bochkanov Sergey
+*************************************************************************/
+double spdmatrixcholeskydet(const real_2d_array &a, const ae_int_t n);
+double spdmatrixcholeskydet(const real_2d_array &a);
+
+
+/*************************************************************************
+Determinant calculation of the symmetric positive definite matrix.
+
+Input parameters:
+    A       -   matrix. Array with elements [0..N-1, 0..N-1].
+    N       -   (optional) size of matrix A:
+                * if given, only principal NxN submatrix is processed and
+                  overwritten. other elements are unchanged.
+                * if not given, automatically determined from matrix size
+                  (A must be square matrix)
+    IsUpper -   (optional) storage type:
+                * if True, symmetric matrix  A  is  given  by  its  upper
+                  triangle, and the lower triangle isn’t used/changed  by
+                  function
+                * if False, symmetric matrix  A  is  given  by  its lower
+                  triangle, and the upper triangle isn’t used/changed  by
+                  function
+                * if not given, both lower and upper  triangles  must  be
+                  filled.
+
+Result:
+    determinant of matrix A.
+    If matrix A is not positive definite, exception is thrown.
+
+  -- ALGLIB --
+     Copyright 2005-2008 by Bochkanov Sergey
+*************************************************************************/
+double spdmatrixdet(const real_2d_array &a, const ae_int_t n, const bool isupper);
+double spdmatrixdet(const real_2d_array &a);
+
+/*************************************************************************
+Algorithm for solving the following generalized symmetric positive-definite
+eigenproblem:
+    A*x = lambda*B*x (1) or
+    A*B*x = lambda*x (2) or
+    B*A*x = lambda*x (3).
+where A is a symmetric matrix, B - symmetric positive-definite matrix.
+The problem is solved by reducing it to an ordinary  symmetric  eigenvalue
+problem.
+
+Input parameters:
+    A           -   symmetric matrix which is given by its upper or lower
+                    triangular part.
+                    Array whose indexes range within [0..N-1, 0..N-1].
+    N           -   size of matrices A and B.
+    IsUpperA    -   storage format of matrix A.
+    B           -   symmetric positive-definite matrix which is given by
+                    its upper or lower triangular part.
+                    Array whose indexes range within [0..N-1, 0..N-1].
+    IsUpperB    -   storage format of matrix B.
+    ZNeeded     -   if ZNeeded is equal to:
+                     * 0, the eigenvectors are not returned;
+                     * 1, the eigenvectors are returned.
+    ProblemType -   if ProblemType is equal to:
+                     * 1, the following problem is solved: A*x = lambda*B*x;
+                     * 2, the following problem is solved: A*B*x = lambda*x;
+                     * 3, the following problem is solved: B*A*x = lambda*x.
+
+Output parameters:
+    D           -   eigenvalues in ascending order.
+                    Array whose index ranges within [0..N-1].
+    Z           -   if ZNeeded is equal to:
+                     * 0, Z hasn’t changed;
+                     * 1, Z contains eigenvectors.
+                    Array whose indexes range within [0..N-1, 0..N-1].
+                    The eigenvectors are stored in matrix columns. It should
+                    be noted that the eigenvectors in such problems do not
+                    form an orthogonal system.
+
+Result:
+    True, if the problem was solved successfully.
+    False, if the error occurred during the Cholesky decomposition of matrix
+    B (the matrix isn’t positive-definite) or during the work of the iterative
+    algorithm for solving the symmetric eigenproblem.
+
+See also the GeneralizedSymmetricDefiniteEVDReduce subroutine.
+
+  -- ALGLIB --
+     Copyright 1.28.2006 by Bochkanov Sergey
+*************************************************************************/
+bool smatrixgevd(const real_2d_array &a, const ae_int_t n, const bool isuppera, const real_2d_array &b, const bool isupperb, const ae_int_t zneeded, const ae_int_t problemtype, real_1d_array &d, real_2d_array &z);
+
+
+/*************************************************************************
+Algorithm for reduction of the following generalized symmetric positive-
+definite eigenvalue problem:
+    A*x = lambda*B*x (1) or
+    A*B*x = lambda*x (2) or
+    B*A*x = lambda*x (3)
+to the symmetric eigenvalues problem C*y = lambda*y (eigenvalues of this and
+the given problems are the same, and the eigenvectors of the given problem
+could be obtained by multiplying the obtained eigenvectors by the
+transformation matrix x = R*y).
+
+Here A is a symmetric matrix, B - symmetric positive-definite matrix.
+
+Input parameters:
+    A           -   symmetric matrix which is given by its upper or lower
+                    triangular part.
+                    Array whose indexes range within [0..N-1, 0..N-1].
+    N           -   size of matrices A and B.
+    IsUpperA    -   storage format of matrix A.
+    B           -   symmetric positive-definite matrix which is given by
+                    its upper or lower triangular part.
+                    Array whose indexes range within [0..N-1, 0..N-1].
+    IsUpperB    -   storage format of matrix B.
+    ProblemType -   if ProblemType is equal to:
+                     * 1, the following problem is solved: A*x = lambda*B*x;
+                     * 2, the following problem is solved: A*B*x = lambda*x;
+                     * 3, the following problem is solved: B*A*x = lambda*x.
+
+Output parameters:
+    A           -   symmetric matrix which is given by its upper or lower
+                    triangle depending on IsUpperA. Contains matrix C.
+                    Array whose indexes range within [0..N-1, 0..N-1].
+    R           -   upper triangular or low triangular transformation matrix
+                    which is used to obtain the eigenvectors of a given problem
+                    as the product of eigenvectors of C (from the right) and
+                    matrix R (from the left). If the matrix is upper
+                    triangular, the elements below the main diagonal
+                    are equal to 0 (and vice versa). Thus, we can perform
+                    the multiplication without taking into account the
+                    internal structure (which is an easier though less
+                    effective way).
+                    Array whose indexes range within [0..N-1, 0..N-1].
+    IsUpperR    -   type of matrix R (upper or lower triangular).
+
+Result:
+    True, if the problem was reduced successfully.
+    False, if the error occurred during the Cholesky decomposition of
+        matrix B (the matrix is not positive-definite).
+
+  -- ALGLIB --
+     Copyright 1.28.2006 by Bochkanov Sergey
+*************************************************************************/
+bool smatrixgevdreduce(real_2d_array &a, const ae_int_t n, const bool isuppera, const real_2d_array &b, const bool isupperb, const ae_int_t problemtype, real_2d_array &r, bool &isupperr);
+
+/*************************************************************************
+Inverse matrix update by the Sherman-Morrison formula
+
+The algorithm updates matrix A^-1 when adding a number to an element
+of matrix A.
+
+Input parameters:
+    InvA    -   inverse of matrix A.
+                Array whose indexes range within [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    UpdRow  -   row where the element to be updated is stored.
+    UpdColumn - column where the element to be updated is stored.
+    UpdVal  -   a number to be added to the element.
+
+
+Output parameters:
+    InvA    -   inverse of modified matrix A.
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+void rmatrixinvupdatesimple(real_2d_array &inva, const ae_int_t n, const ae_int_t updrow, const ae_int_t updcolumn, const double updval);
+
+
+/*************************************************************************
+Inverse matrix update by the Sherman-Morrison formula
+
+The algorithm updates matrix A^-1 when adding a vector to a row
+of matrix A.
+
+Input parameters:
+    InvA    -   inverse of matrix A.
+                Array whose indexes range within [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    UpdRow  -   the row of A whose vector V was added.
+                0 <= Row <= N-1
+    V       -   the vector to be added to a row.
+                Array whose index ranges within [0..N-1].
+
+Output parameters:
+    InvA    -   inverse of modified matrix A.
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+void rmatrixinvupdaterow(real_2d_array &inva, const ae_int_t n, const ae_int_t updrow, const real_1d_array &v);
+
+
+/*************************************************************************
+Inverse matrix update by the Sherman-Morrison formula
+
+The algorithm updates matrix A^-1 when adding a vector to a column
+of matrix A.
+
+Input parameters:
+    InvA        -   inverse of matrix A.
+                    Array whose indexes range within [0..N-1, 0..N-1].
+    N           -   size of matrix A.
+    UpdColumn   -   the column of A whose vector U was added.
+                    0 <= UpdColumn <= N-1
+    U           -   the vector to be added to a column.
+                    Array whose index ranges within [0..N-1].
+
+Output parameters:
+    InvA        -   inverse of modified matrix A.
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+void rmatrixinvupdatecolumn(real_2d_array &inva, const ae_int_t n, const ae_int_t updcolumn, const real_1d_array &u);
+
+
+/*************************************************************************
+Inverse matrix update by the Sherman-Morrison formula
+
+The algorithm computes the inverse of matrix A+u*v’ by using the given matrix
+A^-1 and the vectors u and v.
+
+Input parameters:
+    InvA    -   inverse of matrix A.
+                Array whose indexes range within [0..N-1, 0..N-1].
+    N       -   size of matrix A.
+    U       -   the vector modifying the matrix.
+                Array whose index ranges within [0..N-1].
+    V       -   the vector modifying the matrix.
+                Array whose index ranges within [0..N-1].
+
+Output parameters:
+    InvA - inverse of matrix A + u*v'.
+
+  -- ALGLIB --
+     Copyright 2005 by Bochkanov Sergey
+*************************************************************************/
+void rmatrixinvupdateuv(real_2d_array &inva, const ae_int_t n, const real_1d_array &u, const real_1d_array &v);
+
+/*************************************************************************
+Subroutine performing the Schur decomposition of a general matrix by using
+the QR algorithm with multiple shifts.
+
+The source matrix A is represented as S'*A*S = T, where S is an orthogonal
+matrix (Schur vectors), T - upper quasi-triangular matrix (with blocks of
+sizes 1x1 and 2x2 on the main diagonal).
+
+Input parameters:
+    A   -   matrix to be decomposed.
+            Array whose indexes range within [0..N-1, 0..N-1].
+    N   -   size of A, N>=0.
+
+
+Output parameters:
+    A   -   contains matrix T.
+            Array whose indexes range within [0..N-1, 0..N-1].
+    S   -   contains Schur vectors.
+            Array whose indexes range within [0..N-1, 0..N-1].
+
+Note 1:
+    The block structure of matrix T can be easily recognized: since all
+    the elements below the blocks are zeros, the elements a[i+1,i] which
+    are equal to 0 show the block border.
+
+Note 2:
+    The algorithm performance depends on the value of the internal parameter
+    NS of the InternalSchurDecomposition subroutine which defines the number
+    of shifts in the QR algorithm (similarly to the block width in block-matrix
+    algorithms in linear algebra). If you require maximum performance on
+    your machine, it is recommended to adjust this parameter manually.
+
+Result:
+    True,
+        if the algorithm has converged and parameters A and S contain the result.
+    False,
+        if the algorithm has not converged.
+
+Algorithm implemented on the basis of the DHSEQR subroutine (LAPACK 3.0 library).
+*************************************************************************/
+bool rmatrixschur(real_2d_array &a, const ae_int_t n, real_2d_array &s);
+}
+
+/////////////////////////////////////////////////////////////////////////
+//
+// THIS SECTION CONTAINS COMPUTATIONAL CORE DECLARATIONS (FUNCTIONS)
+//
+/////////////////////////////////////////////////////////////////////////
+namespace alglib_impl
+{
+void ablassplitlength(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_int_t* n1,
+     ae_int_t* n2,
+     ae_state *_state);
+void ablascomplexsplitlength(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_int_t* n1,
+     ae_int_t* n2,
+     ae_state *_state);
+ae_int_t ablasblocksize(/* Real    */ ae_matrix* a, ae_state *_state);
+ae_int_t ablascomplexblocksize(/* Complex */ ae_matrix* a,
+     ae_state *_state);
+ae_int_t ablasmicroblocksize(ae_state *_state);
+void cmatrixtranspose(ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     /* Complex */ ae_matrix* b,
+     ae_int_t ib,
+     ae_int_t jb,
+     ae_state *_state);
+void rmatrixtranspose(ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     /* Real    */ ae_matrix* b,
+     ae_int_t ib,
+     ae_int_t jb,
+     ae_state *_state);
+void cmatrixcopy(ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     /* Complex */ ae_matrix* b,
+     ae_int_t ib,
+     ae_int_t jb,
+     ae_state *_state);
+void rmatrixcopy(ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     /* Real    */ ae_matrix* b,
+     ae_int_t ib,
+     ae_int_t jb,
+     ae_state *_state);
+void cmatrixrank1(ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     /* Complex */ ae_vector* u,
+     ae_int_t iu,
+     /* Complex */ ae_vector* v,
+     ae_int_t iv,
+     ae_state *_state);
+void rmatrixrank1(ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     /* Real    */ ae_vector* u,
+     ae_int_t iu,
+     /* Real    */ ae_vector* v,
+     ae_int_t iv,
+     ae_state *_state);
+void cmatrixmv(ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t opa,
+     /* Complex */ ae_vector* x,
+     ae_int_t ix,
+     /* Complex */ ae_vector* y,
+     ae_int_t iy,
+     ae_state *_state);
+void rmatrixmv(ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t opa,
+     /* Real    */ ae_vector* x,
+     ae_int_t ix,
+     /* Real    */ ae_vector* y,
+     ae_int_t iy,
+     ae_state *_state);
+void cmatrixrighttrsm(ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_matrix* a,
+     ae_int_t i1,
+     ae_int_t j1,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     /* Complex */ ae_matrix* x,
+     ae_int_t i2,
+     ae_int_t j2,
+     ae_state *_state);
+void cmatrixlefttrsm(ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_matrix* a,
+     ae_int_t i1,
+     ae_int_t j1,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     /* Complex */ ae_matrix* x,
+     ae_int_t i2,
+     ae_int_t j2,
+     ae_state *_state);
+void rmatrixrighttrsm(ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_matrix* a,
+     ae_int_t i1,
+     ae_int_t j1,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     /* Real    */ ae_matrix* x,
+     ae_int_t i2,
+     ae_int_t j2,
+     ae_state *_state);
+void rmatrixlefttrsm(ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_matrix* a,
+     ae_int_t i1,
+     ae_int_t j1,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t optype,
+     /* Real    */ ae_matrix* x,
+     ae_int_t i2,
+     ae_int_t j2,
+     ae_state *_state);
+void cmatrixsyrk(ae_int_t n,
+     ae_int_t k,
+     double alpha,
+     /* Complex */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t optypea,
+     double beta,
+     /* Complex */ ae_matrix* c,
+     ae_int_t ic,
+     ae_int_t jc,
+     ae_bool isupper,
+     ae_state *_state);
+void rmatrixsyrk(ae_int_t n,
+     ae_int_t k,
+     double alpha,
+     /* Real    */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t optypea,
+     double beta,
+     /* Real    */ ae_matrix* c,
+     ae_int_t ic,
+     ae_int_t jc,
+     ae_bool isupper,
+     ae_state *_state);
+void cmatrixgemm(ae_int_t m,
+     ae_int_t n,
+     ae_int_t k,
+     ae_complex alpha,
+     /* Complex */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t optypea,
+     /* Complex */ ae_matrix* b,
+     ae_int_t ib,
+     ae_int_t jb,
+     ae_int_t optypeb,
+     ae_complex beta,
+     /* Complex */ ae_matrix* c,
+     ae_int_t ic,
+     ae_int_t jc,
+     ae_state *_state);
+void rmatrixgemm(ae_int_t m,
+     ae_int_t n,
+     ae_int_t k,
+     double alpha,
+     /* Real    */ ae_matrix* a,
+     ae_int_t ia,
+     ae_int_t ja,
+     ae_int_t optypea,
+     /* Real    */ ae_matrix* b,
+     ae_int_t ib,
+     ae_int_t jb,
+     ae_int_t optypeb,
+     double beta,
+     /* Real    */ ae_matrix* c,
+     ae_int_t ic,
+     ae_int_t jc,
+     ae_state *_state);
+void rmatrixqr(/* Real    */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_vector* tau,
+     ae_state *_state);
+void rmatrixlq(/* Real    */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_vector* tau,
+     ae_state *_state);
+void cmatrixqr(/* Complex */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_vector* tau,
+     ae_state *_state);
+void cmatrixlq(/* Complex */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_vector* tau,
+     ae_state *_state);
+void rmatrixqrunpackq(/* Real    */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_vector* tau,
+     ae_int_t qcolumns,
+     /* Real    */ ae_matrix* q,
+     ae_state *_state);
+void rmatrixqrunpackr(/* Real    */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_matrix* r,
+     ae_state *_state);
+void rmatrixlqunpackq(/* Real    */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_vector* tau,
+     ae_int_t qrows,
+     /* Real    */ ae_matrix* q,
+     ae_state *_state);
+void rmatrixlqunpackl(/* Real    */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_matrix* l,
+     ae_state *_state);
+void cmatrixqrunpackq(/* Complex */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_vector* tau,
+     ae_int_t qcolumns,
+     /* Complex */ ae_matrix* q,
+     ae_state *_state);
+void cmatrixqrunpackr(/* Complex */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_matrix* r,
+     ae_state *_state);
+void cmatrixlqunpackq(/* Complex */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_vector* tau,
+     ae_int_t qrows,
+     /* Complex */ ae_matrix* q,
+     ae_state *_state);
+void cmatrixlqunpackl(/* Complex */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Complex */ ae_matrix* l,
+     ae_state *_state);
+void rmatrixbd(/* Real    */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_vector* tauq,
+     /* Real    */ ae_vector* taup,
+     ae_state *_state);
+void rmatrixbdunpackq(/* Real    */ ae_matrix* qp,
+     ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_vector* tauq,
+     ae_int_t qcolumns,
+     /* Real    */ ae_matrix* q,
+     ae_state *_state);
+void rmatrixbdmultiplybyq(/* Real    */ ae_matrix* qp,
+     ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_vector* tauq,
+     /* Real    */ ae_matrix* z,
+     ae_int_t zrows,
+     ae_int_t zcolumns,
+     ae_bool fromtheright,
+     ae_bool dotranspose,
+     ae_state *_state);
+void rmatrixbdunpackpt(/* Real    */ ae_matrix* qp,
+     ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_vector* taup,
+     ae_int_t ptrows,
+     /* Real    */ ae_matrix* pt,
+     ae_state *_state);
+void rmatrixbdmultiplybyp(/* Real    */ ae_matrix* qp,
+     ae_int_t m,
+     ae_int_t n,
+     /* Real    */ ae_vector* taup,
+     /* Real    */ ae_matrix* z,
+     ae_int_t zrows,
+     ae_int_t zcolumns,
+     ae_bool fromtheright,
+     ae_bool dotranspose,
+     ae_state *_state);
+void rmatrixbdunpackdiagonals(/* Real    */ ae_matrix* b,
+     ae_int_t m,
+     ae_int_t n,
+     ae_bool* isupper,
+     /* Real    */ ae_vector* d,
+     /* Real    */ ae_vector* e,
+     ae_state *_state);
+void rmatrixhessenberg(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     /* Real    */ ae_vector* tau,
+     ae_state *_state);
+void rmatrixhessenbergunpackq(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     /* Real    */ ae_vector* tau,
+     /* Real    */ ae_matrix* q,
+     ae_state *_state);
+void rmatrixhessenbergunpackh(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     /* Real    */ ae_matrix* h,
+     ae_state *_state);
+void smatrixtd(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     /* Real    */ ae_vector* tau,
+     /* Real    */ ae_vector* d,
+     /* Real    */ ae_vector* e,
+     ae_state *_state);
+void smatrixtdunpackq(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     /* Real    */ ae_vector* tau,
+     /* Real    */ ae_matrix* q,
+     ae_state *_state);
+void hmatrixtd(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     /* Complex */ ae_vector* tau,
+     /* Real    */ ae_vector* d,
+     /* Real    */ ae_vector* e,
+     ae_state *_state);
+void hmatrixtdunpackq(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     /* Complex */ ae_vector* tau,
+     /* Complex */ ae_matrix* q,
+     ae_state *_state);
+ae_bool smatrixevd(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_int_t zneeded,
+     ae_bool isupper,
+     /* Real    */ ae_vector* d,
+     /* Real    */ ae_matrix* z,
+     ae_state *_state);
+ae_bool smatrixevdr(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_int_t zneeded,
+     ae_bool isupper,
+     double b1,
+     double b2,
+     ae_int_t* m,
+     /* Real    */ ae_vector* w,
+     /* Real    */ ae_matrix* z,
+     ae_state *_state);
+ae_bool smatrixevdi(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_int_t zneeded,
+     ae_bool isupper,
+     ae_int_t i1,
+     ae_int_t i2,
+     /* Real    */ ae_vector* w,
+     /* Real    */ ae_matrix* z,
+     ae_state *_state);
+ae_bool hmatrixevd(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_int_t zneeded,
+     ae_bool isupper,
+     /* Real    */ ae_vector* d,
+     /* Complex */ ae_matrix* z,
+     ae_state *_state);
+ae_bool hmatrixevdr(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_int_t zneeded,
+     ae_bool isupper,
+     double b1,
+     double b2,
+     ae_int_t* m,
+     /* Real    */ ae_vector* w,
+     /* Complex */ ae_matrix* z,
+     ae_state *_state);
+ae_bool hmatrixevdi(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_int_t zneeded,
+     ae_bool isupper,
+     ae_int_t i1,
+     ae_int_t i2,
+     /* Real    */ ae_vector* w,
+     /* Complex */ ae_matrix* z,
+     ae_state *_state);
+ae_bool smatrixtdevd(/* Real    */ ae_vector* d,
+     /* Real    */ ae_vector* e,
+     ae_int_t n,
+     ae_int_t zneeded,
+     /* Real    */ ae_matrix* z,
+     ae_state *_state);
+ae_bool smatrixtdevdr(/* Real    */ ae_vector* d,
+     /* Real    */ ae_vector* e,
+     ae_int_t n,
+     ae_int_t zneeded,
+     double a,
+     double b,
+     ae_int_t* m,
+     /* Real    */ ae_matrix* z,
+     ae_state *_state);
+ae_bool smatrixtdevdi(/* Real    */ ae_vector* d,
+     /* Real    */ ae_vector* e,
+     ae_int_t n,
+     ae_int_t zneeded,
+     ae_int_t i1,
+     ae_int_t i2,
+     /* Real    */ ae_matrix* z,
+     ae_state *_state);
+ae_bool rmatrixevd(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_int_t vneeded,
+     /* Real    */ ae_vector* wr,
+     /* Real    */ ae_vector* wi,
+     /* Real    */ ae_matrix* vl,
+     /* Real    */ ae_matrix* vr,
+     ae_state *_state);
+void rmatrixrndorthogonal(ae_int_t n,
+     /* Real    */ ae_matrix* a,
+     ae_state *_state);
+void rmatrixrndcond(ae_int_t n,
+     double c,
+     /* Real    */ ae_matrix* a,
+     ae_state *_state);
+void cmatrixrndorthogonal(ae_int_t n,
+     /* Complex */ ae_matrix* a,
+     ae_state *_state);
+void cmatrixrndcond(ae_int_t n,
+     double c,
+     /* Complex */ ae_matrix* a,
+     ae_state *_state);
+void smatrixrndcond(ae_int_t n,
+     double c,
+     /* Real    */ ae_matrix* a,
+     ae_state *_state);
+void spdmatrixrndcond(ae_int_t n,
+     double c,
+     /* Real    */ ae_matrix* a,
+     ae_state *_state);
+void hmatrixrndcond(ae_int_t n,
+     double c,
+     /* Complex */ ae_matrix* a,
+     ae_state *_state);
+void hpdmatrixrndcond(ae_int_t n,
+     double c,
+     /* Complex */ ae_matrix* a,
+     ae_state *_state);
+void rmatrixrndorthogonalfromtheright(/* Real    */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     ae_state *_state);
+void rmatrixrndorthogonalfromtheleft(/* Real    */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     ae_state *_state);
+void cmatrixrndorthogonalfromtheright(/* Complex */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     ae_state *_state);
+void cmatrixrndorthogonalfromtheleft(/* Complex */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     ae_state *_state);
+void smatrixrndmultiply(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_state *_state);
+void hmatrixrndmultiply(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_state *_state);
+void rmatrixlu(/* Real    */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Integer */ ae_vector* pivots,
+     ae_state *_state);
+void cmatrixlu(/* Complex */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Integer */ ae_vector* pivots,
+     ae_state *_state);
+ae_bool hpdmatrixcholesky(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_state *_state);
+ae_bool spdmatrixcholesky(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_state *_state);
+void rmatrixlup(/* Real    */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Integer */ ae_vector* pivots,
+     ae_state *_state);
+void cmatrixlup(/* Complex */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Integer */ ae_vector* pivots,
+     ae_state *_state);
+void rmatrixplu(/* Real    */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Integer */ ae_vector* pivots,
+     ae_state *_state);
+void cmatrixplu(/* Complex */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     /* Integer */ ae_vector* pivots,
+     ae_state *_state);
+ae_bool spdmatrixcholeskyrec(/* Real    */ ae_matrix* a,
+     ae_int_t offs,
+     ae_int_t n,
+     ae_bool isupper,
+     /* Real    */ ae_vector* tmp,
+     ae_state *_state);
+double rmatrixrcond1(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_state *_state);
+double rmatrixrcondinf(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_state *_state);
+double spdmatrixrcond(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_state *_state);
+double rmatrixtrrcond1(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_state *_state);
+double rmatrixtrrcondinf(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_state *_state);
+double hpdmatrixrcond(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_state *_state);
+double cmatrixrcond1(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_state *_state);
+double cmatrixrcondinf(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_state *_state);
+double rmatrixlurcond1(/* Real    */ ae_matrix* lua,
+     ae_int_t n,
+     ae_state *_state);
+double rmatrixlurcondinf(/* Real    */ ae_matrix* lua,
+     ae_int_t n,
+     ae_state *_state);
+double spdmatrixcholeskyrcond(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_state *_state);
+double hpdmatrixcholeskyrcond(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_state *_state);
+double cmatrixlurcond1(/* Complex */ ae_matrix* lua,
+     ae_int_t n,
+     ae_state *_state);
+double cmatrixlurcondinf(/* Complex */ ae_matrix* lua,
+     ae_int_t n,
+     ae_state *_state);
+double cmatrixtrrcond1(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_state *_state);
+double cmatrixtrrcondinf(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_state *_state);
+double rcondthreshold(ae_state *_state);
+void rmatrixluinverse(/* Real    */ ae_matrix* a,
+     /* Integer */ ae_vector* pivots,
+     ae_int_t n,
+     ae_int_t* info,
+     matinvreport* rep,
+     ae_state *_state);
+void rmatrixinverse(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_int_t* info,
+     matinvreport* rep,
+     ae_state *_state);
+void cmatrixluinverse(/* Complex */ ae_matrix* a,
+     /* Integer */ ae_vector* pivots,
+     ae_int_t n,
+     ae_int_t* info,
+     matinvreport* rep,
+     ae_state *_state);
+void cmatrixinverse(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_int_t* info,
+     matinvreport* rep,
+     ae_state *_state);
+void spdmatrixcholeskyinverse(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_int_t* info,
+     matinvreport* rep,
+     ae_state *_state);
+void spdmatrixinverse(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_int_t* info,
+     matinvreport* rep,
+     ae_state *_state);
+void hpdmatrixcholeskyinverse(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_int_t* info,
+     matinvreport* rep,
+     ae_state *_state);
+void hpdmatrixinverse(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_int_t* info,
+     matinvreport* rep,
+     ae_state *_state);
+void rmatrixtrinverse(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t* info,
+     matinvreport* rep,
+     ae_state *_state);
+void cmatrixtrinverse(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_bool isunit,
+     ae_int_t* info,
+     matinvreport* rep,
+     ae_state *_state);
+ae_bool _matinvreport_init(matinvreport* p, ae_state *_state, ae_bool make_automatic);
+ae_bool _matinvreport_init_copy(matinvreport* dst, matinvreport* src, ae_state *_state, ae_bool make_automatic);
+void _matinvreport_clear(matinvreport* p);
+ae_bool rmatrixbdsvd(/* Real    */ ae_vector* d,
+     /* Real    */ ae_vector* e,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_bool isfractionalaccuracyrequired,
+     /* Real    */ ae_matrix* u,
+     ae_int_t nru,
+     /* Real    */ ae_matrix* c,
+     ae_int_t ncc,
+     /* Real    */ ae_matrix* vt,
+     ae_int_t ncvt,
+     ae_state *_state);
+ae_bool bidiagonalsvddecomposition(/* Real    */ ae_vector* d,
+     /* Real    */ ae_vector* e,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_bool isfractionalaccuracyrequired,
+     /* Real    */ ae_matrix* u,
+     ae_int_t nru,
+     /* Real    */ ae_matrix* c,
+     ae_int_t ncc,
+     /* Real    */ ae_matrix* vt,
+     ae_int_t ncvt,
+     ae_state *_state);
+ae_bool rmatrixsvd(/* Real    */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     ae_int_t uneeded,
+     ae_int_t vtneeded,
+     ae_int_t additionalmemory,
+     /* Real    */ ae_vector* w,
+     /* Real    */ ae_matrix* u,
+     /* Real    */ ae_matrix* vt,
+     ae_state *_state);
+void fblscholeskysolve(/* Real    */ ae_matrix* cha,
+     double sqrtscalea,
+     ae_int_t n,
+     ae_bool isupper,
+     /* Real    */ ae_vector* xb,
+     /* Real    */ ae_vector* tmp,
+     ae_state *_state);
+void fblssolvecgx(/* Real    */ ae_matrix* a,
+     ae_int_t m,
+     ae_int_t n,
+     double alpha,
+     /* Real    */ ae_vector* b,
+     /* Real    */ ae_vector* x,
+     /* Real    */ ae_vector* buf,
+     ae_state *_state);
+void fblscgcreate(/* Real    */ ae_vector* x,
+     /* Real    */ ae_vector* b,
+     ae_int_t n,
+     fblslincgstate* state,
+     ae_state *_state);
+ae_bool fblscgiteration(fblslincgstate* state, ae_state *_state);
+ae_bool _fblslincgstate_init(fblslincgstate* p, ae_state *_state, ae_bool make_automatic);
+ae_bool _fblslincgstate_init_copy(fblslincgstate* dst, fblslincgstate* src, ae_state *_state, ae_bool make_automatic);
+void _fblslincgstate_clear(fblslincgstate* p);
+double rmatrixludet(/* Real    */ ae_matrix* a,
+     /* Integer */ ae_vector* pivots,
+     ae_int_t n,
+     ae_state *_state);
+double rmatrixdet(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_state *_state);
+ae_complex cmatrixludet(/* Complex */ ae_matrix* a,
+     /* Integer */ ae_vector* pivots,
+     ae_int_t n,
+     ae_state *_state);
+ae_complex cmatrixdet(/* Complex */ ae_matrix* a,
+     ae_int_t n,
+     ae_state *_state);
+double spdmatrixcholeskydet(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_state *_state);
+double spdmatrixdet(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isupper,
+     ae_state *_state);
+ae_bool smatrixgevd(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isuppera,
+     /* Real    */ ae_matrix* b,
+     ae_bool isupperb,
+     ae_int_t zneeded,
+     ae_int_t problemtype,
+     /* Real    */ ae_vector* d,
+     /* Real    */ ae_matrix* z,
+     ae_state *_state);
+ae_bool smatrixgevdreduce(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     ae_bool isuppera,
+     /* Real    */ ae_matrix* b,
+     ae_bool isupperb,
+     ae_int_t problemtype,
+     /* Real    */ ae_matrix* r,
+     ae_bool* isupperr,
+     ae_state *_state);
+void rmatrixinvupdatesimple(/* Real    */ ae_matrix* inva,
+     ae_int_t n,
+     ae_int_t updrow,
+     ae_int_t updcolumn,
+     double updval,
+     ae_state *_state);
+void rmatrixinvupdaterow(/* Real    */ ae_matrix* inva,
+     ae_int_t n,
+     ae_int_t updrow,
+     /* Real    */ ae_vector* v,
+     ae_state *_state);
+void rmatrixinvupdatecolumn(/* Real    */ ae_matrix* inva,
+     ae_int_t n,
+     ae_int_t updcolumn,
+     /* Real    */ ae_vector* u,
+     ae_state *_state);
+void rmatrixinvupdateuv(/* Real    */ ae_matrix* inva,
+     ae_int_t n,
+     /* Real    */ ae_vector* u,
+     /* Real    */ ae_vector* v,
+     ae_state *_state);
+ae_bool rmatrixschur(/* Real    */ ae_matrix* a,
+     ae_int_t n,
+     /* Real    */ ae_matrix* s,
+     ae_state *_state);
+
+}
+#endif
+
diff --git a/contrib/lbfgs/optimization.cpp b/contrib/lbfgs/optimization.cpp
new file mode 100755
index 0000000000..2f4c034023
--- /dev/null
+++ b/contrib/lbfgs/optimization.cpp
@@ -0,0 +1,11827 @@
+/*************************************************************************
+Copyright (c) Sergey Bochkanov (ALGLIB project).
+
+>>> SOURCE LICENSE >>>
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation (www.fsf.org); either version 2 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+A copy of the GNU General Public License is available at
+http://www.fsf.org/licensing/licenses
+>>> END OF LICENSE >>>
+*************************************************************************/
+#include "stdafx.h"
+#include "optimization.h"
+
+// disable some irrelevant warnings
+#if (AE_COMPILER==AE_MSVC)
+#pragma warning(disable:4100)
+#pragma warning(disable:4127)
+#pragma warning(disable:4702)
+#pragma warning(disable:4996)
+#endif
+using namespace std;
+
+/////////////////////////////////////////////////////////////////////////
+//
+// THIS SECTION CONTAINS IMPLEMENTATION OF C++ INTERFACE
+//
+/////////////////////////////////////////////////////////////////////////
+namespace alglib
+{
+
+
+/*************************************************************************
+
+*************************************************************************/
+_minlbfgsstate_owner::_minlbfgsstate_owner()
+{
+    p_struct = (alglib_impl::minlbfgsstate*)alglib_impl::ae_malloc(sizeof(alglib_impl::minlbfgsstate), NULL);
+    if( p_struct==NULL )
+        throw ap_error("ALGLIB: malloc error");
+    if( !alglib_impl::_minlbfgsstate_init(p_struct, NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+}
+
+_minlbfgsstate_owner::_minlbfgsstate_owner(const _minlbfgsstate_owner &rhs)
+{
+    p_struct = (alglib_impl::minlbfgsstate*)alglib_impl::ae_malloc(sizeof(alglib_impl::minlbfgsstate), NULL);
+    if( p_struct==NULL )
+        throw ap_error("ALGLIB: malloc error");
+    if( !alglib_impl::_minlbfgsstate_init_copy(p_struct, const_cast<alglib_impl::minlbfgsstate*>(rhs.p_struct), NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+}
+
+_minlbfgsstate_owner& _minlbfgsstate_owner::operator=(const _minlbfgsstate_owner &rhs)
+{
+    if( this==&rhs )
+        return *this;
+    alglib_impl::_minlbfgsstate_clear(p_struct);
+    if( !alglib_impl::_minlbfgsstate_init_copy(p_struct, const_cast<alglib_impl::minlbfgsstate*>(rhs.p_struct), NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+    return *this;
+}
+
+_minlbfgsstate_owner::~_minlbfgsstate_owner()
+{
+    alglib_impl::_minlbfgsstate_clear(p_struct);
+    ae_free(p_struct);
+}
+
+alglib_impl::minlbfgsstate* _minlbfgsstate_owner::c_ptr()
+{
+    return p_struct;
+}
+
+alglib_impl::minlbfgsstate* _minlbfgsstate_owner::c_ptr() const
+{
+    return const_cast<alglib_impl::minlbfgsstate*>(p_struct);
+}
+minlbfgsstate::minlbfgsstate() : _minlbfgsstate_owner() ,needfg(p_struct->needfg),xupdated(p_struct->xupdated),f(p_struct->f),g(&p_struct->g),x(&p_struct->x)
+{
+}
+
+minlbfgsstate::minlbfgsstate(const minlbfgsstate &rhs):_minlbfgsstate_owner(rhs) ,needfg(p_struct->needfg),xupdated(p_struct->xupdated),f(p_struct->f),g(&p_struct->g),x(&p_struct->x)
+{
+}
+
+minlbfgsstate& minlbfgsstate::operator=(const minlbfgsstate &rhs)
+{
+    if( this==&rhs )
+        return *this;
+    _minlbfgsstate_owner::operator=(rhs);
+    return *this;
+}
+
+minlbfgsstate::~minlbfgsstate()
+{
+}
+
+
+/*************************************************************************
+
+*************************************************************************/
+_minlbfgsreport_owner::_minlbfgsreport_owner()
+{
+    p_struct = (alglib_impl::minlbfgsreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::minlbfgsreport), NULL);
+    if( p_struct==NULL )
+        throw ap_error("ALGLIB: malloc error");
+    if( !alglib_impl::_minlbfgsreport_init(p_struct, NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+}
+
+_minlbfgsreport_owner::_minlbfgsreport_owner(const _minlbfgsreport_owner &rhs)
+{
+    p_struct = (alglib_impl::minlbfgsreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::minlbfgsreport), NULL);
+    if( p_struct==NULL )
+        throw ap_error("ALGLIB: malloc error");
+    if( !alglib_impl::_minlbfgsreport_init_copy(p_struct, const_cast<alglib_impl::minlbfgsreport*>(rhs.p_struct), NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+}
+
+_minlbfgsreport_owner& _minlbfgsreport_owner::operator=(const _minlbfgsreport_owner &rhs)
+{
+    if( this==&rhs )
+        return *this;
+    alglib_impl::_minlbfgsreport_clear(p_struct);
+    if( !alglib_impl::_minlbfgsreport_init_copy(p_struct, const_cast<alglib_impl::minlbfgsreport*>(rhs.p_struct), NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+    return *this;
+}
+
+_minlbfgsreport_owner::~_minlbfgsreport_owner()
+{
+    alglib_impl::_minlbfgsreport_clear(p_struct);
+    ae_free(p_struct);
+}
+
+alglib_impl::minlbfgsreport* _minlbfgsreport_owner::c_ptr()
+{
+    return p_struct;
+}
+
+alglib_impl::minlbfgsreport* _minlbfgsreport_owner::c_ptr() const
+{
+    return const_cast<alglib_impl::minlbfgsreport*>(p_struct);
+}
+minlbfgsreport::minlbfgsreport() : _minlbfgsreport_owner() ,iterationscount(p_struct->iterationscount),nfev(p_struct->nfev),terminationtype(p_struct->terminationtype)
+{
+}
+
+minlbfgsreport::minlbfgsreport(const minlbfgsreport &rhs):_minlbfgsreport_owner(rhs) ,iterationscount(p_struct->iterationscount),nfev(p_struct->nfev),terminationtype(p_struct->terminationtype)
+{
+}
+
+minlbfgsreport& minlbfgsreport::operator=(const minlbfgsreport &rhs)
+{
+    if( this==&rhs )
+        return *this;
+    _minlbfgsreport_owner::operator=(rhs);
+    return *this;
+}
+
+minlbfgsreport::~minlbfgsreport()
+{
+}
+
+/*************************************************************************
+        LIMITED MEMORY BFGS METHOD FOR LARGE SCALE OPTIMIZATION
+
+DESCRIPTION:
+The subroutine minimizes function F(x) of N arguments by  using  a  quasi-
+Newton method (LBFGS scheme) which is optimized to use  a  minimum  amount
+of memory.
+The subroutine generates the approximation of an inverse Hessian matrix by
+using information about the last M steps of the algorithm  (instead of N).
+It lessens a required amount of memory from a value  of  order  N^2  to  a
+value of order 2*N*M.
+
+
+REQUIREMENTS:
+Algorithm will request following information during its operation:
+* function value F and its gradient G (simultaneously) at given point X
+
+
+USAGE:
+1. User initializes algorithm state with MinLBFGSCreate() call
+2. User tunes solver parameters with MinLBFGSSetCond() MinLBFGSSetStpMax()
+   and other functions
+3. User calls MinLBFGSOptimize() function which takes algorithm  state and
+   pointer (delegate, etc.) to callback function which calculates F/G.
+4. User calls MinLBFGSResults() to get solution
+5. Optionally user may call MinLBFGSRestartFrom() to solve another problem
+   with same N/M but another starting point and/or another function.
+   MinLBFGSRestartFrom() allows to reuse already initialized structure.
+
+
+INPUT PARAMETERS:
+    N       -   problem dimension. N>0
+    M       -   number of corrections in the BFGS scheme of Hessian
+                approximation update. Recommended value:  3<=M<=7. The smaller
+                value causes worse convergence, the bigger will  not  cause  a
+                considerably better convergence, but will cause a fall in  the
+                performance. M<=N.
+    X       -   initial solution approximation, array[0..N-1].
+
+
+OUTPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+
+
+NOTES:
+1. you may tune stopping conditions with MinLBFGSSetCond() function
+2. if target function contains exp() or other fast growing functions,  and
+   optimization algorithm makes too large steps which leads  to  overflow,
+   use MinLBFGSSetStpMax() function to bound algorithm's  steps.  However,
+   L-BFGS rarely needs such a tuning.
+
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlbfgscreate(const ae_int_t n, const ae_int_t m, const real_1d_array &x, minlbfgsstate &state)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minlbfgscreate(n, m, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minlbfgsstate*>(state.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+        LIMITED MEMORY BFGS METHOD FOR LARGE SCALE OPTIMIZATION
+
+DESCRIPTION:
+The subroutine minimizes function F(x) of N arguments by  using  a  quasi-
+Newton method (LBFGS scheme) which is optimized to use  a  minimum  amount
+of memory.
+The subroutine generates the approximation of an inverse Hessian matrix by
+using information about the last M steps of the algorithm  (instead of N).
+It lessens a required amount of memory from a value  of  order  N^2  to  a
+value of order 2*N*M.
+
+
+REQUIREMENTS:
+Algorithm will request following information during its operation:
+* function value F and its gradient G (simultaneously) at given point X
+
+
+USAGE:
+1. User initializes algorithm state with MinLBFGSCreate() call
+2. User tunes solver parameters with MinLBFGSSetCond() MinLBFGSSetStpMax()
+   and other functions
+3. User calls MinLBFGSOptimize() function which takes algorithm  state and
+   pointer (delegate, etc.) to callback function which calculates F/G.
+4. User calls MinLBFGSResults() to get solution
+5. Optionally user may call MinLBFGSRestartFrom() to solve another problem
+   with same N/M but another starting point and/or another function.
+   MinLBFGSRestartFrom() allows to reuse already initialized structure.
+
+
+INPUT PARAMETERS:
+    N       -   problem dimension. N>0
+    M       -   number of corrections in the BFGS scheme of Hessian
+                approximation update. Recommended value:  3<=M<=7. The smaller
+                value causes worse convergence, the bigger will  not  cause  a
+                considerably better convergence, but will cause a fall in  the
+                performance. M<=N.
+    X       -   initial solution approximation, array[0..N-1].
+
+
+OUTPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+
+
+NOTES:
+1. you may tune stopping conditions with MinLBFGSSetCond() function
+2. if target function contains exp() or other fast growing functions,  and
+   optimization algorithm makes too large steps which leads  to  overflow,
+   use MinLBFGSSetStpMax() function to bound algorithm's  steps.  However,
+   L-BFGS rarely needs such a tuning.
+
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlbfgscreate(const ae_int_t m, const real_1d_array &x, minlbfgsstate &state)
+{
+    alglib_impl::ae_state _alglib_env_state;    
+    ae_int_t n;
+
+    n = x.length();
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minlbfgscreate(n, m, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minlbfgsstate*>(state.c_ptr()), &_alglib_env_state);
+
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This function sets stopping conditions for L-BFGS optimization algorithm.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    EpsG    -   >=0
+                The  subroutine  finishes  its  work   if   the  condition
+                ||G||<EpsG is satisfied, where ||.|| means Euclidian norm,
+                G - gradient.
+    EpsF    -   >=0
+                The  subroutine  finishes  its work if on k+1-th iteration
+                the  condition  |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1}
+                is satisfied.
+    EpsX    -   >=0
+                The subroutine finishes its work if  on  k+1-th  iteration
+                the condition |X(k+1)-X(k)| <= EpsX is fulfilled.
+    MaxIts  -   maximum number of iterations. If MaxIts=0, the  number  of
+                iterations is unlimited.
+
+Passing EpsG=0, EpsF=0, EpsX=0 and MaxIts=0 (simultaneously) will lead to
+automatic stopping criterion selection (small EpsX).
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlbfgssetcond(const minlbfgsstate &state, const double epsg, const double epsf, const double epsx, const ae_int_t maxits)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minlbfgssetcond(const_cast<alglib_impl::minlbfgsstate*>(state.c_ptr()), epsg, epsf, epsx, maxits, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This function turns on/off reporting.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    NeedXRep-   whether iteration reports are needed or not
+
+If NeedXRep is True, algorithm will call rep() callback function if  it is
+provided to MinLBFGSOptimize().
+
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlbfgssetxrep(const minlbfgsstate &state, const bool needxrep)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minlbfgssetxrep(const_cast<alglib_impl::minlbfgsstate*>(state.c_ptr()), needxrep, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This function sets maximum step length
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    StpMax  -   maximum step length, >=0. Set StpMax to 0.0 (default),  if
+                you don't want to limit step length.
+
+Use this subroutine when you optimize target function which contains exp()
+or  other  fast  growing  functions,  and optimization algorithm makes too
+large  steps  which  leads  to overflow. This function allows us to reject
+steps  that  are  too  large  (and  therefore  expose  us  to the possible
+overflow) without actually calculating function value at the x+stp*d.
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlbfgssetstpmax(const minlbfgsstate &state, const double stpmax)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minlbfgssetstpmax(const_cast<alglib_impl::minlbfgsstate*>(state.c_ptr()), stpmax, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Modification of the preconditioner:
+default preconditioner (simple scaling) is used.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+
+After call to this function preconditioner is changed to the default one.
+
+NOTE:  you  can  change  preconditioner  "on  the  fly",  during algorithm
+iterations.
+
+  -- ALGLIB --
+     Copyright 13.10.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlbfgssetdefaultpreconditioner(const minlbfgsstate &state)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minlbfgssetdefaultpreconditioner(const_cast<alglib_impl::minlbfgsstate*>(state.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Modification of the preconditioner:
+Cholesky factorization of approximate Hessian is used.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    P       -   triangular preconditioner, Cholesky factorization of
+                the approximate Hessian. array[0..N-1,0..N-1],
+                (if larger, only leading N elements are used).
+    IsUpper -   whether upper or lower triangle of P is given
+                (other triangle is not referenced)
+
+After call to this function preconditioner is changed to P  (P  is  copied
+into the internal buffer).
+
+NOTE:  you  can  change  preconditioner  "on  the  fly",  during algorithm
+iterations.
+
+NOTE 2:  P  should  be nonsingular. Exception will be thrown otherwise. It
+also should be well conditioned, although only strict  non-singularity  is
+tested.
+
+  -- ALGLIB --
+     Copyright 13.10.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlbfgssetcholeskypreconditioner(const minlbfgsstate &state, const real_2d_array &p, const bool isupper)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minlbfgssetcholeskypreconditioner(const_cast<alglib_impl::minlbfgsstate*>(state.c_ptr()), const_cast<alglib_impl::ae_matrix*>(p.c_ptr()), isupper, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This function provides reverse communication interface
+Reverse communication interface is not documented or recommended to use.
+See below for functions which provide better documented API
+*************************************************************************/
+bool minlbfgsiteration(const minlbfgsstate &state)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        ae_bool result = alglib_impl::minlbfgsiteration(const_cast<alglib_impl::minlbfgsstate*>(state.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<bool*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+
+void minlbfgsoptimize(minlbfgsstate &state,
+    void (*grad)(const real_1d_array &x, double &func, real_1d_array &grad, void *ptr),
+    void  (*rep)(const real_1d_array &x, double func, void *ptr), 
+    void *ptr)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    if( grad==NULL )
+        throw ap_error("ALGLIB: error in 'minlbfgsoptimize()' (grad is NULL)");
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        while( alglib_impl::minlbfgsiteration(state.c_ptr(), &_alglib_env_state) )
+        {
+            if( state.needfg )
+            {
+                grad(state.x, state.f, state.g, ptr);
+                continue;
+            }
+            if( state.xupdated )
+            {
+                if( rep!=NULL )
+                    rep(state.x, state.f, ptr);
+                continue;
+            }
+            throw ap_error("ALGLIB: error in 'minlbfgsoptimize' (some derivatives were not provided?)");
+        }
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+
+
+/*************************************************************************
+L-BFGS algorithm results
+
+INPUT PARAMETERS:
+    State   -   algorithm state
+
+OUTPUT PARAMETERS:
+    X       -   array[0..N-1], solution
+    Rep     -   optimization report:
+                * Rep.TerminationType completetion code:
+                    * -2    rounding errors prevent further improvement.
+                            X contains best point found.
+                    * -1    incorrect parameters were specified
+                    *  1    relative function improvement is no more than
+                            EpsF.
+                    *  2    relative step is no more than EpsX.
+                    *  4    gradient norm is no more than EpsG
+                    *  5    MaxIts steps was taken
+                    *  7    stopping conditions are too stringent,
+                            further improvement is impossible
+                * Rep.IterationsCount contains iterations count
+                * NFEV countains number of function calculations
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlbfgsresults(const minlbfgsstate &state, real_1d_array &x, minlbfgsreport &rep)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minlbfgsresults(const_cast<alglib_impl::minlbfgsstate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minlbfgsreport*>(rep.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+L-BFGS algorithm results
+
+Buffered implementation of MinLBFGSResults which uses pre-allocated buffer
+to store X[]. If buffer size is  too  small,  it  resizes  buffer.  It  is
+intended to be used in the inner cycles of performance critical algorithms
+where array reallocation penalty is too large to be ignored.
+
+  -- ALGLIB --
+     Copyright 20.08.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlbfgsresultsbuf(const minlbfgsstate &state, real_1d_array &x, minlbfgsreport &rep)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minlbfgsresultsbuf(const_cast<alglib_impl::minlbfgsstate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minlbfgsreport*>(rep.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This  subroutine restarts LBFGS algorithm from new point. All optimization
+parameters are left unchanged.
+
+This  function  allows  to  solve multiple  optimization  problems  (which
+must have same number of dimensions) without object reallocation penalty.
+
+INPUT PARAMETERS:
+    State   -   structure used to store algorithm state
+    X       -   new starting point.
+
+  -- ALGLIB --
+     Copyright 30.07.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlbfgsrestartfrom(const minlbfgsstate &state, const real_1d_array &x)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minlbfgsrestartfrom(const_cast<alglib_impl::minlbfgsstate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Levenberg-Marquardt optimizer.
+
+This structure should be created using one of the MinLMCreate???()
+functions. You should not access its fields directly; use ALGLIB functions
+to work with it.
+*************************************************************************/
+_minlmstate_owner::_minlmstate_owner()
+{
+    p_struct = (alglib_impl::minlmstate*)alglib_impl::ae_malloc(sizeof(alglib_impl::minlmstate), NULL);
+    if( p_struct==NULL )
+        throw ap_error("ALGLIB: malloc error");
+    if( !alglib_impl::_minlmstate_init(p_struct, NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+}
+
+_minlmstate_owner::_minlmstate_owner(const _minlmstate_owner &rhs)
+{
+    p_struct = (alglib_impl::minlmstate*)alglib_impl::ae_malloc(sizeof(alglib_impl::minlmstate), NULL);
+    if( p_struct==NULL )
+        throw ap_error("ALGLIB: malloc error");
+    if( !alglib_impl::_minlmstate_init_copy(p_struct, const_cast<alglib_impl::minlmstate*>(rhs.p_struct), NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+}
+
+_minlmstate_owner& _minlmstate_owner::operator=(const _minlmstate_owner &rhs)
+{
+    if( this==&rhs )
+        return *this;
+    alglib_impl::_minlmstate_clear(p_struct);
+    if( !alglib_impl::_minlmstate_init_copy(p_struct, const_cast<alglib_impl::minlmstate*>(rhs.p_struct), NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+    return *this;
+}
+
+_minlmstate_owner::~_minlmstate_owner()
+{
+    alglib_impl::_minlmstate_clear(p_struct);
+    ae_free(p_struct);
+}
+
+alglib_impl::minlmstate* _minlmstate_owner::c_ptr()
+{
+    return p_struct;
+}
+
+alglib_impl::minlmstate* _minlmstate_owner::c_ptr() const
+{
+    return const_cast<alglib_impl::minlmstate*>(p_struct);
+}
+minlmstate::minlmstate() : _minlmstate_owner() ,needf(p_struct->needf),needfg(p_struct->needfg),needfgh(p_struct->needfgh),needfi(p_struct->needfi),needfij(p_struct->needfij),xupdated(p_struct->xupdated),f(p_struct->f),fi(&p_struct->fi),g(&p_struct->g),h(&p_struct->h),j(&p_struct->j),x(&p_struct->x)
+{
+}
+
+minlmstate::minlmstate(const minlmstate &rhs):_minlmstate_owner(rhs) ,needf(p_struct->needf),needfg(p_struct->needfg),needfgh(p_struct->needfgh),needfi(p_struct->needfi),needfij(p_struct->needfij),xupdated(p_struct->xupdated),f(p_struct->f),fi(&p_struct->fi),g(&p_struct->g),h(&p_struct->h),j(&p_struct->j),x(&p_struct->x)
+{
+}
+
+minlmstate& minlmstate::operator=(const minlmstate &rhs)
+{
+    if( this==&rhs )
+        return *this;
+    _minlmstate_owner::operator=(rhs);
+    return *this;
+}
+
+minlmstate::~minlmstate()
+{
+}
+
+
+/*************************************************************************
+Optimization report, filled by MinLMResults() function
+
+FIELDS:
+* TerminationType, completetion code:
+    * -9    derivative correctness check failed;
+            see Rep.WrongNum, Rep.WrongI, Rep.WrongJ for
+            more information.
+    *  1    relative function improvement is no more than
+            EpsF.
+    *  2    relative step is no more than EpsX.
+    *  4    gradient is no more than EpsG.
+    *  5    MaxIts steps was taken
+    *  7    stopping conditions are too stringent,
+            further improvement is impossible
+* IterationsCount, contains iterations count
+* NFunc, number of function calculations
+* NJac, number of Jacobi matrix calculations
+* NGrad, number of gradient calculations
+* NHess, number of Hessian calculations
+* NCholesky, number of Cholesky decomposition calculations
+*************************************************************************/
+_minlmreport_owner::_minlmreport_owner()
+{
+    p_struct = (alglib_impl::minlmreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::minlmreport), NULL);
+    if( p_struct==NULL )
+        throw ap_error("ALGLIB: malloc error");
+    if( !alglib_impl::_minlmreport_init(p_struct, NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+}
+
+_minlmreport_owner::_minlmreport_owner(const _minlmreport_owner &rhs)
+{
+    p_struct = (alglib_impl::minlmreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::minlmreport), NULL);
+    if( p_struct==NULL )
+        throw ap_error("ALGLIB: malloc error");
+    if( !alglib_impl::_minlmreport_init_copy(p_struct, const_cast<alglib_impl::minlmreport*>(rhs.p_struct), NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+}
+
+_minlmreport_owner& _minlmreport_owner::operator=(const _minlmreport_owner &rhs)
+{
+    if( this==&rhs )
+        return *this;
+    alglib_impl::_minlmreport_clear(p_struct);
+    if( !alglib_impl::_minlmreport_init_copy(p_struct, const_cast<alglib_impl::minlmreport*>(rhs.p_struct), NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+    return *this;
+}
+
+_minlmreport_owner::~_minlmreport_owner()
+{
+    alglib_impl::_minlmreport_clear(p_struct);
+    ae_free(p_struct);
+}
+
+alglib_impl::minlmreport* _minlmreport_owner::c_ptr()
+{
+    return p_struct;
+}
+
+alglib_impl::minlmreport* _minlmreport_owner::c_ptr() const
+{
+    return const_cast<alglib_impl::minlmreport*>(p_struct);
+}
+minlmreport::minlmreport() : _minlmreport_owner() ,iterationscount(p_struct->iterationscount),terminationtype(p_struct->terminationtype),nfunc(p_struct->nfunc),njac(p_struct->njac),ngrad(p_struct->ngrad),nhess(p_struct->nhess),ncholesky(p_struct->ncholesky)
+{
+}
+
+minlmreport::minlmreport(const minlmreport &rhs):_minlmreport_owner(rhs) ,iterationscount(p_struct->iterationscount),terminationtype(p_struct->terminationtype),nfunc(p_struct->nfunc),njac(p_struct->njac),ngrad(p_struct->ngrad),nhess(p_struct->nhess),ncholesky(p_struct->ncholesky)
+{
+}
+
+minlmreport& minlmreport::operator=(const minlmreport &rhs)
+{
+    if( this==&rhs )
+        return *this;
+    _minlmreport_owner::operator=(rhs);
+    return *this;
+}
+
+minlmreport::~minlmreport()
+{
+}
+
+/*************************************************************************
+                IMPROVED LEVENBERG-MARQUARDT METHOD FOR
+                 NON-LINEAR LEAST SQUARES OPTIMIZATION
+
+DESCRIPTION:
+This function is used to find minimum of function which is represented  as
+sum of squares:
+    F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1])
+using value of function vector f[] and Jacobian of f[].
+
+
+REQUIREMENTS:
+This algorithm will request following information during its operation:
+
+* function vector f[] at given point X
+* function vector f[] and Jacobian of f[] (simultaneously) at given point
+
+There are several overloaded versions of  MinLMOptimize()  function  which
+correspond  to  different LM-like optimization algorithms provided by this
+unit. You should choose version which accepts fvec()  and jac() callbacks.
+First  one  is used to calculate f[] at given point, second one calculates
+f[] and Jacobian df[i]/dx[j].
+
+You can try to initialize MinLMState structure with VJ  function and  then
+use incorrect version  of  MinLMOptimize()  (for  example,  version  which
+works  with  general  form function and does not provide Jacobian), but it
+will  lead  to  exception  being  thrown  after first attempt to calculate
+Jacobian.
+
+
+USAGE:
+1. User initializes algorithm state with MinLMCreateVJ() call
+2. User tunes solver parameters with MinLMSetCond(),  MinLMSetStpMax() and
+   other functions
+3. User calls MinLMOptimize() function which  takes algorithm  state   and
+   callback functions.
+4. User calls MinLMResults() to get solution
+5. Optionally, user may call MinLMRestartFrom() to solve  another  problem
+   with same N/M but another starting point and/or another function.
+   MinLMRestartFrom() allows to reuse already initialized structure.
+
+
+INPUT PARAMETERS:
+    N       -   dimension, N>1
+                * if given, only leading N elements of X are used
+                * if not given, automatically determined from size of X
+    M       -   number of functions f[i]
+    X       -   initial solution, array[0..N-1]
+
+OUTPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+
+NOTES:
+1. you may tune stopping conditions with MinLMSetCond() function
+2. if target function contains exp() or other fast growing functions,  and
+   optimization algorithm makes too large steps which leads  to  overflow,
+   use MinLMSetStpMax() function to bound algorithm's steps.
+
+  -- ALGLIB --
+     Copyright 30.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minlmcreatevj(const ae_int_t n, const ae_int_t m, const real_1d_array &x, minlmstate &state)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minlmcreatevj(n, m, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minlmstate*>(state.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+                IMPROVED LEVENBERG-MARQUARDT METHOD FOR
+                 NON-LINEAR LEAST SQUARES OPTIMIZATION
+
+DESCRIPTION:
+This function is used to find minimum of function which is represented  as
+sum of squares:
+    F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1])
+using value of function vector f[] and Jacobian of f[].
+
+
+REQUIREMENTS:
+This algorithm will request following information during its operation:
+
+* function vector f[] at given point X
+* function vector f[] and Jacobian of f[] (simultaneously) at given point
+
+There are several overloaded versions of  MinLMOptimize()  function  which
+correspond  to  different LM-like optimization algorithms provided by this
+unit. You should choose version which accepts fvec()  and jac() callbacks.
+First  one  is used to calculate f[] at given point, second one calculates
+f[] and Jacobian df[i]/dx[j].
+
+You can try to initialize MinLMState structure with VJ  function and  then
+use incorrect version  of  MinLMOptimize()  (for  example,  version  which
+works  with  general  form function and does not provide Jacobian), but it
+will  lead  to  exception  being  thrown  after first attempt to calculate
+Jacobian.
+
+
+USAGE:
+1. User initializes algorithm state with MinLMCreateVJ() call
+2. User tunes solver parameters with MinLMSetCond(),  MinLMSetStpMax() and
+   other functions
+3. User calls MinLMOptimize() function which  takes algorithm  state   and
+   callback functions.
+4. User calls MinLMResults() to get solution
+5. Optionally, user may call MinLMRestartFrom() to solve  another  problem
+   with same N/M but another starting point and/or another function.
+   MinLMRestartFrom() allows to reuse already initialized structure.
+
+
+INPUT PARAMETERS:
+    N       -   dimension, N>1
+                * if given, only leading N elements of X are used
+                * if not given, automatically determined from size of X
+    M       -   number of functions f[i]
+    X       -   initial solution, array[0..N-1]
+
+OUTPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+
+NOTES:
+1. you may tune stopping conditions with MinLMSetCond() function
+2. if target function contains exp() or other fast growing functions,  and
+   optimization algorithm makes too large steps which leads  to  overflow,
+   use MinLMSetStpMax() function to bound algorithm's steps.
+
+  -- ALGLIB --
+     Copyright 30.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minlmcreatevj(const ae_int_t m, const real_1d_array &x, minlmstate &state)
+{
+    alglib_impl::ae_state _alglib_env_state;    
+    ae_int_t n;
+
+    n = x.length();
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minlmcreatevj(n, m, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minlmstate*>(state.c_ptr()), &_alglib_env_state);
+
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+                IMPROVED LEVENBERG-MARQUARDT METHOD FOR
+                 NON-LINEAR LEAST SQUARES OPTIMIZATION
+
+DESCRIPTION:
+This function is used to find minimum of function which is represented  as
+sum of squares:
+    F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1])
+using value of function vector f[] only. Finite differences  are  used  to
+calculate Jacobian.
+
+
+REQUIREMENTS:
+This algorithm will request following information during its operation:
+* function vector f[] at given point X
+
+There are several overloaded versions of  MinLMOptimize()  function  which
+correspond  to  different LM-like optimization algorithms provided by this
+unit. You should choose version which accepts fvec() callback.
+
+You can try to initialize MinLMState structure with VJ  function and  then
+use incorrect version  of  MinLMOptimize()  (for  example,  version  which
+works with general form function and does not accept function vector), but
+it will  lead  to  exception being thrown after first attempt to calculate
+Jacobian.
+
+
+USAGE:
+1. User initializes algorithm state with MinLMCreateV() call
+2. User tunes solver parameters with MinLMSetCond(),  MinLMSetStpMax() and
+   other functions
+3. User calls MinLMOptimize() function which  takes algorithm  state   and
+   callback functions.
+4. User calls MinLMResults() to get solution
+5. Optionally, user may call MinLMRestartFrom() to solve  another  problem
+   with same N/M but another starting point and/or another function.
+   MinLMRestartFrom() allows to reuse already initialized structure.
+
+
+INPUT PARAMETERS:
+    N       -   dimension, N>1
+                * if given, only leading N elements of X are used
+                * if not given, automatically determined from size of X
+    M       -   number of functions f[i]
+    X       -   initial solution, array[0..N-1]
+    DiffStep-   differentiation step, >0
+
+OUTPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+
+See also MinLMIteration, MinLMResults.
+
+NOTES:
+1. you may tune stopping conditions with MinLMSetCond() function
+2. if target function contains exp() or other fast growing functions,  and
+   optimization algorithm makes too large steps which leads  to  overflow,
+   use MinLMSetStpMax() function to bound algorithm's steps.
+
+  -- ALGLIB --
+     Copyright 30.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minlmcreatev(const ae_int_t n, const ae_int_t m, const real_1d_array &x, const double diffstep, minlmstate &state)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minlmcreatev(n, m, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), diffstep, const_cast<alglib_impl::minlmstate*>(state.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+                IMPROVED LEVENBERG-MARQUARDT METHOD FOR
+                 NON-LINEAR LEAST SQUARES OPTIMIZATION
+
+DESCRIPTION:
+This function is used to find minimum of function which is represented  as
+sum of squares:
+    F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1])
+using value of function vector f[] only. Finite differences  are  used  to
+calculate Jacobian.
+
+
+REQUIREMENTS:
+This algorithm will request following information during its operation:
+* function vector f[] at given point X
+
+There are several overloaded versions of  MinLMOptimize()  function  which
+correspond  to  different LM-like optimization algorithms provided by this
+unit. You should choose version which accepts fvec() callback.
+
+You can try to initialize MinLMState structure with VJ  function and  then
+use incorrect version  of  MinLMOptimize()  (for  example,  version  which
+works with general form function and does not accept function vector), but
+it will  lead  to  exception being thrown after first attempt to calculate
+Jacobian.
+
+
+USAGE:
+1. User initializes algorithm state with MinLMCreateV() call
+2. User tunes solver parameters with MinLMSetCond(),  MinLMSetStpMax() and
+   other functions
+3. User calls MinLMOptimize() function which  takes algorithm  state   and
+   callback functions.
+4. User calls MinLMResults() to get solution
+5. Optionally, user may call MinLMRestartFrom() to solve  another  problem
+   with same N/M but another starting point and/or another function.
+   MinLMRestartFrom() allows to reuse already initialized structure.
+
+
+INPUT PARAMETERS:
+    N       -   dimension, N>1
+                * if given, only leading N elements of X are used
+                * if not given, automatically determined from size of X
+    M       -   number of functions f[i]
+    X       -   initial solution, array[0..N-1]
+    DiffStep-   differentiation step, >0
+
+OUTPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+
+See also MinLMIteration, MinLMResults.
+
+NOTES:
+1. you may tune stopping conditions with MinLMSetCond() function
+2. if target function contains exp() or other fast growing functions,  and
+   optimization algorithm makes too large steps which leads  to  overflow,
+   use MinLMSetStpMax() function to bound algorithm's steps.
+
+  -- ALGLIB --
+     Copyright 30.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minlmcreatev(const ae_int_t m, const real_1d_array &x, const double diffstep, minlmstate &state)
+{
+    alglib_impl::ae_state _alglib_env_state;    
+    ae_int_t n;
+
+    n = x.length();
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minlmcreatev(n, m, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), diffstep, const_cast<alglib_impl::minlmstate*>(state.c_ptr()), &_alglib_env_state);
+
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+    LEVENBERG-MARQUARDT-LIKE METHOD FOR NON-LINEAR OPTIMIZATION
+
+DESCRIPTION:
+This  function  is  used  to  find  minimum  of general form (not "sum-of-
+-squares") function
+    F = F(x[0], ..., x[n-1])
+using  its  gradient  and  Hessian.  Levenberg-Marquardt modification with
+L-BFGS pre-optimization and internal pre-conditioned  L-BFGS  optimization
+after each Levenberg-Marquardt step is used.
+
+
+REQUIREMENTS:
+This algorithm will request following information during its operation:
+
+* function value F at given point X
+* F and gradient G (simultaneously) at given point X
+* F, G and Hessian H (simultaneously) at given point X
+
+There are several overloaded versions of  MinLMOptimize()  function  which
+correspond  to  different LM-like optimization algorithms provided by this
+unit. You should choose version which accepts func(),  grad()  and  hess()
+function pointers. First pointer is used to calculate F  at  given  point,
+second  one  calculates  F(x)  and  grad F(x),  third one calculates F(x),
+grad F(x), hess F(x).
+
+You can try to initialize MinLMState structure with FGH-function and  then
+use incorrect version of MinLMOptimize() (for example, version which  does
+not provide Hessian matrix), but it will lead to  exception  being  thrown
+after first attempt to calculate Hessian.
+
+
+USAGE:
+1. User initializes algorithm state with MinLMCreateFGH() call
+2. User tunes solver parameters with MinLMSetCond(),  MinLMSetStpMax() and
+   other functions
+3. User calls MinLMOptimize() function which  takes algorithm  state   and
+   pointers (delegates, etc.) to callback functions.
+4. User calls MinLMResults() to get solution
+5. Optionally, user may call MinLMRestartFrom() to solve  another  problem
+   with same N but another starting point and/or another function.
+   MinLMRestartFrom() allows to reuse already initialized structure.
+
+
+INPUT PARAMETERS:
+    N       -   dimension, N>1
+                * if given, only leading N elements of X are used
+                * if not given, automatically determined from size of X
+    X       -   initial solution, array[0..N-1]
+
+OUTPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+
+NOTES:
+1. you may tune stopping conditions with MinLMSetCond() function
+2. if target function contains exp() or other fast growing functions,  and
+   optimization algorithm makes too large steps which leads  to  overflow,
+   use MinLMSetStpMax() function to bound algorithm's steps.
+
+  -- ALGLIB --
+     Copyright 30.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minlmcreatefgh(const ae_int_t n, const real_1d_array &x, minlmstate &state)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minlmcreatefgh(n, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minlmstate*>(state.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+    LEVENBERG-MARQUARDT-LIKE METHOD FOR NON-LINEAR OPTIMIZATION
+
+DESCRIPTION:
+This  function  is  used  to  find  minimum  of general form (not "sum-of-
+-squares") function
+    F = F(x[0], ..., x[n-1])
+using  its  gradient  and  Hessian.  Levenberg-Marquardt modification with
+L-BFGS pre-optimization and internal pre-conditioned  L-BFGS  optimization
+after each Levenberg-Marquardt step is used.
+
+
+REQUIREMENTS:
+This algorithm will request following information during its operation:
+
+* function value F at given point X
+* F and gradient G (simultaneously) at given point X
+* F, G and Hessian H (simultaneously) at given point X
+
+There are several overloaded versions of  MinLMOptimize()  function  which
+correspond  to  different LM-like optimization algorithms provided by this
+unit. You should choose version which accepts func(),  grad()  and  hess()
+function pointers. First pointer is used to calculate F  at  given  point,
+second  one  calculates  F(x)  and  grad F(x),  third one calculates F(x),
+grad F(x), hess F(x).
+
+You can try to initialize MinLMState structure with FGH-function and  then
+use incorrect version of MinLMOptimize() (for example, version which  does
+not provide Hessian matrix), but it will lead to  exception  being  thrown
+after first attempt to calculate Hessian.
+
+
+USAGE:
+1. User initializes algorithm state with MinLMCreateFGH() call
+2. User tunes solver parameters with MinLMSetCond(),  MinLMSetStpMax() and
+   other functions
+3. User calls MinLMOptimize() function which  takes algorithm  state   and
+   pointers (delegates, etc.) to callback functions.
+4. User calls MinLMResults() to get solution
+5. Optionally, user may call MinLMRestartFrom() to solve  another  problem
+   with same N but another starting point and/or another function.
+   MinLMRestartFrom() allows to reuse already initialized structure.
+
+
+INPUT PARAMETERS:
+    N       -   dimension, N>1
+                * if given, only leading N elements of X are used
+                * if not given, automatically determined from size of X
+    X       -   initial solution, array[0..N-1]
+
+OUTPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+
+NOTES:
+1. you may tune stopping conditions with MinLMSetCond() function
+2. if target function contains exp() or other fast growing functions,  and
+   optimization algorithm makes too large steps which leads  to  overflow,
+   use MinLMSetStpMax() function to bound algorithm's steps.
+
+  -- ALGLIB --
+     Copyright 30.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minlmcreatefgh(const real_1d_array &x, minlmstate &state)
+{
+    alglib_impl::ae_state _alglib_env_state;    
+    ae_int_t n;
+
+    n = x.length();
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minlmcreatefgh(n, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minlmstate*>(state.c_ptr()), &_alglib_env_state);
+
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+                IMPROVED LEVENBERG-MARQUARDT METHOD FOR
+                 NON-LINEAR LEAST SQUARES OPTIMIZATION
+
+DESCRIPTION:
+This function is used to find minimum of function which is represented  as
+sum of squares:
+    F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1])
+using:
+* value of function vector f[]
+* value of Jacobian of f[]
+* gradient of merit function F(x)
+
+This function creates optimizer which uses acceleration strategy 2.  Cheap
+gradient of merit function (which is twice the product of function  vector
+and Jacobian) is used for accelerated iterations (see User Guide for  more
+info on this subject).
+
+REQUIREMENTS:
+This algorithm will request following information during its operation:
+
+* function vector f[] at given point X
+* function vector f[] and Jacobian of f[] (simultaneously) at given point
+* gradient of
+
+There are several overloaded versions of  MinLMOptimize()  function  which
+correspond  to  different LM-like optimization algorithms provided by this
+unit. You should choose version which accepts  fvec(),  jac()  and  grad()
+callbacks. First one is used to calculate f[] at given point,  second  one
+calculates f[] and Jacobian df[i]/dx[j], last one calculates  gradient  of
+merit function F(x).
+
+You can try to initialize MinLMState structure with VJ  function and  then
+use incorrect version  of  MinLMOptimize()  (for  example,  version  which
+works  with  general  form function and does not provide Jacobian), but it
+will  lead  to  exception  being  thrown  after first attempt to calculate
+Jacobian.
+
+
+USAGE:
+1. User initializes algorithm state with MinLMCreateVGJ() call
+2. User tunes solver parameters with MinLMSetCond(),  MinLMSetStpMax() and
+   other functions
+3. User calls MinLMOptimize() function which  takes algorithm  state   and
+   callback functions.
+4. User calls MinLMResults() to get solution
+5. Optionally, user may call MinLMRestartFrom() to solve  another  problem
+   with same N/M but another starting point and/or another function.
+   MinLMRestartFrom() allows to reuse already initialized structure.
+
+
+INPUT PARAMETERS:
+    N       -   dimension, N>1
+                * if given, only leading N elements of X are used
+                * if not given, automatically determined from size of X
+    M       -   number of functions f[i]
+    X       -   initial solution, array[0..N-1]
+
+OUTPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+
+NOTES:
+1. you may tune stopping conditions with MinLMSetCond() function
+2. if target function contains exp() or other fast growing functions,  and
+   optimization algorithm makes too large steps which leads  to  overflow,
+   use MinLMSetStpMax() function to bound algorithm's steps.
+
+  -- ALGLIB --
+     Copyright 30.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minlmcreatevgj(const ae_int_t n, const ae_int_t m, const real_1d_array &x, minlmstate &state)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minlmcreatevgj(n, m, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minlmstate*>(state.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+                IMPROVED LEVENBERG-MARQUARDT METHOD FOR
+                 NON-LINEAR LEAST SQUARES OPTIMIZATION
+
+DESCRIPTION:
+This function is used to find minimum of function which is represented  as
+sum of squares:
+    F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1])
+using:
+* value of function vector f[]
+* value of Jacobian of f[]
+* gradient of merit function F(x)
+
+This function creates optimizer which uses acceleration strategy 2.  Cheap
+gradient of merit function (which is twice the product of function  vector
+and Jacobian) is used for accelerated iterations (see User Guide for  more
+info on this subject).
+
+REQUIREMENTS:
+This algorithm will request following information during its operation:
+
+* function vector f[] at given point X
+* function vector f[] and Jacobian of f[] (simultaneously) at given point
+* gradient of
+
+There are several overloaded versions of  MinLMOptimize()  function  which
+correspond  to  different LM-like optimization algorithms provided by this
+unit. You should choose version which accepts  fvec(),  jac()  and  grad()
+callbacks. First one is used to calculate f[] at given point,  second  one
+calculates f[] and Jacobian df[i]/dx[j], last one calculates  gradient  of
+merit function F(x).
+
+You can try to initialize MinLMState structure with VJ  function and  then
+use incorrect version  of  MinLMOptimize()  (for  example,  version  which
+works  with  general  form function and does not provide Jacobian), but it
+will  lead  to  exception  being  thrown  after first attempt to calculate
+Jacobian.
+
+
+USAGE:
+1. User initializes algorithm state with MinLMCreateVGJ() call
+2. User tunes solver parameters with MinLMSetCond(),  MinLMSetStpMax() and
+   other functions
+3. User calls MinLMOptimize() function which  takes algorithm  state   and
+   callback functions.
+4. User calls MinLMResults() to get solution
+5. Optionally, user may call MinLMRestartFrom() to solve  another  problem
+   with same N/M but another starting point and/or another function.
+   MinLMRestartFrom() allows to reuse already initialized structure.
+
+
+INPUT PARAMETERS:
+    N       -   dimension, N>1
+                * if given, only leading N elements of X are used
+                * if not given, automatically determined from size of X
+    M       -   number of functions f[i]
+    X       -   initial solution, array[0..N-1]
+
+OUTPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+
+NOTES:
+1. you may tune stopping conditions with MinLMSetCond() function
+2. if target function contains exp() or other fast growing functions,  and
+   optimization algorithm makes too large steps which leads  to  overflow,
+   use MinLMSetStpMax() function to bound algorithm's steps.
+
+  -- ALGLIB --
+     Copyright 30.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minlmcreatevgj(const ae_int_t m, const real_1d_array &x, minlmstate &state)
+{
+    alglib_impl::ae_state _alglib_env_state;    
+    ae_int_t n;
+
+    n = x.length();
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minlmcreatevgj(n, m, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minlmstate*>(state.c_ptr()), &_alglib_env_state);
+
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+                   LEVENBERG-MARQUARDT-LIKE METHOD FOR
+                  NON-LINEAR LEAST SQUARES OPTIMIZATION
+
+DESCRIPTION:
+
+This function is used to find minimum of function which is represented  as
+sum of squares:
+    F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1])
+using value of F(), gradient of F(), function vector f[]  and  Jacobian of
+f[].
+
+This function is considered obsolete since ALGLIB 3.1.0 and is present for
+backward  compatibility  only.  We  recommend to use MinLMCreateVGJ, which
+provides similar, but more consistent interface.
+
+  -- ALGLIB --
+     Copyright 30.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minlmcreatefgj(const ae_int_t n, const ae_int_t m, const real_1d_array &x, minlmstate &state)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minlmcreatefgj(n, m, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minlmstate*>(state.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+                   LEVENBERG-MARQUARDT-LIKE METHOD FOR
+                  NON-LINEAR LEAST SQUARES OPTIMIZATION
+
+DESCRIPTION:
+
+This function is used to find minimum of function which is represented  as
+sum of squares:
+    F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1])
+using value of F(), gradient of F(), function vector f[]  and  Jacobian of
+f[].
+
+This function is considered obsolete since ALGLIB 3.1.0 and is present for
+backward  compatibility  only.  We  recommend to use MinLMCreateVGJ, which
+provides similar, but more consistent interface.
+
+  -- ALGLIB --
+     Copyright 30.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minlmcreatefgj(const ae_int_t m, const real_1d_array &x, minlmstate &state)
+{
+    alglib_impl::ae_state _alglib_env_state;    
+    ae_int_t n;
+
+    n = x.length();
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minlmcreatefgj(n, m, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minlmstate*>(state.c_ptr()), &_alglib_env_state);
+
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+    CLASSIC LEVENBERG-MARQUARDT METHOD FOR NON-LINEAR OPTIMIZATION
+
+DESCRIPTION:
+This function is used to find minimum of function which is represented  as
+sum of squares:
+    F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1])
+using  value  of  F(),  function  vector  f[] and Jacobian of f[]. Classic
+Levenberg-Marquardt method is used.
+
+This function is considered obsolete since ALGLIB 3.1.0 and is present for
+backward  compatibility  only.  We  recommend  to use MinLMCreateVJ, which
+provides similar, but more consistent and feature-rich interface.
+
+  -- ALGLIB --
+     Copyright 30.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minlmcreatefj(const ae_int_t n, const ae_int_t m, const real_1d_array &x, minlmstate &state)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minlmcreatefj(n, m, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minlmstate*>(state.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+    CLASSIC LEVENBERG-MARQUARDT METHOD FOR NON-LINEAR OPTIMIZATION
+
+DESCRIPTION:
+This function is used to find minimum of function which is represented  as
+sum of squares:
+    F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1])
+using  value  of  F(),  function  vector  f[] and Jacobian of f[]. Classic
+Levenberg-Marquardt method is used.
+
+This function is considered obsolete since ALGLIB 3.1.0 and is present for
+backward  compatibility  only.  We  recommend  to use MinLMCreateVJ, which
+provides similar, but more consistent and feature-rich interface.
+
+  -- ALGLIB --
+     Copyright 30.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minlmcreatefj(const ae_int_t m, const real_1d_array &x, minlmstate &state)
+{
+    alglib_impl::ae_state _alglib_env_state;    
+    ae_int_t n;
+
+    n = x.length();
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minlmcreatefj(n, m, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minlmstate*>(state.c_ptr()), &_alglib_env_state);
+
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This function sets stopping conditions for Levenberg-Marquardt optimization
+algorithm.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    EpsG    -   >=0
+                The  subroutine  finishes  its  work   if   the  condition
+                ||G||<EpsG is satisfied, where ||.|| means Euclidian norm,
+                G - gradient.
+    EpsF    -   >=0
+                The  subroutine  finishes  its work if on k+1-th iteration
+                the  condition  |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1}
+                is satisfied.
+    EpsX    -   >=0
+                The subroutine finishes its work if  on  k+1-th  iteration
+                the condition |X(k+1)-X(k)| <= EpsX is fulfilled.
+    MaxIts  -   maximum number of iterations. If MaxIts=0, the  number  of
+                iterations   is    unlimited.   Only   Levenberg-Marquardt
+                iterations  are  counted  (L-BFGS/CG  iterations  are  NOT
+                counted because their cost is very low compared to that of
+                LM).
+
+Passing EpsG=0, EpsF=0, EpsX=0 and MaxIts=0 (simultaneously) will lead to
+automatic stopping criterion selection (small EpsX).
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlmsetcond(const minlmstate &state, const double epsg, const double epsf, const double epsx, const ae_int_t maxits)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minlmsetcond(const_cast<alglib_impl::minlmstate*>(state.c_ptr()), epsg, epsf, epsx, maxits, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This function turns on/off reporting.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    NeedXRep-   whether iteration reports are needed or not
+
+If NeedXRep is True, algorithm will call rep() callback function if  it is
+provided to MinLMOptimize(). Both Levenberg-Marquardt and internal  L-BFGS
+iterations are reported.
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlmsetxrep(const minlmstate &state, const bool needxrep)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minlmsetxrep(const_cast<alglib_impl::minlmstate*>(state.c_ptr()), needxrep, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This function sets maximum step length
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    StpMax  -   maximum step length, >=0. Set StpMax to 0.0,  if you don't
+                want to limit step length.
+
+Use this subroutine when you optimize target function which contains exp()
+or  other  fast  growing  functions,  and optimization algorithm makes too
+large  steps  which  leads  to overflow. This function allows us to reject
+steps  that  are  too  large  (and  therefore  expose  us  to the possible
+overflow) without actually calculating function value at the x+stp*d.
+
+NOTE: non-zero StpMax leads to moderate  performance  degradation  because
+intermediate  step  of  preconditioned L-BFGS optimization is incompatible
+with limits on step size.
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlmsetstpmax(const minlmstate &state, const double stpmax)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minlmsetstpmax(const_cast<alglib_impl::minlmstate*>(state.c_ptr()), stpmax, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This function is used to change acceleration settings
+
+You can choose between three acceleration strategies:
+* AccType=0, no acceleration.
+* AccType=1, secant updates are used to update quadratic model after  each
+  iteration. After fixed number of iterations (or after  model  breakdown)
+  we  recalculate  quadratic  model  using  analytic  Jacobian  or  finite
+  differences. Number of secant-based iterations depends  on  optimization
+  settings: about 3 iterations - when we have analytic Jacobian, up to 2*N
+  iterations - when we use finite differences to calculate Jacobian.
+* AccType=2, after quadratic model is built and LM step is made, we use it
+  as preconditioner for several (5-10) iterations of L-BFGS algorithm.
+
+AccType=1 is recommended when Jacobian  calculation  cost  is  prohibitive
+high (several Mx1 function vector calculations  followed  by  several  NxN
+Cholesky factorizations are faster than calculation of one M*N  Jacobian).
+It should also be used when we have no Jacobian, because finite difference
+approximation takes too much time to compute.
+
+AccType=2 is recommended when Jacobian is cheap - much more  cheaper  than
+one  Cholesky  factorization.   We   can   reduce   number   of   Cholesky
+factorizations at the cost of increased number of  Jacobian  calculations.
+Sometimes it helps.
+
+Table below list  optimization  protocols  (XYZ  protocol  corresponds  to
+MinLMCreateXYZ) and acceleration types they support (and use by  default).
+
+ACCELERATION TYPES SUPPORTED BY OPTIMIZATION PROTOCOLS:
+
+protocol    0   1   2   comment
+V           +   +
+VJ          +   +   +
+FGH         +       +
+VGJ         +   +   +   special protocol, not for widespread use
+FJ          +       +   obsolete protocol, not recommended
+FGJ         +       +   obsolete protocol, not recommended
+
+DAFAULT VALUES:
+
+protocol    0   1   2   comment
+V               x       without acceleration it is so slooooooooow
+VJ          x
+FGH         x
+VGJ                 x   we've implicitly turned (2) by passing gradient
+FJ          x           obsolete protocol, not recommended
+FGJ                 x   obsolete protocol, not recommended
+
+NOTE: this  function should be called before optimization. Attempt to call
+it during algorithm iterations may result in unexpected behavior.
+
+NOTE: attempt to call this function with unsupported protocol/acceleration
+combination will result in exception being thrown.
+
+  -- ALGLIB --
+     Copyright 14.10.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlmsetacctype(const minlmstate &state, const ae_int_t acctype)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minlmsetacctype(const_cast<alglib_impl::minlmstate*>(state.c_ptr()), acctype, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This function provides reverse communication interface
+Reverse communication interface is not documented or recommended to use.
+See below for functions which provide better documented API
+*************************************************************************/
+bool minlmiteration(const minlmstate &state)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        ae_bool result = alglib_impl::minlmiteration(const_cast<alglib_impl::minlmstate*>(state.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<bool*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+
+void minlmoptimize(minlmstate &state,
+    void (*fvec)(const real_1d_array &x, real_1d_array &fi, void *ptr),
+    void  (*rep)(const real_1d_array &x, double func, void *ptr), 
+    void *ptr)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    if( fvec==NULL )
+        throw ap_error("ALGLIB: error in 'minlmoptimize()' (fvec is NULL)");
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        while( alglib_impl::minlmiteration(state.c_ptr(), &_alglib_env_state) )
+        {
+            if( state.needfi )
+            {
+                fvec(state.x, state.fi, ptr);
+                continue;
+            }
+            if( state.xupdated )
+            {
+                if( rep!=NULL )
+                    rep(state.x, state.f, ptr);
+                continue;
+            }
+            throw ap_error("ALGLIB: error in 'minlmoptimize' (some derivatives were not provided?)");
+        }
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+
+void minlmoptimize(minlmstate &state,
+    void (*fvec)(const real_1d_array &x, real_1d_array &fi, void *ptr),
+    void  (*jac)(const real_1d_array &x, real_1d_array &fi, real_2d_array &jac, void *ptr),
+    void  (*rep)(const real_1d_array &x, double func, void *ptr), 
+    void *ptr)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    if( fvec==NULL )
+        throw ap_error("ALGLIB: error in 'minlmoptimize()' (fvec is NULL)");
+    if( jac==NULL )
+        throw ap_error("ALGLIB: error in 'minlmoptimize()' (jac is NULL)");
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        while( alglib_impl::minlmiteration(state.c_ptr(), &_alglib_env_state) )
+        {
+            if( state.needfi )
+            {
+                fvec(state.x, state.fi, ptr);
+                continue;
+            }
+            if( state.needfij )
+            {
+                jac(state.x, state.fi, state.j, ptr);
+                continue;
+            }
+            if( state.xupdated )
+            {
+                if( rep!=NULL )
+                    rep(state.x, state.f, ptr);
+                continue;
+            }
+            throw ap_error("ALGLIB: error in 'minlmoptimize' (some derivatives were not provided?)");
+        }
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+
+void minlmoptimize(minlmstate &state,
+    void (*func)(const real_1d_array &x, double &func, void *ptr),
+    void (*grad)(const real_1d_array &x, double &func, real_1d_array &grad, void *ptr),
+    void (*hess)(const real_1d_array &x, double &func, real_1d_array &grad, real_2d_array &hess, void *ptr),
+    void  (*rep)(const real_1d_array &x, double func, void *ptr), 
+    void *ptr)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    if( func==NULL )
+        throw ap_error("ALGLIB: error in 'minlmoptimize()' (func is NULL)");
+    if( grad==NULL )
+        throw ap_error("ALGLIB: error in 'minlmoptimize()' (grad is NULL)");
+    if( hess==NULL )
+        throw ap_error("ALGLIB: error in 'minlmoptimize()' (hess is NULL)");
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        while( alglib_impl::minlmiteration(state.c_ptr(), &_alglib_env_state) )
+        {
+            if( state.needf )
+            {
+                func(state.x, state.f, ptr);
+                continue;
+            }
+            if( state.needfg )
+            {
+                grad(state.x, state.f, state.g, ptr);
+                continue;
+            }
+            if( state.needfgh )
+            {
+                hess(state.x, state.f, state.g, state.h, ptr);
+                continue;
+            }
+            if( state.xupdated )
+            {
+                if( rep!=NULL )
+                    rep(state.x, state.f, ptr);
+                continue;
+            }
+            throw ap_error("ALGLIB: error in 'minlmoptimize' (some derivatives were not provided?)");
+        }
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+
+void minlmoptimize(minlmstate &state,
+    void (*func)(const real_1d_array &x, double &func, void *ptr),
+    void  (*jac)(const real_1d_array &x, real_1d_array &fi, real_2d_array &jac, void *ptr),
+    void  (*rep)(const real_1d_array &x, double func, void *ptr), 
+    void *ptr)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    if( func==NULL )
+        throw ap_error("ALGLIB: error in 'minlmoptimize()' (func is NULL)");
+    if( jac==NULL )
+        throw ap_error("ALGLIB: error in 'minlmoptimize()' (jac is NULL)");
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        while( alglib_impl::minlmiteration(state.c_ptr(), &_alglib_env_state) )
+        {
+            if( state.needf )
+            {
+                func(state.x, state.f, ptr);
+                continue;
+            }
+            if( state.needfij )
+            {
+                jac(state.x, state.fi, state.j, ptr);
+                continue;
+            }
+            if( state.xupdated )
+            {
+                if( rep!=NULL )
+                    rep(state.x, state.f, ptr);
+                continue;
+            }
+            throw ap_error("ALGLIB: error in 'minlmoptimize' (some derivatives were not provided?)");
+        }
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+
+void minlmoptimize(minlmstate &state,
+    void (*func)(const real_1d_array &x, double &func, void *ptr),
+    void (*grad)(const real_1d_array &x, double &func, real_1d_array &grad, void *ptr),
+    void  (*jac)(const real_1d_array &x, real_1d_array &fi, real_2d_array &jac, void *ptr),
+    void  (*rep)(const real_1d_array &x, double func, void *ptr), 
+    void *ptr)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    if( func==NULL )
+        throw ap_error("ALGLIB: error in 'minlmoptimize()' (func is NULL)");
+    if( grad==NULL )
+        throw ap_error("ALGLIB: error in 'minlmoptimize()' (grad is NULL)");
+    if( jac==NULL )
+        throw ap_error("ALGLIB: error in 'minlmoptimize()' (jac is NULL)");
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        while( alglib_impl::minlmiteration(state.c_ptr(), &_alglib_env_state) )
+        {
+            if( state.needf )
+            {
+                func(state.x, state.f, ptr);
+                continue;
+            }
+            if( state.needfg )
+            {
+                grad(state.x, state.f, state.g, ptr);
+                continue;
+            }
+            if( state.needfij )
+            {
+                jac(state.x, state.fi, state.j, ptr);
+                continue;
+            }
+            if( state.xupdated )
+            {
+                if( rep!=NULL )
+                    rep(state.x, state.f, ptr);
+                continue;
+            }
+            throw ap_error("ALGLIB: error in 'minlmoptimize' (some derivatives were not provided?)");
+        }
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+
+
+/*************************************************************************
+Levenberg-Marquardt algorithm results
+
+INPUT PARAMETERS:
+    State   -   algorithm state
+
+OUTPUT PARAMETERS:
+    X       -   array[0..N-1], solution
+    Rep     -   optimization report;
+                see comments for this structure for more info.
+
+  -- ALGLIB --
+     Copyright 10.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minlmresults(const minlmstate &state, real_1d_array &x, minlmreport &rep)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minlmresults(const_cast<alglib_impl::minlmstate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minlmreport*>(rep.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Levenberg-Marquardt algorithm results
+
+Buffered implementation of MinLMResults(), which uses pre-allocated buffer
+to store X[]. If buffer size is  too  small,  it  resizes  buffer.  It  is
+intended to be used in the inner cycles of performance critical algorithms
+where array reallocation penalty is too large to be ignored.
+
+  -- ALGLIB --
+     Copyright 10.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minlmresultsbuf(const minlmstate &state, real_1d_array &x, minlmreport &rep)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minlmresultsbuf(const_cast<alglib_impl::minlmstate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minlmreport*>(rep.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This  subroutine  restarts  LM  algorithm from new point. All optimization
+parameters are left unchanged.
+
+This  function  allows  to  solve multiple  optimization  problems  (which
+must have same number of dimensions) without object reallocation penalty.
+
+INPUT PARAMETERS:
+    State   -   structure used for reverse communication previously
+                allocated with MinLMCreateXXX call.
+    X       -   new starting point.
+
+  -- ALGLIB --
+     Copyright 30.07.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlmrestartfrom(const minlmstate &state, const real_1d_array &x)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minlmrestartfrom(const_cast<alglib_impl::minlmstate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+
+*************************************************************************/
+_minasastate_owner::_minasastate_owner()
+{
+    p_struct = (alglib_impl::minasastate*)alglib_impl::ae_malloc(sizeof(alglib_impl::minasastate), NULL);
+    if( p_struct==NULL )
+        throw ap_error("ALGLIB: malloc error");
+    if( !alglib_impl::_minasastate_init(p_struct, NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+}
+
+_minasastate_owner::_minasastate_owner(const _minasastate_owner &rhs)
+{
+    p_struct = (alglib_impl::minasastate*)alglib_impl::ae_malloc(sizeof(alglib_impl::minasastate), NULL);
+    if( p_struct==NULL )
+        throw ap_error("ALGLIB: malloc error");
+    if( !alglib_impl::_minasastate_init_copy(p_struct, const_cast<alglib_impl::minasastate*>(rhs.p_struct), NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+}
+
+_minasastate_owner& _minasastate_owner::operator=(const _minasastate_owner &rhs)
+{
+    if( this==&rhs )
+        return *this;
+    alglib_impl::_minasastate_clear(p_struct);
+    if( !alglib_impl::_minasastate_init_copy(p_struct, const_cast<alglib_impl::minasastate*>(rhs.p_struct), NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+    return *this;
+}
+
+_minasastate_owner::~_minasastate_owner()
+{
+    alglib_impl::_minasastate_clear(p_struct);
+    ae_free(p_struct);
+}
+
+alglib_impl::minasastate* _minasastate_owner::c_ptr()
+{
+    return p_struct;
+}
+
+alglib_impl::minasastate* _minasastate_owner::c_ptr() const
+{
+    return const_cast<alglib_impl::minasastate*>(p_struct);
+}
+minasastate::minasastate() : _minasastate_owner() ,needfg(p_struct->needfg),xupdated(p_struct->xupdated),f(p_struct->f),g(&p_struct->g),x(&p_struct->x)
+{
+}
+
+minasastate::minasastate(const minasastate &rhs):_minasastate_owner(rhs) ,needfg(p_struct->needfg),xupdated(p_struct->xupdated),f(p_struct->f),g(&p_struct->g),x(&p_struct->x)
+{
+}
+
+minasastate& minasastate::operator=(const minasastate &rhs)
+{
+    if( this==&rhs )
+        return *this;
+    _minasastate_owner::operator=(rhs);
+    return *this;
+}
+
+minasastate::~minasastate()
+{
+}
+
+
+/*************************************************************************
+
+*************************************************************************/
+_minasareport_owner::_minasareport_owner()
+{
+    p_struct = (alglib_impl::minasareport*)alglib_impl::ae_malloc(sizeof(alglib_impl::minasareport), NULL);
+    if( p_struct==NULL )
+        throw ap_error("ALGLIB: malloc error");
+    if( !alglib_impl::_minasareport_init(p_struct, NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+}
+
+_minasareport_owner::_minasareport_owner(const _minasareport_owner &rhs)
+{
+    p_struct = (alglib_impl::minasareport*)alglib_impl::ae_malloc(sizeof(alglib_impl::minasareport), NULL);
+    if( p_struct==NULL )
+        throw ap_error("ALGLIB: malloc error");
+    if( !alglib_impl::_minasareport_init_copy(p_struct, const_cast<alglib_impl::minasareport*>(rhs.p_struct), NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+}
+
+_minasareport_owner& _minasareport_owner::operator=(const _minasareport_owner &rhs)
+{
+    if( this==&rhs )
+        return *this;
+    alglib_impl::_minasareport_clear(p_struct);
+    if( !alglib_impl::_minasareport_init_copy(p_struct, const_cast<alglib_impl::minasareport*>(rhs.p_struct), NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+    return *this;
+}
+
+_minasareport_owner::~_minasareport_owner()
+{
+    alglib_impl::_minasareport_clear(p_struct);
+    ae_free(p_struct);
+}
+
+alglib_impl::minasareport* _minasareport_owner::c_ptr()
+{
+    return p_struct;
+}
+
+alglib_impl::minasareport* _minasareport_owner::c_ptr() const
+{
+    return const_cast<alglib_impl::minasareport*>(p_struct);
+}
+minasareport::minasareport() : _minasareport_owner() ,iterationscount(p_struct->iterationscount),nfev(p_struct->nfev),terminationtype(p_struct->terminationtype),activeconstraints(p_struct->activeconstraints)
+{
+}
+
+minasareport::minasareport(const minasareport &rhs):_minasareport_owner(rhs) ,iterationscount(p_struct->iterationscount),nfev(p_struct->nfev),terminationtype(p_struct->terminationtype),activeconstraints(p_struct->activeconstraints)
+{
+}
+
+minasareport& minasareport::operator=(const minasareport &rhs)
+{
+    if( this==&rhs )
+        return *this;
+    _minasareport_owner::operator=(rhs);
+    return *this;
+}
+
+minasareport::~minasareport()
+{
+}
+
+/*************************************************************************
+              NONLINEAR BOUND CONSTRAINED OPTIMIZATION USING
+                      MODIFIED ACTIVE SET ALGORITHM
+                   WILLIAM W. HAGER AND HONGCHAO ZHANG
+
+DESCRIPTION:
+The  subroutine  minimizes  function  F(x)  of  N  arguments  with   bound
+constraints: BndL[i] <= x[i] <= BndU[i]
+
+This method is  globally  convergent  as  long  as  grad(f)  is  Lipschitz
+continuous on a level set: L = { x : f(x)<=f(x0) }.
+
+
+REQUIREMENTS:
+Algorithm will request following information during its operation:
+* function value F and its gradient G (simultaneously) at given point X
+
+
+USAGE:
+1. User initializes algorithm state with MinASACreate() call
+2. User tunes solver parameters with MinASASetCond() MinASASetStpMax() and
+   other functions
+3. User calls MinASAOptimize() function which takes algorithm  state   and
+   pointer (delegate, etc.) to callback function which calculates F/G.
+4. User calls MinASAResults() to get solution
+5. Optionally, user may call MinASARestartFrom() to solve another  problem
+   with same N but another starting point and/or another function.
+   MinASARestartFrom() allows to reuse already initialized structure.
+
+
+INPUT PARAMETERS:
+    N       -   problem dimension, N>0:
+                * if given, only leading N elements of X are used
+                * if not given, automatically determined from sizes of
+                  X/BndL/BndU.
+    X       -   starting point, array[0..N-1].
+    BndL    -   lower bounds, array[0..N-1].
+                all elements MUST be specified,  i.e.  all  variables  are
+                bounded. However, if some (all) variables  are  unbounded,
+                you may specify very small number as bound: -1000,  -1.0E6
+                or -1.0E300, or something like that.
+    BndU    -   upper bounds, array[0..N-1].
+                all elements MUST be specified,  i.e.  all  variables  are
+                bounded. However, if some (all) variables  are  unbounded,
+                you may specify very large number as bound: +1000,  +1.0E6
+                or +1.0E300, or something like that.
+
+OUTPUT PARAMETERS:
+    State   -   structure stores algorithm state
+
+NOTES:
+
+1. you may tune stopping conditions with MinASASetCond() function
+2. if target function contains exp() or other fast growing functions,  and
+   optimization algorithm makes too large steps which leads  to  overflow,
+   use MinASASetStpMax() function to bound algorithm's steps.
+3. this function does NOT support infinite/NaN values in X, BndL, BndU.
+
+  -- ALGLIB --
+     Copyright 25.03.2010 by Bochkanov Sergey
+*************************************************************************/
+void minasacreate(const ae_int_t n, const real_1d_array &x, const real_1d_array &bndl, const real_1d_array &bndu, minasastate &state)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minasacreate(n, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(bndl.c_ptr()), const_cast<alglib_impl::ae_vector*>(bndu.c_ptr()), const_cast<alglib_impl::minasastate*>(state.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+              NONLINEAR BOUND CONSTRAINED OPTIMIZATION USING
+                      MODIFIED ACTIVE SET ALGORITHM
+                   WILLIAM W. HAGER AND HONGCHAO ZHANG
+
+DESCRIPTION:
+The  subroutine  minimizes  function  F(x)  of  N  arguments  with   bound
+constraints: BndL[i] <= x[i] <= BndU[i]
+
+This method is  globally  convergent  as  long  as  grad(f)  is  Lipschitz
+continuous on a level set: L = { x : f(x)<=f(x0) }.
+
+
+REQUIREMENTS:
+Algorithm will request following information during its operation:
+* function value F and its gradient G (simultaneously) at given point X
+
+
+USAGE:
+1. User initializes algorithm state with MinASACreate() call
+2. User tunes solver parameters with MinASASetCond() MinASASetStpMax() and
+   other functions
+3. User calls MinASAOptimize() function which takes algorithm  state   and
+   pointer (delegate, etc.) to callback function which calculates F/G.
+4. User calls MinASAResults() to get solution
+5. Optionally, user may call MinASARestartFrom() to solve another  problem
+   with same N but another starting point and/or another function.
+   MinASARestartFrom() allows to reuse already initialized structure.
+
+
+INPUT PARAMETERS:
+    N       -   problem dimension, N>0:
+                * if given, only leading N elements of X are used
+                * if not given, automatically determined from sizes of
+                  X/BndL/BndU.
+    X       -   starting point, array[0..N-1].
+    BndL    -   lower bounds, array[0..N-1].
+                all elements MUST be specified,  i.e.  all  variables  are
+                bounded. However, if some (all) variables  are  unbounded,
+                you may specify very small number as bound: -1000,  -1.0E6
+                or -1.0E300, or something like that.
+    BndU    -   upper bounds, array[0..N-1].
+                all elements MUST be specified,  i.e.  all  variables  are
+                bounded. However, if some (all) variables  are  unbounded,
+                you may specify very large number as bound: +1000,  +1.0E6
+                or +1.0E300, or something like that.
+
+OUTPUT PARAMETERS:
+    State   -   structure stores algorithm state
+
+NOTES:
+
+1. you may tune stopping conditions with MinASASetCond() function
+2. if target function contains exp() or other fast growing functions,  and
+   optimization algorithm makes too large steps which leads  to  overflow,
+   use MinASASetStpMax() function to bound algorithm's steps.
+3. this function does NOT support infinite/NaN values in X, BndL, BndU.
+
+  -- ALGLIB --
+     Copyright 25.03.2010 by Bochkanov Sergey
+*************************************************************************/
+void minasacreate(const real_1d_array &x, const real_1d_array &bndl, const real_1d_array &bndu, minasastate &state)
+{
+    alglib_impl::ae_state _alglib_env_state;    
+    ae_int_t n;
+    if( (x.length()!=bndl.length()) || (x.length()!=bndu.length()))
+        throw ap_error("Error while calling 'minasacreate': looks like one of arguments has wrong size");
+    n = x.length();
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minasacreate(n, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(bndl.c_ptr()), const_cast<alglib_impl::ae_vector*>(bndu.c_ptr()), const_cast<alglib_impl::minasastate*>(state.c_ptr()), &_alglib_env_state);
+
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This function sets stopping conditions for the ASA optimization algorithm.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    EpsG    -   >=0
+                The  subroutine  finishes  its  work   if   the  condition
+                ||G||<EpsG is satisfied, where ||.|| means Euclidian norm,
+                G - gradient.
+    EpsF    -   >=0
+                The  subroutine  finishes  its work if on k+1-th iteration
+                the  condition  |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1}
+                is satisfied.
+    EpsX    -   >=0
+                The subroutine finishes its work if  on  k+1-th  iteration
+                the condition |X(k+1)-X(k)| <= EpsX is fulfilled.
+    MaxIts  -   maximum number of iterations. If MaxIts=0, the  number  of
+                iterations is unlimited.
+
+Passing EpsG=0, EpsF=0, EpsX=0 and MaxIts=0 (simultaneously) will lead to
+automatic stopping criterion selection (small EpsX).
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minasasetcond(const minasastate &state, const double epsg, const double epsf, const double epsx, const ae_int_t maxits)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minasasetcond(const_cast<alglib_impl::minasastate*>(state.c_ptr()), epsg, epsf, epsx, maxits, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This function turns on/off reporting.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    NeedXRep-   whether iteration reports are needed or not
+
+If NeedXRep is True, algorithm will call rep() callback function if  it is
+provided to MinASAOptimize().
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minasasetxrep(const minasastate &state, const bool needxrep)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minasasetxrep(const_cast<alglib_impl::minasastate*>(state.c_ptr()), needxrep, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This function sets optimization algorithm.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm stat
+    UAType  -   algorithm type:
+                * -1    automatic selection of the best algorithm
+                * 0     DY (Dai and Yuan) algorithm
+                * 1     Hybrid DY-HS algorithm
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minasasetalgorithm(const minasastate &state, const ae_int_t algotype)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minasasetalgorithm(const_cast<alglib_impl::minasastate*>(state.c_ptr()), algotype, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This function sets maximum step length
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    StpMax  -   maximum step length, >=0. Set StpMax to 0.0,  if you don't
+                want to limit step length (zero by default).
+
+Use this subroutine when you optimize target function which contains exp()
+or  other  fast  growing  functions,  and optimization algorithm makes too
+large  steps  which  leads  to overflow. This function allows us to reject
+steps  that  are  too  large  (and  therefore  expose  us  to the possible
+overflow) without actually calculating function value at the x+stp*d.
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minasasetstpmax(const minasastate &state, const double stpmax)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minasasetstpmax(const_cast<alglib_impl::minasastate*>(state.c_ptr()), stpmax, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This function provides reverse communication interface
+Reverse communication interface is not documented or recommended to use.
+See below for functions which provide better documented API
+*************************************************************************/
+bool minasaiteration(const minasastate &state)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        ae_bool result = alglib_impl::minasaiteration(const_cast<alglib_impl::minasastate*>(state.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<bool*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+
+void minasaoptimize(minasastate &state,
+    void (*grad)(const real_1d_array &x, double &func, real_1d_array &grad, void *ptr),
+    void  (*rep)(const real_1d_array &x, double func, void *ptr), 
+    void *ptr)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    if( grad==NULL )
+        throw ap_error("ALGLIB: error in 'minasaoptimize()' (grad is NULL)");
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        while( alglib_impl::minasaiteration(state.c_ptr(), &_alglib_env_state) )
+        {
+            if( state.needfg )
+            {
+                grad(state.x, state.f, state.g, ptr);
+                continue;
+            }
+            if( state.xupdated )
+            {
+                if( rep!=NULL )
+                    rep(state.x, state.f, ptr);
+                continue;
+            }
+            throw ap_error("ALGLIB: error in 'minasaoptimize' (some derivatives were not provided?)");
+        }
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+
+
+/*************************************************************************
+ASA results
+
+INPUT PARAMETERS:
+    State   -   algorithm state
+
+OUTPUT PARAMETERS:
+    X       -   array[0..N-1], solution
+    Rep     -   optimization report:
+                * Rep.TerminationType completetion code:
+                    * -2    rounding errors prevent further improvement.
+                            X contains best point found.
+                    * -1    incorrect parameters were specified
+                    *  1    relative function improvement is no more than
+                            EpsF.
+                    *  2    relative step is no more than EpsX.
+                    *  4    gradient norm is no more than EpsG
+                    *  5    MaxIts steps was taken
+                    *  7    stopping conditions are too stringent,
+                            further improvement is impossible
+                * Rep.IterationsCount contains iterations count
+                * NFEV countains number of function calculations
+                * ActiveConstraints contains number of active constraints
+
+  -- ALGLIB --
+     Copyright 20.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minasaresults(const minasastate &state, real_1d_array &x, minasareport &rep)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minasaresults(const_cast<alglib_impl::minasastate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minasareport*>(rep.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+ASA results
+
+Buffered implementation of MinASAResults() which uses pre-allocated buffer
+to store X[]. If buffer size is  too  small,  it  resizes  buffer.  It  is
+intended to be used in the inner cycles of performance critical algorithms
+where array reallocation penalty is too large to be ignored.
+
+  -- ALGLIB --
+     Copyright 20.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minasaresultsbuf(const minasastate &state, real_1d_array &x, minasareport &rep)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minasaresultsbuf(const_cast<alglib_impl::minasastate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minasareport*>(rep.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This  subroutine  restarts  CG  algorithm from new point. All optimization
+parameters are left unchanged.
+
+This  function  allows  to  solve multiple  optimization  problems  (which
+must have same number of dimensions) without object reallocation penalty.
+
+INPUT PARAMETERS:
+    State   -   structure previously allocated with MinCGCreate call.
+    X       -   new starting point.
+    BndL    -   new lower bounds
+    BndU    -   new upper bounds
+
+  -- ALGLIB --
+     Copyright 30.07.2010 by Bochkanov Sergey
+*************************************************************************/
+void minasarestartfrom(const minasastate &state, const real_1d_array &x, const real_1d_array &bndl, const real_1d_array &bndu)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minasarestartfrom(const_cast<alglib_impl::minasastate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(bndl.c_ptr()), const_cast<alglib_impl::ae_vector*>(bndu.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This object stores state of the nonlinear CG optimizer.
+
+You should use ALGLIB functions to work with this object.
+*************************************************************************/
+_mincgstate_owner::_mincgstate_owner()
+{
+    p_struct = (alglib_impl::mincgstate*)alglib_impl::ae_malloc(sizeof(alglib_impl::mincgstate), NULL);
+    if( p_struct==NULL )
+        throw ap_error("ALGLIB: malloc error");
+    if( !alglib_impl::_mincgstate_init(p_struct, NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+}
+
+_mincgstate_owner::_mincgstate_owner(const _mincgstate_owner &rhs)
+{
+    p_struct = (alglib_impl::mincgstate*)alglib_impl::ae_malloc(sizeof(alglib_impl::mincgstate), NULL);
+    if( p_struct==NULL )
+        throw ap_error("ALGLIB: malloc error");
+    if( !alglib_impl::_mincgstate_init_copy(p_struct, const_cast<alglib_impl::mincgstate*>(rhs.p_struct), NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+}
+
+_mincgstate_owner& _mincgstate_owner::operator=(const _mincgstate_owner &rhs)
+{
+    if( this==&rhs )
+        return *this;
+    alglib_impl::_mincgstate_clear(p_struct);
+    if( !alglib_impl::_mincgstate_init_copy(p_struct, const_cast<alglib_impl::mincgstate*>(rhs.p_struct), NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+    return *this;
+}
+
+_mincgstate_owner::~_mincgstate_owner()
+{
+    alglib_impl::_mincgstate_clear(p_struct);
+    ae_free(p_struct);
+}
+
+alglib_impl::mincgstate* _mincgstate_owner::c_ptr()
+{
+    return p_struct;
+}
+
+alglib_impl::mincgstate* _mincgstate_owner::c_ptr() const
+{
+    return const_cast<alglib_impl::mincgstate*>(p_struct);
+}
+mincgstate::mincgstate() : _mincgstate_owner() ,needfg(p_struct->needfg),xupdated(p_struct->xupdated),f(p_struct->f),g(&p_struct->g),x(&p_struct->x)
+{
+}
+
+mincgstate::mincgstate(const mincgstate &rhs):_mincgstate_owner(rhs) ,needfg(p_struct->needfg),xupdated(p_struct->xupdated),f(p_struct->f),g(&p_struct->g),x(&p_struct->x)
+{
+}
+
+mincgstate& mincgstate::operator=(const mincgstate &rhs)
+{
+    if( this==&rhs )
+        return *this;
+    _mincgstate_owner::operator=(rhs);
+    return *this;
+}
+
+mincgstate::~mincgstate()
+{
+}
+
+
+/*************************************************************************
+
+*************************************************************************/
+_mincgreport_owner::_mincgreport_owner()
+{
+    p_struct = (alglib_impl::mincgreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::mincgreport), NULL);
+    if( p_struct==NULL )
+        throw ap_error("ALGLIB: malloc error");
+    if( !alglib_impl::_mincgreport_init(p_struct, NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+}
+
+_mincgreport_owner::_mincgreport_owner(const _mincgreport_owner &rhs)
+{
+    p_struct = (alglib_impl::mincgreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::mincgreport), NULL);
+    if( p_struct==NULL )
+        throw ap_error("ALGLIB: malloc error");
+    if( !alglib_impl::_mincgreport_init_copy(p_struct, const_cast<alglib_impl::mincgreport*>(rhs.p_struct), NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+}
+
+_mincgreport_owner& _mincgreport_owner::operator=(const _mincgreport_owner &rhs)
+{
+    if( this==&rhs )
+        return *this;
+    alglib_impl::_mincgreport_clear(p_struct);
+    if( !alglib_impl::_mincgreport_init_copy(p_struct, const_cast<alglib_impl::mincgreport*>(rhs.p_struct), NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+    return *this;
+}
+
+_mincgreport_owner::~_mincgreport_owner()
+{
+    alglib_impl::_mincgreport_clear(p_struct);
+    ae_free(p_struct);
+}
+
+alglib_impl::mincgreport* _mincgreport_owner::c_ptr()
+{
+    return p_struct;
+}
+
+alglib_impl::mincgreport* _mincgreport_owner::c_ptr() const
+{
+    return const_cast<alglib_impl::mincgreport*>(p_struct);
+}
+mincgreport::mincgreport() : _mincgreport_owner() ,iterationscount(p_struct->iterationscount),nfev(p_struct->nfev),terminationtype(p_struct->terminationtype)
+{
+}
+
+mincgreport::mincgreport(const mincgreport &rhs):_mincgreport_owner(rhs) ,iterationscount(p_struct->iterationscount),nfev(p_struct->nfev),terminationtype(p_struct->terminationtype)
+{
+}
+
+mincgreport& mincgreport::operator=(const mincgreport &rhs)
+{
+    if( this==&rhs )
+        return *this;
+    _mincgreport_owner::operator=(rhs);
+    return *this;
+}
+
+mincgreport::~mincgreport()
+{
+}
+
+/*************************************************************************
+        NONLINEAR CONJUGATE GRADIENT METHOD
+
+DESCRIPTION:
+The subroutine minimizes function F(x) of N arguments by using one of  the
+nonlinear conjugate gradient methods.
+
+These CG methods are globally convergent (even on non-convex functions) as
+long as grad(f) is Lipschitz continuous in  a  some  neighborhood  of  the
+L = { x : f(x)<=f(x0) }.
+
+
+REQUIREMENTS:
+Algorithm will request following information during its operation:
+* function value F and its gradient G (simultaneously) at given point X
+
+
+USAGE:
+1. User initializes algorithm state with MinCGCreate() call
+2. User tunes solver parameters with MinCGSetCond(), MinCGSetStpMax() and
+   other functions
+3. User calls MinCGOptimize() function which takes algorithm  state   and
+   pointer (delegate, etc.) to callback function which calculates F/G.
+4. User calls MinCGResults() to get solution
+5. Optionally, user may call MinCGRestartFrom() to solve another  problem
+   with same N but another starting point and/or another function.
+   MinCGRestartFrom() allows to reuse already initialized structure.
+
+
+INPUT PARAMETERS:
+    N       -   problem dimension, N>0:
+                * if given, only leading N elements of X are used
+                * if not given, automatically determined from size of X
+    X       -   starting point, array[0..N-1].
+
+OUTPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+
+  -- ALGLIB --
+     Copyright 25.03.2010 by Bochkanov Sergey
+*************************************************************************/
+void mincgcreate(const ae_int_t n, const real_1d_array &x, mincgstate &state)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::mincgcreate(n, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::mincgstate*>(state.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+        NONLINEAR CONJUGATE GRADIENT METHOD
+
+DESCRIPTION:
+The subroutine minimizes function F(x) of N arguments by using one of  the
+nonlinear conjugate gradient methods.
+
+These CG methods are globally convergent (even on non-convex functions) as
+long as grad(f) is Lipschitz continuous in  a  some  neighborhood  of  the
+L = { x : f(x)<=f(x0) }.
+
+
+REQUIREMENTS:
+Algorithm will request following information during its operation:
+* function value F and its gradient G (simultaneously) at given point X
+
+
+USAGE:
+1. User initializes algorithm state with MinCGCreate() call
+2. User tunes solver parameters with MinCGSetCond(), MinCGSetStpMax() and
+   other functions
+3. User calls MinCGOptimize() function which takes algorithm  state   and
+   pointer (delegate, etc.) to callback function which calculates F/G.
+4. User calls MinCGResults() to get solution
+5. Optionally, user may call MinCGRestartFrom() to solve another  problem
+   with same N but another starting point and/or another function.
+   MinCGRestartFrom() allows to reuse already initialized structure.
+
+
+INPUT PARAMETERS:
+    N       -   problem dimension, N>0:
+                * if given, only leading N elements of X are used
+                * if not given, automatically determined from size of X
+    X       -   starting point, array[0..N-1].
+
+OUTPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+
+  -- ALGLIB --
+     Copyright 25.03.2010 by Bochkanov Sergey
+*************************************************************************/
+void mincgcreate(const real_1d_array &x, mincgstate &state)
+{
+    alglib_impl::ae_state _alglib_env_state;    
+    ae_int_t n;
+
+    n = x.length();
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::mincgcreate(n, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::mincgstate*>(state.c_ptr()), &_alglib_env_state);
+
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This function sets stopping conditions for CG optimization algorithm.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    EpsG    -   >=0
+                The  subroutine  finishes  its  work   if   the  condition
+                ||G||<EpsG is satisfied, where ||.|| means Euclidian norm,
+                G - gradient.
+    EpsF    -   >=0
+                The  subroutine  finishes  its work if on k+1-th iteration
+                the  condition  |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1}
+                is satisfied.
+    EpsX    -   >=0
+                The subroutine finishes its work if  on  k+1-th  iteration
+                the condition |X(k+1)-X(k)| <= EpsX is fulfilled.
+    MaxIts  -   maximum number of iterations. If MaxIts=0, the  number  of
+                iterations is unlimited.
+
+Passing EpsG=0, EpsF=0, EpsX=0 and MaxIts=0 (simultaneously) will lead to
+automatic stopping criterion selection (small EpsX).
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void mincgsetcond(const mincgstate &state, const double epsg, const double epsf, const double epsx, const ae_int_t maxits)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::mincgsetcond(const_cast<alglib_impl::mincgstate*>(state.c_ptr()), epsg, epsf, epsx, maxits, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This function turns on/off reporting.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    NeedXRep-   whether iteration reports are needed or not
+
+If NeedXRep is True, algorithm will call rep() callback function if  it is
+provided to MinCGOptimize().
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void mincgsetxrep(const mincgstate &state, const bool needxrep)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::mincgsetxrep(const_cast<alglib_impl::mincgstate*>(state.c_ptr()), needxrep, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This function sets CG algorithm.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    CGType  -   algorithm type:
+                * -1    automatic selection of the best algorithm
+                * 0     DY (Dai and Yuan) algorithm
+                * 1     Hybrid DY-HS algorithm
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void mincgsetcgtype(const mincgstate &state, const ae_int_t cgtype)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::mincgsetcgtype(const_cast<alglib_impl::mincgstate*>(state.c_ptr()), cgtype, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This function sets maximum step length
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    StpMax  -   maximum step length, >=0. Set StpMax to 0.0,  if you don't
+                want to limit step length.
+
+Use this subroutine when you optimize target function which contains exp()
+or  other  fast  growing  functions,  and optimization algorithm makes too
+large  steps  which  leads  to overflow. This function allows us to reject
+steps  that  are  too  large  (and  therefore  expose  us  to the possible
+overflow) without actually calculating function value at the x+stp*d.
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void mincgsetstpmax(const mincgstate &state, const double stpmax)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::mincgsetstpmax(const_cast<alglib_impl::mincgstate*>(state.c_ptr()), stpmax, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This function allows to suggest initial step length to the CG algorithm.
+
+Suggested  step  length  is used as starting point for the line search. It
+can be useful when you have  badly  scaled  problem,  i.e.  when  ||grad||
+(which is used as initial estimate for the first step) is many  orders  of
+magnitude different from the desired step.
+
+Line search  may  fail  on  such problems without good estimate of initial
+step length. Imagine, for example, problem with ||grad||=10^50 and desired
+step equal to 0.1 Line  search function will use 10^50  as  initial  step,
+then  it  will  decrease step length by 2 (up to 20 attempts) and will get
+10^44, which is still too large.
+
+This function allows us to tell than line search should  be  started  from
+some moderate step length, like 1.0, so algorithm will be able  to  detect
+desired step length in a several searches.
+
+This function influences only first iteration of algorithm. It  should  be
+called between MinCGCreate/MinCGRestartFrom() call and MinCGOptimize call.
+
+INPUT PARAMETERS:
+    State   -   structure used to store algorithm state.
+    Stp     -   initial estimate of the step length.
+                Can be zero (no estimate).
+
+  -- ALGLIB --
+     Copyright 30.07.2010 by Bochkanov Sergey
+*************************************************************************/
+void mincgsuggeststep(const mincgstate &state, const double stp)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::mincgsuggeststep(const_cast<alglib_impl::mincgstate*>(state.c_ptr()), stp, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This function provides reverse communication interface
+Reverse communication interface is not documented or recommended to use.
+See below for functions which provide better documented API
+*************************************************************************/
+bool mincgiteration(const mincgstate &state)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        ae_bool result = alglib_impl::mincgiteration(const_cast<alglib_impl::mincgstate*>(state.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<bool*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+
+void mincgoptimize(mincgstate &state,
+    void (*grad)(const real_1d_array &x, double &func, real_1d_array &grad, void *ptr),
+    void  (*rep)(const real_1d_array &x, double func, void *ptr), 
+    void *ptr)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    if( grad==NULL )
+        throw ap_error("ALGLIB: error in 'mincgoptimize()' (grad is NULL)");
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        while( alglib_impl::mincgiteration(state.c_ptr(), &_alglib_env_state) )
+        {
+            if( state.needfg )
+            {
+                grad(state.x, state.f, state.g, ptr);
+                continue;
+            }
+            if( state.xupdated )
+            {
+                if( rep!=NULL )
+                    rep(state.x, state.f, ptr);
+                continue;
+            }
+            throw ap_error("ALGLIB: error in 'mincgoptimize' (some derivatives were not provided?)");
+        }
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+
+
+/*************************************************************************
+Conjugate gradient results
+
+INPUT PARAMETERS:
+    State   -   algorithm state
+
+OUTPUT PARAMETERS:
+    X       -   array[0..N-1], solution
+    Rep     -   optimization report:
+                * Rep.TerminationType completetion code:
+                    *  1    relative function improvement is no more than
+                            EpsF.
+                    *  2    relative step is no more than EpsX.
+                    *  4    gradient norm is no more than EpsG
+                    *  5    MaxIts steps was taken
+                    *  7    stopping conditions are too stringent,
+                            further improvement is impossible,
+                            we return best X found so far
+                * Rep.IterationsCount contains iterations count
+                * NFEV countains number of function calculations
+
+  -- ALGLIB --
+     Copyright 20.04.2009 by Bochkanov Sergey
+*************************************************************************/
+void mincgresults(const mincgstate &state, real_1d_array &x, mincgreport &rep)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::mincgresults(const_cast<alglib_impl::mincgstate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::mincgreport*>(rep.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+Conjugate gradient results
+
+Buffered implementation of MinCGResults(), which uses pre-allocated buffer
+to store X[]. If buffer size is  too  small,  it  resizes  buffer.  It  is
+intended to be used in the inner cycles of performance critical algorithms
+where array reallocation penalty is too large to be ignored.
+
+  -- ALGLIB --
+     Copyright 20.04.2009 by Bochkanov Sergey
+*************************************************************************/
+void mincgresultsbuf(const mincgstate &state, real_1d_array &x, mincgreport &rep)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::mincgresultsbuf(const_cast<alglib_impl::mincgstate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::mincgreport*>(rep.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This  subroutine  restarts  CG  algorithm from new point. All optimization
+parameters are left unchanged.
+
+This  function  allows  to  solve multiple  optimization  problems  (which
+must have same number of dimensions) without object reallocation penalty.
+
+INPUT PARAMETERS:
+    State   -   structure used to store algorithm state.
+    X       -   new starting point.
+
+  -- ALGLIB --
+     Copyright 30.07.2010 by Bochkanov Sergey
+*************************************************************************/
+void mincgrestartfrom(const mincgstate &state, const real_1d_array &x)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::mincgrestartfrom(const_cast<alglib_impl::mincgstate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This object stores nonlinear optimizer state.
+You should use functions provided by MinBLEIC subpackage to work with this
+object
+*************************************************************************/
+_minbleicstate_owner::_minbleicstate_owner()
+{
+    p_struct = (alglib_impl::minbleicstate*)alglib_impl::ae_malloc(sizeof(alglib_impl::minbleicstate), NULL);
+    if( p_struct==NULL )
+        throw ap_error("ALGLIB: malloc error");
+    if( !alglib_impl::_minbleicstate_init(p_struct, NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+}
+
+_minbleicstate_owner::_minbleicstate_owner(const _minbleicstate_owner &rhs)
+{
+    p_struct = (alglib_impl::minbleicstate*)alglib_impl::ae_malloc(sizeof(alglib_impl::minbleicstate), NULL);
+    if( p_struct==NULL )
+        throw ap_error("ALGLIB: malloc error");
+    if( !alglib_impl::_minbleicstate_init_copy(p_struct, const_cast<alglib_impl::minbleicstate*>(rhs.p_struct), NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+}
+
+_minbleicstate_owner& _minbleicstate_owner::operator=(const _minbleicstate_owner &rhs)
+{
+    if( this==&rhs )
+        return *this;
+    alglib_impl::_minbleicstate_clear(p_struct);
+    if( !alglib_impl::_minbleicstate_init_copy(p_struct, const_cast<alglib_impl::minbleicstate*>(rhs.p_struct), NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+    return *this;
+}
+
+_minbleicstate_owner::~_minbleicstate_owner()
+{
+    alglib_impl::_minbleicstate_clear(p_struct);
+    ae_free(p_struct);
+}
+
+alglib_impl::minbleicstate* _minbleicstate_owner::c_ptr()
+{
+    return p_struct;
+}
+
+alglib_impl::minbleicstate* _minbleicstate_owner::c_ptr() const
+{
+    return const_cast<alglib_impl::minbleicstate*>(p_struct);
+}
+minbleicstate::minbleicstate() : _minbleicstate_owner() ,needfg(p_struct->needfg),xupdated(p_struct->xupdated),f(p_struct->f),g(&p_struct->g),x(&p_struct->x)
+{
+}
+
+minbleicstate::minbleicstate(const minbleicstate &rhs):_minbleicstate_owner(rhs) ,needfg(p_struct->needfg),xupdated(p_struct->xupdated),f(p_struct->f),g(&p_struct->g),x(&p_struct->x)
+{
+}
+
+minbleicstate& minbleicstate::operator=(const minbleicstate &rhs)
+{
+    if( this==&rhs )
+        return *this;
+    _minbleicstate_owner::operator=(rhs);
+    return *this;
+}
+
+minbleicstate::~minbleicstate()
+{
+}
+
+
+/*************************************************************************
+This structure stores optimization report:
+* InnerIterationsCount      number of inner iterations
+* OuterIterationsCount      number of outer iterations
+* NFEV                      number of gradient evaluations
+
+There are additional fields which can be used for debugging:
+* DebugEqErr                error in the equality constraints (2-norm)
+* DebugFS                   f, calculated at projection of initial point
+                            to the feasible set
+* DebugFF                   f, calculated at the final point
+* DebugDX                   |X_start-X_final|
+*************************************************************************/
+_minbleicreport_owner::_minbleicreport_owner()
+{
+    p_struct = (alglib_impl::minbleicreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::minbleicreport), NULL);
+    if( p_struct==NULL )
+        throw ap_error("ALGLIB: malloc error");
+    if( !alglib_impl::_minbleicreport_init(p_struct, NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+}
+
+_minbleicreport_owner::_minbleicreport_owner(const _minbleicreport_owner &rhs)
+{
+    p_struct = (alglib_impl::minbleicreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::minbleicreport), NULL);
+    if( p_struct==NULL )
+        throw ap_error("ALGLIB: malloc error");
+    if( !alglib_impl::_minbleicreport_init_copy(p_struct, const_cast<alglib_impl::minbleicreport*>(rhs.p_struct), NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+}
+
+_minbleicreport_owner& _minbleicreport_owner::operator=(const _minbleicreport_owner &rhs)
+{
+    if( this==&rhs )
+        return *this;
+    alglib_impl::_minbleicreport_clear(p_struct);
+    if( !alglib_impl::_minbleicreport_init_copy(p_struct, const_cast<alglib_impl::minbleicreport*>(rhs.p_struct), NULL, ae_false) )
+        throw ap_error("ALGLIB: malloc error");
+    return *this;
+}
+
+_minbleicreport_owner::~_minbleicreport_owner()
+{
+    alglib_impl::_minbleicreport_clear(p_struct);
+    ae_free(p_struct);
+}
+
+alglib_impl::minbleicreport* _minbleicreport_owner::c_ptr()
+{
+    return p_struct;
+}
+
+alglib_impl::minbleicreport* _minbleicreport_owner::c_ptr() const
+{
+    return const_cast<alglib_impl::minbleicreport*>(p_struct);
+}
+minbleicreport::minbleicreport() : _minbleicreport_owner() ,inneriterationscount(p_struct->inneriterationscount),outeriterationscount(p_struct->outeriterationscount),nfev(p_struct->nfev),terminationtype(p_struct->terminationtype),debugeqerr(p_struct->debugeqerr),debugfs(p_struct->debugfs),debugff(p_struct->debugff),debugdx(p_struct->debugdx)
+{
+}
+
+minbleicreport::minbleicreport(const minbleicreport &rhs):_minbleicreport_owner(rhs) ,inneriterationscount(p_struct->inneriterationscount),outeriterationscount(p_struct->outeriterationscount),nfev(p_struct->nfev),terminationtype(p_struct->terminationtype),debugeqerr(p_struct->debugeqerr),debugfs(p_struct->debugfs),debugff(p_struct->debugff),debugdx(p_struct->debugdx)
+{
+}
+
+minbleicreport& minbleicreport::operator=(const minbleicreport &rhs)
+{
+    if( this==&rhs )
+        return *this;
+    _minbleicreport_owner::operator=(rhs);
+    return *this;
+}
+
+minbleicreport::~minbleicreport()
+{
+}
+
+/*************************************************************************
+                     BOUND CONSTRAINED OPTIMIZATION
+       WITH ADDITIONAL LINEAR EQUALITY AND INEQUALITY CONSTRAINTS
+
+DESCRIPTION:
+The  subroutine  minimizes  function   F(x)  of N arguments subject to any
+combination of:
+* bound constraints
+* linear inequality constraints
+* linear equality constraints
+
+REQUIREMENTS:
+* function value and gradient
+* grad(f) must be Lipschitz continuous on a level set: L = { x : f(x)<=f(x0) }
+* function must be defined even in the infeasible points (algorithm make take
+  steps in the infeasible area before converging to the feasible point)
+* starting point X0 must be feasible or not too far away from the feasible set
+* problem must satisfy strict complementary conditions
+
+USAGE:
+
+Constrained optimization if far more complex than the unconstrained one.
+Here we give very brief outline of the BLEIC optimizer. We strongly recommend
+you to read examples in the ALGLIB Reference Manual and to read ALGLIB User Guide
+on optimization, which is available at http://www.alglib.net/optimization/
+
+1. User initializes algorithm state with MinBLEICCreate() call
+
+2. USer adds boundary and/or linear constraints by calling
+   MinBLEICSetBC() and MinBLEICSetLC() functions.
+
+3. User sets stopping conditions for underlying unconstrained solver
+   with MinBLEICSetInnerCond() call.
+   This function controls accuracy of underlying optimization algorithm.
+
+4. User sets stopping conditions for outer iteration by calling
+   MinBLEICSetOuterCond() function.
+   This function controls handling of boundary and inequality constraints.
+
+5. User tunes barrier parameters:
+   * barrier width with MinBLEICSetBarrierWidth() call
+   * (optionally) dynamics of the barrier width with MinBLEICSetBarrierDecay() call
+   These functions control handling of boundary and inequality constraints.
+
+6. Additionally, user may set limit on number of internal iterations
+   by MinBLEICSetMaxIts() call.
+   This function allows to prevent algorithm from looping forever.
+
+7. User calls MinBLEICOptimize() function which takes algorithm  state and
+   pointer (delegate, etc.) to callback function which calculates F/G.
+
+8. User calls MinBLEICResults() to get solution
+
+9. Optionally user may call MinBLEICRestartFrom() to solve another problem
+   with same N but another starting point.
+   MinBLEICRestartFrom() allows to reuse already initialized structure.
+
+
+INPUT PARAMETERS:
+    N       -   problem dimension, N>0:
+                * if given, only leading N elements of X are used
+                * if not given, automatically determined from size ofX
+    X       -   starting point, array[N]:
+                * it is better to set X to a feasible point
+                * but X can be infeasible, in which case algorithm will try
+                  to find feasible point first, using X as initial
+                  approximation.
+
+OUTPUT PARAMETERS:
+    State   -   structure stores algorithm state
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleiccreate(const ae_int_t n, const real_1d_array &x, minbleicstate &state)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minbleiccreate(n, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minbleicstate*>(state.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+                     BOUND CONSTRAINED OPTIMIZATION
+       WITH ADDITIONAL LINEAR EQUALITY AND INEQUALITY CONSTRAINTS
+
+DESCRIPTION:
+The  subroutine  minimizes  function   F(x)  of N arguments subject to any
+combination of:
+* bound constraints
+* linear inequality constraints
+* linear equality constraints
+
+REQUIREMENTS:
+* function value and gradient
+* grad(f) must be Lipschitz continuous on a level set: L = { x : f(x)<=f(x0) }
+* function must be defined even in the infeasible points (algorithm make take
+  steps in the infeasible area before converging to the feasible point)
+* starting point X0 must be feasible or not too far away from the feasible set
+* problem must satisfy strict complementary conditions
+
+USAGE:
+
+Constrained optimization if far more complex than the unconstrained one.
+Here we give very brief outline of the BLEIC optimizer. We strongly recommend
+you to read examples in the ALGLIB Reference Manual and to read ALGLIB User Guide
+on optimization, which is available at http://www.alglib.net/optimization/
+
+1. User initializes algorithm state with MinBLEICCreate() call
+
+2. USer adds boundary and/or linear constraints by calling
+   MinBLEICSetBC() and MinBLEICSetLC() functions.
+
+3. User sets stopping conditions for underlying unconstrained solver
+   with MinBLEICSetInnerCond() call.
+   This function controls accuracy of underlying optimization algorithm.
+
+4. User sets stopping conditions for outer iteration by calling
+   MinBLEICSetOuterCond() function.
+   This function controls handling of boundary and inequality constraints.
+
+5. User tunes barrier parameters:
+   * barrier width with MinBLEICSetBarrierWidth() call
+   * (optionally) dynamics of the barrier width with MinBLEICSetBarrierDecay() call
+   These functions control handling of boundary and inequality constraints.
+
+6. Additionally, user may set limit on number of internal iterations
+   by MinBLEICSetMaxIts() call.
+   This function allows to prevent algorithm from looping forever.
+
+7. User calls MinBLEICOptimize() function which takes algorithm  state and
+   pointer (delegate, etc.) to callback function which calculates F/G.
+
+8. User calls MinBLEICResults() to get solution
+
+9. Optionally user may call MinBLEICRestartFrom() to solve another problem
+   with same N but another starting point.
+   MinBLEICRestartFrom() allows to reuse already initialized structure.
+
+
+INPUT PARAMETERS:
+    N       -   problem dimension, N>0:
+                * if given, only leading N elements of X are used
+                * if not given, automatically determined from size ofX
+    X       -   starting point, array[N]:
+                * it is better to set X to a feasible point
+                * but X can be infeasible, in which case algorithm will try
+                  to find feasible point first, using X as initial
+                  approximation.
+
+OUTPUT PARAMETERS:
+    State   -   structure stores algorithm state
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleiccreate(const real_1d_array &x, minbleicstate &state)
+{
+    alglib_impl::ae_state _alglib_env_state;    
+    ae_int_t n;
+
+    n = x.length();
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minbleiccreate(n, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minbleicstate*>(state.c_ptr()), &_alglib_env_state);
+
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This function sets boundary constraints for BLEIC optimizer.
+
+Boundary constraints are inactive by default (after initial creation).
+They are preserved after algorithm restart with MinBLEICRestartFrom().
+
+INPUT PARAMETERS:
+    State   -   structure stores algorithm state
+    BndL    -   lower bounds, array[N].
+                If some (all) variables are unbounded, you may specify
+                very small number or -INF.
+    BndU    -   upper bounds, array[N].
+                If some (all) variables are unbounded, you may specify
+                very large number or +INF.
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicsetbc(const minbleicstate &state, const real_1d_array &bndl, const real_1d_array &bndu)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minbleicsetbc(const_cast<alglib_impl::minbleicstate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(bndl.c_ptr()), const_cast<alglib_impl::ae_vector*>(bndu.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This function sets linear constraints for BLEIC optimizer.
+
+Linear constraints are inactive by default (after initial creation).
+They are preserved after algorithm restart with MinBLEICRestartFrom().
+
+INPUT PARAMETERS:
+    State   -   structure previously allocated with MinBLEICCreate call.
+    C       -   linear constraints, array[K,N+1].
+                Each row of C represents one constraint, either equality
+                or inequality (see below):
+                * first N elements correspond to coefficients,
+                * last element corresponds to the right part.
+                All elements of C (including right part) must be finite.
+    CT      -   type of constraints, array[K]:
+                * if CT[i]>0, then I-th constraint is C[i,*]*x >= C[i,n+1]
+                * if CT[i]=0, then I-th constraint is C[i,*]*x  = C[i,n+1]
+                * if CT[i]<0, then I-th constraint is C[i,*]*x <= C[i,n+1]
+    K       -   number of equality/inequality constraints, K>=0:
+                * if given, only leading K elements of C/CT are used
+                * if not given, automatically determined from sizes of C/CT
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicsetlc(const minbleicstate &state, const real_2d_array &c, const integer_1d_array &ct, const ae_int_t k)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minbleicsetlc(const_cast<alglib_impl::minbleicstate*>(state.c_ptr()), const_cast<alglib_impl::ae_matrix*>(c.c_ptr()), const_cast<alglib_impl::ae_vector*>(ct.c_ptr()), k, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This function sets linear constraints for BLEIC optimizer.
+
+Linear constraints are inactive by default (after initial creation).
+They are preserved after algorithm restart with MinBLEICRestartFrom().
+
+INPUT PARAMETERS:
+    State   -   structure previously allocated with MinBLEICCreate call.
+    C       -   linear constraints, array[K,N+1].
+                Each row of C represents one constraint, either equality
+                or inequality (see below):
+                * first N elements correspond to coefficients,
+                * last element corresponds to the right part.
+                All elements of C (including right part) must be finite.
+    CT      -   type of constraints, array[K]:
+                * if CT[i]>0, then I-th constraint is C[i,*]*x >= C[i,n+1]
+                * if CT[i]=0, then I-th constraint is C[i,*]*x  = C[i,n+1]
+                * if CT[i]<0, then I-th constraint is C[i,*]*x <= C[i,n+1]
+    K       -   number of equality/inequality constraints, K>=0:
+                * if given, only leading K elements of C/CT are used
+                * if not given, automatically determined from sizes of C/CT
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicsetlc(const minbleicstate &state, const real_2d_array &c, const integer_1d_array &ct)
+{
+    alglib_impl::ae_state _alglib_env_state;    
+    ae_int_t k;
+    if( (c.rows()!=ct.length()))
+        throw ap_error("Error while calling 'minbleicsetlc': looks like one of arguments has wrong size");
+    k = c.rows();
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minbleicsetlc(const_cast<alglib_impl::minbleicstate*>(state.c_ptr()), const_cast<alglib_impl::ae_matrix*>(c.c_ptr()), const_cast<alglib_impl::ae_vector*>(ct.c_ptr()), k, &_alglib_env_state);
+
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This function sets stopping conditions for the underlying nonlinear CG
+optimizer. It controls overall accuracy of solution. These conditions
+should be strict enough in order for algorithm to converge.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    EpsG    -   >=0
+                Algorithm finishes its work if 2-norm of the Lagrangian
+                gradient is less than or equal to EpsG.
+    EpsF    -   >=0
+                The  subroutine  finishes  its work if on k+1-th iteration
+                the  condition  |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1}
+                is satisfied.
+    EpsX    -   >=0
+                The subroutine finishes its work if  on  k+1-th  iteration
+                the condition |X(k+1)-X(k)| <= EpsX is fulfilled.
+
+Passing EpsG=0, EpsF=0 and EpsX=0 (simultaneously) will lead to
+automatic stopping criterion selection.
+
+These conditions are used to terminate inner iterations. However, you
+need to tune termination conditions for outer iterations too.
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicsetinnercond(const minbleicstate &state, const double epsg, const double epsf, const double epsx)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minbleicsetinnercond(const_cast<alglib_impl::minbleicstate*>(state.c_ptr()), epsg, epsf, epsx, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This function sets stopping conditions for outer iteration of BLEIC algo.
+
+These conditions control accuracy of constraint handling and amount of
+infeasibility allowed in the solution.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    EpsX    -   >0, stopping condition on outer iteration step length
+    EpsI    -   >0, stopping condition on infeasibility
+
+Both EpsX and EpsI must be non-zero.
+
+MEANING OF EpsX
+
+EpsX  is  a  stopping  condition for outer iterations. Algorithm will stop
+when  solution  of  the  current  modified  subproblem will be within EpsX
+(using 2-norm) of the previous solution.
+
+MEANING OF EpsI
+
+EpsI controls feasibility properties -  algorithm  won't  stop  until  all
+inequality constraints will be satisfied with error (distance from current
+point to the feasible area) at most EpsI.
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicsetoutercond(const minbleicstate &state, const double epsx, const double epsi)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minbleicsetoutercond(const_cast<alglib_impl::minbleicstate*>(state.c_ptr()), epsx, epsi, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This function sets initial barrier width.
+
+BLEIC optimizer uses  modified  barrier  functions  to  handle  inequality
+constraints. These functions are almost constant in the inner parts of the
+feasible  area,  but  grow rapidly to the infinity OUTSIDE of the feasible
+area. Barrier width is a distance from feasible area to  the  point  where
+modified barrier function becomes infinite.
+
+Barrier width must be:
+* small enough (below some problem-dependent value) in order for algorithm
+  to  converge.  Necessary  condition  is that the target function must be
+  well described by linear model in the areas as small as barrier width.
+* not VERY small (in order to avoid  difficulties  associated  with  rapid
+  changes in the modified function, ill-conditioning, round-off issues).
+
+Choosing  appropriate  barrier  width  is  very  important  for  efficient
+optimization, and it often requires error  and  trial.  You  can  use  two
+strategies when choosing barrier width:
+* set barrier width with MinBLEICSetBarrierWidth() call. In this case  you
+  should try different barrier widths and examine results.
+* set decreasing barrier width by combining  MinBLEICSetBarrierWidth() and
+  MinBLEICSetBarrierDecay()  calls.  In  this case algorithm will decrease
+  barrier  width  after  each  outer iteration until it encounters optimal
+  barrier width.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    Mu      -   >0, initial barrier width
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicsetbarrierwidth(const minbleicstate &state, const double mu)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minbleicsetbarrierwidth(const_cast<alglib_impl::minbleicstate*>(state.c_ptr()), mu, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This function sets decay coefficient for barrier width.
+
+By default, no barrier decay is used (Decay=1.0).
+
+BLEIC optimizer uses  modified  barrier  functions  to  handle  inequality
+constraints. These functions are almost constant in the inner parts of the
+feasible  area,  but  grow rapidly to the infinity OUTSIDE of the feasible
+area. Barrier width is a distance from feasible area to  the  point  where
+modified barrier function becomes infinite. Decay coefficient allows us to
+decrease  barrier  width  from  the  initial  (suboptimial)  value   until
+optimal value will be met.
+
+We recommend you either to set MuDecay=1.0 (no decay) or use some moderate
+value like 0.5-0.7
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    MuDecay -   0<MuDecay<=1, decay coefficient
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicsetbarrierdecay(const minbleicstate &state, const double mudecay)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minbleicsetbarrierdecay(const_cast<alglib_impl::minbleicstate*>(state.c_ptr()), mudecay, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This function allows to stop algorithm after specified number of inner
+iterations.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    MaxIts  -   maximum number of inner iterations.
+                If MaxIts=0, the number of iterations is unlimited.
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicsetmaxits(const minbleicstate &state, const ae_int_t maxits)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minbleicsetmaxits(const_cast<alglib_impl::minbleicstate*>(state.c_ptr()), maxits, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This function turns on/off reporting.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    NeedXRep-   whether iteration reports are needed or not
+
+If NeedXRep is True, algorithm will call rep() callback function if  it is
+provided to MinBLEICOptimize().
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicsetxrep(const minbleicstate &state, const bool needxrep)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minbleicsetxrep(const_cast<alglib_impl::minbleicstate*>(state.c_ptr()), needxrep, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This function sets maximum step length
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    StpMax  -   maximum step length, >=0. Set StpMax to 0.0,  if you don't
+                want to limit step length.
+
+Use this subroutine when you optimize target function which contains exp()
+or  other  fast  growing  functions,  and optimization algorithm makes too
+large  steps  which  lead   to overflow. This function allows us to reject
+steps  that  are  too  large  (and  therefore  expose  us  to the possible
+overflow) without actually calculating function value at the x+stp*d.
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicsetstpmax(const minbleicstate &state, const double stpmax)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minbleicsetstpmax(const_cast<alglib_impl::minbleicstate*>(state.c_ptr()), stpmax, &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This function provides reverse communication interface
+Reverse communication interface is not documented or recommended to use.
+See below for functions which provide better documented API
+*************************************************************************/
+bool minbleiciteration(const minbleicstate &state)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        ae_bool result = alglib_impl::minbleiciteration(const_cast<alglib_impl::minbleicstate*>(state.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return *(reinterpret_cast<bool*>(&result));
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+
+void minbleicoptimize(minbleicstate &state,
+    void (*grad)(const real_1d_array &x, double &func, real_1d_array &grad, void *ptr),
+    void  (*rep)(const real_1d_array &x, double func, void *ptr), 
+    void *ptr)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    if( grad==NULL )
+        throw ap_error("ALGLIB: error in 'minbleicoptimize()' (grad is NULL)");
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        while( alglib_impl::minbleiciteration(state.c_ptr(), &_alglib_env_state) )
+        {
+            if( state.needfg )
+            {
+                grad(state.x, state.f, state.g, ptr);
+                continue;
+            }
+            if( state.xupdated )
+            {
+                if( rep!=NULL )
+                    rep(state.x, state.f, ptr);
+                continue;
+            }
+            throw ap_error("ALGLIB: error in 'minbleicoptimize' (some derivatives were not provided?)");
+        }
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+
+
+/*************************************************************************
+BLEIC results
+
+INPUT PARAMETERS:
+    State   -   algorithm state
+
+OUTPUT PARAMETERS:
+    X       -   array[0..N-1], solution
+    Rep     -   optimization report:
+                * Rep.TerminationType completetion code:
+                    * -3    inconsistent constraints. Feasible point is
+                            either nonexistent or too hard to find. Try to
+                            restart optimizer with better initial
+                            approximation
+                    * -2    rounding errors prevent further improvement.
+                            X contains best point found.
+                    *  4    conditions on constraints are fulfilled
+                            with error less than or equal to EpsC
+                    *  5    MaxIts steps was taken
+                    *  7    stopping conditions are too stringent,
+                            further improvement is impossible,
+                            X contains best point found so far.
+                * Rep.IterationsCount contains iterations count
+                * NFEV countains number of function calculations
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicresults(const minbleicstate &state, real_1d_array &x, minbleicreport &rep)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minbleicresults(const_cast<alglib_impl::minbleicstate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minbleicreport*>(rep.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+BLEIC results
+
+Buffered implementation of MinBLEICResults() which uses pre-allocated buffer
+to store X[]. If buffer size is  too  small,  it  resizes  buffer.  It  is
+intended to be used in the inner cycles of performance critical algorithms
+where array reallocation penalty is too large to be ignored.
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicresultsbuf(const minbleicstate &state, real_1d_array &x, minbleicreport &rep)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minbleicresultsbuf(const_cast<alglib_impl::minbleicstate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::minbleicreport*>(rep.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+
+/*************************************************************************
+This subroutine restarts algorithm from new point.
+All optimization parameters (including constraints) are left unchanged.
+
+This  function  allows  to  solve multiple  optimization  problems  (which
+must have  same number of dimensions) without object reallocation penalty.
+
+INPUT PARAMETERS:
+    State   -   structure previously allocated with MinBLEICCreate call.
+    X       -   new starting point.
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicrestartfrom(const minbleicstate &state, const real_1d_array &x)
+{
+    alglib_impl::ae_state _alglib_env_state;
+    alglib_impl::ae_state_init(&_alglib_env_state);
+    try
+    {
+        alglib_impl::minbleicrestartfrom(const_cast<alglib_impl::minbleicstate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), &_alglib_env_state);
+        alglib_impl::ae_state_clear(&_alglib_env_state);
+        return;
+    }
+    catch(alglib_impl::ae_error_type)
+    {
+        throw ap_error(_alglib_env_state.error_msg);
+    }
+    catch(...)
+    {
+        throw;
+    }
+}
+}
+
+/////////////////////////////////////////////////////////////////////////
+//
+// THIS SECTION CONTAINS IMPLEMENTATION OF COMPUTATIONAL CORE
+//
+/////////////////////////////////////////////////////////////////////////
+namespace alglib_impl
+{
+static void minlbfgs_clearrequestfields(minlbfgsstate* state,
+     ae_state *_state);
+
+
+static ae_int_t minlm_lmmodefj = 0;
+static ae_int_t minlm_lmmodefgj = 1;
+static ae_int_t minlm_lmmodefgh = 2;
+static ae_int_t minlm_lmflagnoprelbfgs = 1;
+static ae_int_t minlm_lmflagnointlbfgs = 2;
+static ae_int_t minlm_lmprelbfgsm = 5;
+static ae_int_t minlm_lmintlbfgsits = 5;
+static ae_int_t minlm_lbfgsnorealloc = 1;
+static double minlm_lambdaup = 2.0;
+static double minlm_lambdadown = 0.33;
+static double minlm_suspiciousnu = 16;
+static ae_int_t minlm_smallmodelage = 3;
+static ae_int_t minlm_additers = 5;
+static void minlm_lmprepare(ae_int_t n,
+     ae_int_t m,
+     ae_bool havegrad,
+     minlmstate* state,
+     ae_state *_state);
+static void minlm_clearrequestfields(minlmstate* state, ae_state *_state);
+static ae_bool minlm_increaselambda(double* lambdav,
+     double* nu,
+     ae_state *_state);
+static void minlm_decreaselambda(double* lambdav,
+     double* nu,
+     ae_state *_state);
+
+
+static ae_int_t minasa_n1 = 2;
+static ae_int_t minasa_n2 = 2;
+static double minasa_stpmin = 1.0E-300;
+static double minasa_gpaftol = 0.0001;
+static double minasa_gpadecay = 0.5;
+static double minasa_asarho = 0.5;
+static double minasa_asaboundedantigradnorm(minasastate* state,
+     ae_state *_state);
+static double minasa_asaginorm(minasastate* state, ae_state *_state);
+static double minasa_asad1norm(minasastate* state, ae_state *_state);
+static ae_bool minasa_asauisempty(minasastate* state, ae_state *_state);
+static void minasa_clearrequestfields(minasastate* state,
+     ae_state *_state);
+
+
+static ae_int_t mincg_rscountdownlen = 10;
+static void mincg_clearrequestfields(mincgstate* state, ae_state *_state);
+
+
+static double minbleic_svdtol = 100;
+static double minbleic_lmtol = 100;
+static double minbleic_maxlmgrowth = 1000;
+static double minbleic_minlagrangemul = 1.0E-50;
+static double minbleic_maxlagrangemul = 1.0E+50;
+static double minbleic_maxouterits = 20;
+static ae_int_t minbleic_mucountdownlen = 15;
+static ae_int_t minbleic_cscountdownlen = 5;
+static void minbleic_clearrequestfields(minbleicstate* state,
+     ae_state *_state);
+static void minbleic_makeprojection(minbleicstate* state,
+     /* Real    */ ae_vector* x,
+     /* Real    */ ae_vector* r,
+     double* rnorm2,
+     ae_state *_state);
+static void minbleic_modifytargetfunction(minbleicstate* state,
+     /* Real    */ ae_vector* x,
+     /* Real    */ ae_vector* r,
+     double rnorm2,
+     double* f,
+     /* Real    */ ae_vector* g,
+     double* gnorm,
+     double* mpgnorm,
+     double* mba,
+     double* fierr,
+     double* cserr,
+     ae_state *_state);
+static void minbleic_penaltyfunction(minbleicstate* state,
+     /* Real    */ ae_vector* x,
+     double* f,
+     /* Real    */ ae_vector* g,
+     /* Real    */ ae_vector* r,
+     double* mba,
+     double* fierr,
+     ae_state *_state);
+
+
+
+
+
+/*************************************************************************
+        LIMITED MEMORY BFGS METHOD FOR LARGE SCALE OPTIMIZATION
+
+DESCRIPTION:
+The subroutine minimizes function F(x) of N arguments by  using  a  quasi-
+Newton method (LBFGS scheme) which is optimized to use  a  minimum  amount
+of memory.
+The subroutine generates the approximation of an inverse Hessian matrix by
+using information about the last M steps of the algorithm  (instead of N).
+It lessens a required amount of memory from a value  of  order  N^2  to  a
+value of order 2*N*M.
+
+
+REQUIREMENTS:
+Algorithm will request following information during its operation:
+* function value F and its gradient G (simultaneously) at given point X
+
+
+USAGE:
+1. User initializes algorithm state with MinLBFGSCreate() call
+2. User tunes solver parameters with MinLBFGSSetCond() MinLBFGSSetStpMax()
+   and other functions
+3. User calls MinLBFGSOptimize() function which takes algorithm  state and
+   pointer (delegate, etc.) to callback function which calculates F/G.
+4. User calls MinLBFGSResults() to get solution
+5. Optionally user may call MinLBFGSRestartFrom() to solve another problem
+   with same N/M but another starting point and/or another function.
+   MinLBFGSRestartFrom() allows to reuse already initialized structure.
+
+
+INPUT PARAMETERS:
+    N       -   problem dimension. N>0
+    M       -   number of corrections in the BFGS scheme of Hessian
+                approximation update. Recommended value:  3<=M<=7. The smaller
+                value causes worse convergence, the bigger will  not  cause  a
+                considerably better convergence, but will cause a fall in  the
+                performance. M<=N.
+    X       -   initial solution approximation, array[0..N-1].
+
+
+OUTPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    
+
+NOTES:
+1. you may tune stopping conditions with MinLBFGSSetCond() function
+2. if target function contains exp() or other fast growing functions,  and
+   optimization algorithm makes too large steps which leads  to  overflow,
+   use MinLBFGSSetStpMax() function to bound algorithm's  steps.  However,
+   L-BFGS rarely needs such a tuning.
+
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlbfgscreate(ae_int_t n,
+     ae_int_t m,
+     /* Real    */ ae_vector* x,
+     minlbfgsstate* state,
+     ae_state *_state)
+{
+
+    _minlbfgsstate_clear(state);
+
+    ae_assert(n>=1, "MinLBFGSCreate: N<1!", _state);
+    ae_assert(m>=1, "MinLBFGSCreate: M<1", _state);
+    ae_assert(m<=n, "MinLBFGSCreate: M>N", _state);
+    ae_assert(x->cnt>=n, "MinLBFGSCreate: Length(X)<N!", _state);
+    ae_assert(isfinitevector(x, n, _state), "MinLBFGSCreate: X contains infinite or NaN values!", _state);
+    minlbfgscreatex(n, m, x, 0, state, _state);
+}
+
+
+/*************************************************************************
+This function sets stopping conditions for L-BFGS optimization algorithm.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    EpsG    -   >=0
+                The  subroutine  finishes  its  work   if   the  condition
+                ||G||<EpsG is satisfied, where ||.|| means Euclidian norm,
+                G - gradient.
+    EpsF    -   >=0
+                The  subroutine  finishes  its work if on k+1-th iteration
+                the  condition  |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1}
+                is satisfied.
+    EpsX    -   >=0
+                The subroutine finishes its work if  on  k+1-th  iteration
+                the condition |X(k+1)-X(k)| <= EpsX is fulfilled.
+    MaxIts  -   maximum number of iterations. If MaxIts=0, the  number  of
+                iterations is unlimited.
+
+Passing EpsG=0, EpsF=0, EpsX=0 and MaxIts=0 (simultaneously) will lead to
+automatic stopping criterion selection (small EpsX).
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlbfgssetcond(minlbfgsstate* state,
+     double epsg,
+     double epsf,
+     double epsx,
+     ae_int_t maxits,
+     ae_state *_state)
+{
+
+
+    ae_assert(ae_isfinite(epsg, _state), "MinLBFGSSetCond: EpsG is not finite number!", _state);
+    ae_assert(ae_fp_greater_eq(epsg,0), "MinLBFGSSetCond: negative EpsG!", _state);
+    ae_assert(ae_isfinite(epsf, _state), "MinLBFGSSetCond: EpsF is not finite number!", _state);
+    ae_assert(ae_fp_greater_eq(epsf,0), "MinLBFGSSetCond: negative EpsF!", _state);
+    ae_assert(ae_isfinite(epsx, _state), "MinLBFGSSetCond: EpsX is not finite number!", _state);
+    ae_assert(ae_fp_greater_eq(epsx,0), "MinLBFGSSetCond: negative EpsX!", _state);
+    ae_assert(maxits>=0, "MinLBFGSSetCond: negative MaxIts!", _state);
+    if( ((ae_fp_eq(epsg,0)&&ae_fp_eq(epsf,0))&&ae_fp_eq(epsx,0))&&maxits==0 )
+    {
+        epsx = 1.0E-6;
+    }
+    state->epsg = epsg;
+    state->epsf = epsf;
+    state->epsx = epsx;
+    state->maxits = maxits;
+}
+
+
+/*************************************************************************
+This function turns on/off reporting.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    NeedXRep-   whether iteration reports are needed or not
+
+If NeedXRep is True, algorithm will call rep() callback function if  it is
+provided to MinLBFGSOptimize().
+
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlbfgssetxrep(minlbfgsstate* state,
+     ae_bool needxrep,
+     ae_state *_state)
+{
+
+
+    state->xrep = needxrep;
+}
+
+
+/*************************************************************************
+This function sets maximum step length
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    StpMax  -   maximum step length, >=0. Set StpMax to 0.0 (default),  if
+                you don't want to limit step length.
+
+Use this subroutine when you optimize target function which contains exp()
+or  other  fast  growing  functions,  and optimization algorithm makes too
+large  steps  which  leads  to overflow. This function allows us to reject
+steps  that  are  too  large  (and  therefore  expose  us  to the possible
+overflow) without actually calculating function value at the x+stp*d.
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlbfgssetstpmax(minlbfgsstate* state,
+     double stpmax,
+     ae_state *_state)
+{
+
+
+    ae_assert(ae_isfinite(stpmax, _state), "MinLBFGSSetStpMax: StpMax is not finite!", _state);
+    ae_assert(ae_fp_greater_eq(stpmax,0), "MinLBFGSSetStpMax: StpMax<0!", _state);
+    state->stpmax = stpmax;
+}
+
+
+/*************************************************************************
+Extended subroutine for internal use only.
+
+Accepts additional parameters:
+
+    Flags - additional settings:
+            * Flags = 0     means no additional settings
+            * Flags = 1     "do not allocate memory". used when solving
+                            a many subsequent tasks with  same N/M  values.
+                            First  call MUST  be without this flag bit set,
+                            subsequent  calls   of   MinLBFGS   with   same
+                            MinLBFGSState structure can set Flags to 1.
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlbfgscreatex(ae_int_t n,
+     ae_int_t m,
+     /* Real    */ ae_vector* x,
+     ae_int_t flags,
+     minlbfgsstate* state,
+     ae_state *_state)
+{
+    ae_bool allocatemem;
+
+
+    ae_assert(n>=1, "MinLBFGS: N too small!", _state);
+    ae_assert(m>=1, "MinLBFGS: M too small!", _state);
+    ae_assert(m<=n, "MinLBFGS: M too large!", _state);
+    
+    /*
+     * Initialize
+     */
+    state->n = n;
+    state->m = m;
+    state->flags = flags;
+    allocatemem = flags%2==0;
+    flags = flags/2;
+    if( allocatemem )
+    {
+        ae_vector_set_length(&state->rho, m-1+1, _state);
+        ae_vector_set_length(&state->theta, m-1+1, _state);
+        ae_matrix_set_length(&state->y, m-1+1, n-1+1, _state);
+        ae_matrix_set_length(&state->s, m-1+1, n-1+1, _state);
+        ae_vector_set_length(&state->d, n-1+1, _state);
+        ae_vector_set_length(&state->x, n-1+1, _state);
+        ae_vector_set_length(&state->g, n-1+1, _state);
+        ae_vector_set_length(&state->work, n-1+1, _state);
+    }
+    minlbfgssetcond(state, 0, 0, 0, 0, _state);
+    minlbfgssetxrep(state, ae_false, _state);
+    minlbfgssetstpmax(state, 0, _state);
+    minlbfgsrestartfrom(state, x, _state);
+    state->prectype = 0;
+}
+
+
+/*************************************************************************
+Modification of the preconditioner:
+default preconditioner (simple scaling) is used.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+
+After call to this function preconditioner is changed to the default one.
+
+NOTE:  you  can  change  preconditioner  "on  the  fly",  during algorithm
+iterations.
+
+  -- ALGLIB --
+     Copyright 13.10.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlbfgssetdefaultpreconditioner(minlbfgsstate* state,
+     ae_state *_state)
+{
+
+
+    state->prectype = 0;
+}
+
+
+/*************************************************************************
+Modification of the preconditioner:
+Cholesky factorization of approximate Hessian is used.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    P       -   triangular preconditioner, Cholesky factorization of
+                the approximate Hessian. array[0..N-1,0..N-1],
+                (if larger, only leading N elements are used).
+    IsUpper -   whether upper or lower triangle of P is given
+                (other triangle is not referenced)
+
+After call to this function preconditioner is changed to P  (P  is  copied
+into the internal buffer).
+
+NOTE:  you  can  change  preconditioner  "on  the  fly",  during algorithm
+iterations.
+
+NOTE 2:  P  should  be nonsingular. Exception will be thrown otherwise. It
+also should be well conditioned, although only strict  non-singularity  is
+tested.
+
+  -- ALGLIB --
+     Copyright 13.10.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlbfgssetcholeskypreconditioner(minlbfgsstate* state,
+     /* Real    */ ae_matrix* p,
+     ae_bool isupper,
+     ae_state *_state)
+{
+    ae_int_t i;
+    double mx;
+
+
+    ae_assert(isfinitertrmatrix(p, state->n, isupper, _state), "MinLBFGSSetCholeskyPreconditioner: P contains infinite or NAN values!", _state);
+    mx = 0;
+    for(i=0; i<=state->n-1; i++)
+    {
+        mx = ae_maxreal(mx, ae_fabs(p->ptr.pp_double[i][i], _state), _state);
+    }
+    ae_assert(ae_fp_greater(mx,0), "MinLBFGSSetCholeskyPreconditioner: P is strictly singular!", _state);
+    if( state->denseh.rows<state->n||state->denseh.cols<state->n )
+    {
+        ae_matrix_set_length(&state->denseh, state->n, state->n, _state);
+    }
+    state->prectype = 1;
+    if( isupper )
+    {
+        rmatrixcopy(state->n, state->n, p, 0, 0, &state->denseh, 0, 0, _state);
+    }
+    else
+    {
+        rmatrixtranspose(state->n, state->n, p, 0, 0, &state->denseh, 0, 0, _state);
+    }
+}
+
+
+/*************************************************************************
+
+  -- ALGLIB --
+     Copyright 20.03.2009 by Bochkanov Sergey
+*************************************************************************/
+ae_bool minlbfgsiteration(minlbfgsstate* state, ae_state *_state)
+{
+    ae_int_t n;
+    ae_int_t m;
+    ae_int_t maxits;
+    double epsf;
+    double epsg;
+    double epsx;
+    ae_int_t i;
+    ae_int_t j;
+    ae_int_t ic;
+    ae_int_t mcinfo;
+    double v;
+    double vv;
+    ae_bool result;
+
+
+    
+    /*
+     * Reverse communication preparations
+     * I know it looks ugly, but it works the same way
+     * anywhere from C++ to Python.
+     *
+     * This code initializes locals by:
+     * * random values determined during code
+     *   generation - on first subroutine call
+     * * values from previous call - on subsequent calls
+     */
+    if( state->rstate.stage>=0 )
+    {
+        n = state->rstate.ia.ptr.p_int[0];
+        m = state->rstate.ia.ptr.p_int[1];
+        maxits = state->rstate.ia.ptr.p_int[2];
+        i = state->rstate.ia.ptr.p_int[3];
+        j = state->rstate.ia.ptr.p_int[4];
+        ic = state->rstate.ia.ptr.p_int[5];
+        mcinfo = state->rstate.ia.ptr.p_int[6];
+        epsf = state->rstate.ra.ptr.p_double[0];
+        epsg = state->rstate.ra.ptr.p_double[1];
+        epsx = state->rstate.ra.ptr.p_double[2];
+        v = state->rstate.ra.ptr.p_double[3];
+        vv = state->rstate.ra.ptr.p_double[4];
+    }
+    else
+    {
+        n = -983;
+        m = -989;
+        maxits = -834;
+        i = 900;
+        j = -287;
+        ic = 364;
+        mcinfo = 214;
+        epsf = -338;
+        epsg = -686;
+        epsx = 912;
+        v = 585;
+        vv = 497;
+    }
+    if( state->rstate.stage==0 )
+    {
+        goto lbl_0;
+    }
+    if( state->rstate.stage==1 )
+    {
+        goto lbl_1;
+    }
+    if( state->rstate.stage==2 )
+    {
+        goto lbl_2;
+    }
+    if( state->rstate.stage==3 )
+    {
+        goto lbl_3;
+    }
+    
+    /*
+     * Routine body
+     */
+    
+    /*
+     * Unload frequently used variables from State structure
+     * (just for typing convinience)
+     */
+    n = state->n;
+    m = state->m;
+    epsg = state->epsg;
+    epsf = state->epsf;
+    epsx = state->epsx;
+    maxits = state->maxits;
+    state->repterminationtype = 0;
+    state->repiterationscount = 0;
+    state->repnfev = 0;
+    
+    /*
+     * Calculate F/G at the initial point
+     */
+    minlbfgs_clearrequestfields(state, _state);
+    state->needfg = ae_true;
+    state->rstate.stage = 0;
+    goto lbl_rcomm;
+lbl_0:
+    state->needfg = ae_false;
+    if( !state->xrep )
+    {
+        goto lbl_4;
+    }
+    minlbfgs_clearrequestfields(state, _state);
+    state->xupdated = ae_true;
+    state->rstate.stage = 1;
+    goto lbl_rcomm;
+lbl_1:
+    state->xupdated = ae_false;
+lbl_4:
+    state->repnfev = 1;
+    state->fold = state->f;
+    v = ae_v_dotproduct(&state->g.ptr.p_double[0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    v = ae_sqrt(v, _state);
+    if( ae_fp_less_eq(v,epsg) )
+    {
+        state->repterminationtype = 4;
+        result = ae_false;
+        return result;
+    }
+    
+    /*
+     * Choose initial step and direction.
+     * Apply preconditioner, if we have something other than default.
+     */
+    if( ae_fp_eq(state->stpmax,0) )
+    {
+        state->stp = ae_minreal(1.0/v, 1, _state);
+    }
+    else
+    {
+        state->stp = ae_minreal(1.0/v, state->stpmax, _state);
+    }
+    ae_v_moveneg(&state->d.ptr.p_double[0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    if( state->prectype==1 )
+    {
+        
+        /*
+         * Cholesky preconditioner is used
+         */
+        fblscholeskysolve(&state->denseh, 1.0, n, ae_true, &state->d, &state->autobuf, _state);
+    }
+    
+    /*
+     * Main cycle
+     */
+    state->k = 0;
+lbl_6:
+    if( ae_false )
+    {
+        goto lbl_7;
+    }
+    
+    /*
+     * Main cycle: prepare to 1-D line search
+     */
+    state->p = state->k%m;
+    state->q = ae_minint(state->k, m-1, _state);
+    
+    /*
+     * Store X[k], G[k]
+     */
+    ae_v_moveneg(&state->s.ptr.pp_double[state->p][0], 1, &state->x.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    ae_v_moveneg(&state->y.ptr.pp_double[state->p][0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    
+    /*
+     * Minimize F(x+alpha*d)
+     * Calculate S[k], Y[k]
+     */
+    state->mcstage = 0;
+    if( state->k!=0 )
+    {
+        state->stp = 1.0;
+    }
+    linminnormalized(&state->d, &state->stp, n, _state);
+    mcsrch(n, &state->x, &state->f, &state->g, &state->d, &state->stp, state->stpmax, &mcinfo, &state->nfev, &state->work, &state->lstate, &state->mcstage, _state);
+lbl_8:
+    if( state->mcstage==0 )
+    {
+        goto lbl_9;
+    }
+    minlbfgs_clearrequestfields(state, _state);
+    state->needfg = ae_true;
+    state->rstate.stage = 2;
+    goto lbl_rcomm;
+lbl_2:
+    state->needfg = ae_false;
+    mcsrch(n, &state->x, &state->f, &state->g, &state->d, &state->stp, state->stpmax, &mcinfo, &state->nfev, &state->work, &state->lstate, &state->mcstage, _state);
+    goto lbl_8;
+lbl_9:
+    if( !state->xrep )
+    {
+        goto lbl_10;
+    }
+    
+    /*
+     * report
+     */
+    minlbfgs_clearrequestfields(state, _state);
+    state->xupdated = ae_true;
+    state->rstate.stage = 3;
+    goto lbl_rcomm;
+lbl_3:
+    state->xupdated = ae_false;
+lbl_10:
+    state->repnfev = state->repnfev+state->nfev;
+    state->repiterationscount = state->repiterationscount+1;
+    ae_v_add(&state->s.ptr.pp_double[state->p][0], 1, &state->x.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    ae_v_add(&state->y.ptr.pp_double[state->p][0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    
+    /*
+     * Stopping conditions
+     */
+    if( state->repiterationscount>=maxits&&maxits>0 )
+    {
+        
+        /*
+         * Too many iterations
+         */
+        state->repterminationtype = 5;
+        result = ae_false;
+        return result;
+    }
+    v = ae_v_dotproduct(&state->g.ptr.p_double[0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    if( ae_fp_less_eq(ae_sqrt(v, _state),epsg) )
+    {
+        
+        /*
+         * Gradient is small enough
+         */
+        state->repterminationtype = 4;
+        result = ae_false;
+        return result;
+    }
+    if( ae_fp_less_eq(state->fold-state->f,epsf*ae_maxreal(ae_fabs(state->fold, _state), ae_maxreal(ae_fabs(state->f, _state), 1.0, _state), _state)) )
+    {
+        
+        /*
+         * F(k+1)-F(k) is small enough
+         */
+        state->repterminationtype = 1;
+        result = ae_false;
+        return result;
+    }
+    v = ae_v_dotproduct(&state->s.ptr.pp_double[state->p][0], 1, &state->s.ptr.pp_double[state->p][0], 1, ae_v_len(0,n-1));
+    if( ae_fp_less_eq(ae_sqrt(v, _state),epsx) )
+    {
+        
+        /*
+         * X(k+1)-X(k) is small enough
+         */
+        state->repterminationtype = 2;
+        result = ae_false;
+        return result;
+    }
+    
+    /*
+     * If Wolfe conditions are satisfied, we can update
+     * limited memory model.
+     *
+     * However, if conditions are not satisfied (NFEV limit is met,
+     * function is too wild, ...), we'll skip L-BFGS update
+     */
+    if( mcinfo!=1 )
+    {
+        
+        /*
+         * Skip update.
+         *
+         * In such cases we'll initialize search direction by
+         * antigradient vector, because it  leads to more
+         * transparent code with less number of special cases
+         */
+        state->fold = state->f;
+        ae_v_moveneg(&state->d.ptr.p_double[0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    }
+    else
+    {
+        
+        /*
+         * Calculate Rho[k], GammaK
+         */
+        v = ae_v_dotproduct(&state->y.ptr.pp_double[state->p][0], 1, &state->s.ptr.pp_double[state->p][0], 1, ae_v_len(0,n-1));
+        vv = ae_v_dotproduct(&state->y.ptr.pp_double[state->p][0], 1, &state->y.ptr.pp_double[state->p][0], 1, ae_v_len(0,n-1));
+        if( ae_fp_eq(v,0)||ae_fp_eq(vv,0) )
+        {
+            
+            /*
+             * Rounding errors make further iterations impossible.
+             */
+            state->repterminationtype = -2;
+            result = ae_false;
+            return result;
+        }
+        state->rho.ptr.p_double[state->p] = 1/v;
+        state->gammak = v/vv;
+        
+        /*
+         *  Calculate d(k+1) = -H(k+1)*g(k+1)
+         *
+         *  for I:=K downto K-Q do
+         *      V = s(i)^T * work(iteration:I)
+         *      theta(i) = V
+         *      work(iteration:I+1) = work(iteration:I) - V*Rho(i)*y(i)
+         *  work(last iteration) = H0*work(last iteration) - preconditioner
+         *  for I:=K-Q to K do
+         *      V = y(i)^T*work(iteration:I)
+         *      work(iteration:I+1) = work(iteration:I) +(-V+theta(i))*Rho(i)*s(i)
+         *
+         *  NOW WORK CONTAINS d(k+1)
+         */
+        ae_v_move(&state->work.ptr.p_double[0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1));
+        for(i=state->k; i>=state->k-state->q; i--)
+        {
+            ic = i%m;
+            v = ae_v_dotproduct(&state->s.ptr.pp_double[ic][0], 1, &state->work.ptr.p_double[0], 1, ae_v_len(0,n-1));
+            state->theta.ptr.p_double[ic] = v;
+            vv = v*state->rho.ptr.p_double[ic];
+            ae_v_subd(&state->work.ptr.p_double[0], 1, &state->y.ptr.pp_double[ic][0], 1, ae_v_len(0,n-1), vv);
+        }
+        if( state->prectype==0 )
+        {
+            
+            /*
+             * Simple preconditioner is used
+             */
+            v = state->gammak;
+            ae_v_muld(&state->work.ptr.p_double[0], 1, ae_v_len(0,n-1), v);
+        }
+        if( state->prectype==1 )
+        {
+            
+            /*
+             * Cholesky preconditioner is used
+             */
+            fblscholeskysolve(&state->denseh, 1, n, ae_true, &state->work, &state->autobuf, _state);
+        }
+        for(i=state->k-state->q; i<=state->k; i++)
+        {
+            ic = i%m;
+            v = ae_v_dotproduct(&state->y.ptr.pp_double[ic][0], 1, &state->work.ptr.p_double[0], 1, ae_v_len(0,n-1));
+            vv = state->rho.ptr.p_double[ic]*(-v+state->theta.ptr.p_double[ic]);
+            ae_v_addd(&state->work.ptr.p_double[0], 1, &state->s.ptr.pp_double[ic][0], 1, ae_v_len(0,n-1), vv);
+        }
+        ae_v_moveneg(&state->d.ptr.p_double[0], 1, &state->work.ptr.p_double[0], 1, ae_v_len(0,n-1));
+        
+        /*
+         * Next step
+         */
+        state->fold = state->f;
+        state->k = state->k+1;
+    }
+    goto lbl_6;
+lbl_7:
+    result = ae_false;
+    return result;
+    
+    /*
+     * Saving state
+     */
+lbl_rcomm:
+    result = ae_true;
+    state->rstate.ia.ptr.p_int[0] = n;
+    state->rstate.ia.ptr.p_int[1] = m;
+    state->rstate.ia.ptr.p_int[2] = maxits;
+    state->rstate.ia.ptr.p_int[3] = i;
+    state->rstate.ia.ptr.p_int[4] = j;
+    state->rstate.ia.ptr.p_int[5] = ic;
+    state->rstate.ia.ptr.p_int[6] = mcinfo;
+    state->rstate.ra.ptr.p_double[0] = epsf;
+    state->rstate.ra.ptr.p_double[1] = epsg;
+    state->rstate.ra.ptr.p_double[2] = epsx;
+    state->rstate.ra.ptr.p_double[3] = v;
+    state->rstate.ra.ptr.p_double[4] = vv;
+    return result;
+}
+
+
+/*************************************************************************
+L-BFGS algorithm results
+
+INPUT PARAMETERS:
+    State   -   algorithm state
+
+OUTPUT PARAMETERS:
+    X       -   array[0..N-1], solution
+    Rep     -   optimization report:
+                * Rep.TerminationType completetion code:
+                    * -2    rounding errors prevent further improvement.
+                            X contains best point found.
+                    * -1    incorrect parameters were specified
+                    *  1    relative function improvement is no more than
+                            EpsF.
+                    *  2    relative step is no more than EpsX.
+                    *  4    gradient norm is no more than EpsG
+                    *  5    MaxIts steps was taken
+                    *  7    stopping conditions are too stringent,
+                            further improvement is impossible
+                * Rep.IterationsCount contains iterations count
+                * NFEV countains number of function calculations
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlbfgsresults(minlbfgsstate* state,
+     /* Real    */ ae_vector* x,
+     minlbfgsreport* rep,
+     ae_state *_state)
+{
+
+    ae_vector_clear(x);
+    _minlbfgsreport_clear(rep);
+
+    minlbfgsresultsbuf(state, x, rep, _state);
+}
+
+
+/*************************************************************************
+L-BFGS algorithm results
+
+Buffered implementation of MinLBFGSResults which uses pre-allocated buffer
+to store X[]. If buffer size is  too  small,  it  resizes  buffer.  It  is
+intended to be used in the inner cycles of performance critical algorithms
+where array reallocation penalty is too large to be ignored.
+
+  -- ALGLIB --
+     Copyright 20.08.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlbfgsresultsbuf(minlbfgsstate* state,
+     /* Real    */ ae_vector* x,
+     minlbfgsreport* rep,
+     ae_state *_state)
+{
+
+
+    if( x->cnt<state->n )
+    {
+        ae_vector_set_length(x, state->n, _state);
+    }
+    ae_v_move(&x->ptr.p_double[0], 1, &state->x.ptr.p_double[0], 1, ae_v_len(0,state->n-1));
+    rep->iterationscount = state->repiterationscount;
+    rep->nfev = state->repnfev;
+    rep->terminationtype = state->repterminationtype;
+}
+
+
+/*************************************************************************
+This  subroutine restarts LBFGS algorithm from new point. All optimization
+parameters are left unchanged.
+
+This  function  allows  to  solve multiple  optimization  problems  (which
+must have same number of dimensions) without object reallocation penalty.
+
+INPUT PARAMETERS:
+    State   -   structure used to store algorithm state
+    X       -   new starting point.
+
+  -- ALGLIB --
+     Copyright 30.07.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlbfgsrestartfrom(minlbfgsstate* state,
+     /* Real    */ ae_vector* x,
+     ae_state *_state)
+{
+
+
+    ae_assert(x->cnt>=state->n, "MinLBFGSRestartFrom: Length(X)<N!", _state);
+    ae_assert(isfinitevector(x, state->n, _state), "MinLBFGSRestartFrom: X contains infinite or NaN values!", _state);
+    ae_v_move(&state->x.ptr.p_double[0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,state->n-1));
+    ae_vector_set_length(&state->rstate.ia, 6+1, _state);
+    ae_vector_set_length(&state->rstate.ra, 4+1, _state);
+    state->rstate.stage = -1;
+    minlbfgs_clearrequestfields(state, _state);
+}
+
+
+/*************************************************************************
+Clears request fileds (to be sure that we don't forgot to clear something)
+*************************************************************************/
+static void minlbfgs_clearrequestfields(minlbfgsstate* state,
+     ae_state *_state)
+{
+
+
+    state->needfg = ae_false;
+    state->xupdated = ae_false;
+}
+
+
+ae_bool _minlbfgsstate_init(minlbfgsstate* p, ae_state *_state, ae_bool make_automatic)
+{
+    if( !ae_vector_init(&p->rho, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_matrix_init(&p->y, 0, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_matrix_init(&p->s, 0, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->theta, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->d, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->work, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_matrix_init(&p->denseh, 0, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->autobuf, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->g, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !_rcommstate_init(&p->rstate, _state, make_automatic) )
+        return ae_false;
+    if( !_linminstate_init(&p->lstate, _state, make_automatic) )
+        return ae_false;
+    return ae_true;
+}
+
+
+ae_bool _minlbfgsstate_init_copy(minlbfgsstate* dst, minlbfgsstate* src, ae_state *_state, ae_bool make_automatic)
+{
+    dst->n = src->n;
+    dst->m = src->m;
+    dst->epsg = src->epsg;
+    dst->epsf = src->epsf;
+    dst->epsx = src->epsx;
+    dst->maxits = src->maxits;
+    dst->flags = src->flags;
+    dst->xrep = src->xrep;
+    dst->stpmax = src->stpmax;
+    dst->nfev = src->nfev;
+    dst->mcstage = src->mcstage;
+    dst->k = src->k;
+    dst->q = src->q;
+    dst->p = src->p;
+    if( !ae_vector_init_copy(&dst->rho, &src->rho, _state, make_automatic) )
+        return ae_false;
+    if( !ae_matrix_init_copy(&dst->y, &src->y, _state, make_automatic) )
+        return ae_false;
+    if( !ae_matrix_init_copy(&dst->s, &src->s, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->theta, &src->theta, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->d, &src->d, _state, make_automatic) )
+        return ae_false;
+    dst->stp = src->stp;
+    if( !ae_vector_init_copy(&dst->work, &src->work, _state, make_automatic) )
+        return ae_false;
+    dst->fold = src->fold;
+    dst->prectype = src->prectype;
+    dst->gammak = src->gammak;
+    if( !ae_matrix_init_copy(&dst->denseh, &src->denseh, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->autobuf, &src->autobuf, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic) )
+        return ae_false;
+    dst->f = src->f;
+    if( !ae_vector_init_copy(&dst->g, &src->g, _state, make_automatic) )
+        return ae_false;
+    dst->needfg = src->needfg;
+    dst->xupdated = src->xupdated;
+    if( !_rcommstate_init_copy(&dst->rstate, &src->rstate, _state, make_automatic) )
+        return ae_false;
+    dst->repiterationscount = src->repiterationscount;
+    dst->repnfev = src->repnfev;
+    dst->repterminationtype = src->repterminationtype;
+    if( !_linminstate_init_copy(&dst->lstate, &src->lstate, _state, make_automatic) )
+        return ae_false;
+    return ae_true;
+}
+
+
+void _minlbfgsstate_clear(minlbfgsstate* p)
+{
+    ae_vector_clear(&p->rho);
+    ae_matrix_clear(&p->y);
+    ae_matrix_clear(&p->s);
+    ae_vector_clear(&p->theta);
+    ae_vector_clear(&p->d);
+    ae_vector_clear(&p->work);
+    ae_matrix_clear(&p->denseh);
+    ae_vector_clear(&p->autobuf);
+    ae_vector_clear(&p->x);
+    ae_vector_clear(&p->g);
+    _rcommstate_clear(&p->rstate);
+    _linminstate_clear(&p->lstate);
+}
+
+
+ae_bool _minlbfgsreport_init(minlbfgsreport* p, ae_state *_state, ae_bool make_automatic)
+{
+    return ae_true;
+}
+
+
+ae_bool _minlbfgsreport_init_copy(minlbfgsreport* dst, minlbfgsreport* src, ae_state *_state, ae_bool make_automatic)
+{
+    dst->iterationscount = src->iterationscount;
+    dst->nfev = src->nfev;
+    dst->terminationtype = src->terminationtype;
+    return ae_true;
+}
+
+
+void _minlbfgsreport_clear(minlbfgsreport* p)
+{
+}
+
+
+
+
+/*************************************************************************
+                IMPROVED LEVENBERG-MARQUARDT METHOD FOR
+                 NON-LINEAR LEAST SQUARES OPTIMIZATION
+
+DESCRIPTION:
+This function is used to find minimum of function which is represented  as
+sum of squares:
+    F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1])
+using value of function vector f[] and Jacobian of f[].
+
+
+REQUIREMENTS:
+This algorithm will request following information during its operation:
+
+* function vector f[] at given point X
+* function vector f[] and Jacobian of f[] (simultaneously) at given point
+
+There are several overloaded versions of  MinLMOptimize()  function  which
+correspond  to  different LM-like optimization algorithms provided by this
+unit. You should choose version which accepts fvec()  and jac() callbacks.
+First  one  is used to calculate f[] at given point, second one calculates
+f[] and Jacobian df[i]/dx[j].
+
+You can try to initialize MinLMState structure with VJ  function and  then
+use incorrect version  of  MinLMOptimize()  (for  example,  version  which
+works  with  general  form function and does not provide Jacobian), but it
+will  lead  to  exception  being  thrown  after first attempt to calculate
+Jacobian.
+
+
+USAGE:
+1. User initializes algorithm state with MinLMCreateVJ() call
+2. User tunes solver parameters with MinLMSetCond(),  MinLMSetStpMax() and
+   other functions
+3. User calls MinLMOptimize() function which  takes algorithm  state   and
+   callback functions.
+4. User calls MinLMResults() to get solution
+5. Optionally, user may call MinLMRestartFrom() to solve  another  problem
+   with same N/M but another starting point and/or another function.
+   MinLMRestartFrom() allows to reuse already initialized structure.
+
+
+INPUT PARAMETERS:
+    N       -   dimension, N>1
+                * if given, only leading N elements of X are used
+                * if not given, automatically determined from size of X
+    M       -   number of functions f[i]
+    X       -   initial solution, array[0..N-1]
+
+OUTPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+
+NOTES:
+1. you may tune stopping conditions with MinLMSetCond() function
+2. if target function contains exp() or other fast growing functions,  and
+   optimization algorithm makes too large steps which leads  to  overflow,
+   use MinLMSetStpMax() function to bound algorithm's steps.
+
+  -- ALGLIB --
+     Copyright 30.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minlmcreatevj(ae_int_t n,
+     ae_int_t m,
+     /* Real    */ ae_vector* x,
+     minlmstate* state,
+     ae_state *_state)
+{
+
+    _minlmstate_clear(state);
+
+    ae_assert(n>=1, "MinLMCreateVJ: N<1!", _state);
+    ae_assert(m>=1, "MinLMCreateVJ: M<1!", _state);
+    ae_assert(x->cnt>=n, "MinLMCreateVJ: Length(X)<N!", _state);
+    ae_assert(isfinitevector(x, n, _state), "MinLMCreateVJ: X contains infinite or NaN values!", _state);
+    
+    /*
+     * initialize, check parameters
+     */
+    state->n = n;
+    state->m = m;
+    state->algomode = 1;
+    state->hasf = ae_false;
+    state->hasfi = ae_true;
+    state->hasg = ae_false;
+    
+    /*
+     * second stage of initialization
+     */
+    minlm_lmprepare(n, m, ae_false, state, _state);
+    minlmsetacctype(state, 0, _state);
+    minlmsetcond(state, 0, 0, 0, 0, _state);
+    minlmsetxrep(state, ae_false, _state);
+    minlmsetstpmax(state, 0, _state);
+    minlmrestartfrom(state, x, _state);
+}
+
+
+/*************************************************************************
+                IMPROVED LEVENBERG-MARQUARDT METHOD FOR
+                 NON-LINEAR LEAST SQUARES OPTIMIZATION
+
+DESCRIPTION:
+This function is used to find minimum of function which is represented  as
+sum of squares:
+    F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1])
+using value of function vector f[] only. Finite differences  are  used  to
+calculate Jacobian.
+
+
+REQUIREMENTS:
+This algorithm will request following information during its operation:
+* function vector f[] at given point X
+
+There are several overloaded versions of  MinLMOptimize()  function  which
+correspond  to  different LM-like optimization algorithms provided by this
+unit. You should choose version which accepts fvec() callback.
+
+You can try to initialize MinLMState structure with VJ  function and  then
+use incorrect version  of  MinLMOptimize()  (for  example,  version  which
+works with general form function and does not accept function vector), but
+it will  lead  to  exception being thrown after first attempt to calculate
+Jacobian.
+
+
+USAGE:
+1. User initializes algorithm state with MinLMCreateV() call
+2. User tunes solver parameters with MinLMSetCond(),  MinLMSetStpMax() and
+   other functions
+3. User calls MinLMOptimize() function which  takes algorithm  state   and
+   callback functions.
+4. User calls MinLMResults() to get solution
+5. Optionally, user may call MinLMRestartFrom() to solve  another  problem
+   with same N/M but another starting point and/or another function.
+   MinLMRestartFrom() allows to reuse already initialized structure.
+
+
+INPUT PARAMETERS:
+    N       -   dimension, N>1
+                * if given, only leading N elements of X are used
+                * if not given, automatically determined from size of X
+    M       -   number of functions f[i]
+    X       -   initial solution, array[0..N-1]
+    DiffStep-   differentiation step, >0
+
+OUTPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+
+See also MinLMIteration, MinLMResults.
+
+NOTES:
+1. you may tune stopping conditions with MinLMSetCond() function
+2. if target function contains exp() or other fast growing functions,  and
+   optimization algorithm makes too large steps which leads  to  overflow,
+   use MinLMSetStpMax() function to bound algorithm's steps.
+
+  -- ALGLIB --
+     Copyright 30.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minlmcreatev(ae_int_t n,
+     ae_int_t m,
+     /* Real    */ ae_vector* x,
+     double diffstep,
+     minlmstate* state,
+     ae_state *_state)
+{
+
+    _minlmstate_clear(state);
+
+    ae_assert(ae_isfinite(diffstep, _state), "MinLMCreateV: DiffStep is not finite!", _state);
+    ae_assert(ae_fp_greater(diffstep,0), "MinLMCreateV: DiffStep<=0!", _state);
+    ae_assert(n>=1, "MinLMCreateV: N<1!", _state);
+    ae_assert(m>=1, "MinLMCreateV: M<1!", _state);
+    ae_assert(x->cnt>=n, "MinLMCreateV: Length(X)<N!", _state);
+    ae_assert(isfinitevector(x, n, _state), "MinLMCreateV: X contains infinite or NaN values!", _state);
+    
+    /*
+     * initialize
+     */
+    state->n = n;
+    state->m = m;
+    state->algomode = 0;
+    state->hasf = ae_false;
+    state->hasfi = ae_true;
+    state->hasg = ae_false;
+    state->diffstep = diffstep;
+    
+    /*
+     * second stage of initialization
+     */
+    minlm_lmprepare(n, m, ae_false, state, _state);
+    minlmsetacctype(state, 1, _state);
+    minlmsetcond(state, 0, 0, 0, 0, _state);
+    minlmsetxrep(state, ae_false, _state);
+    minlmsetstpmax(state, 0, _state);
+    minlmrestartfrom(state, x, _state);
+}
+
+
+/*************************************************************************
+    LEVENBERG-MARQUARDT-LIKE METHOD FOR NON-LINEAR OPTIMIZATION
+
+DESCRIPTION:
+This  function  is  used  to  find  minimum  of general form (not "sum-of-
+-squares") function
+    F = F(x[0], ..., x[n-1])
+using  its  gradient  and  Hessian.  Levenberg-Marquardt modification with
+L-BFGS pre-optimization and internal pre-conditioned  L-BFGS  optimization
+after each Levenberg-Marquardt step is used.
+
+
+REQUIREMENTS:
+This algorithm will request following information during its operation:
+
+* function value F at given point X
+* F and gradient G (simultaneously) at given point X
+* F, G and Hessian H (simultaneously) at given point X
+
+There are several overloaded versions of  MinLMOptimize()  function  which
+correspond  to  different LM-like optimization algorithms provided by this
+unit. You should choose version which accepts func(),  grad()  and  hess()
+function pointers. First pointer is used to calculate F  at  given  point,
+second  one  calculates  F(x)  and  grad F(x),  third one calculates F(x),
+grad F(x), hess F(x).
+
+You can try to initialize MinLMState structure with FGH-function and  then
+use incorrect version of MinLMOptimize() (for example, version which  does
+not provide Hessian matrix), but it will lead to  exception  being  thrown
+after first attempt to calculate Hessian.
+
+
+USAGE:
+1. User initializes algorithm state with MinLMCreateFGH() call
+2. User tunes solver parameters with MinLMSetCond(),  MinLMSetStpMax() and
+   other functions
+3. User calls MinLMOptimize() function which  takes algorithm  state   and
+   pointers (delegates, etc.) to callback functions.
+4. User calls MinLMResults() to get solution
+5. Optionally, user may call MinLMRestartFrom() to solve  another  problem
+   with same N but another starting point and/or another function.
+   MinLMRestartFrom() allows to reuse already initialized structure.
+
+
+INPUT PARAMETERS:
+    N       -   dimension, N>1
+                * if given, only leading N elements of X are used
+                * if not given, automatically determined from size of X
+    X       -   initial solution, array[0..N-1]
+
+OUTPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+
+NOTES:
+1. you may tune stopping conditions with MinLMSetCond() function
+2. if target function contains exp() or other fast growing functions,  and
+   optimization algorithm makes too large steps which leads  to  overflow,
+   use MinLMSetStpMax() function to bound algorithm's steps.
+
+  -- ALGLIB --
+     Copyright 30.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minlmcreatefgh(ae_int_t n,
+     /* Real    */ ae_vector* x,
+     minlmstate* state,
+     ae_state *_state)
+{
+
+    _minlmstate_clear(state);
+
+    ae_assert(n>=1, "MinLMCreateFGH: N<1!", _state);
+    ae_assert(x->cnt>=n, "MinLMCreateFGH: Length(X)<N!", _state);
+    ae_assert(isfinitevector(x, n, _state), "MinLMCreateFGH: X contains infinite or NaN values!", _state);
+    
+    /*
+     * initialize
+     */
+    state->n = n;
+    state->m = 0;
+    state->algomode = 2;
+    state->hasf = ae_true;
+    state->hasfi = ae_false;
+    state->hasg = ae_true;
+    
+    /*
+     * init2
+     */
+    minlm_lmprepare(n, 0, ae_true, state, _state);
+    minlmsetacctype(state, 2, _state);
+    minlmsetcond(state, 0, 0, 0, 0, _state);
+    minlmsetxrep(state, ae_false, _state);
+    minlmsetstpmax(state, 0, _state);
+    minlmrestartfrom(state, x, _state);
+}
+
+
+/*************************************************************************
+                IMPROVED LEVENBERG-MARQUARDT METHOD FOR
+                 NON-LINEAR LEAST SQUARES OPTIMIZATION
+
+DESCRIPTION:
+This function is used to find minimum of function which is represented  as
+sum of squares:
+    F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1])
+using:
+* value of function vector f[]
+* value of Jacobian of f[]
+* gradient of merit function F(x)
+
+This function creates optimizer which uses acceleration strategy 2.  Cheap
+gradient of merit function (which is twice the product of function  vector
+and Jacobian) is used for accelerated iterations (see User Guide for  more
+info on this subject).
+
+REQUIREMENTS:
+This algorithm will request following information during its operation:
+
+* function vector f[] at given point X
+* function vector f[] and Jacobian of f[] (simultaneously) at given point
+* gradient of
+
+There are several overloaded versions of  MinLMOptimize()  function  which
+correspond  to  different LM-like optimization algorithms provided by this
+unit. You should choose version which accepts  fvec(),  jac()  and  grad()
+callbacks. First one is used to calculate f[] at given point,  second  one
+calculates f[] and Jacobian df[i]/dx[j], last one calculates  gradient  of
+merit function F(x).
+
+You can try to initialize MinLMState structure with VJ  function and  then
+use incorrect version  of  MinLMOptimize()  (for  example,  version  which
+works  with  general  form function and does not provide Jacobian), but it
+will  lead  to  exception  being  thrown  after first attempt to calculate
+Jacobian.
+
+
+USAGE:
+1. User initializes algorithm state with MinLMCreateVGJ() call
+2. User tunes solver parameters with MinLMSetCond(),  MinLMSetStpMax() and
+   other functions
+3. User calls MinLMOptimize() function which  takes algorithm  state   and
+   callback functions.
+4. User calls MinLMResults() to get solution
+5. Optionally, user may call MinLMRestartFrom() to solve  another  problem
+   with same N/M but another starting point and/or another function.
+   MinLMRestartFrom() allows to reuse already initialized structure.
+
+
+INPUT PARAMETERS:
+    N       -   dimension, N>1
+                * if given, only leading N elements of X are used
+                * if not given, automatically determined from size of X
+    M       -   number of functions f[i]
+    X       -   initial solution, array[0..N-1]
+
+OUTPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+
+NOTES:
+1. you may tune stopping conditions with MinLMSetCond() function
+2. if target function contains exp() or other fast growing functions,  and
+   optimization algorithm makes too large steps which leads  to  overflow,
+   use MinLMSetStpMax() function to bound algorithm's steps.
+
+  -- ALGLIB --
+     Copyright 30.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minlmcreatevgj(ae_int_t n,
+     ae_int_t m,
+     /* Real    */ ae_vector* x,
+     minlmstate* state,
+     ae_state *_state)
+{
+
+    _minlmstate_clear(state);
+
+    ae_assert(n>=1, "MinLMCreateVGJ: N<1!", _state);
+    ae_assert(m>=1, "MinLMCreateVGJ: M<1!", _state);
+    ae_assert(x->cnt>=n, "MinLMCreateVGJ: Length(X)<N!", _state);
+    ae_assert(isfinitevector(x, n, _state), "MinLMCreateVGJ: X contains infinite or NaN values!", _state);
+    
+    /*
+     * initialize, check parameters
+     */
+    state->n = n;
+    state->m = m;
+    state->algomode = 1;
+    state->hasf = ae_false;
+    state->hasfi = ae_true;
+    state->hasg = ae_false;
+    
+    /*
+     * second stage of initialization
+     */
+    minlm_lmprepare(n, m, ae_false, state, _state);
+    minlmsetacctype(state, 2, _state);
+    minlmsetcond(state, 0, 0, 0, 0, _state);
+    minlmsetxrep(state, ae_false, _state);
+    minlmsetstpmax(state, 0, _state);
+    minlmrestartfrom(state, x, _state);
+}
+
+
+/*************************************************************************
+                   LEVENBERG-MARQUARDT-LIKE METHOD FOR
+                  NON-LINEAR LEAST SQUARES OPTIMIZATION
+
+DESCRIPTION:
+
+This function is used to find minimum of function which is represented  as
+sum of squares:
+    F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1])
+using value of F(), gradient of F(), function vector f[]  and  Jacobian of
+f[].
+
+This function is considered obsolete since ALGLIB 3.1.0 and is present for
+backward  compatibility  only.  We  recommend to use MinLMCreateVGJ, which
+provides similar, but more consistent interface.
+
+  -- ALGLIB --
+     Copyright 30.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minlmcreatefgj(ae_int_t n,
+     ae_int_t m,
+     /* Real    */ ae_vector* x,
+     minlmstate* state,
+     ae_state *_state)
+{
+
+    _minlmstate_clear(state);
+
+    ae_assert(n>=1, "MinLMCreateFGJ: N<1!", _state);
+    ae_assert(m>=1, "MinLMCreateFGJ: M<1!", _state);
+    ae_assert(x->cnt>=n, "MinLMCreateFGJ: Length(X)<N!", _state);
+    ae_assert(isfinitevector(x, n, _state), "MinLMCreateFGJ: X contains infinite or NaN values!", _state);
+    
+    /*
+     * initialize
+     */
+    state->n = n;
+    state->m = m;
+    state->algomode = 1;
+    state->hasf = ae_true;
+    state->hasfi = ae_false;
+    state->hasg = ae_true;
+    
+    /*
+     * init2
+     */
+    minlm_lmprepare(n, m, ae_true, state, _state);
+    minlmsetacctype(state, 2, _state);
+    minlmsetcond(state, 0, 0, 0, 0, _state);
+    minlmsetxrep(state, ae_false, _state);
+    minlmsetstpmax(state, 0, _state);
+    minlmrestartfrom(state, x, _state);
+}
+
+
+/*************************************************************************
+    CLASSIC LEVENBERG-MARQUARDT METHOD FOR NON-LINEAR OPTIMIZATION
+
+DESCRIPTION:
+This function is used to find minimum of function which is represented  as
+sum of squares:
+    F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1])
+using  value  of  F(),  function  vector  f[] and Jacobian of f[]. Classic
+Levenberg-Marquardt method is used.
+
+This function is considered obsolete since ALGLIB 3.1.0 and is present for
+backward  compatibility  only.  We  recommend  to use MinLMCreateVJ, which
+provides similar, but more consistent and feature-rich interface.
+
+  -- ALGLIB --
+     Copyright 30.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minlmcreatefj(ae_int_t n,
+     ae_int_t m,
+     /* Real    */ ae_vector* x,
+     minlmstate* state,
+     ae_state *_state)
+{
+
+    _minlmstate_clear(state);
+
+    ae_assert(n>=1, "MinLMCreateFJ: N<1!", _state);
+    ae_assert(m>=1, "MinLMCreateFJ: M<1!", _state);
+    ae_assert(x->cnt>=n, "MinLMCreateFJ: Length(X)<N!", _state);
+    ae_assert(isfinitevector(x, n, _state), "MinLMCreateFJ: X contains infinite or NaN values!", _state);
+    
+    /*
+     * initialize
+     */
+    state->n = n;
+    state->m = m;
+    state->algomode = 1;
+    state->hasf = ae_true;
+    state->hasfi = ae_false;
+    state->hasg = ae_false;
+    
+    /*
+     * init 2
+     */
+    minlm_lmprepare(n, m, ae_true, state, _state);
+    minlmsetacctype(state, 0, _state);
+    minlmsetcond(state, 0, 0, 0, 0, _state);
+    minlmsetxrep(state, ae_false, _state);
+    minlmsetstpmax(state, 0, _state);
+    minlmrestartfrom(state, x, _state);
+}
+
+
+/*************************************************************************
+This function sets stopping conditions for Levenberg-Marquardt optimization
+algorithm.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    EpsG    -   >=0
+                The  subroutine  finishes  its  work   if   the  condition
+                ||G||<EpsG is satisfied, where ||.|| means Euclidian norm,
+                G - gradient.
+    EpsF    -   >=0
+                The  subroutine  finishes  its work if on k+1-th iteration
+                the  condition  |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1}
+                is satisfied.
+    EpsX    -   >=0
+                The subroutine finishes its work if  on  k+1-th  iteration
+                the condition |X(k+1)-X(k)| <= EpsX is fulfilled.
+    MaxIts  -   maximum number of iterations. If MaxIts=0, the  number  of
+                iterations   is    unlimited.   Only   Levenberg-Marquardt
+                iterations  are  counted  (L-BFGS/CG  iterations  are  NOT
+                counted because their cost is very low compared to that of
+                LM).
+
+Passing EpsG=0, EpsF=0, EpsX=0 and MaxIts=0 (simultaneously) will lead to
+automatic stopping criterion selection (small EpsX).
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlmsetcond(minlmstate* state,
+     double epsg,
+     double epsf,
+     double epsx,
+     ae_int_t maxits,
+     ae_state *_state)
+{
+
+
+    ae_assert(ae_isfinite(epsg, _state), "MinLMSetCond: EpsG is not finite number!", _state);
+    ae_assert(ae_fp_greater_eq(epsg,0), "MinLMSetCond: negative EpsG!", _state);
+    ae_assert(ae_isfinite(epsf, _state), "MinLMSetCond: EpsF is not finite number!", _state);
+    ae_assert(ae_fp_greater_eq(epsf,0), "MinLMSetCond: negative EpsF!", _state);
+    ae_assert(ae_isfinite(epsx, _state), "MinLMSetCond: EpsX is not finite number!", _state);
+    ae_assert(ae_fp_greater_eq(epsx,0), "MinLMSetCond: negative EpsX!", _state);
+    ae_assert(maxits>=0, "MinLMSetCond: negative MaxIts!", _state);
+    if( ((ae_fp_eq(epsg,0)&&ae_fp_eq(epsf,0))&&ae_fp_eq(epsx,0))&&maxits==0 )
+    {
+        epsx = 1.0E-6;
+    }
+    state->epsg = epsg;
+    state->epsf = epsf;
+    state->epsx = epsx;
+    state->maxits = maxits;
+}
+
+
+/*************************************************************************
+This function turns on/off reporting.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    NeedXRep-   whether iteration reports are needed or not
+
+If NeedXRep is True, algorithm will call rep() callback function if  it is
+provided to MinLMOptimize(). Both Levenberg-Marquardt and internal  L-BFGS
+iterations are reported.
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlmsetxrep(minlmstate* state, ae_bool needxrep, ae_state *_state)
+{
+
+
+    state->xrep = needxrep;
+}
+
+
+/*************************************************************************
+This function sets maximum step length
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    StpMax  -   maximum step length, >=0. Set StpMax to 0.0,  if you don't
+                want to limit step length.
+
+Use this subroutine when you optimize target function which contains exp()
+or  other  fast  growing  functions,  and optimization algorithm makes too
+large  steps  which  leads  to overflow. This function allows us to reject
+steps  that  are  too  large  (and  therefore  expose  us  to the possible
+overflow) without actually calculating function value at the x+stp*d.
+
+NOTE: non-zero StpMax leads to moderate  performance  degradation  because
+intermediate  step  of  preconditioned L-BFGS optimization is incompatible
+with limits on step size.
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlmsetstpmax(minlmstate* state, double stpmax, ae_state *_state)
+{
+
+
+    ae_assert(ae_isfinite(stpmax, _state), "MinLMSetStpMax: StpMax is not finite!", _state);
+    ae_assert(ae_fp_greater_eq(stpmax,0), "MinLMSetStpMax: StpMax<0!", _state);
+    state->stpmax = stpmax;
+}
+
+
+/*************************************************************************
+This function is used to change acceleration settings
+
+You can choose between three acceleration strategies:
+* AccType=0, no acceleration.
+* AccType=1, secant updates are used to update quadratic model after  each
+  iteration. After fixed number of iterations (or after  model  breakdown)
+  we  recalculate  quadratic  model  using  analytic  Jacobian  or  finite
+  differences. Number of secant-based iterations depends  on  optimization
+  settings: about 3 iterations - when we have analytic Jacobian, up to 2*N
+  iterations - when we use finite differences to calculate Jacobian.
+* AccType=2, after quadratic model is built and LM step is made, we use it
+  as preconditioner for several (5-10) iterations of L-BFGS algorithm.
+
+AccType=1 is recommended when Jacobian  calculation  cost  is  prohibitive
+high (several Mx1 function vector calculations  followed  by  several  NxN
+Cholesky factorizations are faster than calculation of one M*N  Jacobian).
+It should also be used when we have no Jacobian, because finite difference
+approximation takes too much time to compute.
+
+AccType=2 is recommended when Jacobian is cheap - much more  cheaper  than
+one  Cholesky  factorization.   We   can   reduce   number   of   Cholesky
+factorizations at the cost of increased number of  Jacobian  calculations.
+Sometimes it helps.
+
+Table below list  optimization  protocols  (XYZ  protocol  corresponds  to
+MinLMCreateXYZ) and acceleration types they support (and use by  default).
+
+ACCELERATION TYPES SUPPORTED BY OPTIMIZATION PROTOCOLS:
+
+protocol    0   1   2   comment
+V           +   +
+VJ          +   +   +
+FGH         +       +
+VGJ         +   +   +   special protocol, not for widespread use
+FJ          +       +   obsolete protocol, not recommended
+FGJ         +       +   obsolete protocol, not recommended
+
+DAFAULT VALUES:
+
+protocol    0   1   2   comment
+V               x       without acceleration it is so slooooooooow
+VJ          x
+FGH         x
+VGJ                 x   we've implicitly turned (2) by passing gradient
+FJ          x           obsolete protocol, not recommended
+FGJ                 x   obsolete protocol, not recommended
+
+NOTE: this  function should be called before optimization. Attempt to call
+it during algorithm iterations may result in unexpected behavior.
+
+NOTE: attempt to call this function with unsupported protocol/acceleration
+combination will result in exception being thrown.
+
+  -- ALGLIB --
+     Copyright 14.10.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlmsetacctype(minlmstate* state,
+     ae_int_t acctype,
+     ae_state *_state)
+{
+
+
+    ae_assert((acctype==0||acctype==1)||acctype==2, "MinLMSetAccType: incorrect AccType!", _state);
+    if( acctype==0 )
+    {
+        state->maxmodelage = 0;
+        state->makeadditers = ae_false;
+        return;
+    }
+    if( acctype==1 )
+    {
+        ae_assert(state->hasfi, "MinLMSetAccType: AccType=1 is incompatible with current protocol!", _state);
+        if( state->algomode==0 )
+        {
+            state->maxmodelage = 2*state->n;
+        }
+        else
+        {
+            state->maxmodelage = minlm_smallmodelage;
+        }
+        state->makeadditers = ae_false;
+        return;
+    }
+    if( acctype==2 )
+    {
+        ae_assert(state->algomode==1||state->algomode==2, "MinLMSetAccType: AccType=2 is incompatible with current protocol!", _state);
+        state->maxmodelage = 0;
+        state->makeadditers = ae_true;
+        return;
+    }
+}
+
+
+/*************************************************************************
+NOTES:
+
+1. Depending on function used to create state  structure,  this  algorithm
+   may accept Jacobian and/or Hessian and/or gradient.  According  to  the
+   said above, there ase several versions of this function,  which  accept
+   different sets of callbacks.
+
+   This flexibility opens way to subtle errors - you may create state with
+   MinLMCreateFGH() (optimization using Hessian), but call function  which
+   does not accept Hessian. So when algorithm will request Hessian,  there
+   will be no callback to call. In this case exception will be thrown.
+
+   Be careful to avoid such errors because there is no way to find them at
+   compile time - you can see them at runtime only.
+
+  -- ALGLIB --
+     Copyright 10.03.2009 by Bochkanov Sergey
+*************************************************************************/
+ae_bool minlmiteration(minlmstate* state, ae_state *_state)
+{
+    ae_int_t n;
+    ae_int_t m;
+    ae_bool bflag;
+    ae_int_t iflag;
+    double v;
+    double s;
+    double t;
+    ae_int_t i;
+    ae_int_t k;
+    ae_bool result;
+
+
+    
+    /*
+     * Reverse communication preparations
+     * I know it looks ugly, but it works the same way
+     * anywhere from C++ to Python.
+     *
+     * This code initializes locals by:
+     * * random values determined during code
+     *   generation - on first subroutine call
+     * * values from previous call - on subsequent calls
+     */
+    if( state->rstate.stage>=0 )
+    {
+        n = state->rstate.ia.ptr.p_int[0];
+        m = state->rstate.ia.ptr.p_int[1];
+        iflag = state->rstate.ia.ptr.p_int[2];
+        i = state->rstate.ia.ptr.p_int[3];
+        k = state->rstate.ia.ptr.p_int[4];
+        bflag = state->rstate.ba.ptr.p_bool[0];
+        v = state->rstate.ra.ptr.p_double[0];
+        s = state->rstate.ra.ptr.p_double[1];
+        t = state->rstate.ra.ptr.p_double[2];
+    }
+    else
+    {
+        n = -983;
+        m = -989;
+        iflag = -834;
+        i = 900;
+        k = -287;
+        bflag = ae_false;
+        v = 214;
+        s = -338;
+        t = -686;
+    }
+    if( state->rstate.stage==0 )
+    {
+        goto lbl_0;
+    }
+    if( state->rstate.stage==1 )
+    {
+        goto lbl_1;
+    }
+    if( state->rstate.stage==2 )
+    {
+        goto lbl_2;
+    }
+    if( state->rstate.stage==3 )
+    {
+        goto lbl_3;
+    }
+    if( state->rstate.stage==4 )
+    {
+        goto lbl_4;
+    }
+    if( state->rstate.stage==5 )
+    {
+        goto lbl_5;
+    }
+    if( state->rstate.stage==6 )
+    {
+        goto lbl_6;
+    }
+    if( state->rstate.stage==7 )
+    {
+        goto lbl_7;
+    }
+    if( state->rstate.stage==8 )
+    {
+        goto lbl_8;
+    }
+    if( state->rstate.stage==9 )
+    {
+        goto lbl_9;
+    }
+    if( state->rstate.stage==10 )
+    {
+        goto lbl_10;
+    }
+    if( state->rstate.stage==11 )
+    {
+        goto lbl_11;
+    }
+    if( state->rstate.stage==12 )
+    {
+        goto lbl_12;
+    }
+    if( state->rstate.stage==13 )
+    {
+        goto lbl_13;
+    }
+    if( state->rstate.stage==14 )
+    {
+        goto lbl_14;
+    }
+    if( state->rstate.stage==15 )
+    {
+        goto lbl_15;
+    }
+    if( state->rstate.stage==16 )
+    {
+        goto lbl_16;
+    }
+    if( state->rstate.stage==17 )
+    {
+        goto lbl_17;
+    }
+    
+    /*
+     * Routine body
+     */
+    
+    /*
+     * prepare
+     */
+    n = state->n;
+    m = state->m;
+    state->repiterationscount = 0;
+    state->repterminationtype = 0;
+    state->repnfunc = 0;
+    state->repnjac = 0;
+    state->repngrad = 0;
+    state->repnhess = 0;
+    state->repncholesky = 0;
+    
+    /*
+     * Initial report of current point
+     *
+     * Note 1: we rewrite State.X twice because
+     * user may accidentally change it after first call.
+     *
+     * Note 2: we set NeedF or NeedFI depending on what
+     * information about function we have.
+     */
+    if( !state->xrep )
+    {
+        goto lbl_18;
+    }
+    ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    minlm_clearrequestfields(state, _state);
+    if( !state->hasf )
+    {
+        goto lbl_20;
+    }
+    state->needf = ae_true;
+    state->rstate.stage = 0;
+    goto lbl_rcomm;
+lbl_0:
+    state->needf = ae_false;
+    goto lbl_21;
+lbl_20:
+    ae_assert(state->hasfi, "MinLM: internal error 2!", _state);
+    state->needfi = ae_true;
+    state->rstate.stage = 1;
+    goto lbl_rcomm;
+lbl_1:
+    state->needfi = ae_false;
+    v = ae_v_dotproduct(&state->fi.ptr.p_double[0], 1, &state->fi.ptr.p_double[0], 1, ae_v_len(0,m-1));
+    state->f = v;
+lbl_21:
+    state->repnfunc = state->repnfunc+1;
+    ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    minlm_clearrequestfields(state, _state);
+    state->xupdated = ae_true;
+    state->rstate.stage = 2;
+    goto lbl_rcomm;
+lbl_2:
+    state->xupdated = ae_false;
+lbl_18:
+    
+    /*
+     * Prepare control variables
+     */
+    state->nu = 1;
+    state->lambdav = -ae_maxrealnumber;
+    state->modelage = state->maxmodelage+1;
+    state->deltaxready = ae_false;
+    state->deltafready = ae_false;
+    
+    /*
+     * Main cycle.
+     *
+     * We move through it until either:
+     * * one of the stopping conditions is met
+     * * we decide that stopping conditions are too stringent
+     *   and break from cycle
+     *
+     */
+lbl_22:
+    if( ae_false )
+    {
+        goto lbl_23;
+    }
+    
+    /*
+     * First, we have to prepare quadratic model for our function.
+     * We use BFlag to ensure that model is prepared;
+     * if it is false at the end of this block, something went wrong.
+     *
+     * We may either calculate brand new model or update old one.
+     *
+     * Before this block we have:
+     * * State.XBase            - current position.
+     * * State.DeltaX           - if DeltaXReady is True
+     * * State.DeltaF           - if DeltaFReady is True
+     *
+     * After this block is over, we will have:
+     * * State.XBase            - base point (unchanged)
+     * * State.FBase            - F(XBase)
+     * * State.GBase            - linear term
+     * * State.QuadraticModel   - quadratic term
+     * * State.LambdaV          - current estimate for lambda
+     *
+     * We also clear DeltaXReady/DeltaFReady flags
+     * after initialization is done.
+     */
+    bflag = ae_false;
+    if( !(state->algomode==0||state->algomode==1) )
+    {
+        goto lbl_24;
+    }
+    
+    /*
+     * Calculate f[] and Jacobian
+     */
+    if( !(state->modelage>state->maxmodelage||!(state->deltaxready&&state->deltafready)) )
+    {
+        goto lbl_26;
+    }
+    
+    /*
+     * Refresh model (using either finite differences or analytic Jacobian)
+     */
+    if( state->algomode!=0 )
+    {
+        goto lbl_28;
+    }
+    
+    /*
+     * Optimization using F values only.
+     * Use finite differences to estimate Jacobian.
+     */
+    ae_assert(state->hasfi, "MinLMIteration: internal error when estimating Jacobian (no f[])", _state);
+    k = 0;
+lbl_30:
+    if( k>n-1 )
+    {
+        goto lbl_32;
+    }
+    ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    state->x.ptr.p_double[k] = state->x.ptr.p_double[k]-state->diffstep;
+    minlm_clearrequestfields(state, _state);
+    state->needfi = ae_true;
+    state->rstate.stage = 3;
+    goto lbl_rcomm;
+lbl_3:
+    state->repnfunc = state->repnfunc+1;
+    ae_v_move(&state->fm1.ptr.p_double[0], 1, &state->fi.ptr.p_double[0], 1, ae_v_len(0,m-1));
+    ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    state->x.ptr.p_double[k] = state->x.ptr.p_double[k]+state->diffstep;
+    minlm_clearrequestfields(state, _state);
+    state->needfi = ae_true;
+    state->rstate.stage = 4;
+    goto lbl_rcomm;
+lbl_4:
+    state->repnfunc = state->repnfunc+1;
+    ae_v_move(&state->fp1.ptr.p_double[0], 1, &state->fi.ptr.p_double[0], 1, ae_v_len(0,m-1));
+    v = 1/(2*state->diffstep);
+    ae_v_moved(&state->j.ptr.pp_double[0][k], state->j.stride, &state->fp1.ptr.p_double[0], 1, ae_v_len(0,m-1), v);
+    ae_v_subd(&state->j.ptr.pp_double[0][k], state->j.stride, &state->fm1.ptr.p_double[0], 1, ae_v_len(0,m-1), v);
+    k = k+1;
+    goto lbl_30;
+lbl_32:
+    
+    /*
+     * Calculate F(XBase)
+     */
+    ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    minlm_clearrequestfields(state, _state);
+    state->needfi = ae_true;
+    state->rstate.stage = 5;
+    goto lbl_rcomm;
+lbl_5:
+    state->needfi = ae_false;
+    state->repnfunc = state->repnfunc+1;
+    state->repnjac = state->repnjac+1;
+    
+    /*
+     * New model
+     */
+    state->modelage = 0;
+    goto lbl_29;
+lbl_28:
+    
+    /*
+     * Obtain f[] and Jacobian
+     */
+    ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    minlm_clearrequestfields(state, _state);
+    state->needfij = ae_true;
+    state->rstate.stage = 6;
+    goto lbl_rcomm;
+lbl_6:
+    state->needfij = ae_false;
+    state->repnfunc = state->repnfunc+1;
+    state->repnjac = state->repnjac+1;
+    
+    /*
+     * New model
+     */
+    state->modelage = 0;
+lbl_29:
+    goto lbl_27;
+lbl_26:
+    
+    /*
+     * State.J contains Jacobian or its current approximation;
+     * refresh it using secant updates:
+     *
+     * f(x0+dx) = f(x0) + J*dx,
+     * J_new = J_old + u*h'
+     * h = x_new-x_old
+     * u = (f_new - f_old - J_old*h)/(h'h)
+     *
+     * We can explicitly generate h and u, but it is
+     * preferential to do in-place calculations. Only
+     * I-th row of J_old is needed to calculate u[I],
+     * so we can update J row by row in one pass.
+     *
+     * NOTE: we expect that State.XBase contains new point,
+     * State.FBase contains old point, State.DeltaX and
+     * State.DeltaY contain updates from last step.
+     */
+    ae_assert(state->deltaxready&&state->deltafready, "MinLMIteration: uninitialized DeltaX/DeltaF", _state);
+    t = ae_v_dotproduct(&state->deltax.ptr.p_double[0], 1, &state->deltax.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    ae_assert(ae_fp_neq(t,0), "MinLM: internal error (T=0)", _state);
+    for(i=0; i<=m-1; i++)
+    {
+        v = ae_v_dotproduct(&state->j.ptr.pp_double[i][0], 1, &state->deltax.ptr.p_double[0], 1, ae_v_len(0,n-1));
+        v = (state->deltaf.ptr.p_double[i]-v)/t;
+        ae_v_addd(&state->j.ptr.pp_double[i][0], 1, &state->deltax.ptr.p_double[0], 1, ae_v_len(0,n-1), v);
+    }
+    ae_v_move(&state->fi.ptr.p_double[0], 1, &state->fibase.ptr.p_double[0], 1, ae_v_len(0,m-1));
+    ae_v_add(&state->fi.ptr.p_double[0], 1, &state->deltaf.ptr.p_double[0], 1, ae_v_len(0,m-1));
+    
+    /*
+     * Increase model age
+     */
+    state->modelage = state->modelage+1;
+lbl_27:
+    
+    /*
+     * Generate quadratic model:
+     *     f(xbase+dx) =
+     *       = (f0 + J*dx)'(f0 + J*dx)
+     *       = f0^2 + dx'J'f0 + f0*J*dx + dx'J'J*dx
+     *       = f0^2 + 2*f0*J*dx + dx'J'J*dx
+     *
+     * Note that we calculate 2*(J'J) instead of J'J because
+     * our quadratic model is based on Tailor decomposition,
+     * i.e. it has 0.5 before quadratic term.
+     */
+    rmatrixgemm(n, n, m, 2.0, &state->j, 0, 0, 1, &state->j, 0, 0, 0, 0.0, &state->quadraticmodel, 0, 0, _state);
+    rmatrixmv(n, m, &state->j, 0, 0, 1, &state->fi, 0, &state->gbase, 0, _state);
+    ae_v_muld(&state->gbase.ptr.p_double[0], 1, ae_v_len(0,n-1), 2);
+    v = ae_v_dotproduct(&state->fi.ptr.p_double[0], 1, &state->fi.ptr.p_double[0], 1, ae_v_len(0,m-1));
+    state->fbase = v;
+    ae_v_move(&state->fibase.ptr.p_double[0], 1, &state->fi.ptr.p_double[0], 1, ae_v_len(0,m-1));
+    
+    /*
+     * set control variables
+     */
+    bflag = ae_true;
+lbl_24:
+    if( state->algomode!=2 )
+    {
+        goto lbl_33;
+    }
+    ae_assert(!state->hasfi, "MinLMIteration: internal error (HasFI is True in Hessian-based mode)", _state);
+    
+    /*
+     * Obtain F, G, H
+     */
+    ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    minlm_clearrequestfields(state, _state);
+    state->needfgh = ae_true;
+    state->rstate.stage = 7;
+    goto lbl_rcomm;
+lbl_7:
+    state->needfgh = ae_false;
+    state->repnfunc = state->repnfunc+1;
+    state->repngrad = state->repngrad+1;
+    state->repnhess = state->repnhess+1;
+    rmatrixcopy(n, n, &state->h, 0, 0, &state->quadraticmodel, 0, 0, _state);
+    ae_v_move(&state->gbase.ptr.p_double[0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    state->fbase = state->f;
+    
+    /*
+     * set control variables
+     */
+    bflag = ae_true;
+    state->modelage = 0;
+lbl_33:
+    ae_assert(bflag, "MinLM: internal integrity check failed!", _state);
+    state->deltaxready = ae_false;
+    state->deltafready = ae_false;
+    
+    /*
+     * If Lambda is not initialized, initialize it using quadratic model
+     */
+    if( ae_fp_less(state->lambdav,0) )
+    {
+        state->lambdav = 0;
+        for(i=0; i<=n-1; i++)
+        {
+            state->lambdav = ae_maxreal(state->lambdav, ae_fabs(state->quadraticmodel.ptr.pp_double[i][i], _state), _state);
+        }
+        state->lambdav = 0.001*state->lambdav;
+        if( ae_fp_eq(state->lambdav,0) )
+        {
+            state->lambdav = 1;
+        }
+    }
+    
+    /*
+     * Test stopping conditions for function gradient
+     */
+    v = ae_v_dotproduct(&state->gbase.ptr.p_double[0], 1, &state->gbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    v = ae_sqrt(v, _state);
+    if( ae_fp_greater(v,state->epsg) )
+    {
+        goto lbl_35;
+    }
+    if( state->modelage!=0 )
+    {
+        goto lbl_37;
+    }
+    
+    /*
+     * Model is fresh, we can rely on it and terminate algorithm
+     */
+    state->repterminationtype = 4;
+    if( !state->xrep )
+    {
+        goto lbl_39;
+    }
+    ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    state->f = state->fbase;
+    minlm_clearrequestfields(state, _state);
+    state->xupdated = ae_true;
+    state->rstate.stage = 8;
+    goto lbl_rcomm;
+lbl_8:
+    state->xupdated = ae_false;
+lbl_39:
+    result = ae_false;
+    return result;
+    goto lbl_38;
+lbl_37:
+    
+    /*
+     * Model is not fresh, we should refresh it and test
+     * conditions once more
+     */
+    state->modelage = state->maxmodelage+1;
+    goto lbl_22;
+lbl_38:
+lbl_35:
+    
+    /*
+     * Find value of Levenberg-Marquardt damping parameter which:
+     * * leads to positive definite damped model
+     * * within bounds specified by StpMax
+     * * generates step which decreases function value
+     *
+     * After this block IFlag is set to:
+     * * -2, if model update is needed (either Lambda growth is too large
+     *       or step is too short, but we can't rely on model and stop iterations)
+     * * -1, if model is fresh, Lambda have grown too large, termination is needed
+     * *  0, if everything is OK, continue iterations
+     *
+     * State.Nu can have any value on enter, but after exit it is set to 1.0
+     */
+    iflag = -99;
+lbl_41:
+    if( ae_false )
+    {
+        goto lbl_42;
+    }
+    
+    /*
+     * Do we need model update?
+     */
+    if( state->modelage>0&&ae_fp_greater_eq(state->nu,minlm_suspiciousnu) )
+    {
+        iflag = -2;
+        goto lbl_42;
+    }
+    
+    /*
+     * DampedModel = QuadraticModel+lambda*I
+     */
+    rmatrixcopy(n, n, &state->quadraticmodel, 0, 0, &state->dampedmodel, 0, 0, _state);
+    for(i=0; i<=n-1; i++)
+    {
+        state->dampedmodel.ptr.pp_double[i][i] = state->dampedmodel.ptr.pp_double[i][i]+state->lambdav;
+    }
+    
+    /*
+     * 1. try to solve (RawModel+Lambda*I)*dx = -g.
+     *    increase lambda if left part is not positive definite.
+     * 2. bound step by StpMax
+     *    increase lambda if step is larger than StpMax
+     *
+     * We use BFlag variable to indicate that we have to increase Lambda.
+     * If it is False, we will try to increase Lambda and move to new iteration.
+     */
+    bflag = ae_true;
+    state->repncholesky = state->repncholesky+1;
+    if( spdmatrixcholeskyrec(&state->dampedmodel, 0, n, ae_true, &state->choleskybuf, _state) )
+    {
+        ae_v_moveneg(&state->xdir.ptr.p_double[0], 1, &state->gbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
+        fblscholeskysolve(&state->dampedmodel, 1.0, n, ae_true, &state->xdir, &state->choleskybuf, _state);
+        v = ae_v_dotproduct(&state->xdir.ptr.p_double[0], 1, &state->xdir.ptr.p_double[0], 1, ae_v_len(0,n-1));
+        if( ae_isfinite(v, _state) )
+        {
+            v = ae_sqrt(v, _state);
+            if( ae_fp_greater(state->stpmax,0)&&ae_fp_greater(v,state->stpmax) )
+            {
+                bflag = ae_false;
+            }
+        }
+        else
+        {
+            bflag = ae_false;
+        }
+    }
+    else
+    {
+        bflag = ae_false;
+    }
+    if( !bflag )
+    {
+        
+        /*
+         * Solution failed:
+         * try to increase lambda to make matrix positive definite and continue.
+         */
+        if( !minlm_increaselambda(&state->lambdav, &state->nu, _state) )
+        {
+            iflag = -1;
+            goto lbl_42;
+        }
+        goto lbl_41;
+    }
+    
+    /*
+     * Step in State.XDir and it is bounded by StpMax.
+     *
+     * We should check stopping conditions on step size here.
+     * DeltaX, which is used for secant updates, is initialized here.
+     *
+     * This code is a bit tricky because sometimes XDir<>0, but
+     * it is so small that XDir+XBase==XBase (in finite precision
+     * arithmetics). So we set DeltaX to XBase, then
+     * add XDir, and then subtract XBase to get exact value of
+     * DeltaX.
+     *
+     * Step length is estimated using DeltaX.
+     *
+     * NOTE: stopping conditions are tested
+     * for fresh models only (ModelAge=0)
+     */
+    ae_v_move(&state->deltax.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    ae_v_add(&state->deltax.ptr.p_double[0], 1, &state->xdir.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    ae_v_sub(&state->deltax.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    state->deltaxready = ae_true;
+    v = ae_v_dotproduct(&state->deltax.ptr.p_double[0], 1, &state->deltax.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    v = ae_sqrt(v, _state);
+    if( ae_fp_greater(v,state->epsx) )
+    {
+        goto lbl_43;
+    }
+    if( state->modelage!=0 )
+    {
+        goto lbl_45;
+    }
+    
+    /*
+     * Step is too short, model is fresh and we can rely on it.
+     * Terminating.
+     */
+    state->repterminationtype = 2;
+    if( !state->xrep )
+    {
+        goto lbl_47;
+    }
+    ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    state->f = state->fbase;
+    minlm_clearrequestfields(state, _state);
+    state->xupdated = ae_true;
+    state->rstate.stage = 9;
+    goto lbl_rcomm;
+lbl_9:
+    state->xupdated = ae_false;
+lbl_47:
+    result = ae_false;
+    return result;
+    goto lbl_46;
+lbl_45:
+    
+    /*
+     * Step is suspiciously short, but model is not fresh
+     * and we can't rely on it.
+     */
+    iflag = -2;
+    goto lbl_42;
+lbl_46:
+lbl_43:
+    
+    /*
+     * Let's evaluate new step:
+     * a) if we have Fi vector, we evaluate it using rcomm, and
+     *    then we manually calculate State.F as sum of squares of Fi[]
+     * b) if we have F value, we just evaluate it through rcomm interface
+     *
+     * We prefer (a) because we may need Fi vector for additional
+     * iterations
+     */
+    ae_assert(state->hasfi||state->hasf, "MinLM: internal error 2!", _state);
+    ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    ae_v_add(&state->x.ptr.p_double[0], 1, &state->xdir.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    minlm_clearrequestfields(state, _state);
+    if( !state->hasfi )
+    {
+        goto lbl_49;
+    }
+    state->needfi = ae_true;
+    state->rstate.stage = 10;
+    goto lbl_rcomm;
+lbl_10:
+    state->needfi = ae_false;
+    v = ae_v_dotproduct(&state->fi.ptr.p_double[0], 1, &state->fi.ptr.p_double[0], 1, ae_v_len(0,m-1));
+    state->f = v;
+    ae_v_move(&state->deltaf.ptr.p_double[0], 1, &state->fi.ptr.p_double[0], 1, ae_v_len(0,m-1));
+    ae_v_sub(&state->deltaf.ptr.p_double[0], 1, &state->fibase.ptr.p_double[0], 1, ae_v_len(0,m-1));
+    state->deltafready = ae_true;
+    goto lbl_50;
+lbl_49:
+    state->needf = ae_true;
+    state->rstate.stage = 11;
+    goto lbl_rcomm;
+lbl_11:
+    state->needf = ae_false;
+lbl_50:
+    state->repnfunc = state->repnfunc+1;
+    if( ae_fp_greater_eq(state->f,state->fbase) )
+    {
+        
+        /*
+         * Increase lambda and continue
+         */
+        if( !minlm_increaselambda(&state->lambdav, &state->nu, _state) )
+        {
+            iflag = -1;
+            goto lbl_42;
+        }
+        goto lbl_41;
+    }
+    
+    /*
+     * We've found our step!
+     */
+    iflag = 0;
+    goto lbl_42;
+    goto lbl_41;
+lbl_42:
+    state->nu = 1;
+    ae_assert(iflag>=-2&&iflag<=0, "MinLM: internal integrity check failed!", _state);
+    if( iflag==-2 )
+    {
+        state->modelage = state->maxmodelage+1;
+        goto lbl_22;
+    }
+    if( iflag==-1 )
+    {
+        goto lbl_23;
+    }
+    
+    /*
+     * Levenberg-Marquardt step is ready.
+     * Compare predicted vs. actual decrease and decide what to do with lambda.
+     *
+     * NOTE: we expect that State.DeltaX contains direction of step,
+     * State.F contains function value at new point.
+     */
+    ae_assert(state->deltaxready, "MinLM: deltaX is not ready", _state);
+    t = 0;
+    for(i=0; i<=n-1; i++)
+    {
+        v = ae_v_dotproduct(&state->quadraticmodel.ptr.pp_double[i][0], 1, &state->deltax.ptr.p_double[0], 1, ae_v_len(0,n-1));
+        t = t+state->deltax.ptr.p_double[i]*state->gbase.ptr.p_double[i]+0.5*state->deltax.ptr.p_double[i]*v;
+    }
+    state->predicteddecrease = -t;
+    state->actualdecrease = -(state->f-state->fbase);
+    if( ae_fp_less_eq(state->predicteddecrease,0) )
+    {
+        goto lbl_23;
+    }
+    v = state->actualdecrease/state->predicteddecrease;
+    if( ae_fp_greater_eq(v,0.1) )
+    {
+        goto lbl_51;
+    }
+    if( minlm_increaselambda(&state->lambdav, &state->nu, _state) )
+    {
+        goto lbl_53;
+    }
+    
+    /*
+     * Lambda is too large, we have to break iterations.
+     */
+    state->repterminationtype = 7;
+    if( !state->xrep )
+    {
+        goto lbl_55;
+    }
+    ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    state->f = state->fbase;
+    minlm_clearrequestfields(state, _state);
+    state->xupdated = ae_true;
+    state->rstate.stage = 12;
+    goto lbl_rcomm;
+lbl_12:
+    state->xupdated = ae_false;
+lbl_55:
+    result = ae_false;
+    return result;
+lbl_53:
+lbl_51:
+    if( ae_fp_greater(v,0.5) )
+    {
+        minlm_decreaselambda(&state->lambdav, &state->nu, _state);
+    }
+    
+    /*
+     * Accept step, report it and
+     * test stopping conditions on iterations count and function decrease.
+     *
+     * NOTE: we expect that State.DeltaX contains direction of step,
+     * State.F contains function value at new point.
+     *
+     * NOTE2: we should update XBase ONLY. In the beginning of the next
+     * iteration we expect that State.FIBase is NOT updated and
+     * contains old value of a function vector.
+     */
+    ae_v_add(&state->xbase.ptr.p_double[0], 1, &state->deltax.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    if( !state->xrep )
+    {
+        goto lbl_57;
+    }
+    ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    minlm_clearrequestfields(state, _state);
+    state->xupdated = ae_true;
+    state->rstate.stage = 13;
+    goto lbl_rcomm;
+lbl_13:
+    state->xupdated = ae_false;
+lbl_57:
+    state->repiterationscount = state->repiterationscount+1;
+    if( state->repiterationscount>=state->maxits&&state->maxits>0 )
+    {
+        state->repterminationtype = 5;
+    }
+    if( state->modelage==0 )
+    {
+        if( ae_fp_less_eq(ae_fabs(state->f-state->fbase, _state),state->epsf*ae_maxreal(1, ae_maxreal(ae_fabs(state->f, _state), ae_fabs(state->fbase, _state), _state), _state)) )
+        {
+            state->repterminationtype = 1;
+        }
+    }
+    if( state->repterminationtype<=0 )
+    {
+        goto lbl_59;
+    }
+    if( !state->xrep )
+    {
+        goto lbl_61;
+    }
+    
+    /*
+     * Report: XBase contains new point, F contains function value at new point
+     */
+    ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    minlm_clearrequestfields(state, _state);
+    state->xupdated = ae_true;
+    state->rstate.stage = 14;
+    goto lbl_rcomm;
+lbl_14:
+    state->xupdated = ae_false;
+lbl_61:
+    result = ae_false;
+    return result;
+lbl_59:
+    state->modelage = state->modelage+1;
+    
+    /*
+     * Additional iterations for unconstrained problems:
+     * preconditioned L-BFGS is used.
+     *
+     * NOTE: additional iterations are incompatible with secant updates
+     * because they invalidate
+     */
+    if( !(ae_fp_eq(state->stpmax,0)&&state->makeadditers) )
+    {
+        goto lbl_63;
+    }
+    ae_assert(state->hasg||state->m!=0, "MinLM: no grad or Jacobian for additional iterations", _state);
+    
+    /*
+     * Make preconditioned iterations
+     */
+    minlbfgssetcholeskypreconditioner(&state->internalstate, &state->dampedmodel, ae_true, _state);
+    minlbfgsrestartfrom(&state->internalstate, &state->xbase, _state);
+lbl_65:
+    if( !minlbfgsiteration(&state->internalstate, _state) )
+    {
+        goto lbl_66;
+    }
+    if( !state->internalstate.needfg )
+    {
+        goto lbl_67;
+    }
+    ae_v_move(&state->x.ptr.p_double[0], 1, &state->internalstate.x.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    minlm_clearrequestfields(state, _state);
+    if( !state->hasg )
+    {
+        goto lbl_69;
+    }
+    state->needfg = ae_true;
+    state->rstate.stage = 15;
+    goto lbl_rcomm;
+lbl_15:
+    state->needfg = ae_false;
+    state->repngrad = state->repngrad+1;
+    ae_v_move(&state->internalstate.g.ptr.p_double[0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    state->internalstate.f = state->f;
+    goto lbl_70;
+lbl_69:
+    state->needfij = ae_true;
+    state->rstate.stage = 16;
+    goto lbl_rcomm;
+lbl_16:
+    state->needfij = ae_false;
+    state->repnfunc = state->repnfunc+1;
+    state->repnjac = state->repnjac+1;
+    for(i=0; i<=n-1; i++)
+    {
+        state->internalstate.g.ptr.p_double[i] = 0;
+    }
+    for(i=0; i<=m-1; i++)
+    {
+        v = 2*state->fi.ptr.p_double[i];
+        ae_v_addd(&state->internalstate.g.ptr.p_double[0], 1, &state->j.ptr.pp_double[i][0], 1, ae_v_len(0,n-1), v);
+        state->internalstate.f = state->internalstate.f+ae_sqr(state->fi.ptr.p_double[i], _state);
+    }
+lbl_70:
+lbl_67:
+    goto lbl_65;
+lbl_66:
+    minlbfgsresultsbuf(&state->internalstate, &state->xbase, &state->internalrep, _state);
+    
+    /*
+     * Invalidate DeltaX/DeltaF (control variables used for integrity checks)
+     */
+    state->deltaxready = ae_false;
+    state->deltafready = ae_false;
+lbl_63:
+    goto lbl_22;
+lbl_23:
+    
+    /*
+     * Lambda is too large, we have to break iterations.
+     */
+    state->repterminationtype = 7;
+    if( !state->xrep )
+    {
+        goto lbl_71;
+    }
+    ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    state->f = state->fbase;
+    minlm_clearrequestfields(state, _state);
+    state->xupdated = ae_true;
+    state->rstate.stage = 17;
+    goto lbl_rcomm;
+lbl_17:
+    state->xupdated = ae_false;
+lbl_71:
+    result = ae_false;
+    return result;
+    
+    /*
+     * Saving state
+     */
+lbl_rcomm:
+    result = ae_true;
+    state->rstate.ia.ptr.p_int[0] = n;
+    state->rstate.ia.ptr.p_int[1] = m;
+    state->rstate.ia.ptr.p_int[2] = iflag;
+    state->rstate.ia.ptr.p_int[3] = i;
+    state->rstate.ia.ptr.p_int[4] = k;
+    state->rstate.ba.ptr.p_bool[0] = bflag;
+    state->rstate.ra.ptr.p_double[0] = v;
+    state->rstate.ra.ptr.p_double[1] = s;
+    state->rstate.ra.ptr.p_double[2] = t;
+    return result;
+}
+
+
+/*************************************************************************
+Levenberg-Marquardt algorithm results
+
+INPUT PARAMETERS:
+    State   -   algorithm state
+
+OUTPUT PARAMETERS:
+    X       -   array[0..N-1], solution
+    Rep     -   optimization report;
+                see comments for this structure for more info.
+
+  -- ALGLIB --
+     Copyright 10.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minlmresults(minlmstate* state,
+     /* Real    */ ae_vector* x,
+     minlmreport* rep,
+     ae_state *_state)
+{
+
+    ae_vector_clear(x);
+    _minlmreport_clear(rep);
+
+    minlmresultsbuf(state, x, rep, _state);
+}
+
+
+/*************************************************************************
+Levenberg-Marquardt algorithm results
+
+Buffered implementation of MinLMResults(), which uses pre-allocated buffer
+to store X[]. If buffer size is  too  small,  it  resizes  buffer.  It  is
+intended to be used in the inner cycles of performance critical algorithms
+where array reallocation penalty is too large to be ignored.
+
+  -- ALGLIB --
+     Copyright 10.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minlmresultsbuf(minlmstate* state,
+     /* Real    */ ae_vector* x,
+     minlmreport* rep,
+     ae_state *_state)
+{
+
+
+    if( x->cnt<state->n )
+    {
+        ae_vector_set_length(x, state->n, _state);
+    }
+    ae_v_move(&x->ptr.p_double[0], 1, &state->x.ptr.p_double[0], 1, ae_v_len(0,state->n-1));
+    rep->iterationscount = state->repiterationscount;
+    rep->terminationtype = state->repterminationtype;
+    rep->nfunc = state->repnfunc;
+    rep->njac = state->repnjac;
+    rep->ngrad = state->repngrad;
+    rep->nhess = state->repnhess;
+    rep->ncholesky = state->repncholesky;
+}
+
+
+/*************************************************************************
+This  subroutine  restarts  LM  algorithm from new point. All optimization
+parameters are left unchanged.
+
+This  function  allows  to  solve multiple  optimization  problems  (which
+must have same number of dimensions) without object reallocation penalty.
+
+INPUT PARAMETERS:
+    State   -   structure used for reverse communication previously
+                allocated with MinLMCreateXXX call.
+    X       -   new starting point.
+
+  -- ALGLIB --
+     Copyright 30.07.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlmrestartfrom(minlmstate* state,
+     /* Real    */ ae_vector* x,
+     ae_state *_state)
+{
+
+
+    ae_assert(x->cnt>=state->n, "MinLMRestartFrom: Length(X)<N!", _state);
+    ae_assert(isfinitevector(x, state->n, _state), "MinLMRestartFrom: X contains infinite or NaN values!", _state);
+    ae_v_move(&state->xbase.ptr.p_double[0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,state->n-1));
+    ae_vector_set_length(&state->rstate.ia, 4+1, _state);
+    ae_vector_set_length(&state->rstate.ba, 0+1, _state);
+    ae_vector_set_length(&state->rstate.ra, 2+1, _state);
+    state->rstate.stage = -1;
+    minlm_clearrequestfields(state, _state);
+}
+
+
+/*************************************************************************
+Prepare internal structures (except for RComm).
+
+Note: M must be zero for FGH mode, non-zero for V/VJ/FJ/FGJ mode.
+*************************************************************************/
+static void minlm_lmprepare(ae_int_t n,
+     ae_int_t m,
+     ae_bool havegrad,
+     minlmstate* state,
+     ae_state *_state)
+{
+    ae_int_t i;
+
+
+    if( n<=0||m<0 )
+    {
+        return;
+    }
+    if( havegrad )
+    {
+        ae_vector_set_length(&state->g, n, _state);
+    }
+    if( m!=0 )
+    {
+        ae_matrix_set_length(&state->j, m, n, _state);
+        ae_vector_set_length(&state->fi, m, _state);
+        ae_vector_set_length(&state->fibase, m, _state);
+        ae_vector_set_length(&state->deltaf, m, _state);
+        ae_vector_set_length(&state->fm2, m, _state);
+        ae_vector_set_length(&state->fm1, m, _state);
+        ae_vector_set_length(&state->fp2, m, _state);
+        ae_vector_set_length(&state->fp1, m, _state);
+    }
+    else
+    {
+        ae_matrix_set_length(&state->h, n, n, _state);
+    }
+    ae_vector_set_length(&state->x, n, _state);
+    ae_vector_set_length(&state->deltax, n, _state);
+    ae_matrix_set_length(&state->quadraticmodel, n, n, _state);
+    ae_matrix_set_length(&state->dampedmodel, n, n, _state);
+    ae_vector_set_length(&state->xbase, n, _state);
+    ae_vector_set_length(&state->gbase, n, _state);
+    ae_vector_set_length(&state->xdir, n, _state);
+    
+    /*
+     * prepare internal L-BFGS
+     */
+    for(i=0; i<=n-1; i++)
+    {
+        state->x.ptr.p_double[i] = 0;
+    }
+    minlbfgscreate(n, ae_minint(minlm_additers, n, _state), &state->x, &state->internalstate, _state);
+    minlbfgssetcond(&state->internalstate, 0.0, 0.0, 0.0, ae_minint(minlm_additers, n, _state), _state);
+}
+
+
+/*************************************************************************
+Clears request fileds (to be sure that we don't forgot to clear something)
+*************************************************************************/
+static void minlm_clearrequestfields(minlmstate* state, ae_state *_state)
+{
+
+
+    state->needf = ae_false;
+    state->needfg = ae_false;
+    state->needfgh = ae_false;
+    state->needfij = ae_false;
+    state->needfi = ae_false;
+    state->xupdated = ae_false;
+}
+
+
+/*************************************************************************
+Increases lambda, returns False when there is a danger of overflow
+*************************************************************************/
+static ae_bool minlm_increaselambda(double* lambdav,
+     double* nu,
+     ae_state *_state)
+{
+    double lnlambda;
+    double lnnu;
+    double lnlambdaup;
+    double lnmax;
+    ae_bool result;
+
+
+    result = ae_false;
+    lnlambda = ae_log(*lambdav, _state);
+    lnlambdaup = ae_log(minlm_lambdaup, _state);
+    lnnu = ae_log(*nu, _state);
+    lnmax = ae_log(ae_maxrealnumber, _state);
+    if( ae_fp_greater(lnlambda+lnlambdaup+lnnu,0.25*lnmax) )
+    {
+        return result;
+    }
+    if( ae_fp_greater(lnnu+ae_log(2, _state),lnmax) )
+    {
+        return result;
+    }
+    *lambdav = *lambdav*minlm_lambdaup*(*nu);
+    *nu = *nu*2;
+    result = ae_true;
+    return result;
+}
+
+
+/*************************************************************************
+Decreases lambda, but leaves it unchanged when there is danger of underflow.
+*************************************************************************/
+static void minlm_decreaselambda(double* lambdav,
+     double* nu,
+     ae_state *_state)
+{
+
+
+    *nu = 1;
+    if( ae_fp_less(ae_log(*lambdav, _state)+ae_log(minlm_lambdadown, _state),ae_log(ae_minrealnumber, _state)) )
+    {
+        *lambdav = ae_minrealnumber;
+    }
+    else
+    {
+        *lambdav = *lambdav*minlm_lambdadown;
+    }
+}
+
+
+ae_bool _minlmstate_init(minlmstate* p, ae_state *_state, ae_bool make_automatic)
+{
+    if( !ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->fi, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_matrix_init(&p->j, 0, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_matrix_init(&p->h, 0, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->g, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->xbase, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->fibase, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->gbase, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_matrix_init(&p->quadraticmodel, 0, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_matrix_init(&p->dampedmodel, 0, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->xdir, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->deltax, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->deltaf, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !_rcommstate_init(&p->rstate, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->choleskybuf, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->fm2, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->fm1, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->fp2, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->fp1, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !_minlbfgsstate_init(&p->internalstate, _state, make_automatic) )
+        return ae_false;
+    if( !_minlbfgsreport_init(&p->internalrep, _state, make_automatic) )
+        return ae_false;
+    return ae_true;
+}
+
+
+ae_bool _minlmstate_init_copy(minlmstate* dst, minlmstate* src, ae_state *_state, ae_bool make_automatic)
+{
+    dst->n = src->n;
+    dst->m = src->m;
+    dst->diffstep = src->diffstep;
+    dst->epsg = src->epsg;
+    dst->epsf = src->epsf;
+    dst->epsx = src->epsx;
+    dst->maxits = src->maxits;
+    dst->xrep = src->xrep;
+    dst->stpmax = src->stpmax;
+    dst->maxmodelage = src->maxmodelage;
+    dst->makeadditers = src->makeadditers;
+    if( !ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic) )
+        return ae_false;
+    dst->f = src->f;
+    if( !ae_vector_init_copy(&dst->fi, &src->fi, _state, make_automatic) )
+        return ae_false;
+    if( !ae_matrix_init_copy(&dst->j, &src->j, _state, make_automatic) )
+        return ae_false;
+    if( !ae_matrix_init_copy(&dst->h, &src->h, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->g, &src->g, _state, make_automatic) )
+        return ae_false;
+    dst->needf = src->needf;
+    dst->needfg = src->needfg;
+    dst->needfgh = src->needfgh;
+    dst->needfij = src->needfij;
+    dst->needfi = src->needfi;
+    dst->xupdated = src->xupdated;
+    dst->algomode = src->algomode;
+    dst->hasf = src->hasf;
+    dst->hasfi = src->hasfi;
+    dst->hasg = src->hasg;
+    if( !ae_vector_init_copy(&dst->xbase, &src->xbase, _state, make_automatic) )
+        return ae_false;
+    dst->fbase = src->fbase;
+    if( !ae_vector_init_copy(&dst->fibase, &src->fibase, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->gbase, &src->gbase, _state, make_automatic) )
+        return ae_false;
+    if( !ae_matrix_init_copy(&dst->quadraticmodel, &src->quadraticmodel, _state, make_automatic) )
+        return ae_false;
+    dst->lambdav = src->lambdav;
+    dst->nu = src->nu;
+    if( !ae_matrix_init_copy(&dst->dampedmodel, &src->dampedmodel, _state, make_automatic) )
+        return ae_false;
+    dst->modelage = src->modelage;
+    if( !ae_vector_init_copy(&dst->xdir, &src->xdir, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->deltax, &src->deltax, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->deltaf, &src->deltaf, _state, make_automatic) )
+        return ae_false;
+    dst->deltaxready = src->deltaxready;
+    dst->deltafready = src->deltafready;
+    dst->repiterationscount = src->repiterationscount;
+    dst->repterminationtype = src->repterminationtype;
+    dst->repnfunc = src->repnfunc;
+    dst->repnjac = src->repnjac;
+    dst->repngrad = src->repngrad;
+    dst->repnhess = src->repnhess;
+    dst->repncholesky = src->repncholesky;
+    if( !_rcommstate_init_copy(&dst->rstate, &src->rstate, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->choleskybuf, &src->choleskybuf, _state, make_automatic) )
+        return ae_false;
+    dst->actualdecrease = src->actualdecrease;
+    dst->predicteddecrease = src->predicteddecrease;
+    if( !ae_vector_init_copy(&dst->fm2, &src->fm2, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->fm1, &src->fm1, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->fp2, &src->fp2, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->fp1, &src->fp1, _state, make_automatic) )
+        return ae_false;
+    if( !_minlbfgsstate_init_copy(&dst->internalstate, &src->internalstate, _state, make_automatic) )
+        return ae_false;
+    if( !_minlbfgsreport_init_copy(&dst->internalrep, &src->internalrep, _state, make_automatic) )
+        return ae_false;
+    return ae_true;
+}
+
+
+void _minlmstate_clear(minlmstate* p)
+{
+    ae_vector_clear(&p->x);
+    ae_vector_clear(&p->fi);
+    ae_matrix_clear(&p->j);
+    ae_matrix_clear(&p->h);
+    ae_vector_clear(&p->g);
+    ae_vector_clear(&p->xbase);
+    ae_vector_clear(&p->fibase);
+    ae_vector_clear(&p->gbase);
+    ae_matrix_clear(&p->quadraticmodel);
+    ae_matrix_clear(&p->dampedmodel);
+    ae_vector_clear(&p->xdir);
+    ae_vector_clear(&p->deltax);
+    ae_vector_clear(&p->deltaf);
+    _rcommstate_clear(&p->rstate);
+    ae_vector_clear(&p->choleskybuf);
+    ae_vector_clear(&p->fm2);
+    ae_vector_clear(&p->fm1);
+    ae_vector_clear(&p->fp2);
+    ae_vector_clear(&p->fp1);
+    _minlbfgsstate_clear(&p->internalstate);
+    _minlbfgsreport_clear(&p->internalrep);
+}
+
+
+ae_bool _minlmreport_init(minlmreport* p, ae_state *_state, ae_bool make_automatic)
+{
+    return ae_true;
+}
+
+
+ae_bool _minlmreport_init_copy(minlmreport* dst, minlmreport* src, ae_state *_state, ae_bool make_automatic)
+{
+    dst->iterationscount = src->iterationscount;
+    dst->terminationtype = src->terminationtype;
+    dst->nfunc = src->nfunc;
+    dst->njac = src->njac;
+    dst->ngrad = src->ngrad;
+    dst->nhess = src->nhess;
+    dst->ncholesky = src->ncholesky;
+    return ae_true;
+}
+
+
+void _minlmreport_clear(minlmreport* p)
+{
+}
+
+
+
+
+/*************************************************************************
+              NONLINEAR BOUND CONSTRAINED OPTIMIZATION USING
+                      MODIFIED ACTIVE SET ALGORITHM
+                   WILLIAM W. HAGER AND HONGCHAO ZHANG
+
+DESCRIPTION:
+The  subroutine  minimizes  function  F(x)  of  N  arguments  with   bound
+constraints: BndL[i] <= x[i] <= BndU[i]
+
+This method is  globally  convergent  as  long  as  grad(f)  is  Lipschitz
+continuous on a level set: L = { x : f(x)<=f(x0) }.
+
+
+REQUIREMENTS:
+Algorithm will request following information during its operation:
+* function value F and its gradient G (simultaneously) at given point X
+
+
+USAGE:
+1. User initializes algorithm state with MinASACreate() call
+2. User tunes solver parameters with MinASASetCond() MinASASetStpMax() and
+   other functions
+3. User calls MinASAOptimize() function which takes algorithm  state   and
+   pointer (delegate, etc.) to callback function which calculates F/G.
+4. User calls MinASAResults() to get solution
+5. Optionally, user may call MinASARestartFrom() to solve another  problem
+   with same N but another starting point and/or another function.
+   MinASARestartFrom() allows to reuse already initialized structure.
+
+
+INPUT PARAMETERS:
+    N       -   problem dimension, N>0:
+                * if given, only leading N elements of X are used
+                * if not given, automatically determined from sizes of
+                  X/BndL/BndU.
+    X       -   starting point, array[0..N-1].
+    BndL    -   lower bounds, array[0..N-1].
+                all elements MUST be specified,  i.e.  all  variables  are
+                bounded. However, if some (all) variables  are  unbounded,
+                you may specify very small number as bound: -1000,  -1.0E6
+                or -1.0E300, or something like that.
+    BndU    -   upper bounds, array[0..N-1].
+                all elements MUST be specified,  i.e.  all  variables  are
+                bounded. However, if some (all) variables  are  unbounded,
+                you may specify very large number as bound: +1000,  +1.0E6
+                or +1.0E300, or something like that.
+
+OUTPUT PARAMETERS:
+    State   -   structure stores algorithm state
+
+NOTES:
+
+1. you may tune stopping conditions with MinASASetCond() function
+2. if target function contains exp() or other fast growing functions,  and
+   optimization algorithm makes too large steps which leads  to  overflow,
+   use MinASASetStpMax() function to bound algorithm's steps.
+3. this function does NOT support infinite/NaN values in X, BndL, BndU.
+
+  -- ALGLIB --
+     Copyright 25.03.2010 by Bochkanov Sergey
+*************************************************************************/
+void minasacreate(ae_int_t n,
+     /* Real    */ ae_vector* x,
+     /* Real    */ ae_vector* bndl,
+     /* Real    */ ae_vector* bndu,
+     minasastate* state,
+     ae_state *_state)
+{
+    ae_int_t i;
+
+    _minasastate_clear(state);
+
+    ae_assert(n>=1, "MinASA: N too small!", _state);
+    ae_assert(x->cnt>=n, "MinCGCreate: Length(X)<N!", _state);
+    ae_assert(isfinitevector(x, n, _state), "MinCGCreate: X contains infinite or NaN values!", _state);
+    ae_assert(bndl->cnt>=n, "MinCGCreate: Length(BndL)<N!", _state);
+    ae_assert(isfinitevector(bndl, n, _state), "MinCGCreate: BndL contains infinite or NaN values!", _state);
+    ae_assert(bndu->cnt>=n, "MinCGCreate: Length(BndU)<N!", _state);
+    ae_assert(isfinitevector(bndu, n, _state), "MinCGCreate: BndU contains infinite or NaN values!", _state);
+    for(i=0; i<=n-1; i++)
+    {
+        ae_assert(ae_fp_less_eq(bndl->ptr.p_double[i],bndu->ptr.p_double[i]), "MinASA: inconsistent bounds!", _state);
+        ae_assert(ae_fp_less_eq(bndl->ptr.p_double[i],x->ptr.p_double[i]), "MinASA: infeasible X!", _state);
+        ae_assert(ae_fp_less_eq(x->ptr.p_double[i],bndu->ptr.p_double[i]), "MinASA: infeasible X!", _state);
+    }
+    
+    /*
+     * Initialize
+     */
+    state->n = n;
+    minasasetcond(state, 0, 0, 0, 0, _state);
+    minasasetxrep(state, ae_false, _state);
+    minasasetstpmax(state, 0, _state);
+    minasasetalgorithm(state, -1, _state);
+    ae_vector_set_length(&state->bndl, n, _state);
+    ae_vector_set_length(&state->bndu, n, _state);
+    ae_vector_set_length(&state->ak, n, _state);
+    ae_vector_set_length(&state->xk, n, _state);
+    ae_vector_set_length(&state->dk, n, _state);
+    ae_vector_set_length(&state->an, n, _state);
+    ae_vector_set_length(&state->xn, n, _state);
+    ae_vector_set_length(&state->dn, n, _state);
+    ae_vector_set_length(&state->x, n, _state);
+    ae_vector_set_length(&state->d, n, _state);
+    ae_vector_set_length(&state->g, n, _state);
+    ae_vector_set_length(&state->gc, n, _state);
+    ae_vector_set_length(&state->work, n, _state);
+    ae_vector_set_length(&state->yk, n, _state);
+    minasarestartfrom(state, x, bndl, bndu, _state);
+}
+
+
+/*************************************************************************
+This function sets stopping conditions for the ASA optimization algorithm.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    EpsG    -   >=0
+                The  subroutine  finishes  its  work   if   the  condition
+                ||G||<EpsG is satisfied, where ||.|| means Euclidian norm,
+                G - gradient.
+    EpsF    -   >=0
+                The  subroutine  finishes  its work if on k+1-th iteration
+                the  condition  |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1}
+                is satisfied.
+    EpsX    -   >=0
+                The subroutine finishes its work if  on  k+1-th  iteration
+                the condition |X(k+1)-X(k)| <= EpsX is fulfilled.
+    MaxIts  -   maximum number of iterations. If MaxIts=0, the  number  of
+                iterations is unlimited.
+
+Passing EpsG=0, EpsF=0, EpsX=0 and MaxIts=0 (simultaneously) will lead to
+automatic stopping criterion selection (small EpsX).
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minasasetcond(minasastate* state,
+     double epsg,
+     double epsf,
+     double epsx,
+     ae_int_t maxits,
+     ae_state *_state)
+{
+
+
+    ae_assert(ae_isfinite(epsg, _state), "MinASASetCond: EpsG is not finite number!", _state);
+    ae_assert(ae_fp_greater_eq(epsg,0), "MinASASetCond: negative EpsG!", _state);
+    ae_assert(ae_isfinite(epsf, _state), "MinASASetCond: EpsF is not finite number!", _state);
+    ae_assert(ae_fp_greater_eq(epsf,0), "MinASASetCond: negative EpsF!", _state);
+    ae_assert(ae_isfinite(epsx, _state), "MinASASetCond: EpsX is not finite number!", _state);
+    ae_assert(ae_fp_greater_eq(epsx,0), "MinASASetCond: negative EpsX!", _state);
+    ae_assert(maxits>=0, "MinASASetCond: negative MaxIts!", _state);
+    if( ((ae_fp_eq(epsg,0)&&ae_fp_eq(epsf,0))&&ae_fp_eq(epsx,0))&&maxits==0 )
+    {
+        epsx = 1.0E-6;
+    }
+    state->epsg = epsg;
+    state->epsf = epsf;
+    state->epsx = epsx;
+    state->maxits = maxits;
+}
+
+
+/*************************************************************************
+This function turns on/off reporting.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    NeedXRep-   whether iteration reports are needed or not
+
+If NeedXRep is True, algorithm will call rep() callback function if  it is
+provided to MinASAOptimize().
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minasasetxrep(minasastate* state, ae_bool needxrep, ae_state *_state)
+{
+
+
+    state->xrep = needxrep;
+}
+
+
+/*************************************************************************
+This function sets optimization algorithm.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm stat
+    UAType  -   algorithm type:
+                * -1    automatic selection of the best algorithm
+                * 0     DY (Dai and Yuan) algorithm
+                * 1     Hybrid DY-HS algorithm
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minasasetalgorithm(minasastate* state,
+     ae_int_t algotype,
+     ae_state *_state)
+{
+
+
+    ae_assert(algotype>=-1&&algotype<=1, "MinASASetAlgorithm: incorrect AlgoType!", _state);
+    if( algotype==-1 )
+    {
+        algotype = 1;
+    }
+    state->cgtype = algotype;
+}
+
+
+/*************************************************************************
+This function sets maximum step length
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    StpMax  -   maximum step length, >=0. Set StpMax to 0.0,  if you don't
+                want to limit step length (zero by default).
+
+Use this subroutine when you optimize target function which contains exp()
+or  other  fast  growing  functions,  and optimization algorithm makes too
+large  steps  which  leads  to overflow. This function allows us to reject
+steps  that  are  too  large  (and  therefore  expose  us  to the possible
+overflow) without actually calculating function value at the x+stp*d.
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minasasetstpmax(minasastate* state, double stpmax, ae_state *_state)
+{
+
+
+    ae_assert(ae_isfinite(stpmax, _state), "MinASASetStpMax: StpMax is not finite!", _state);
+    ae_assert(ae_fp_greater_eq(stpmax,0), "MinASASetStpMax: StpMax<0!", _state);
+    state->stpmax = stpmax;
+}
+
+
+/*************************************************************************
+
+  -- ALGLIB --
+     Copyright 20.03.2009 by Bochkanov Sergey
+*************************************************************************/
+ae_bool minasaiteration(minasastate* state, ae_state *_state)
+{
+    ae_int_t n;
+    ae_int_t i;
+    double betak;
+    double v;
+    double vv;
+    ae_int_t mcinfo;
+    ae_bool b;
+    ae_bool stepfound;
+    ae_int_t diffcnt;
+    ae_bool result;
+
+
+    
+    /*
+     * Reverse communication preparations
+     * I know it looks ugly, but it works the same way
+     * anywhere from C++ to Python.
+     *
+     * This code initializes locals by:
+     * * random values determined during code
+     *   generation - on first subroutine call
+     * * values from previous call - on subsequent calls
+     */
+    if( state->rstate.stage>=0 )
+    {
+        n = state->rstate.ia.ptr.p_int[0];
+        i = state->rstate.ia.ptr.p_int[1];
+        mcinfo = state->rstate.ia.ptr.p_int[2];
+        diffcnt = state->rstate.ia.ptr.p_int[3];
+        b = state->rstate.ba.ptr.p_bool[0];
+        stepfound = state->rstate.ba.ptr.p_bool[1];
+        betak = state->rstate.ra.ptr.p_double[0];
+        v = state->rstate.ra.ptr.p_double[1];
+        vv = state->rstate.ra.ptr.p_double[2];
+    }
+    else
+    {
+        n = -983;
+        i = -989;
+        mcinfo = -834;
+        diffcnt = 900;
+        b = ae_true;
+        stepfound = ae_false;
+        betak = 214;
+        v = -338;
+        vv = -686;
+    }
+    if( state->rstate.stage==0 )
+    {
+        goto lbl_0;
+    }
+    if( state->rstate.stage==1 )
+    {
+        goto lbl_1;
+    }
+    if( state->rstate.stage==2 )
+    {
+        goto lbl_2;
+    }
+    if( state->rstate.stage==3 )
+    {
+        goto lbl_3;
+    }
+    if( state->rstate.stage==4 )
+    {
+        goto lbl_4;
+    }
+    if( state->rstate.stage==5 )
+    {
+        goto lbl_5;
+    }
+    if( state->rstate.stage==6 )
+    {
+        goto lbl_6;
+    }
+    if( state->rstate.stage==7 )
+    {
+        goto lbl_7;
+    }
+    if( state->rstate.stage==8 )
+    {
+        goto lbl_8;
+    }
+    if( state->rstate.stage==9 )
+    {
+        goto lbl_9;
+    }
+    if( state->rstate.stage==10 )
+    {
+        goto lbl_10;
+    }
+    if( state->rstate.stage==11 )
+    {
+        goto lbl_11;
+    }
+    if( state->rstate.stage==12 )
+    {
+        goto lbl_12;
+    }
+    if( state->rstate.stage==13 )
+    {
+        goto lbl_13;
+    }
+    if( state->rstate.stage==14 )
+    {
+        goto lbl_14;
+    }
+    
+    /*
+     * Routine body
+     */
+    
+    /*
+     * Prepare
+     */
+    n = state->n;
+    state->repterminationtype = 0;
+    state->repiterationscount = 0;
+    state->repnfev = 0;
+    state->debugrestartscount = 0;
+    state->cgtype = 1;
+    ae_v_move(&state->xk.ptr.p_double[0], 1, &state->x.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    for(i=0; i<=n-1; i++)
+    {
+        if( ae_fp_eq(state->xk.ptr.p_double[i],state->bndl.ptr.p_double[i])||ae_fp_eq(state->xk.ptr.p_double[i],state->bndu.ptr.p_double[i]) )
+        {
+            state->ak.ptr.p_double[i] = 0;
+        }
+        else
+        {
+            state->ak.ptr.p_double[i] = 1;
+        }
+    }
+    state->mu = 0.1;
+    state->curalgo = 0;
+    
+    /*
+     * Calculate F/G, initialize algorithm
+     */
+    minasa_clearrequestfields(state, _state);
+    state->needfg = ae_true;
+    state->rstate.stage = 0;
+    goto lbl_rcomm;
+lbl_0:
+    state->needfg = ae_false;
+    if( !state->xrep )
+    {
+        goto lbl_15;
+    }
+    
+    /*
+     * progress report
+     */
+    minasa_clearrequestfields(state, _state);
+    state->xupdated = ae_true;
+    state->rstate.stage = 1;
+    goto lbl_rcomm;
+lbl_1:
+    state->xupdated = ae_false;
+lbl_15:
+    if( ae_fp_less_eq(minasa_asaboundedantigradnorm(state, _state),state->epsg) )
+    {
+        state->repterminationtype = 4;
+        result = ae_false;
+        return result;
+    }
+    state->repnfev = state->repnfev+1;
+    
+    /*
+     * Main cycle
+     *
+     * At the beginning of new iteration:
+     * * CurAlgo stores current algorithm selector
+     * * State.XK, State.F and State.G store current X/F/G
+     * * State.AK stores current set of active constraints
+     */
+lbl_17:
+    if( ae_false )
+    {
+        goto lbl_18;
+    }
+    
+    /*
+     * GPA algorithm
+     */
+    if( state->curalgo!=0 )
+    {
+        goto lbl_19;
+    }
+    state->k = 0;
+    state->acount = 0;
+lbl_21:
+    if( ae_false )
+    {
+        goto lbl_22;
+    }
+    
+    /*
+     * Determine Dk = proj(xk - gk)-xk
+     */
+    for(i=0; i<=n-1; i++)
+    {
+        state->d.ptr.p_double[i] = boundval(state->xk.ptr.p_double[i]-state->g.ptr.p_double[i], state->bndl.ptr.p_double[i], state->bndu.ptr.p_double[i], _state)-state->xk.ptr.p_double[i];
+    }
+    
+    /*
+     * Armijo line search.
+     * * exact search with alpha=1 is tried first,
+     *   'exact' means that we evaluate f() EXACTLY at
+     *   bound(x-g,bndl,bndu), without intermediate floating
+     *   point operations.
+     * * alpha<1 are tried if explicit search wasn't successful
+     * Result is placed into XN.
+     *
+     * Two types of search are needed because we can't
+     * just use second type with alpha=1 because in finite
+     * precision arithmetics (x1-x0)+x0 may differ from x1.
+     * So while x1 is correctly bounded (it lie EXACTLY on
+     * boundary, if it is active), (x1-x0)+x0 may be
+     * not bounded.
+     */
+    v = ae_v_dotproduct(&state->d.ptr.p_double[0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    state->dginit = v;
+    state->finit = state->f;
+    if( !(ae_fp_less_eq(minasa_asad1norm(state, _state),state->stpmax)||ae_fp_eq(state->stpmax,0)) )
+    {
+        goto lbl_23;
+    }
+    
+    /*
+     * Try alpha=1 step first
+     */
+    for(i=0; i<=n-1; i++)
+    {
+        state->x.ptr.p_double[i] = boundval(state->xk.ptr.p_double[i]-state->g.ptr.p_double[i], state->bndl.ptr.p_double[i], state->bndu.ptr.p_double[i], _state);
+    }
+    minasa_clearrequestfields(state, _state);
+    state->needfg = ae_true;
+    state->rstate.stage = 2;
+    goto lbl_rcomm;
+lbl_2:
+    state->needfg = ae_false;
+    state->repnfev = state->repnfev+1;
+    stepfound = ae_fp_less_eq(state->f,state->finit+minasa_gpaftol*state->dginit);
+    goto lbl_24;
+lbl_23:
+    stepfound = ae_false;
+lbl_24:
+    if( !stepfound )
+    {
+        goto lbl_25;
+    }
+    
+    /*
+     * we are at the boundary(ies)
+     */
+    ae_v_move(&state->xn.ptr.p_double[0], 1, &state->x.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    state->stp = 1;
+    goto lbl_26;
+lbl_25:
+    
+    /*
+     * alpha=1 is too large, try smaller values
+     */
+    state->stp = 1;
+    linminnormalized(&state->d, &state->stp, n, _state);
+    state->dginit = state->dginit/state->stp;
+    state->stp = minasa_gpadecay*state->stp;
+    if( ae_fp_greater(state->stpmax,0) )
+    {
+        state->stp = ae_minreal(state->stp, state->stpmax, _state);
+    }
+lbl_27:
+    if( ae_false )
+    {
+        goto lbl_28;
+    }
+    v = state->stp;
+    ae_v_move(&state->x.ptr.p_double[0], 1, &state->xk.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    ae_v_addd(&state->x.ptr.p_double[0], 1, &state->d.ptr.p_double[0], 1, ae_v_len(0,n-1), v);
+    minasa_clearrequestfields(state, _state);
+    state->needfg = ae_true;
+    state->rstate.stage = 3;
+    goto lbl_rcomm;
+lbl_3:
+    state->needfg = ae_false;
+    state->repnfev = state->repnfev+1;
+    if( ae_fp_less_eq(state->stp,minasa_stpmin) )
+    {
+        goto lbl_28;
+    }
+    if( ae_fp_less_eq(state->f,state->finit+state->stp*minasa_gpaftol*state->dginit) )
+    {
+        goto lbl_28;
+    }
+    state->stp = state->stp*minasa_gpadecay;
+    goto lbl_27;
+lbl_28:
+    ae_v_move(&state->xn.ptr.p_double[0], 1, &state->x.ptr.p_double[0], 1, ae_v_len(0,n-1));
+lbl_26:
+    state->repiterationscount = state->repiterationscount+1;
+    if( !state->xrep )
+    {
+        goto lbl_29;
+    }
+    
+    /*
+     * progress report
+     */
+    minasa_clearrequestfields(state, _state);
+    state->xupdated = ae_true;
+    state->rstate.stage = 4;
+    goto lbl_rcomm;
+lbl_4:
+    state->xupdated = ae_false;
+lbl_29:
+    
+    /*
+     * Calculate new set of active constraints.
+     * Reset counter if active set was changed.
+     * Prepare for the new iteration
+     */
+    for(i=0; i<=n-1; i++)
+    {
+        if( ae_fp_eq(state->xn.ptr.p_double[i],state->bndl.ptr.p_double[i])||ae_fp_eq(state->xn.ptr.p_double[i],state->bndu.ptr.p_double[i]) )
+        {
+            state->an.ptr.p_double[i] = 0;
+        }
+        else
+        {
+            state->an.ptr.p_double[i] = 1;
+        }
+    }
+    for(i=0; i<=n-1; i++)
+    {
+        if( ae_fp_neq(state->ak.ptr.p_double[i],state->an.ptr.p_double[i]) )
+        {
+            state->acount = -1;
+            break;
+        }
+    }
+    state->acount = state->acount+1;
+    ae_v_move(&state->xk.ptr.p_double[0], 1, &state->xn.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    ae_v_move(&state->ak.ptr.p_double[0], 1, &state->an.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    
+    /*
+     * Stopping conditions
+     */
+    if( !(state->repiterationscount>=state->maxits&&state->maxits>0) )
+    {
+        goto lbl_31;
+    }
+    
+    /*
+     * Too many iterations
+     */
+    state->repterminationtype = 5;
+    if( !state->xrep )
+    {
+        goto lbl_33;
+    }
+    minasa_clearrequestfields(state, _state);
+    state->xupdated = ae_true;
+    state->rstate.stage = 5;
+    goto lbl_rcomm;
+lbl_5:
+    state->xupdated = ae_false;
+lbl_33:
+    result = ae_false;
+    return result;
+lbl_31:
+    if( ae_fp_greater(minasa_asaboundedantigradnorm(state, _state),state->epsg) )
+    {
+        goto lbl_35;
+    }
+    
+    /*
+     * Gradient is small enough
+     */
+    state->repterminationtype = 4;
+    if( !state->xrep )
+    {
+        goto lbl_37;
+    }
+    minasa_clearrequestfields(state, _state);
+    state->xupdated = ae_true;
+    state->rstate.stage = 6;
+    goto lbl_rcomm;
+lbl_6:
+    state->xupdated = ae_false;
+lbl_37:
+    result = ae_false;
+    return result;
+lbl_35:
+    v = ae_v_dotproduct(&state->d.ptr.p_double[0], 1, &state->d.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    if( ae_fp_greater(ae_sqrt(v, _state)*state->stp,state->epsx) )
+    {
+        goto lbl_39;
+    }
+    
+    /*
+     * Step size is too small, no further improvement is
+     * possible
+     */
+    state->repterminationtype = 2;
+    if( !state->xrep )
+    {
+        goto lbl_41;
+    }
+    minasa_clearrequestfields(state, _state);
+    state->xupdated = ae_true;
+    state->rstate.stage = 7;
+    goto lbl_rcomm;
+lbl_7:
+    state->xupdated = ae_false;
+lbl_41:
+    result = ae_false;
+    return result;
+lbl_39:
+    if( ae_fp_greater(state->finit-state->f,state->epsf*ae_maxreal(ae_fabs(state->finit, _state), ae_maxreal(ae_fabs(state->f, _state), 1.0, _state), _state)) )
+    {
+        goto lbl_43;
+    }
+    
+    /*
+     * F(k+1)-F(k) is small enough
+     */
+    state->repterminationtype = 1;
+    if( !state->xrep )
+    {
+        goto lbl_45;
+    }
+    minasa_clearrequestfields(state, _state);
+    state->xupdated = ae_true;
+    state->rstate.stage = 8;
+    goto lbl_rcomm;
+lbl_8:
+    state->xupdated = ae_false;
+lbl_45:
+    result = ae_false;
+    return result;
+lbl_43:
+    
+    /*
+     * Decide - should we switch algorithm or not
+     */
+    if( minasa_asauisempty(state, _state) )
+    {
+        if( ae_fp_greater_eq(minasa_asaginorm(state, _state),state->mu*minasa_asad1norm(state, _state)) )
+        {
+            state->curalgo = 1;
+            goto lbl_22;
+        }
+        else
+        {
+            state->mu = state->mu*minasa_asarho;
+        }
+    }
+    else
+    {
+        if( state->acount==minasa_n1 )
+        {
+            if( ae_fp_greater_eq(minasa_asaginorm(state, _state),state->mu*minasa_asad1norm(state, _state)) )
+            {
+                state->curalgo = 1;
+                goto lbl_22;
+            }
+        }
+    }
+    
+    /*
+     * Next iteration
+     */
+    state->k = state->k+1;
+    goto lbl_21;
+lbl_22:
+lbl_19:
+    
+    /*
+     * CG algorithm
+     */
+    if( state->curalgo!=1 )
+    {
+        goto lbl_47;
+    }
+    
+    /*
+     * first, check that there are non-active constraints.
+     * move to GPA algorithm, if all constraints are active
+     */
+    b = ae_true;
+    for(i=0; i<=n-1; i++)
+    {
+        if( ae_fp_neq(state->ak.ptr.p_double[i],0) )
+        {
+            b = ae_false;
+            break;
+        }
+    }
+    if( b )
+    {
+        state->curalgo = 0;
+        goto lbl_17;
+    }
+    
+    /*
+     * CG iterations
+     */
+    state->fold = state->f;
+    ae_v_move(&state->xk.ptr.p_double[0], 1, &state->x.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    for(i=0; i<=n-1; i++)
+    {
+        state->dk.ptr.p_double[i] = -state->g.ptr.p_double[i]*state->ak.ptr.p_double[i];
+        state->gc.ptr.p_double[i] = state->g.ptr.p_double[i]*state->ak.ptr.p_double[i];
+    }
+lbl_49:
+    if( ae_false )
+    {
+        goto lbl_50;
+    }
+    
+    /*
+     * Store G[k] for later calculation of Y[k]
+     */
+    for(i=0; i<=n-1; i++)
+    {
+        state->yk.ptr.p_double[i] = -state->gc.ptr.p_double[i];
+    }
+    
+    /*
+     * Make a CG step in direction given by DK[]:
+     * * calculate step. Step projection into feasible set
+     *   is used. It has several benefits: a) step may be
+     *   found with usual line search, b) multiple constraints
+     *   may be activated with one step, c) activated constraints
+     *   are detected in a natural way - just compare x[i] with
+     *   bounds
+     * * update active set, set B to True, if there
+     *   were changes in the set.
+     */
+    ae_v_move(&state->d.ptr.p_double[0], 1, &state->dk.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    ae_v_move(&state->xn.ptr.p_double[0], 1, &state->xk.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    state->mcstage = 0;
+    state->stp = 1;
+    linminnormalized(&state->d, &state->stp, n, _state);
+    if( ae_fp_neq(state->laststep,0) )
+    {
+        state->stp = state->laststep;
+    }
+    mcsrch(n, &state->xn, &state->f, &state->gc, &state->d, &state->stp, state->stpmax, &mcinfo, &state->nfev, &state->work, &state->lstate, &state->mcstage, _state);
+lbl_51:
+    if( state->mcstage==0 )
+    {
+        goto lbl_52;
+    }
+    
+    /*
+     * preprocess data: bound State.XN so it belongs to the
+     * feasible set and store it in the State.X
+     */
+    for(i=0; i<=n-1; i++)
+    {
+        state->x.ptr.p_double[i] = boundval(state->xn.ptr.p_double[i], state->bndl.ptr.p_double[i], state->bndu.ptr.p_double[i], _state);
+    }
+    
+    /*
+     * RComm
+     */
+    minasa_clearrequestfields(state, _state);
+    state->needfg = ae_true;
+    state->rstate.stage = 9;
+    goto lbl_rcomm;
+lbl_9:
+    state->needfg = ae_false;
+    
+    /*
+     * postprocess data: zero components of G corresponding to
+     * the active constraints
+     */
+    for(i=0; i<=n-1; i++)
+    {
+        if( ae_fp_eq(state->x.ptr.p_double[i],state->bndl.ptr.p_double[i])||ae_fp_eq(state->x.ptr.p_double[i],state->bndu.ptr.p_double[i]) )
+        {
+            state->gc.ptr.p_double[i] = 0;
+        }
+        else
+        {
+            state->gc.ptr.p_double[i] = state->g.ptr.p_double[i];
+        }
+    }
+    mcsrch(n, &state->xn, &state->f, &state->gc, &state->d, &state->stp, state->stpmax, &mcinfo, &state->nfev, &state->work, &state->lstate, &state->mcstage, _state);
+    goto lbl_51;
+lbl_52:
+    diffcnt = 0;
+    for(i=0; i<=n-1; i++)
+    {
+        
+        /*
+         * XN contains unprojected result, project it,
+         * save copy to X (will be used for progress reporting)
+         */
+        state->xn.ptr.p_double[i] = boundval(state->xn.ptr.p_double[i], state->bndl.ptr.p_double[i], state->bndu.ptr.p_double[i], _state);
+        
+        /*
+         * update active set
+         */
+        if( ae_fp_eq(state->xn.ptr.p_double[i],state->bndl.ptr.p_double[i])||ae_fp_eq(state->xn.ptr.p_double[i],state->bndu.ptr.p_double[i]) )
+        {
+            state->an.ptr.p_double[i] = 0;
+        }
+        else
+        {
+            state->an.ptr.p_double[i] = 1;
+        }
+        if( ae_fp_neq(state->an.ptr.p_double[i],state->ak.ptr.p_double[i]) )
+        {
+            diffcnt = diffcnt+1;
+        }
+        state->ak.ptr.p_double[i] = state->an.ptr.p_double[i];
+    }
+    ae_v_move(&state->xk.ptr.p_double[0], 1, &state->xn.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    state->repnfev = state->repnfev+state->nfev;
+    state->repiterationscount = state->repiterationscount+1;
+    if( !state->xrep )
+    {
+        goto lbl_53;
+    }
+    
+    /*
+     * progress report
+     */
+    minasa_clearrequestfields(state, _state);
+    state->xupdated = ae_true;
+    state->rstate.stage = 10;
+    goto lbl_rcomm;
+lbl_10:
+    state->xupdated = ae_false;
+lbl_53:
+    
+    /*
+     * Update info about step length
+     */
+    v = ae_v_dotproduct(&state->d.ptr.p_double[0], 1, &state->d.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    state->laststep = ae_sqrt(v, _state)*state->stp;
+    
+    /*
+     * Check stopping conditions.
+     */
+    if( ae_fp_greater(minasa_asaboundedantigradnorm(state, _state),state->epsg) )
+    {
+        goto lbl_55;
+    }
+    
+    /*
+     * Gradient is small enough
+     */
+    state->repterminationtype = 4;
+    if( !state->xrep )
+    {
+        goto lbl_57;
+    }
+    minasa_clearrequestfields(state, _state);
+    state->xupdated = ae_true;
+    state->rstate.stage = 11;
+    goto lbl_rcomm;
+lbl_11:
+    state->xupdated = ae_false;
+lbl_57:
+    result = ae_false;
+    return result;
+lbl_55:
+    if( !(state->repiterationscount>=state->maxits&&state->maxits>0) )
+    {
+        goto lbl_59;
+    }
+    
+    /*
+     * Too many iterations
+     */
+    state->repterminationtype = 5;
+    if( !state->xrep )
+    {
+        goto lbl_61;
+    }
+    minasa_clearrequestfields(state, _state);
+    state->xupdated = ae_true;
+    state->rstate.stage = 12;
+    goto lbl_rcomm;
+lbl_12:
+    state->xupdated = ae_false;
+lbl_61:
+    result = ae_false;
+    return result;
+lbl_59:
+    if( !(ae_fp_greater_eq(minasa_asaginorm(state, _state),state->mu*minasa_asad1norm(state, _state))&&diffcnt==0) )
+    {
+        goto lbl_63;
+    }
+    
+    /*
+     * These conditions (EpsF/EpsX) are explicitly or implicitly
+     * related to the current step size and influenced
+     * by changes in the active constraints.
+     *
+     * For these reasons they are checked only when we don't
+     * want to 'unstick' at the end of the iteration and there
+     * were no changes in the active set.
+     *
+     * NOTE: consition |G|>=Mu*|D1| must be exactly opposite
+     * to the condition used to switch back to GPA. At least
+     * one inequality must be strict, otherwise infinite cycle
+     * may occur when |G|=Mu*|D1| (we DON'T test stopping
+     * conditions and we DON'T switch to GPA, so we cycle
+     * indefinitely).
+     */
+    if( ae_fp_greater(state->fold-state->f,state->epsf*ae_maxreal(ae_fabs(state->fold, _state), ae_maxreal(ae_fabs(state->f, _state), 1.0, _state), _state)) )
+    {
+        goto lbl_65;
+    }
+    
+    /*
+     * F(k+1)-F(k) is small enough
+     */
+    state->repterminationtype = 1;
+    if( !state->xrep )
+    {
+        goto lbl_67;
+    }
+    minasa_clearrequestfields(state, _state);
+    state->xupdated = ae_true;
+    state->rstate.stage = 13;
+    goto lbl_rcomm;
+lbl_13:
+    state->xupdated = ae_false;
+lbl_67:
+    result = ae_false;
+    return result;
+lbl_65:
+    if( ae_fp_greater(state->laststep,state->epsx) )
+    {
+        goto lbl_69;
+    }
+    
+    /*
+     * X(k+1)-X(k) is small enough
+     */
+    state->repterminationtype = 2;
+    if( !state->xrep )
+    {
+        goto lbl_71;
+    }
+    minasa_clearrequestfields(state, _state);
+    state->xupdated = ae_true;
+    state->rstate.stage = 14;
+    goto lbl_rcomm;
+lbl_14:
+    state->xupdated = ae_false;
+lbl_71:
+    result = ae_false;
+    return result;
+lbl_69:
+lbl_63:
+    
+    /*
+     * Check conditions for switching
+     */
+    if( ae_fp_less(minasa_asaginorm(state, _state),state->mu*minasa_asad1norm(state, _state)) )
+    {
+        state->curalgo = 0;
+        goto lbl_50;
+    }
+    if( diffcnt>0 )
+    {
+        if( minasa_asauisempty(state, _state)||diffcnt>=minasa_n2 )
+        {
+            state->curalgo = 1;
+        }
+        else
+        {
+            state->curalgo = 0;
+        }
+        goto lbl_50;
+    }
+    
+    /*
+     * Calculate D(k+1)
+     *
+     * Line search may result in:
+     * * maximum feasible step being taken (already processed)
+     * * point satisfying Wolfe conditions
+     * * some kind of error (CG is restarted by assigning 0.0 to Beta)
+     */
+    if( mcinfo==1 )
+    {
+        
+        /*
+         * Standard Wolfe conditions are satisfied:
+         * * calculate Y[K] and BetaK
+         */
+        ae_v_add(&state->yk.ptr.p_double[0], 1, &state->gc.ptr.p_double[0], 1, ae_v_len(0,n-1));
+        vv = ae_v_dotproduct(&state->yk.ptr.p_double[0], 1, &state->dk.ptr.p_double[0], 1, ae_v_len(0,n-1));
+        v = ae_v_dotproduct(&state->gc.ptr.p_double[0], 1, &state->gc.ptr.p_double[0], 1, ae_v_len(0,n-1));
+        state->betady = v/vv;
+        v = ae_v_dotproduct(&state->gc.ptr.p_double[0], 1, &state->yk.ptr.p_double[0], 1, ae_v_len(0,n-1));
+        state->betahs = v/vv;
+        if( state->cgtype==0 )
+        {
+            betak = state->betady;
+        }
+        if( state->cgtype==1 )
+        {
+            betak = ae_maxreal(0, ae_minreal(state->betady, state->betahs, _state), _state);
+        }
+    }
+    else
+    {
+        
+        /*
+         * Something is wrong (may be function is too wild or too flat).
+         *
+         * We'll set BetaK=0, which will restart CG algorithm.
+         * We can stop later (during normal checks) if stopping conditions are met.
+         */
+        betak = 0;
+        state->debugrestartscount = state->debugrestartscount+1;
+    }
+    ae_v_moveneg(&state->dn.ptr.p_double[0], 1, &state->gc.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    ae_v_addd(&state->dn.ptr.p_double[0], 1, &state->dk.ptr.p_double[0], 1, ae_v_len(0,n-1), betak);
+    ae_v_move(&state->dk.ptr.p_double[0], 1, &state->dn.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    
+    /*
+     * update other information
+     */
+    state->fold = state->f;
+    state->k = state->k+1;
+    goto lbl_49;
+lbl_50:
+lbl_47:
+    goto lbl_17;
+lbl_18:
+    result = ae_false;
+    return result;
+    
+    /*
+     * Saving state
+     */
+lbl_rcomm:
+    result = ae_true;
+    state->rstate.ia.ptr.p_int[0] = n;
+    state->rstate.ia.ptr.p_int[1] = i;
+    state->rstate.ia.ptr.p_int[2] = mcinfo;
+    state->rstate.ia.ptr.p_int[3] = diffcnt;
+    state->rstate.ba.ptr.p_bool[0] = b;
+    state->rstate.ba.ptr.p_bool[1] = stepfound;
+    state->rstate.ra.ptr.p_double[0] = betak;
+    state->rstate.ra.ptr.p_double[1] = v;
+    state->rstate.ra.ptr.p_double[2] = vv;
+    return result;
+}
+
+
+/*************************************************************************
+ASA results
+
+INPUT PARAMETERS:
+    State   -   algorithm state
+
+OUTPUT PARAMETERS:
+    X       -   array[0..N-1], solution
+    Rep     -   optimization report:
+                * Rep.TerminationType completetion code:
+                    * -2    rounding errors prevent further improvement.
+                            X contains best point found.
+                    * -1    incorrect parameters were specified
+                    *  1    relative function improvement is no more than
+                            EpsF.
+                    *  2    relative step is no more than EpsX.
+                    *  4    gradient norm is no more than EpsG
+                    *  5    MaxIts steps was taken
+                    *  7    stopping conditions are too stringent,
+                            further improvement is impossible
+                * Rep.IterationsCount contains iterations count
+                * NFEV countains number of function calculations
+                * ActiveConstraints contains number of active constraints
+
+  -- ALGLIB --
+     Copyright 20.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minasaresults(minasastate* state,
+     /* Real    */ ae_vector* x,
+     minasareport* rep,
+     ae_state *_state)
+{
+
+    ae_vector_clear(x);
+    _minasareport_clear(rep);
+
+    minasaresultsbuf(state, x, rep, _state);
+}
+
+
+/*************************************************************************
+ASA results
+
+Buffered implementation of MinASAResults() which uses pre-allocated buffer
+to store X[]. If buffer size is  too  small,  it  resizes  buffer.  It  is
+intended to be used in the inner cycles of performance critical algorithms
+where array reallocation penalty is too large to be ignored.
+
+  -- ALGLIB --
+     Copyright 20.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minasaresultsbuf(minasastate* state,
+     /* Real    */ ae_vector* x,
+     minasareport* rep,
+     ae_state *_state)
+{
+    ae_int_t i;
+
+
+    if( x->cnt<state->n )
+    {
+        ae_vector_set_length(x, state->n, _state);
+    }
+    ae_v_move(&x->ptr.p_double[0], 1, &state->x.ptr.p_double[0], 1, ae_v_len(0,state->n-1));
+    rep->iterationscount = state->repiterationscount;
+    rep->nfev = state->repnfev;
+    rep->terminationtype = state->repterminationtype;
+    rep->activeconstraints = 0;
+    for(i=0; i<=state->n-1; i++)
+    {
+        if( ae_fp_eq(state->ak.ptr.p_double[i],0) )
+        {
+            rep->activeconstraints = rep->activeconstraints+1;
+        }
+    }
+}
+
+
+/*************************************************************************
+This  subroutine  restarts  CG  algorithm from new point. All optimization
+parameters are left unchanged.
+
+This  function  allows  to  solve multiple  optimization  problems  (which
+must have same number of dimensions) without object reallocation penalty.
+
+INPUT PARAMETERS:
+    State   -   structure previously allocated with MinCGCreate call.
+    X       -   new starting point.
+    BndL    -   new lower bounds
+    BndU    -   new upper bounds
+
+  -- ALGLIB --
+     Copyright 30.07.2010 by Bochkanov Sergey
+*************************************************************************/
+void minasarestartfrom(minasastate* state,
+     /* Real    */ ae_vector* x,
+     /* Real    */ ae_vector* bndl,
+     /* Real    */ ae_vector* bndu,
+     ae_state *_state)
+{
+
+
+    ae_assert(x->cnt>=state->n, "MinASARestartFrom: Length(X)<N!", _state);
+    ae_assert(isfinitevector(x, state->n, _state), "MinASARestartFrom: X contains infinite or NaN values!", _state);
+    ae_assert(bndl->cnt>=state->n, "MinASARestartFrom: Length(BndL)<N!", _state);
+    ae_assert(isfinitevector(bndl, state->n, _state), "MinASARestartFrom: BndL contains infinite or NaN values!", _state);
+    ae_assert(bndu->cnt>=state->n, "MinASARestartFrom: Length(BndU)<N!", _state);
+    ae_assert(isfinitevector(bndu, state->n, _state), "MinASARestartFrom: BndU contains infinite or NaN values!", _state);
+    ae_v_move(&state->x.ptr.p_double[0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,state->n-1));
+    ae_v_move(&state->bndl.ptr.p_double[0], 1, &bndl->ptr.p_double[0], 1, ae_v_len(0,state->n-1));
+    ae_v_move(&state->bndu.ptr.p_double[0], 1, &bndu->ptr.p_double[0], 1, ae_v_len(0,state->n-1));
+    state->laststep = 0;
+    ae_vector_set_length(&state->rstate.ia, 3+1, _state);
+    ae_vector_set_length(&state->rstate.ba, 1+1, _state);
+    ae_vector_set_length(&state->rstate.ra, 2+1, _state);
+    state->rstate.stage = -1;
+    minasa_clearrequestfields(state, _state);
+}
+
+
+/*************************************************************************
+Returns norm of bounded anti-gradient.
+
+Bounded antigradient is a vector obtained from  anti-gradient  by  zeroing
+components which point outwards:
+    result = norm(v)
+    v[i]=0     if ((-g[i]<0)and(x[i]=bndl[i])) or
+                  ((-g[i]>0)and(x[i]=bndu[i]))
+    v[i]=-g[i] otherwise
+
+This function may be used to check a stopping criterion.
+
+  -- ALGLIB --
+     Copyright 20.03.2009 by Bochkanov Sergey
+*************************************************************************/
+static double minasa_asaboundedantigradnorm(minasastate* state,
+     ae_state *_state)
+{
+    ae_int_t i;
+    double v;
+    double result;
+
+
+    result = 0;
+    for(i=0; i<=state->n-1; i++)
+    {
+        v = -state->g.ptr.p_double[i];
+        if( ae_fp_eq(state->x.ptr.p_double[i],state->bndl.ptr.p_double[i])&&ae_fp_less(-state->g.ptr.p_double[i],0) )
+        {
+            v = 0;
+        }
+        if( ae_fp_eq(state->x.ptr.p_double[i],state->bndu.ptr.p_double[i])&&ae_fp_greater(-state->g.ptr.p_double[i],0) )
+        {
+            v = 0;
+        }
+        result = result+ae_sqr(v, _state);
+    }
+    result = ae_sqrt(result, _state);
+    return result;
+}
+
+
+/*************************************************************************
+Returns norm of GI(x).
+
+GI(x) is  a  gradient  vector  whose  components  associated  with  active
+constraints are zeroed. It  differs  from  bounded  anti-gradient  because
+components  of   GI(x)   are   zeroed  independently  of  sign(g[i]),  and
+anti-gradient's components are zeroed with respect to both constraint  and
+sign.
+
+  -- ALGLIB --
+     Copyright 20.03.2009 by Bochkanov Sergey
+*************************************************************************/
+static double minasa_asaginorm(minasastate* state, ae_state *_state)
+{
+    ae_int_t i;
+    double result;
+
+
+    result = 0;
+    for(i=0; i<=state->n-1; i++)
+    {
+        if( ae_fp_neq(state->x.ptr.p_double[i],state->bndl.ptr.p_double[i])&&ae_fp_neq(state->x.ptr.p_double[i],state->bndu.ptr.p_double[i]) )
+        {
+            result = result+ae_sqr(state->g.ptr.p_double[i], _state);
+        }
+    }
+    result = ae_sqrt(result, _state);
+    return result;
+}
+
+
+/*************************************************************************
+Returns norm(D1(State.X))
+
+For a meaning of D1 see 'NEW ACTIVE SET ALGORITHM FOR BOX CONSTRAINED
+OPTIMIZATION' by WILLIAM W. HAGER AND HONGCHAO ZHANG.
+
+  -- ALGLIB --
+     Copyright 20.03.2009 by Bochkanov Sergey
+*************************************************************************/
+static double minasa_asad1norm(minasastate* state, ae_state *_state)
+{
+    ae_int_t i;
+    double result;
+
+
+    result = 0;
+    for(i=0; i<=state->n-1; i++)
+    {
+        result = result+ae_sqr(boundval(state->x.ptr.p_double[i]-state->g.ptr.p_double[i], state->bndl.ptr.p_double[i], state->bndu.ptr.p_double[i], _state)-state->x.ptr.p_double[i], _state);
+    }
+    result = ae_sqrt(result, _state);
+    return result;
+}
+
+
+/*************************************************************************
+Returns True, if U set is empty.
+
+* State.X is used as point,
+* State.G - as gradient,
+* D is calculated within function (because State.D may have different
+  meaning depending on current optimization algorithm)
+
+For a meaning of U see 'NEW ACTIVE SET ALGORITHM FOR BOX CONSTRAINED
+OPTIMIZATION' by WILLIAM W. HAGER AND HONGCHAO ZHANG.
+
+  -- ALGLIB --
+     Copyright 20.03.2009 by Bochkanov Sergey
+*************************************************************************/
+static ae_bool minasa_asauisempty(minasastate* state, ae_state *_state)
+{
+    ae_int_t i;
+    double d;
+    double d2;
+    double d32;
+    ae_bool result;
+
+
+    d = minasa_asad1norm(state, _state);
+    d2 = ae_sqrt(d, _state);
+    d32 = d*d2;
+    result = ae_true;
+    for(i=0; i<=state->n-1; i++)
+    {
+        if( ae_fp_greater_eq(ae_fabs(state->g.ptr.p_double[i], _state),d2)&&ae_fp_greater_eq(ae_minreal(state->x.ptr.p_double[i]-state->bndl.ptr.p_double[i], state->bndu.ptr.p_double[i]-state->x.ptr.p_double[i], _state),d32) )
+        {
+            result = ae_false;
+            return result;
+        }
+    }
+    return result;
+}
+
+
+/*************************************************************************
+Clears request fileds (to be sure that we don't forgot to clear something)
+*************************************************************************/
+static void minasa_clearrequestfields(minasastate* state,
+     ae_state *_state)
+{
+
+
+    state->needfg = ae_false;
+    state->xupdated = ae_false;
+}
+
+
+ae_bool _minasastate_init(minasastate* p, ae_state *_state, ae_bool make_automatic)
+{
+    if( !ae_vector_init(&p->bndl, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->bndu, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->ak, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->xk, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->dk, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->an, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->xn, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->dn, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->d, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->work, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->yk, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->gc, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->g, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !_rcommstate_init(&p->rstate, _state, make_automatic) )
+        return ae_false;
+    if( !_linminstate_init(&p->lstate, _state, make_automatic) )
+        return ae_false;
+    return ae_true;
+}
+
+
+ae_bool _minasastate_init_copy(minasastate* dst, minasastate* src, ae_state *_state, ae_bool make_automatic)
+{
+    dst->n = src->n;
+    dst->epsg = src->epsg;
+    dst->epsf = src->epsf;
+    dst->epsx = src->epsx;
+    dst->maxits = src->maxits;
+    dst->xrep = src->xrep;
+    dst->stpmax = src->stpmax;
+    dst->cgtype = src->cgtype;
+    dst->k = src->k;
+    dst->nfev = src->nfev;
+    dst->mcstage = src->mcstage;
+    if( !ae_vector_init_copy(&dst->bndl, &src->bndl, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->bndu, &src->bndu, _state, make_automatic) )
+        return ae_false;
+    dst->curalgo = src->curalgo;
+    dst->acount = src->acount;
+    dst->mu = src->mu;
+    dst->finit = src->finit;
+    dst->dginit = src->dginit;
+    if( !ae_vector_init_copy(&dst->ak, &src->ak, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->xk, &src->xk, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->dk, &src->dk, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->an, &src->an, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->xn, &src->xn, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->dn, &src->dn, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->d, &src->d, _state, make_automatic) )
+        return ae_false;
+    dst->fold = src->fold;
+    dst->stp = src->stp;
+    if( !ae_vector_init_copy(&dst->work, &src->work, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->yk, &src->yk, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->gc, &src->gc, _state, make_automatic) )
+        return ae_false;
+    dst->laststep = src->laststep;
+    if( !ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic) )
+        return ae_false;
+    dst->f = src->f;
+    if( !ae_vector_init_copy(&dst->g, &src->g, _state, make_automatic) )
+        return ae_false;
+    dst->needfg = src->needfg;
+    dst->xupdated = src->xupdated;
+    if( !_rcommstate_init_copy(&dst->rstate, &src->rstate, _state, make_automatic) )
+        return ae_false;
+    dst->repiterationscount = src->repiterationscount;
+    dst->repnfev = src->repnfev;
+    dst->repterminationtype = src->repterminationtype;
+    dst->debugrestartscount = src->debugrestartscount;
+    if( !_linminstate_init_copy(&dst->lstate, &src->lstate, _state, make_automatic) )
+        return ae_false;
+    dst->betahs = src->betahs;
+    dst->betady = src->betady;
+    return ae_true;
+}
+
+
+void _minasastate_clear(minasastate* p)
+{
+    ae_vector_clear(&p->bndl);
+    ae_vector_clear(&p->bndu);
+    ae_vector_clear(&p->ak);
+    ae_vector_clear(&p->xk);
+    ae_vector_clear(&p->dk);
+    ae_vector_clear(&p->an);
+    ae_vector_clear(&p->xn);
+    ae_vector_clear(&p->dn);
+    ae_vector_clear(&p->d);
+    ae_vector_clear(&p->work);
+    ae_vector_clear(&p->yk);
+    ae_vector_clear(&p->gc);
+    ae_vector_clear(&p->x);
+    ae_vector_clear(&p->g);
+    _rcommstate_clear(&p->rstate);
+    _linminstate_clear(&p->lstate);
+}
+
+
+ae_bool _minasareport_init(minasareport* p, ae_state *_state, ae_bool make_automatic)
+{
+    return ae_true;
+}
+
+
+ae_bool _minasareport_init_copy(minasareport* dst, minasareport* src, ae_state *_state, ae_bool make_automatic)
+{
+    dst->iterationscount = src->iterationscount;
+    dst->nfev = src->nfev;
+    dst->terminationtype = src->terminationtype;
+    dst->activeconstraints = src->activeconstraints;
+    return ae_true;
+}
+
+
+void _minasareport_clear(minasareport* p)
+{
+}
+
+
+
+
+/*************************************************************************
+        NONLINEAR CONJUGATE GRADIENT METHOD
+
+DESCRIPTION:
+The subroutine minimizes function F(x) of N arguments by using one of  the
+nonlinear conjugate gradient methods.
+
+These CG methods are globally convergent (even on non-convex functions) as
+long as grad(f) is Lipschitz continuous in  a  some  neighborhood  of  the
+L = { x : f(x)<=f(x0) }.
+
+
+REQUIREMENTS:
+Algorithm will request following information during its operation:
+* function value F and its gradient G (simultaneously) at given point X
+
+
+USAGE:
+1. User initializes algorithm state with MinCGCreate() call
+2. User tunes solver parameters with MinCGSetCond(), MinCGSetStpMax() and
+   other functions
+3. User calls MinCGOptimize() function which takes algorithm  state   and
+   pointer (delegate, etc.) to callback function which calculates F/G.
+4. User calls MinCGResults() to get solution
+5. Optionally, user may call MinCGRestartFrom() to solve another  problem
+   with same N but another starting point and/or another function.
+   MinCGRestartFrom() allows to reuse already initialized structure.
+
+
+INPUT PARAMETERS:
+    N       -   problem dimension, N>0:
+                * if given, only leading N elements of X are used
+                * if not given, automatically determined from size of X
+    X       -   starting point, array[0..N-1].
+
+OUTPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+
+  -- ALGLIB --
+     Copyright 25.03.2010 by Bochkanov Sergey
+*************************************************************************/
+void mincgcreate(ae_int_t n,
+     /* Real    */ ae_vector* x,
+     mincgstate* state,
+     ae_state *_state)
+{
+
+    _mincgstate_clear(state);
+
+    ae_assert(n>=1, "MinCGCreate: N too small!", _state);
+    ae_assert(x->cnt>=n, "MinCGCreate: Length(X)<N!", _state);
+    ae_assert(isfinitevector(x, n, _state), "MinCGCreate: X contains infinite or NaN values!", _state);
+    
+    /*
+     * Initialize
+     */
+    state->n = n;
+    mincgsetcond(state, 0, 0, 0, 0, _state);
+    mincgsetxrep(state, ae_false, _state);
+    mincgsetdrep(state, ae_false, _state);
+    mincgsetstpmax(state, 0, _state);
+    mincgsetcgtype(state, -1, _state);
+    ae_vector_set_length(&state->xk, n, _state);
+    ae_vector_set_length(&state->dk, n, _state);
+    ae_vector_set_length(&state->xn, n, _state);
+    ae_vector_set_length(&state->dn, n, _state);
+    ae_vector_set_length(&state->x, n, _state);
+    ae_vector_set_length(&state->d, n, _state);
+    ae_vector_set_length(&state->g, n, _state);
+    ae_vector_set_length(&state->work, n, _state);
+    ae_vector_set_length(&state->yk, n, _state);
+    mincgrestartfrom(state, x, _state);
+}
+
+
+/*************************************************************************
+This function sets stopping conditions for CG optimization algorithm.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    EpsG    -   >=0
+                The  subroutine  finishes  its  work   if   the  condition
+                ||G||<EpsG is satisfied, where ||.|| means Euclidian norm,
+                G - gradient.
+    EpsF    -   >=0
+                The  subroutine  finishes  its work if on k+1-th iteration
+                the  condition  |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1}
+                is satisfied.
+    EpsX    -   >=0
+                The subroutine finishes its work if  on  k+1-th  iteration
+                the condition |X(k+1)-X(k)| <= EpsX is fulfilled.
+    MaxIts  -   maximum number of iterations. If MaxIts=0, the  number  of
+                iterations is unlimited.
+
+Passing EpsG=0, EpsF=0, EpsX=0 and MaxIts=0 (simultaneously) will lead to
+automatic stopping criterion selection (small EpsX).
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void mincgsetcond(mincgstate* state,
+     double epsg,
+     double epsf,
+     double epsx,
+     ae_int_t maxits,
+     ae_state *_state)
+{
+
+
+    ae_assert(ae_isfinite(epsg, _state), "MinCGSetCond: EpsG is not finite number!", _state);
+    ae_assert(ae_fp_greater_eq(epsg,0), "MinCGSetCond: negative EpsG!", _state);
+    ae_assert(ae_isfinite(epsf, _state), "MinCGSetCond: EpsF is not finite number!", _state);
+    ae_assert(ae_fp_greater_eq(epsf,0), "MinCGSetCond: negative EpsF!", _state);
+    ae_assert(ae_isfinite(epsx, _state), "MinCGSetCond: EpsX is not finite number!", _state);
+    ae_assert(ae_fp_greater_eq(epsx,0), "MinCGSetCond: negative EpsX!", _state);
+    ae_assert(maxits>=0, "MinCGSetCond: negative MaxIts!", _state);
+    if( ((ae_fp_eq(epsg,0)&&ae_fp_eq(epsf,0))&&ae_fp_eq(epsx,0))&&maxits==0 )
+    {
+        epsx = 1.0E-6;
+    }
+    state->epsg = epsg;
+    state->epsf = epsf;
+    state->epsx = epsx;
+    state->maxits = maxits;
+}
+
+
+/*************************************************************************
+This function turns on/off reporting.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    NeedXRep-   whether iteration reports are needed or not
+
+If NeedXRep is True, algorithm will call rep() callback function if  it is
+provided to MinCGOptimize().
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void mincgsetxrep(mincgstate* state, ae_bool needxrep, ae_state *_state)
+{
+
+
+    state->xrep = needxrep;
+}
+
+
+/*************************************************************************
+This function turns on/off line search reports.
+These reports are described in more details in developer-only  comments on
+MinCGState object.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    NeedDRep-   whether line search reports are needed or not
+
+This function is intended for private use only. Turning it on artificially
+may cause program failure.
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void mincgsetdrep(mincgstate* state, ae_bool needdrep, ae_state *_state)
+{
+
+
+    state->drep = needdrep;
+}
+
+
+/*************************************************************************
+This function sets CG algorithm.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    CGType  -   algorithm type:
+                * -1    automatic selection of the best algorithm
+                * 0     DY (Dai and Yuan) algorithm
+                * 1     Hybrid DY-HS algorithm
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void mincgsetcgtype(mincgstate* state, ae_int_t cgtype, ae_state *_state)
+{
+
+
+    ae_assert(cgtype>=-1&&cgtype<=1, "MinCGSetCGType: incorrect CGType!", _state);
+    if( cgtype==-1 )
+    {
+        cgtype = 1;
+    }
+    state->cgtype = cgtype;
+}
+
+
+/*************************************************************************
+This function sets maximum step length
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    StpMax  -   maximum step length, >=0. Set StpMax to 0.0,  if you don't
+                want to limit step length.
+
+Use this subroutine when you optimize target function which contains exp()
+or  other  fast  growing  functions,  and optimization algorithm makes too
+large  steps  which  leads  to overflow. This function allows us to reject
+steps  that  are  too  large  (and  therefore  expose  us  to the possible
+overflow) without actually calculating function value at the x+stp*d.
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void mincgsetstpmax(mincgstate* state, double stpmax, ae_state *_state)
+{
+
+
+    ae_assert(ae_isfinite(stpmax, _state), "MinCGSetStpMax: StpMax is not finite!", _state);
+    ae_assert(ae_fp_greater_eq(stpmax,0), "MinCGSetStpMax: StpMax<0!", _state);
+    state->stpmax = stpmax;
+}
+
+
+/*************************************************************************
+This function allows to suggest initial step length to the CG algorithm.
+
+Suggested  step  length  is used as starting point for the line search. It
+can be useful when you have  badly  scaled  problem,  i.e.  when  ||grad||
+(which is used as initial estimate for the first step) is many  orders  of
+magnitude different from the desired step.
+
+Line search  may  fail  on  such problems without good estimate of initial
+step length. Imagine, for example, problem with ||grad||=10^50 and desired
+step equal to 0.1 Line  search function will use 10^50  as  initial  step,
+then  it  will  decrease step length by 2 (up to 20 attempts) and will get
+10^44, which is still too large.
+
+This function allows us to tell than line search should  be  started  from
+some moderate step length, like 1.0, so algorithm will be able  to  detect
+desired step length in a several searches.
+
+This function influences only first iteration of algorithm. It  should  be
+called between MinCGCreate/MinCGRestartFrom() call and MinCGOptimize call.
+
+INPUT PARAMETERS:
+    State   -   structure used to store algorithm state.
+    Stp     -   initial estimate of the step length.
+                Can be zero (no estimate).
+
+  -- ALGLIB --
+     Copyright 30.07.2010 by Bochkanov Sergey
+*************************************************************************/
+void mincgsuggeststep(mincgstate* state, double stp, ae_state *_state)
+{
+
+
+    ae_assert(ae_isfinite(stp, _state), "MinCGSuggestStep: Stp is infinite or NAN", _state);
+    ae_assert(ae_fp_greater_eq(stp,0), "MinCGSuggestStep: Stp<0", _state);
+    state->suggestedstep = stp;
+}
+
+
+/*************************************************************************
+
+  -- ALGLIB --
+     Copyright 20.04.2009 by Bochkanov Sergey
+*************************************************************************/
+ae_bool mincgiteration(mincgstate* state, ae_state *_state)
+{
+    ae_int_t n;
+    ae_int_t i;
+    double betak;
+    double v;
+    double vv;
+    ae_int_t mcinfo;
+    ae_bool result;
+
+
+    
+    /*
+     * Reverse communication preparations
+     * I know it looks ugly, but it works the same way
+     * anywhere from C++ to Python.
+     *
+     * This code initializes locals by:
+     * * random values determined during code
+     *   generation - on first subroutine call
+     * * values from previous call - on subsequent calls
+     */
+    if( state->rstate.stage>=0 )
+    {
+        n = state->rstate.ia.ptr.p_int[0];
+        i = state->rstate.ia.ptr.p_int[1];
+        mcinfo = state->rstate.ia.ptr.p_int[2];
+        betak = state->rstate.ra.ptr.p_double[0];
+        v = state->rstate.ra.ptr.p_double[1];
+        vv = state->rstate.ra.ptr.p_double[2];
+    }
+    else
+    {
+        n = -983;
+        i = -989;
+        mcinfo = -834;
+        betak = 900;
+        v = -287;
+        vv = 364;
+    }
+    if( state->rstate.stage==0 )
+    {
+        goto lbl_0;
+    }
+    if( state->rstate.stage==1 )
+    {
+        goto lbl_1;
+    }
+    if( state->rstate.stage==2 )
+    {
+        goto lbl_2;
+    }
+    if( state->rstate.stage==3 )
+    {
+        goto lbl_3;
+    }
+    if( state->rstate.stage==4 )
+    {
+        goto lbl_4;
+    }
+    if( state->rstate.stage==5 )
+    {
+        goto lbl_5;
+    }
+    
+    /*
+     * Routine body
+     */
+    
+    /*
+     * Prepare
+     */
+    n = state->n;
+    state->repterminationtype = 0;
+    state->repiterationscount = 0;
+    state->repnfev = 0;
+    state->debugrestartscount = 0;
+    
+    /*
+     * Calculate F/G, XK and DK, initialize algorithm
+     */
+    ae_v_move(&state->xk.ptr.p_double[0], 1, &state->x.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    mincg_clearrequestfields(state, _state);
+    state->needfg = ae_true;
+    state->rstate.stage = 0;
+    goto lbl_rcomm;
+lbl_0:
+    state->needfg = ae_false;
+    ae_v_moveneg(&state->dk.ptr.p_double[0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    if( !state->xrep )
+    {
+        goto lbl_6;
+    }
+    mincg_clearrequestfields(state, _state);
+    state->xupdated = ae_true;
+    state->rstate.stage = 1;
+    goto lbl_rcomm;
+lbl_1:
+    state->xupdated = ae_false;
+lbl_6:
+    v = ae_v_dotproduct(&state->g.ptr.p_double[0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    v = ae_sqrt(v, _state);
+    if( ae_fp_eq(v,0) )
+    {
+        ae_v_move(&state->xn.ptr.p_double[0], 1, &state->xk.ptr.p_double[0], 1, ae_v_len(0,n-1));
+        state->repterminationtype = 4;
+        result = ae_false;
+        return result;
+    }
+    state->repnfev = 1;
+    state->k = 0;
+    state->fold = state->f;
+    
+    /*
+     * Main cycle
+     */
+    state->laststep = state->suggestedstep;
+    state->rstimer = mincg_rscountdownlen;
+lbl_8:
+    if( ae_false )
+    {
+        goto lbl_9;
+    }
+    
+    /*
+     * Store G[k] for later calculation of Y[k]
+     */
+    ae_v_moveneg(&state->yk.ptr.p_double[0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    
+    /*
+     * Calculate X(k+1): minimize F(x+alpha*d)
+     */
+    ae_v_move(&state->d.ptr.p_double[0], 1, &state->dk.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    ae_v_move(&state->x.ptr.p_double[0], 1, &state->xk.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    state->mcstage = 0;
+    state->stp = 1.0;
+    linminnormalized(&state->d, &state->stp, n, _state);
+    if( ae_fp_neq(state->laststep,0) )
+    {
+        state->stp = state->laststep;
+    }
+    state->curstpmax = state->stpmax;
+    if( !state->drep )
+    {
+        goto lbl_10;
+    }
+    
+    /*
+     * Report beginning of line search (if needed)
+     */
+    mincg_clearrequestfields(state, _state);
+    state->lsstart = ae_true;
+    state->rstate.stage = 2;
+    goto lbl_rcomm;
+lbl_2:
+    state->lsstart = ae_false;
+lbl_10:
+    mcsrch(n, &state->x, &state->f, &state->g, &state->d, &state->stp, state->curstpmax, &mcinfo, &state->nfev, &state->work, &state->lstate, &state->mcstage, _state);
+lbl_12:
+    if( state->mcstage==0 )
+    {
+        goto lbl_13;
+    }
+    mincg_clearrequestfields(state, _state);
+    state->needfg = ae_true;
+    state->rstate.stage = 3;
+    goto lbl_rcomm;
+lbl_3:
+    state->needfg = ae_false;
+    mcsrch(n, &state->x, &state->f, &state->g, &state->d, &state->stp, state->curstpmax, &mcinfo, &state->nfev, &state->work, &state->lstate, &state->mcstage, _state);
+    goto lbl_12;
+lbl_13:
+    if( !state->drep )
+    {
+        goto lbl_14;
+    }
+    
+    /*
+     * Report end of line search (if needed)
+     */
+    mincg_clearrequestfields(state, _state);
+    state->lsend = ae_true;
+    state->rstate.stage = 4;
+    goto lbl_rcomm;
+lbl_4:
+    state->lsend = ae_false;
+lbl_14:
+    if( !state->xrep )
+    {
+        goto lbl_16;
+    }
+    mincg_clearrequestfields(state, _state);
+    state->xupdated = ae_true;
+    state->rstate.stage = 5;
+    goto lbl_rcomm;
+lbl_5:
+    state->xupdated = ae_false;
+lbl_16:
+    ae_v_move(&state->xn.ptr.p_double[0], 1, &state->x.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    if( mcinfo==1 )
+    {
+        
+        /*
+         * Standard Wolfe conditions hold
+         * Calculate Y[K] and BetaK
+         */
+        ae_v_add(&state->yk.ptr.p_double[0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1));
+        vv = ae_v_dotproduct(&state->yk.ptr.p_double[0], 1, &state->dk.ptr.p_double[0], 1, ae_v_len(0,n-1));
+        v = ae_v_dotproduct(&state->g.ptr.p_double[0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1));
+        state->betady = v/vv;
+        v = ae_v_dotproduct(&state->g.ptr.p_double[0], 1, &state->yk.ptr.p_double[0], 1, ae_v_len(0,n-1));
+        state->betahs = v/vv;
+        if( state->cgtype==0 )
+        {
+            betak = state->betady;
+        }
+        if( state->cgtype==1 )
+        {
+            betak = ae_maxreal(0, ae_minreal(state->betady, state->betahs, _state), _state);
+        }
+    }
+    else
+    {
+        
+        /*
+         * Something is wrong (may be function is too wild or too flat).
+         *
+         * We'll set BetaK=0, which will restart CG algorithm.
+         * We can stop later (during normal checks) if stopping conditions are met.
+         */
+        betak = 0;
+        state->debugrestartscount = state->debugrestartscount+1;
+    }
+    if( mcinfo==1||mcinfo==5 )
+    {
+        state->rstimer = mincg_rscountdownlen;
+    }
+    else
+    {
+        state->rstimer = state->rstimer-1;
+    }
+    
+    /*
+     * Calculate D(k+1)
+     */
+    ae_v_moveneg(&state->dn.ptr.p_double[0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    ae_v_addd(&state->dn.ptr.p_double[0], 1, &state->dk.ptr.p_double[0], 1, ae_v_len(0,n-1), betak);
+    
+    /*
+     * Update info about step length
+     */
+    v = ae_v_dotproduct(&state->d.ptr.p_double[0], 1, &state->d.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    state->laststep = ae_sqrt(v, _state)*state->stp;
+    
+    /*
+     * Update information and Hessian.
+     * Check stopping conditions.
+     */
+    state->repnfev = state->repnfev+state->nfev;
+    state->repiterationscount = state->repiterationscount+1;
+    if( state->repiterationscount>=state->maxits&&state->maxits>0 )
+    {
+        
+        /*
+         * Too many iterations
+         */
+        state->repterminationtype = 5;
+        result = ae_false;
+        return result;
+    }
+    v = ae_v_dotproduct(&state->g.ptr.p_double[0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    if( ae_fp_less_eq(ae_sqrt(v, _state),state->epsg) )
+    {
+        
+        /*
+         * Gradient is small enough
+         */
+        state->repterminationtype = 4;
+        result = ae_false;
+        return result;
+    }
+    if( ae_fp_less_eq(state->fold-state->f,state->epsf*ae_maxreal(ae_fabs(state->fold, _state), ae_maxreal(ae_fabs(state->f, _state), 1.0, _state), _state)) )
+    {
+        
+        /*
+         * F(k+1)-F(k) is small enough
+         */
+        state->repterminationtype = 1;
+        result = ae_false;
+        return result;
+    }
+    if( ae_fp_less_eq(state->laststep,state->epsx) )
+    {
+        
+        /*
+         * X(k+1)-X(k) is small enough
+         */
+        state->repterminationtype = 2;
+        result = ae_false;
+        return result;
+    }
+    if( state->rstimer<=0 )
+    {
+        
+        /*
+         * Too many subsequent restarts
+         */
+        state->repterminationtype = 7;
+        result = ae_false;
+        return result;
+    }
+    
+    /*
+     * Shift Xk/Dk, update other information
+     */
+    ae_v_move(&state->xk.ptr.p_double[0], 1, &state->xn.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    ae_v_move(&state->dk.ptr.p_double[0], 1, &state->dn.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    state->fold = state->f;
+    state->k = state->k+1;
+    goto lbl_8;
+lbl_9:
+    result = ae_false;
+    return result;
+    
+    /*
+     * Saving state
+     */
+lbl_rcomm:
+    result = ae_true;
+    state->rstate.ia.ptr.p_int[0] = n;
+    state->rstate.ia.ptr.p_int[1] = i;
+    state->rstate.ia.ptr.p_int[2] = mcinfo;
+    state->rstate.ra.ptr.p_double[0] = betak;
+    state->rstate.ra.ptr.p_double[1] = v;
+    state->rstate.ra.ptr.p_double[2] = vv;
+    return result;
+}
+
+
+/*************************************************************************
+Conjugate gradient results
+
+INPUT PARAMETERS:
+    State   -   algorithm state
+
+OUTPUT PARAMETERS:
+    X       -   array[0..N-1], solution
+    Rep     -   optimization report:
+                * Rep.TerminationType completetion code:
+                    *  1    relative function improvement is no more than
+                            EpsF.
+                    *  2    relative step is no more than EpsX.
+                    *  4    gradient norm is no more than EpsG
+                    *  5    MaxIts steps was taken
+                    *  7    stopping conditions are too stringent,
+                            further improvement is impossible,
+                            we return best X found so far
+                * Rep.IterationsCount contains iterations count
+                * NFEV countains number of function calculations
+
+  -- ALGLIB --
+     Copyright 20.04.2009 by Bochkanov Sergey
+*************************************************************************/
+void mincgresults(mincgstate* state,
+     /* Real    */ ae_vector* x,
+     mincgreport* rep,
+     ae_state *_state)
+{
+
+    ae_vector_clear(x);
+    _mincgreport_clear(rep);
+
+    mincgresultsbuf(state, x, rep, _state);
+}
+
+
+/*************************************************************************
+Conjugate gradient results
+
+Buffered implementation of MinCGResults(), which uses pre-allocated buffer
+to store X[]. If buffer size is  too  small,  it  resizes  buffer.  It  is
+intended to be used in the inner cycles of performance critical algorithms
+where array reallocation penalty is too large to be ignored.
+
+  -- ALGLIB --
+     Copyright 20.04.2009 by Bochkanov Sergey
+*************************************************************************/
+void mincgresultsbuf(mincgstate* state,
+     /* Real    */ ae_vector* x,
+     mincgreport* rep,
+     ae_state *_state)
+{
+
+
+    if( x->cnt<state->n )
+    {
+        ae_vector_set_length(x, state->n, _state);
+    }
+    ae_v_move(&x->ptr.p_double[0], 1, &state->xn.ptr.p_double[0], 1, ae_v_len(0,state->n-1));
+    rep->iterationscount = state->repiterationscount;
+    rep->nfev = state->repnfev;
+    rep->terminationtype = state->repterminationtype;
+}
+
+
+/*************************************************************************
+This  subroutine  restarts  CG  algorithm from new point. All optimization
+parameters are left unchanged.
+
+This  function  allows  to  solve multiple  optimization  problems  (which
+must have same number of dimensions) without object reallocation penalty.
+
+INPUT PARAMETERS:
+    State   -   structure used to store algorithm state.
+    X       -   new starting point.
+
+  -- ALGLIB --
+     Copyright 30.07.2010 by Bochkanov Sergey
+*************************************************************************/
+void mincgrestartfrom(mincgstate* state,
+     /* Real    */ ae_vector* x,
+     ae_state *_state)
+{
+
+
+    ae_assert(x->cnt>=state->n, "MinCGRestartFrom: Length(X)<N!", _state);
+    ae_assert(isfinitevector(x, state->n, _state), "MinCGCreate: X contains infinite or NaN values!", _state);
+    ae_v_move(&state->x.ptr.p_double[0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,state->n-1));
+    mincgsuggeststep(state, 0.0, _state);
+    ae_vector_set_length(&state->rstate.ia, 2+1, _state);
+    ae_vector_set_length(&state->rstate.ra, 2+1, _state);
+    state->rstate.stage = -1;
+    mincg_clearrequestfields(state, _state);
+}
+
+
+/*************************************************************************
+Clears request fileds (to be sure that we don't forgot to clear something)
+*************************************************************************/
+static void mincg_clearrequestfields(mincgstate* state, ae_state *_state)
+{
+
+
+    state->needfg = ae_false;
+    state->xupdated = ae_false;
+    state->lsstart = ae_false;
+    state->lsend = ae_false;
+}
+
+
+ae_bool _mincgstate_init(mincgstate* p, ae_state *_state, ae_bool make_automatic)
+{
+    if( !ae_vector_init(&p->xk, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->dk, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->xn, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->dn, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->d, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->work, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->yk, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->g, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !_rcommstate_init(&p->rstate, _state, make_automatic) )
+        return ae_false;
+    if( !_linminstate_init(&p->lstate, _state, make_automatic) )
+        return ae_false;
+    return ae_true;
+}
+
+
+ae_bool _mincgstate_init_copy(mincgstate* dst, mincgstate* src, ae_state *_state, ae_bool make_automatic)
+{
+    dst->n = src->n;
+    dst->epsg = src->epsg;
+    dst->epsf = src->epsf;
+    dst->epsx = src->epsx;
+    dst->maxits = src->maxits;
+    dst->stpmax = src->stpmax;
+    dst->suggestedstep = src->suggestedstep;
+    dst->xrep = src->xrep;
+    dst->drep = src->drep;
+    dst->cgtype = src->cgtype;
+    dst->nfev = src->nfev;
+    dst->mcstage = src->mcstage;
+    dst->k = src->k;
+    if( !ae_vector_init_copy(&dst->xk, &src->xk, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->dk, &src->dk, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->xn, &src->xn, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->dn, &src->dn, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->d, &src->d, _state, make_automatic) )
+        return ae_false;
+    dst->fold = src->fold;
+    dst->stp = src->stp;
+    dst->curstpmax = src->curstpmax;
+    if( !ae_vector_init_copy(&dst->work, &src->work, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->yk, &src->yk, _state, make_automatic) )
+        return ae_false;
+    dst->laststep = src->laststep;
+    dst->rstimer = src->rstimer;
+    if( !ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic) )
+        return ae_false;
+    dst->f = src->f;
+    if( !ae_vector_init_copy(&dst->g, &src->g, _state, make_automatic) )
+        return ae_false;
+    dst->needfg = src->needfg;
+    dst->xupdated = src->xupdated;
+    dst->lsstart = src->lsstart;
+    dst->lsend = src->lsend;
+    if( !_rcommstate_init_copy(&dst->rstate, &src->rstate, _state, make_automatic) )
+        return ae_false;
+    dst->repiterationscount = src->repiterationscount;
+    dst->repnfev = src->repnfev;
+    dst->repterminationtype = src->repterminationtype;
+    dst->debugrestartscount = src->debugrestartscount;
+    if( !_linminstate_init_copy(&dst->lstate, &src->lstate, _state, make_automatic) )
+        return ae_false;
+    dst->betahs = src->betahs;
+    dst->betady = src->betady;
+    return ae_true;
+}
+
+
+void _mincgstate_clear(mincgstate* p)
+{
+    ae_vector_clear(&p->xk);
+    ae_vector_clear(&p->dk);
+    ae_vector_clear(&p->xn);
+    ae_vector_clear(&p->dn);
+    ae_vector_clear(&p->d);
+    ae_vector_clear(&p->work);
+    ae_vector_clear(&p->yk);
+    ae_vector_clear(&p->x);
+    ae_vector_clear(&p->g);
+    _rcommstate_clear(&p->rstate);
+    _linminstate_clear(&p->lstate);
+}
+
+
+ae_bool _mincgreport_init(mincgreport* p, ae_state *_state, ae_bool make_automatic)
+{
+    return ae_true;
+}
+
+
+ae_bool _mincgreport_init_copy(mincgreport* dst, mincgreport* src, ae_state *_state, ae_bool make_automatic)
+{
+    dst->iterationscount = src->iterationscount;
+    dst->nfev = src->nfev;
+    dst->terminationtype = src->terminationtype;
+    return ae_true;
+}
+
+
+void _mincgreport_clear(mincgreport* p)
+{
+}
+
+
+
+
+/*************************************************************************
+                     BOUND CONSTRAINED OPTIMIZATION
+       WITH ADDITIONAL LINEAR EQUALITY AND INEQUALITY CONSTRAINTS
+
+DESCRIPTION:
+The  subroutine  minimizes  function   F(x)  of N arguments subject to any
+combination of:
+* bound constraints
+* linear inequality constraints
+* linear equality constraints
+
+REQUIREMENTS:
+* function value and gradient
+* grad(f) must be Lipschitz continuous on a level set: L = { x : f(x)<=f(x0) }
+* function must be defined even in the infeasible points (algorithm make take
+  steps in the infeasible area before converging to the feasible point)
+* starting point X0 must be feasible or not too far away from the feasible set
+* problem must satisfy strict complementary conditions
+
+USAGE:
+
+Constrained optimization if far more complex than the unconstrained one.
+Here we give very brief outline of the BLEIC optimizer. We strongly recommend
+you to read examples in the ALGLIB Reference Manual and to read ALGLIB User Guide
+on optimization, which is available at http://www.alglib.net/optimization/
+
+1. User initializes algorithm state with MinBLEICCreate() call
+
+2. USer adds boundary and/or linear constraints by calling
+   MinBLEICSetBC() and MinBLEICSetLC() functions.
+
+3. User sets stopping conditions for underlying unconstrained solver
+   with MinBLEICSetInnerCond() call.
+   This function controls accuracy of underlying optimization algorithm.
+
+4. User sets stopping conditions for outer iteration by calling
+   MinBLEICSetOuterCond() function.
+   This function controls handling of boundary and inequality constraints.
+
+5. User tunes barrier parameters:
+   * barrier width with MinBLEICSetBarrierWidth() call
+   * (optionally) dynamics of the barrier width with MinBLEICSetBarrierDecay() call
+   These functions control handling of boundary and inequality constraints.
+
+6. Additionally, user may set limit on number of internal iterations
+   by MinBLEICSetMaxIts() call.
+   This function allows to prevent algorithm from looping forever.
+
+7. User calls MinBLEICOptimize() function which takes algorithm  state and
+   pointer (delegate, etc.) to callback function which calculates F/G.
+
+8. User calls MinBLEICResults() to get solution
+
+9. Optionally user may call MinBLEICRestartFrom() to solve another problem
+   with same N but another starting point.
+   MinBLEICRestartFrom() allows to reuse already initialized structure.
+
+
+INPUT PARAMETERS:
+    N       -   problem dimension, N>0:
+                * if given, only leading N elements of X are used
+                * if not given, automatically determined from size ofX
+    X       -   starting point, array[N]:
+                * it is better to set X to a feasible point
+                * but X can be infeasible, in which case algorithm will try
+                  to find feasible point first, using X as initial
+                  approximation.
+
+OUTPUT PARAMETERS:
+    State   -   structure stores algorithm state
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleiccreate(ae_int_t n,
+     /* Real    */ ae_vector* x,
+     minbleicstate* state,
+     ae_state *_state)
+{
+    ae_frame _frame_block;
+    ae_int_t i;
+    ae_matrix c;
+    ae_vector ct;
+
+    ae_frame_make(_state, &_frame_block);
+    _minbleicstate_clear(state);
+    ae_matrix_init(&c, 0, 0, DT_REAL, _state, ae_true);
+    ae_vector_init(&ct, 0, DT_INT, _state, ae_true);
+
+    ae_assert(n>=1, "MinBLEICCreate: N<1", _state);
+    ae_assert(x->cnt>=n, "MinBLEICCreate: Length(X)<N", _state);
+    ae_assert(isfinitevector(x, n, _state), "MinBLEICCreate: X contains infinite or NaN values!", _state);
+    
+    /*
+     * Initialize.
+     */
+    state->n = n;
+    ae_vector_set_length(&state->x, n, _state);
+    ae_vector_set_length(&state->g, n, _state);
+    ae_vector_set_length(&state->bndl, n, _state);
+    ae_vector_set_length(&state->hasbndl, n, _state);
+    ae_vector_set_length(&state->bndu, n, _state);
+    ae_vector_set_length(&state->hasbndu, n, _state);
+    ae_vector_set_length(&state->xcur, n, _state);
+    ae_vector_set_length(&state->xprev, n, _state);
+    ae_vector_set_length(&state->xstart, n, _state);
+    ae_vector_set_length(&state->xe, n, _state);
+    for(i=0; i<=n-1; i++)
+    {
+        state->bndl.ptr.p_double[i] = _state->v_neginf;
+        state->hasbndl.ptr.p_bool[i] = ae_false;
+        state->bndu.ptr.p_double[i] = _state->v_posinf;
+        state->hasbndu.ptr.p_bool[i] = ae_false;
+    }
+    minbleicsetlc(state, &c, &ct, 0, _state);
+    minbleicsetinnercond(state, 0.0, 0.0, 0.0, _state);
+    minbleicsetoutercond(state, 1.0E-6, 1.0E-6, _state);
+    minbleicsetbarrierwidth(state, 1.0E-3, _state);
+    minbleicsetbarrierdecay(state, 1.0, _state);
+    minbleicsetmaxits(state, 0, _state);
+    minbleicsetxrep(state, ae_false, _state);
+    minbleicsetstpmax(state, 0.0, _state);
+    minbleicrestartfrom(state, x, _state);
+    mincgcreate(n, x, &state->cgstate, _state);
+    ae_frame_leave(_state);
+}
+
+
+/*************************************************************************
+This function sets boundary constraints for BLEIC optimizer.
+
+Boundary constraints are inactive by default (after initial creation).
+They are preserved after algorithm restart with MinBLEICRestartFrom().
+
+INPUT PARAMETERS:
+    State   -   structure stores algorithm state
+    BndL    -   lower bounds, array[N].
+                If some (all) variables are unbounded, you may specify
+                very small number or -INF.
+    BndU    -   upper bounds, array[N].
+                If some (all) variables are unbounded, you may specify
+                very large number or +INF.
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicsetbc(minbleicstate* state,
+     /* Real    */ ae_vector* bndl,
+     /* Real    */ ae_vector* bndu,
+     ae_state *_state)
+{
+    ae_int_t i;
+    ae_int_t n;
+
+
+    n = state->n;
+    ae_assert(bndl->cnt>=n, "MinBLEICSetBC: Length(BndL)<N", _state);
+    ae_assert(bndu->cnt>=n, "MinBLEICSetBC: Length(BndU)<N", _state);
+    for(i=0; i<=n-1; i++)
+    {
+        ae_assert(ae_isfinite(bndl->ptr.p_double[i], _state)||ae_isneginf(bndl->ptr.p_double[i], _state), "MinBLEICSetBC: BndL contains NAN or +INF", _state);
+        ae_assert(ae_isfinite(bndu->ptr.p_double[i], _state)||ae_isposinf(bndu->ptr.p_double[i], _state), "MinBLEICSetBC: BndL contains NAN or -INF", _state);
+        state->bndl.ptr.p_double[i] = bndl->ptr.p_double[i];
+        state->hasbndl.ptr.p_bool[i] = ae_isfinite(bndl->ptr.p_double[i], _state);
+        state->bndu.ptr.p_double[i] = bndu->ptr.p_double[i];
+        state->hasbndu.ptr.p_bool[i] = ae_isfinite(bndu->ptr.p_double[i], _state);
+    }
+}
+
+
+/*************************************************************************
+This function sets linear constraints for BLEIC optimizer.
+
+Linear constraints are inactive by default (after initial creation).
+They are preserved after algorithm restart with MinBLEICRestartFrom().
+
+INPUT PARAMETERS:
+    State   -   structure previously allocated with MinBLEICCreate call.
+    C       -   linear constraints, array[K,N+1].
+                Each row of C represents one constraint, either equality
+                or inequality (see below):
+                * first N elements correspond to coefficients,
+                * last element corresponds to the right part.
+                All elements of C (including right part) must be finite.
+    CT      -   type of constraints, array[K]:
+                * if CT[i]>0, then I-th constraint is C[i,*]*x >= C[i,n+1]
+                * if CT[i]=0, then I-th constraint is C[i,*]*x  = C[i,n+1]
+                * if CT[i]<0, then I-th constraint is C[i,*]*x <= C[i,n+1]
+    K       -   number of equality/inequality constraints, K>=0:
+                * if given, only leading K elements of C/CT are used
+                * if not given, automatically determined from sizes of C/CT
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicsetlc(minbleicstate* state,
+     /* Real    */ ae_matrix* c,
+     /* Integer */ ae_vector* ct,
+     ae_int_t k,
+     ae_state *_state)
+{
+    ae_int_t n;
+    ae_int_t m;
+    ae_int_t i;
+    ae_int_t idx;
+    ae_bool b;
+    double v;
+
+
+    n = state->n;
+    
+    /*
+     * First, check for errors in the inputs
+     */
+    ae_assert(k>=0, "MinBLEICSetLC: K<0", _state);
+    ae_assert(c->cols>=n+1||k==0, "MinBLEICSetLC: Cols(C)<N+1", _state);
+    ae_assert(c->rows>=k, "MinBLEICSetLC: Rows(C)<K", _state);
+    ae_assert(ct->cnt>=k, "MinBLEICSetLC: Length(CT)<K", _state);
+    ae_assert(apservisfinitematrix(c, k, n+1, _state), "MinBLEICSetLC: C contains infinite or NaN values!", _state);
+    
+    /*
+     * Determine number of constraints,
+     * allocate space and copy
+     */
+    state->cecnt = 0;
+    state->cicnt = 0;
+    for(i=0; i<=k-1; i++)
+    {
+        if( ct->ptr.p_int[i]!=0 )
+        {
+            state->cicnt = state->cicnt+1;
+        }
+        else
+        {
+            state->cecnt = state->cecnt+1;
+        }
+    }
+    rmatrixsetlengthatleast(&state->ci, state->cicnt, n+1, _state);
+    rmatrixsetlengthatleast(&state->ce, state->cecnt, n+1, _state);
+    idx = 0;
+    for(i=0; i<=k-1; i++)
+    {
+        if( ct->ptr.p_int[i]!=0 )
+        {
+            ae_v_move(&state->ci.ptr.pp_double[idx][0], 1, &c->ptr.pp_double[i][0], 1, ae_v_len(0,n));
+            if( ct->ptr.p_int[i]<0 )
+            {
+                ae_v_muld(&state->ci.ptr.pp_double[idx][0], 1, ae_v_len(0,n), -1);
+            }
+            idx = idx+1;
+        }
+    }
+    idx = 0;
+    for(i=0; i<=k-1; i++)
+    {
+        if( ct->ptr.p_int[i]==0 )
+        {
+            ae_v_move(&state->ce.ptr.pp_double[idx][0], 1, &c->ptr.pp_double[i][0], 1, ae_v_len(0,n));
+            idx = idx+1;
+        }
+    }
+    
+    /*
+     * Calculate ortohognal basis of row space of CE.
+     * Determine actual basis size, drop vectors corresponding to
+     * small singular values.
+     *
+     * NOTE: it is important to use "W[I]>W[0]*Tol" form (strict
+     * inequality instead of non-strict) because it allows us to
+     * handle situations with zero CE in a natural and elegant way:
+     * all singular values are zero, and even W[0] itself is not
+     * greater than W[0]*tol.
+     */
+    if( state->cecnt>0 )
+    {
+        b = rmatrixsvd(&state->ce, state->cecnt, n, 1, 1, 2, &state->w, &state->cesvl, &state->cebasis, _state);
+        ae_assert(b, "MinBLEIC: inconvergence of internal SVD", _state);
+        state->cedim = 0;
+        m = ae_minint(state->cecnt, n, _state);
+        for(i=0; i<=m-1; i++)
+        {
+            if( ae_fp_greater(state->w.ptr.p_double[i],state->w.ptr.p_double[0]*minbleic_svdtol*ae_machineepsilon) )
+            {
+                state->cedim = state->cedim+1;
+            }
+        }
+    }
+    else
+    {
+        state->cedim = 0;
+    }
+    
+    /*
+     * Calculate XE: solution of CE*x = b.
+     * Fill it with zeros if CEDim=0
+     */
+    if( state->cedim>0 )
+    {
+        rvectorsetlengthatleast(&state->tmp0, state->cedim, _state);
+        for(i=0; i<=state->cedim-1; i++)
+        {
+            state->tmp0.ptr.p_double[i] = 0;
+        }
+        for(i=0; i<=state->cecnt-1; i++)
+        {
+            v = state->ce.ptr.pp_double[i][n];
+            ae_v_addd(&state->tmp0.ptr.p_double[0], 1, &state->cesvl.ptr.pp_double[i][0], 1, ae_v_len(0,state->cedim-1), v);
+        }
+        for(i=0; i<=state->cedim-1; i++)
+        {
+            state->tmp0.ptr.p_double[i] = state->tmp0.ptr.p_double[i]/state->w.ptr.p_double[i];
+        }
+        for(i=0; i<=n-1; i++)
+        {
+            state->xe.ptr.p_double[i] = 0;
+        }
+        for(i=0; i<=state->cedim-1; i++)
+        {
+            v = state->tmp0.ptr.p_double[i];
+            ae_v_addd(&state->xe.ptr.p_double[0], 1, &state->cebasis.ptr.pp_double[i][0], 1, ae_v_len(0,n-1), v);
+        }
+    }
+    else
+    {
+        
+        /*
+         * no constraints, fill with zeros
+         */
+        for(i=0; i<=n-1; i++)
+        {
+            state->xe.ptr.p_double[i] = 0;
+        }
+    }
+}
+
+
+/*************************************************************************
+This function sets stopping conditions for the underlying nonlinear CG
+optimizer. It controls overall accuracy of solution. These conditions
+should be strict enough in order for algorithm to converge.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    EpsG    -   >=0
+                Algorithm finishes its work if 2-norm of the Lagrangian
+                gradient is less than or equal to EpsG.
+    EpsF    -   >=0
+                The  subroutine  finishes  its work if on k+1-th iteration
+                the  condition  |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1}
+                is satisfied.
+    EpsX    -   >=0
+                The subroutine finishes its work if  on  k+1-th  iteration
+                the condition |X(k+1)-X(k)| <= EpsX is fulfilled.
+
+Passing EpsG=0, EpsF=0 and EpsX=0 (simultaneously) will lead to
+automatic stopping criterion selection.
+
+These conditions are used to terminate inner iterations. However, you
+need to tune termination conditions for outer iterations too.
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicsetinnercond(minbleicstate* state,
+     double epsg,
+     double epsf,
+     double epsx,
+     ae_state *_state)
+{
+
+
+    ae_assert(ae_isfinite(epsg, _state), "MinBLEICSetInnerCond: EpsG is not finite number", _state);
+    ae_assert(ae_fp_greater_eq(epsg,0), "MinBLEICSetInnerCond: negative EpsG", _state);
+    ae_assert(ae_isfinite(epsf, _state), "MinBLEICSetInnerCond: EpsF is not finite number", _state);
+    ae_assert(ae_fp_greater_eq(epsf,0), "MinBLEICSetInnerCond: negative EpsF", _state);
+    ae_assert(ae_isfinite(epsx, _state), "MinBLEICSetInnerCond: EpsX is not finite number", _state);
+    ae_assert(ae_fp_greater_eq(epsx,0), "MinBLEICSetInnerCond: negative EpsX", _state);
+    state->innerepsg = epsg;
+    state->innerepsf = epsf;
+    state->innerepsx = epsx;
+}
+
+
+/*************************************************************************
+This function sets stopping conditions for outer iteration of BLEIC algo.
+
+These conditions control accuracy of constraint handling and amount of
+infeasibility allowed in the solution.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    EpsX    -   >0, stopping condition on outer iteration step length
+    EpsI    -   >0, stopping condition on infeasibility
+    
+Both EpsX and EpsI must be non-zero.
+
+MEANING OF EpsX
+
+EpsX  is  a  stopping  condition for outer iterations. Algorithm will stop
+when  solution  of  the  current  modified  subproblem will be within EpsX
+(using 2-norm) of the previous solution.
+
+MEANING OF EpsI
+
+EpsI controls feasibility properties -  algorithm  won't  stop  until  all
+inequality constraints will be satisfied with error (distance from current
+point to the feasible area) at most EpsI.
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicsetoutercond(minbleicstate* state,
+     double epsx,
+     double epsi,
+     ae_state *_state)
+{
+
+
+    ae_assert(ae_isfinite(epsx, _state), "MinBLEICSetOuterCond: EpsX is not finite number", _state);
+    ae_assert(ae_fp_greater(epsx,0), "MinBLEICSetOuterCond: non-positive EpsX", _state);
+    ae_assert(ae_isfinite(epsi, _state), "MinBLEICSetOuterCond: EpsI is not finite number", _state);
+    ae_assert(ae_fp_greater(epsi,0), "MinBLEICSetOuterCond: non-positive EpsI", _state);
+    state->outerepsx = epsx;
+    state->outerepsi = epsi;
+}
+
+
+/*************************************************************************
+This function sets initial barrier width.
+
+BLEIC optimizer uses  modified  barrier  functions  to  handle  inequality
+constraints. These functions are almost constant in the inner parts of the
+feasible  area,  but  grow rapidly to the infinity OUTSIDE of the feasible
+area. Barrier width is a distance from feasible area to  the  point  where
+modified barrier function becomes infinite.
+
+Barrier width must be:
+* small enough (below some problem-dependent value) in order for algorithm
+  to  converge.  Necessary  condition  is that the target function must be
+  well described by linear model in the areas as small as barrier width.
+* not VERY small (in order to avoid  difficulties  associated  with  rapid
+  changes in the modified function, ill-conditioning, round-off issues).
+
+Choosing  appropriate  barrier  width  is  very  important  for  efficient
+optimization, and it often requires error  and  trial.  You  can  use  two
+strategies when choosing barrier width:
+* set barrier width with MinBLEICSetBarrierWidth() call. In this case  you
+  should try different barrier widths and examine results.
+* set decreasing barrier width by combining  MinBLEICSetBarrierWidth() and
+  MinBLEICSetBarrierDecay()  calls.  In  this case algorithm will decrease
+  barrier  width  after  each  outer iteration until it encounters optimal
+  barrier width.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    Mu      -   >0, initial barrier width
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicsetbarrierwidth(minbleicstate* state,
+     double mu,
+     ae_state *_state)
+{
+
+
+    ae_assert(ae_isfinite(mu, _state), "MinBLEICSetBarrierWidth: Mu is not finite number", _state);
+    ae_assert(ae_fp_greater(mu,0), "MinBLEICSetBarrierWidth: non-positive Mu", _state);
+    state->mustart = mu;
+}
+
+
+/*************************************************************************
+This function sets decay coefficient for barrier width.
+
+By default, no barrier decay is used (Decay=1.0).
+
+BLEIC optimizer uses  modified  barrier  functions  to  handle  inequality
+constraints. These functions are almost constant in the inner parts of the
+feasible  area,  but  grow rapidly to the infinity OUTSIDE of the feasible
+area. Barrier width is a distance from feasible area to  the  point  where
+modified barrier function becomes infinite. Decay coefficient allows us to
+decrease  barrier  width  from  the  initial  (suboptimial)  value   until
+optimal value will be met.
+
+We recommend you either to set MuDecay=1.0 (no decay) or use some moderate
+value like 0.5-0.7
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    MuDecay -   0<MuDecay<=1, decay coefficient
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicsetbarrierdecay(minbleicstate* state,
+     double mudecay,
+     ae_state *_state)
+{
+
+
+    ae_assert(ae_isfinite(mudecay, _state), "MinBLEICSetBarrierDecay: MuDecay is not finite number", _state);
+    ae_assert(ae_fp_greater(mudecay,0), "MinBLEICSetBarrierDecay: non-positive MuDecay", _state);
+    ae_assert(ae_fp_less_eq(mudecay,1), "MinBLEICSetBarrierDecay: MuDecay>1", _state);
+    state->mudecay = mudecay;
+}
+
+
+/*************************************************************************
+This function allows to stop algorithm after specified number of inner
+iterations.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    MaxIts  -   maximum number of inner iterations.
+                If MaxIts=0, the number of iterations is unlimited.
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicsetmaxits(minbleicstate* state,
+     ae_int_t maxits,
+     ae_state *_state)
+{
+
+
+    ae_assert(maxits>=0, "MinBLEICSetCond: negative MaxIts!", _state);
+    state->maxits = maxits;
+}
+
+
+/*************************************************************************
+This function turns on/off reporting.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    NeedXRep-   whether iteration reports are needed or not
+
+If NeedXRep is True, algorithm will call rep() callback function if  it is
+provided to MinBLEICOptimize().
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicsetxrep(minbleicstate* state,
+     ae_bool needxrep,
+     ae_state *_state)
+{
+
+
+    state->xrep = needxrep;
+}
+
+
+/*************************************************************************
+This function sets maximum step length
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    StpMax  -   maximum step length, >=0. Set StpMax to 0.0,  if you don't
+                want to limit step length.
+
+Use this subroutine when you optimize target function which contains exp()
+or  other  fast  growing  functions,  and optimization algorithm makes too
+large  steps  which  lead   to overflow. This function allows us to reject
+steps  that  are  too  large  (and  therefore  expose  us  to the possible
+overflow) without actually calculating function value at the x+stp*d.
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicsetstpmax(minbleicstate* state,
+     double stpmax,
+     ae_state *_state)
+{
+
+
+    ae_assert(ae_isfinite(stpmax, _state), "MinBLEICSetStpMax: StpMax is not finite!", _state);
+    ae_assert(ae_fp_greater_eq(stpmax,0), "MinBLEICSetStpMax: StpMax<0!", _state);
+    state->stpmax = stpmax;
+}
+
+
+/*************************************************************************
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+ae_bool minbleiciteration(minbleicstate* state, ae_state *_state)
+{
+    ae_int_t n;
+    ae_int_t m;
+    ae_int_t i;
+    double v;
+    double vv;
+    ae_bool b;
+    ae_bool result;
+
+
+    
+    /*
+     * Reverse communication preparations
+     * I know it looks ugly, but it works the same way
+     * anywhere from C++ to Python.
+     *
+     * This code initializes locals by:
+     * * random values determined during code
+     *   generation - on first subroutine call
+     * * values from previous call - on subsequent calls
+     */
+    if( state->rstate.stage>=0 )
+    {
+        n = state->rstate.ia.ptr.p_int[0];
+        m = state->rstate.ia.ptr.p_int[1];
+        i = state->rstate.ia.ptr.p_int[2];
+        b = state->rstate.ba.ptr.p_bool[0];
+        v = state->rstate.ra.ptr.p_double[0];
+        vv = state->rstate.ra.ptr.p_double[1];
+    }
+    else
+    {
+        n = -983;
+        m = -989;
+        i = -834;
+        b = ae_false;
+        v = -287;
+        vv = 364;
+    }
+    if( state->rstate.stage==0 )
+    {
+        goto lbl_0;
+    }
+    if( state->rstate.stage==1 )
+    {
+        goto lbl_1;
+    }
+    if( state->rstate.stage==2 )
+    {
+        goto lbl_2;
+    }
+    if( state->rstate.stage==3 )
+    {
+        goto lbl_3;
+    }
+    if( state->rstate.stage==4 )
+    {
+        goto lbl_4;
+    }
+    
+    /*
+     * Routine body
+     */
+    
+    /*
+     * Prepare.
+     */
+    n = state->n;
+    state->repterminationtype = 0;
+    state->repinneriterationscount = 0;
+    state->repouteriterationscount = 0;
+    state->repnfev = 0;
+    state->repdebugeqerr = 0.0;
+    state->repdebugfs = _state->v_nan;
+    state->repdebugff = _state->v_nan;
+    state->repdebugdx = _state->v_nan;
+    rvectorsetlengthatleast(&state->r, n, _state);
+    rvectorsetlengthatleast(&state->tmp1, n, _state);
+    
+    /*
+     * Check that equality constraints are consistent within EpsC.
+     * If not - premature termination.
+     */
+    if( state->cedim>0 )
+    {
+        state->repdebugeqerr = 0.0;
+        for(i=0; i<=state->cecnt-1; i++)
+        {
+            v = ae_v_dotproduct(&state->ce.ptr.pp_double[i][0], 1, &state->xe.ptr.p_double[0], 1, ae_v_len(0,n-1));
+            state->repdebugeqerr = state->repdebugeqerr+ae_sqr(v-state->ce.ptr.pp_double[i][n], _state);
+        }
+        state->repdebugeqerr = ae_sqrt(state->repdebugeqerr, _state);
+        if( ae_fp_greater(state->repdebugeqerr,state->outerepsi) )
+        {
+            state->repterminationtype = -3;
+            result = ae_false;
+            return result;
+        }
+    }
+    
+    /*
+     * Find feasible point.
+     *
+     * We make up to 16*N iterations of nonlinear CG trying to
+     * minimize penalty for violation of inequality constraints.
+     *
+     * if P is magnitude of violation, then penalty function has form:
+     * * P*P+P for non-zero violation (P>=0)
+     * * 0.0 for absence of violation (P<0)
+     * Such function is non-smooth at P=0.0, but its nonsmoothness
+     * allows us to rapidly converge to the feasible point.
+     */
+    minbleic_makeprojection(state, &state->xcur, &state->r, &vv, _state);
+    mincgrestartfrom(&state->cgstate, &state->xcur, _state);
+    mincgsetcond(&state->cgstate, 0.0, 0.0, 0.0, 16*n, _state);
+    mincgsetxrep(&state->cgstate, ae_false, _state);
+    while(mincgiteration(&state->cgstate, _state))
+    {
+        if( state->cgstate.needfg )
+        {
+            ae_v_move(&state->x.ptr.p_double[0], 1, &state->cgstate.x.ptr.p_double[0], 1, ae_v_len(0,n-1));
+            minbleic_penaltyfunction(state, &state->x, &state->cgstate.f, &state->cgstate.g, &state->r, &state->mba, &state->errfeas, _state);
+            continue;
+        }
+    }
+    mincgresults(&state->cgstate, &state->xcur, &state->cgrep, _state);
+    ae_v_move(&state->tmp1.ptr.p_double[0], 1, &state->xcur.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    minbleic_penaltyfunction(state, &state->tmp1, &state->f, &state->g, &state->r, &state->mba, &state->errfeas, _state);
+    if( ae_fp_greater(state->errfeas,state->outerepsi) )
+    {
+        state->repterminationtype = -3;
+        result = ae_false;
+        return result;
+    }
+    
+    /*
+     * Initialize RepDebugFS with function value at initial point
+     */
+    ae_v_move(&state->x.ptr.p_double[0], 1, &state->xcur.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    minbleic_clearrequestfields(state, _state);
+    state->needfg = ae_true;
+    state->rstate.stage = 0;
+    goto lbl_rcomm;
+lbl_0:
+    state->needfg = ae_false;
+    state->repnfev = state->repnfev+1;
+    state->repdebugfs = state->f;
+    
+    /*
+     * Calculate number of inequality constraints and allocate
+     * array for Lagrange multipliers.
+     *
+     * Initialize Lagrange multipliers and penalty term
+     */
+    state->lmcnt = state->cicnt;
+    for(i=0; i<=n-1; i++)
+    {
+        if( state->hasbndl.ptr.p_bool[i] )
+        {
+            state->lmcnt = state->lmcnt+1;
+        }
+        if( state->hasbndu.ptr.p_bool[i] )
+        {
+            state->lmcnt = state->lmcnt+1;
+        }
+    }
+    if( state->lmcnt>0 )
+    {
+        rvectorsetlengthatleast(&state->lm, state->lmcnt, _state);
+    }
+    for(i=0; i<=state->lmcnt-1; i++)
+    {
+        state->lm.ptr.p_double[i] = 1.0;
+    }
+    state->mu = ae_maxreal(state->mustart, state->outerepsi, _state);
+    
+    /*
+     * BndMax is a maximum over right parts of inequality constraints.
+     * It is used later to bound Mu from below.
+     */
+    state->bndmax = 0.0;
+    for(i=0; i<=n-1; i++)
+    {
+        if( state->hasbndl.ptr.p_bool[i] )
+        {
+            state->bndmax = ae_maxreal(state->bndmax, ae_fabs(state->bndl.ptr.p_double[i], _state), _state);
+        }
+        if( state->hasbndu.ptr.p_bool[i] )
+        {
+            state->bndmax = ae_maxreal(state->bndmax, ae_fabs(state->bndu.ptr.p_double[i], _state), _state);
+        }
+    }
+    for(i=0; i<=state->cicnt-1; i++)
+    {
+        state->bndmax = ae_maxreal(state->bndmax, ae_fabs(state->ci.ptr.pp_double[i][n], _state), _state);
+    }
+    
+    /*
+     * External cycle: optimization subject to current
+     * estimate of Lagrange multipliers and penalty term
+     */
+    state->itsleft = state->maxits;
+    state->mucounter = minbleic_mucountdownlen;
+    ae_v_move(&state->xprev.ptr.p_double[0], 1, &state->xcur.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    ae_v_move(&state->xstart.ptr.p_double[0], 1, &state->xcur.ptr.p_double[0], 1, ae_v_len(0,n-1));
+lbl_5:
+    if( ae_false )
+    {
+        goto lbl_6;
+    }
+    
+    /*
+     * Inner cycle: CG with projections and penalty functions
+     */
+    mincgrestartfrom(&state->cgstate, &state->xcur, _state);
+    mincgsetcond(&state->cgstate, state->innerepsg, state->innerepsf, state->innerepsx, state->itsleft, _state);
+    mincgsetxrep(&state->cgstate, state->xrep, _state);
+    mincgsetdrep(&state->cgstate, ae_true, _state);
+    mincgsetstpmax(&state->cgstate, state->stpmax, _state);
+lbl_7:
+    if( !mincgiteration(&state->cgstate, _state) )
+    {
+        goto lbl_8;
+    }
+    if( state->cgstate.lsstart )
+    {
+        
+        /*
+         * Beginning of the line search: set upper limit on step size
+         * to prevent algo from leaving area where barrier function
+         * is defined.
+         *
+         * We calculate State.CGState.CurStpMax in two steps:
+         * * first, we calculate it as the distance from the current
+         *   point to the boundary where modified barrier function
+         *   overflows; distance is taken along State.CGState.D
+         * * then we multiply it by 0.999 to make sure that we won't
+         *   make step into the boundary due to the rounding noise
+         */
+        if( ae_fp_eq(state->cgstate.curstpmax,0) )
+        {
+            state->cgstate.curstpmax = 1.0E50;
+        }
+        state->boundary = -0.9*state->mu;
+        state->closetobarrier = ae_false;
+        for(i=0; i<=n-1; i++)
+        {
+            if( state->hasbndl.ptr.p_bool[i] )
+            {
+                v = state->cgstate.x.ptr.p_double[i]-state->bndl.ptr.p_double[i];
+                if( ae_fp_less(state->cgstate.d.ptr.p_double[i],0) )
+                {
+                    state->cgstate.curstpmax = safeminposrv(v+state->mu, -state->cgstate.d.ptr.p_double[i], state->cgstate.curstpmax, _state);
+                }
+                if( ae_fp_greater(state->cgstate.d.ptr.p_double[i],0)&&ae_fp_less_eq(v,state->boundary) )
+                {
+                    state->cgstate.curstpmax = safeminposrv(-v, state->cgstate.d.ptr.p_double[i], state->cgstate.curstpmax, _state);
+                    state->closetobarrier = ae_true;
+                }
+            }
+            if( state->hasbndu.ptr.p_bool[i] )
+            {
+                v = state->bndu.ptr.p_double[i]-state->cgstate.x.ptr.p_double[i];
+                if( ae_fp_greater(state->cgstate.d.ptr.p_double[i],0) )
+                {
+                    state->cgstate.curstpmax = safeminposrv(v+state->mu, state->cgstate.d.ptr.p_double[i], state->cgstate.curstpmax, _state);
+                }
+                if( ae_fp_less(state->cgstate.d.ptr.p_double[i],0)&&ae_fp_less_eq(v,state->boundary) )
+                {
+                    state->cgstate.curstpmax = safeminposrv(-v, -state->cgstate.d.ptr.p_double[i], state->cgstate.curstpmax, _state);
+                    state->closetobarrier = ae_true;
+                }
+            }
+        }
+        for(i=0; i<=state->cicnt-1; i++)
+        {
+            v = ae_v_dotproduct(&state->ci.ptr.pp_double[i][0], 1, &state->cgstate.d.ptr.p_double[0], 1, ae_v_len(0,n-1));
+            vv = ae_v_dotproduct(&state->ci.ptr.pp_double[i][0], 1, &state->cgstate.x.ptr.p_double[0], 1, ae_v_len(0,n-1));
+            vv = vv-state->ci.ptr.pp_double[i][n];
+            if( ae_fp_less(v,0) )
+            {
+                state->cgstate.curstpmax = safeminposrv(vv+state->mu, -v, state->cgstate.curstpmax, _state);
+            }
+            if( ae_fp_greater(v,0)&&ae_fp_less_eq(vv,state->boundary) )
+            {
+                state->cgstate.curstpmax = safeminposrv(-vv, v, state->cgstate.curstpmax, _state);
+                state->closetobarrier = ae_true;
+            }
+        }
+        state->cgstate.curstpmax = 0.999*state->cgstate.curstpmax;
+        if( state->closetobarrier )
+        {
+            state->cgstate.stp = 0.5*state->cgstate.curstpmax;
+        }
+        goto lbl_7;
+    }
+    if( !state->cgstate.needfg )
+    {
+        goto lbl_9;
+    }
+    
+    /*
+     * * get X from CG
+     * * project X into equality constrained subspace
+     * * RComm (note: X is stored in Tmp1 to prevent accidental corruption by user)
+     * * modify target function with barriers and penalties
+     * * pass F/G back to nonlinear CG
+     */
+    ae_v_move(&state->x.ptr.p_double[0], 1, &state->cgstate.x.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    minbleic_makeprojection(state, &state->x, &state->r, &vv, _state);
+    ae_v_move(&state->tmp1.ptr.p_double[0], 1, &state->x.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    minbleic_clearrequestfields(state, _state);
+    state->needfg = ae_true;
+    state->rstate.stage = 1;
+    goto lbl_rcomm;
+lbl_1:
+    state->needfg = ae_false;
+    ae_v_move(&state->x.ptr.p_double[0], 1, &state->tmp1.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    minbleic_modifytargetfunction(state, &state->x, &state->r, vv, &state->f, &state->g, &state->gnorm, &state->mpgnorm, &state->mba, &state->errfeas, &state->errslack, _state);
+    state->cgstate.f = state->f;
+    ae_v_move(&state->cgstate.g.ptr.p_double[0], 1, &state->g.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    goto lbl_7;
+lbl_9:
+    if( !state->cgstate.xupdated )
+    {
+        goto lbl_11;
+    }
+    
+    /*
+     * Report
+     */
+    ae_v_move(&state->x.ptr.p_double[0], 1, &state->cgstate.x.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    state->f = state->cgstate.f;
+    minbleic_clearrequestfields(state, _state);
+    state->xupdated = ae_true;
+    state->rstate.stage = 2;
+    goto lbl_rcomm;
+lbl_2:
+    state->xupdated = ae_false;
+    goto lbl_7;
+lbl_11:
+    goto lbl_7;
+lbl_8:
+    mincgresults(&state->cgstate, &state->xcur, &state->cgrep, _state);
+    state->repinneriterationscount = state->repinneriterationscount+state->cgrep.iterationscount;
+    state->repouteriterationscount = state->repouteriterationscount+1;
+    state->repnfev = state->repnfev+state->cgrep.nfev;
+    
+    /*
+     * Update RepDebugFF with function value at current point
+     */
+    ae_v_move(&state->x.ptr.p_double[0], 1, &state->xcur.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    minbleic_clearrequestfields(state, _state);
+    state->needfg = ae_true;
+    state->rstate.stage = 3;
+    goto lbl_rcomm;
+lbl_3:
+    state->needfg = ae_false;
+    state->repnfev = state->repnfev+1;
+    state->repdebugff = state->f;
+    
+    /*
+     * Update Lagrange multipliers and Mu
+     *
+     * We calculate three values during update:
+     * * LMDif      -   absolute differense between new and old multipliers
+     * * LMNorm     -   inf-norm of Lagrange vector
+     * * LMGrowth   -   maximum componentwise relative growth of Lagrange vector
+     *
+     * We limit growth of Lagrange multipliers by MaxLMGrowth,
+     * it allows us to stabilize algorithm when it moves deep into
+     * the infeasible area.
+     *
+     * We calculate modified target function at XCur in order to get
+     * information about overall problem properties. Some values
+     * calculated here will be used later:
+     * * ErrFeas  - feasibility error
+     * * ErrSlack - complementary slackness error
+     */
+    ae_v_move(&state->x.ptr.p_double[0], 1, &state->xcur.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    minbleic_makeprojection(state, &state->x, &state->r, &vv, _state);
+    ae_v_move(&state->tmp1.ptr.p_double[0], 1, &state->x.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    minbleic_clearrequestfields(state, _state);
+    state->needfg = ae_true;
+    state->rstate.stage = 4;
+    goto lbl_rcomm;
+lbl_4:
+    state->needfg = ae_false;
+    ae_v_move(&state->x.ptr.p_double[0], 1, &state->tmp1.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    minbleic_modifytargetfunction(state, &state->x, &state->r, vv, &state->f, &state->g, &state->gnorm, &state->mpgnorm, &state->mba, &state->errfeas, &state->errslack, _state);
+    m = 0;
+    state->lmdif = 0;
+    state->lmnorm = 0;
+    state->lmgrowth = 0;
+    for(i=0; i<=n-1; i++)
+    {
+        if( state->hasbndl.ptr.p_bool[i] )
+        {
+            barrierfunc(state->xcur.ptr.p_double[i]-state->bndl.ptr.p_double[i], state->mu, &state->v0, &state->v1, &state->v2, _state);
+            v = state->lm.ptr.p_double[m];
+            vv = ae_minreal(minbleic_maxlmgrowth, -state->mu*state->v1, _state);
+            state->lm.ptr.p_double[m] = vv*state->lm.ptr.p_double[m];
+            state->lmnorm = ae_maxreal(state->lmnorm, ae_fabs(v, _state), _state);
+            state->lmdif = ae_maxreal(state->lmdif, ae_fabs(state->lm.ptr.p_double[m]-v, _state), _state);
+            state->lmgrowth = ae_maxreal(state->lmgrowth, vv, _state);
+            m = m+1;
+        }
+        if( state->hasbndu.ptr.p_bool[i] )
+        {
+            barrierfunc(state->bndu.ptr.p_double[i]-state->xcur.ptr.p_double[i], state->mu, &state->v0, &state->v1, &state->v2, _state);
+            v = state->lm.ptr.p_double[m];
+            vv = ae_minreal(minbleic_maxlmgrowth, -state->mu*state->v1, _state);
+            state->lm.ptr.p_double[m] = vv*state->lm.ptr.p_double[m];
+            state->lmnorm = ae_maxreal(state->lmnorm, ae_fabs(v, _state), _state);
+            state->lmdif = ae_maxreal(state->lmdif, ae_fabs(state->lm.ptr.p_double[m]-v, _state), _state);
+            state->lmgrowth = ae_maxreal(state->lmgrowth, vv, _state);
+            m = m+1;
+        }
+    }
+    for(i=0; i<=state->cicnt-1; i++)
+    {
+        v = ae_v_dotproduct(&state->ci.ptr.pp_double[i][0], 1, &state->xcur.ptr.p_double[0], 1, ae_v_len(0,n-1));
+        v = v-state->ci.ptr.pp_double[i][n];
+        barrierfunc(v, state->mu, &state->v0, &state->v1, &state->v2, _state);
+        v = state->lm.ptr.p_double[m];
+        vv = ae_minreal(minbleic_maxlmgrowth, -state->mu*state->v1, _state);
+        state->lm.ptr.p_double[m] = vv*state->lm.ptr.p_double[m];
+        state->lmnorm = ae_maxreal(state->lmnorm, ae_fabs(v, _state), _state);
+        state->lmdif = ae_maxreal(state->lmdif, ae_fabs(state->lm.ptr.p_double[m]-v, _state), _state);
+        state->lmgrowth = ae_maxreal(state->lmgrowth, vv, _state);
+        m = m+1;
+    }
+    if( ae_fp_greater(state->mba,-0.2*state->mudecay*state->mu) )
+    {
+        state->mu = ae_maxreal(state->mudecay*state->mu, 1.0E6*ae_machineepsilon*state->bndmax, _state);
+    }
+    if( ae_fp_less(state->mu,state->outerepsi) )
+    {
+        state->mucounter = state->mucounter-1;
+        state->mu = state->outerepsi;
+    }
+    
+    /*
+     * Check for stopping:
+     * * "normal", outer step size is small enough, infeasibility is within bounds
+     * * "inconsistent",  if Lagrange multipliers increased beyond threshold given by MaxLagrangeMul
+     * * "too stringent", in other cases
+     */
+    v = 0;
+    for(i=0; i<=n-1; i++)
+    {
+        v = v+ae_sqr(state->xcur.ptr.p_double[i]-state->xprev.ptr.p_double[i], _state);
+    }
+    v = ae_sqrt(v, _state);
+    if( ae_fp_less_eq(state->errfeas,state->outerepsi)&&ae_fp_less_eq(v,state->outerepsx) )
+    {
+        state->repterminationtype = 4;
+        goto lbl_6;
+    }
+    if( state->maxits>0 )
+    {
+        state->itsleft = state->itsleft-state->cgrep.iterationscount;
+        if( state->itsleft<=0 )
+        {
+            state->repterminationtype = 5;
+            goto lbl_6;
+        }
+    }
+    if( ae_fp_greater_eq(state->repouteriterationscount,minbleic_maxouterits) )
+    {
+        state->repterminationtype = 5;
+        goto lbl_6;
+    }
+    if( ae_fp_less(state->lmdif,minbleic_lmtol*ae_machineepsilon*state->lmnorm)||ae_fp_less(state->lmnorm,minbleic_minlagrangemul) )
+    {
+        state->repterminationtype = 7;
+        goto lbl_6;
+    }
+    if( state->mucounter<=0 )
+    {
+        state->repterminationtype = 7;
+        goto lbl_6;
+    }
+    if( ae_fp_greater(state->lmnorm,minbleic_maxlagrangemul) )
+    {
+        state->repterminationtype = -3;
+        goto lbl_6;
+    }
+    
+    /*
+     * Next iteration
+     */
+    ae_v_move(&state->xprev.ptr.p_double[0], 1, &state->xcur.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    goto lbl_5;
+lbl_6:
+    
+    /*
+     * We've stopped, fill debug information
+     */
+    state->repdebugeqerr = 0.0;
+    for(i=0; i<=state->cecnt-1; i++)
+    {
+        v = ae_v_dotproduct(&state->ce.ptr.pp_double[i][0], 1, &state->xe.ptr.p_double[0], 1, ae_v_len(0,n-1));
+        state->repdebugeqerr = state->repdebugeqerr+ae_sqr(v-state->ce.ptr.pp_double[i][n], _state);
+    }
+    state->repdebugeqerr = ae_sqrt(state->repdebugeqerr, _state);
+    state->repdebugdx = 0;
+    for(i=0; i<=n-1; i++)
+    {
+        state->repdebugdx = state->repdebugdx+ae_sqr(state->xcur.ptr.p_double[i]-state->xstart.ptr.p_double[i], _state);
+    }
+    state->repdebugdx = ae_sqrt(state->repdebugdx, _state);
+    result = ae_false;
+    return result;
+    
+    /*
+     * Saving state
+     */
+lbl_rcomm:
+    result = ae_true;
+    state->rstate.ia.ptr.p_int[0] = n;
+    state->rstate.ia.ptr.p_int[1] = m;
+    state->rstate.ia.ptr.p_int[2] = i;
+    state->rstate.ba.ptr.p_bool[0] = b;
+    state->rstate.ra.ptr.p_double[0] = v;
+    state->rstate.ra.ptr.p_double[1] = vv;
+    return result;
+}
+
+
+/*************************************************************************
+BLEIC results
+
+INPUT PARAMETERS:
+    State   -   algorithm state
+
+OUTPUT PARAMETERS:
+    X       -   array[0..N-1], solution
+    Rep     -   optimization report:
+                * Rep.TerminationType completetion code:
+                    * -3    inconsistent constraints. Feasible point is
+                            either nonexistent or too hard to find. Try to
+                            restart optimizer with better initial
+                            approximation
+                    * -2    rounding errors prevent further improvement.
+                            X contains best point found.
+                    *  4    conditions on constraints are fulfilled
+                            with error less than or equal to EpsC
+                    *  5    MaxIts steps was taken
+                    *  7    stopping conditions are too stringent,
+                            further improvement is impossible,
+                            X contains best point found so far.
+                * Rep.IterationsCount contains iterations count
+                * NFEV countains number of function calculations
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicresults(minbleicstate* state,
+     /* Real    */ ae_vector* x,
+     minbleicreport* rep,
+     ae_state *_state)
+{
+
+    ae_vector_clear(x);
+    _minbleicreport_clear(rep);
+
+    minbleicresultsbuf(state, x, rep, _state);
+}
+
+
+/*************************************************************************
+BLEIC results
+
+Buffered implementation of MinBLEICResults() which uses pre-allocated buffer
+to store X[]. If buffer size is  too  small,  it  resizes  buffer.  It  is
+intended to be used in the inner cycles of performance critical algorithms
+where array reallocation penalty is too large to be ignored.
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicresultsbuf(minbleicstate* state,
+     /* Real    */ ae_vector* x,
+     minbleicreport* rep,
+     ae_state *_state)
+{
+
+
+    if( x->cnt<state->n )
+    {
+        ae_vector_set_length(x, state->n, _state);
+    }
+    ae_v_move(&x->ptr.p_double[0], 1, &state->xcur.ptr.p_double[0], 1, ae_v_len(0,state->n-1));
+    rep->inneriterationscount = state->repinneriterationscount;
+    rep->outeriterationscount = state->repouteriterationscount;
+    rep->nfev = state->repnfev;
+    rep->terminationtype = state->repterminationtype;
+    rep->debugeqerr = state->repdebugeqerr;
+    rep->debugfs = state->repdebugfs;
+    rep->debugff = state->repdebugff;
+    rep->debugdx = state->repdebugdx;
+}
+
+
+/*************************************************************************
+This subroutine restarts algorithm from new point.
+All optimization parameters (including constraints) are left unchanged.
+
+This  function  allows  to  solve multiple  optimization  problems  (which
+must have  same number of dimensions) without object reallocation penalty.
+
+INPUT PARAMETERS:
+    State   -   structure previously allocated with MinBLEICCreate call.
+    X       -   new starting point.
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicrestartfrom(minbleicstate* state,
+     /* Real    */ ae_vector* x,
+     ae_state *_state)
+{
+    ae_int_t n;
+
+
+    n = state->n;
+    
+    /*
+     * First, check for errors in the inputs
+     */
+    ae_assert(x->cnt>=n, "MinBLEICRestartFrom: Length(X)<N", _state);
+    ae_assert(isfinitevector(x, n, _state), "MinBLEICRestartFrom: X contains infinite or NaN values!", _state);
+    
+    /*
+     * Set XC
+     */
+    ae_v_move(&state->xcur.ptr.p_double[0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,n-1));
+    
+    /*
+     * prepare RComm facilities
+     */
+    ae_vector_set_length(&state->rstate.ia, 2+1, _state);
+    ae_vector_set_length(&state->rstate.ba, 0+1, _state);
+    ae_vector_set_length(&state->rstate.ra, 1+1, _state);
+    state->rstate.stage = -1;
+    minbleic_clearrequestfields(state, _state);
+}
+
+
+/*************************************************************************
+Modified barrier function calculated at point X with respect to the
+barrier parameter Mu.
+
+Functions, its first and second derivatives are calculated.
+*************************************************************************/
+void barrierfunc(double x,
+     double mu,
+     double* f,
+     double* df,
+     double* d2f,
+     ae_state *_state)
+{
+    double c0;
+    double c1;
+    double c2;
+    double xpmu;
+
+    *f = 0;
+    *df = 0;
+    *d2f = 0;
+
+    xpmu = x+mu;
+    if( ae_fp_greater(xpmu,0.5*mu) )
+    {
+        *f = -ae_log(x/mu+1, _state);
+        *df = -1/(x+mu);
+        *d2f = 1/(xpmu*xpmu);
+        return;
+    }
+    if( ae_fp_greater(xpmu,0) )
+    {
+        c0 = -ae_log(0.5, _state)-0.5;
+        c1 = -1/mu;
+        c2 = mu/4;
+        *f = c0+c1*(x+0.5*mu)+c2/xpmu;
+        *df = c1-c2/((x+mu)*(x+mu));
+        *d2f = 2*c2/((x+mu)*(x+mu)*(x+mu));
+        return;
+    }
+    *f = ae_maxrealnumber;
+    *df = 0;
+    *d2f = 0;
+}
+
+
+/*************************************************************************
+Clears request fileds (to be sure that we don't forget to clear something)
+*************************************************************************/
+static void minbleic_clearrequestfields(minbleicstate* state,
+     ae_state *_state)
+{
+
+
+    state->needfg = ae_false;
+    state->xupdated = ae_false;
+}
+
+
+/*************************************************************************
+This function makes projection of X into equality constrained subspace.
+It calculates set of additional values which are used later for
+modification of the target function F.
+
+INPUT PARAMETERS:
+    State   -   optimizer state (we use its fields to get information
+                about constraints)
+    X       -   vector being projected
+    R       -   preallocated buffer, used to store residual from projection
+
+OUTPUT PARAMETERS:
+    X       -   projection of input X
+    R       -   residual
+    RNorm   -   residual norm squared, used later to modify target function
+*************************************************************************/
+static void minbleic_makeprojection(minbleicstate* state,
+     /* Real    */ ae_vector* x,
+     /* Real    */ ae_vector* r,
+     double* rnorm2,
+     ae_state *_state)
+{
+    double v;
+    ae_int_t i;
+    ae_int_t n;
+
+    *rnorm2 = 0;
+
+    n = state->n;
+    
+    /*
+     * * subtract XE from X
+     * * project X
+     * * calculate norm of deviation from null space, store it in VV
+     * * calculate residual from projection, store it in R
+     * * add XE to X
+     */
+    ae_v_sub(&x->ptr.p_double[0], 1, &state->xe.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    *rnorm2 = 0;
+    for(i=0; i<=n-1; i++)
+    {
+        r->ptr.p_double[i] = 0;
+    }
+    for(i=0; i<=state->cedim-1; i++)
+    {
+        v = ae_v_dotproduct(&x->ptr.p_double[0], 1, &state->cebasis.ptr.pp_double[i][0], 1, ae_v_len(0,n-1));
+        ae_v_subd(&x->ptr.p_double[0], 1, &state->cebasis.ptr.pp_double[i][0], 1, ae_v_len(0,n-1), v);
+        ae_v_addd(&r->ptr.p_double[0], 1, &state->cebasis.ptr.pp_double[i][0], 1, ae_v_len(0,n-1), v);
+        *rnorm2 = *rnorm2+ae_sqr(v, _state);
+    }
+    ae_v_add(&x->ptr.p_double[0], 1, &state->xe.ptr.p_double[0], 1, ae_v_len(0,n-1));
+}
+
+
+/*************************************************************************
+This subroutine applies modifications to the target function given by
+its value F and gradient G at the projected point X which lies in the
+equality constrained subspace.
+
+Following modifications are applied:
+* modified barrier functions to handle inequality constraints
+  (both F and G are modified)
+* projection of gradient into equality constrained subspace
+  (only G is modified)
+* quadratic penalty for deviations from equality constrained subspace
+  (both F and G are modified)
+
+It also calculates gradient norm (three different norms for three
+different types of gradient), feasibility and complementary slackness
+errors.
+
+INPUT PARAMETERS:
+    State   -   optimizer state (we use its fields to get information
+                about constraints)
+    X       -   point (projected into equality constrained subspace)
+    R       -   residual from projection
+    RNorm2  -   residual norm squared
+    F       -   function value at X
+    G       -   function gradient at X
+
+OUTPUT PARAMETERS:
+    F       -   modified function value at X
+    G       -   modified function gradient at X
+    GNorm   -   2-norm of unmodified G
+    MPGNorm -   2-norm of modified G
+    MBA     -   minimum argument of barrier functions.
+                If X is strictly feasible, it is greater than zero.
+                If X lies on a boundary, it is zero.
+                It is negative for infeasible X.
+    FIErr   -   2-norm of feasibility error with respect to
+                inequality/bound constraints
+    CSErr   -   2-norm of complementarity slackness error
+*************************************************************************/
+static void minbleic_modifytargetfunction(minbleicstate* state,
+     /* Real    */ ae_vector* x,
+     /* Real    */ ae_vector* r,
+     double rnorm2,
+     double* f,
+     /* Real    */ ae_vector* g,
+     double* gnorm,
+     double* mpgnorm,
+     double* mba,
+     double* fierr,
+     double* cserr,
+     ae_state *_state)
+{
+    double v;
+    double vv;
+    double t;
+    ae_int_t i;
+    ae_int_t n;
+    ae_int_t m;
+    double v0;
+    double v1;
+    double v2;
+    ae_bool hasconstraints;
+
+    *gnorm = 0;
+    *mpgnorm = 0;
+    *mba = 0;
+    *fierr = 0;
+    *cserr = 0;
+
+    n = state->n;
+    *mba = ae_maxrealnumber;
+    hasconstraints = ae_false;
+    
+    /*
+     * GNorm
+     */
+    v = ae_v_dotproduct(&g->ptr.p_double[0], 1, &g->ptr.p_double[0], 1, ae_v_len(0,n-1));
+    *gnorm = ae_sqrt(v, _state);
+    
+    /*
+     * Process bound and inequality constraints.
+     * Bound constraints with +-INF are ignored.
+     * Here M is used to store number of constraints processed.
+     */
+    m = 0;
+    *fierr = 0;
+    *cserr = 0;
+    for(i=0; i<=n-1; i++)
+    {
+        if( state->hasbndl.ptr.p_bool[i] )
+        {
+            v = x->ptr.p_double[i]-state->bndl.ptr.p_double[i];
+            *mba = ae_minreal(v, *mba, _state);
+            barrierfunc(v, state->mu, &v0, &v1, &v2, _state);
+            *f = *f+state->mu*state->lm.ptr.p_double[m]*v0;
+            g->ptr.p_double[i] = g->ptr.p_double[i]+state->mu*state->lm.ptr.p_double[m]*v1;
+            if( ae_fp_less(v,0) )
+            {
+                *fierr = *fierr+v*v;
+            }
+            t = -state->lm.ptr.p_double[m]*v;
+            *cserr = *cserr+t*t;
+            m = m+1;
+            hasconstraints = ae_true;
+        }
+        if( state->hasbndu.ptr.p_bool[i] )
+        {
+            v = state->bndu.ptr.p_double[i]-x->ptr.p_double[i];
+            *mba = ae_minreal(v, *mba, _state);
+            barrierfunc(v, state->mu, &v0, &v1, &v2, _state);
+            *f = *f+state->mu*state->lm.ptr.p_double[m]*v0;
+            g->ptr.p_double[i] = g->ptr.p_double[i]-state->mu*state->lm.ptr.p_double[m]*v1;
+            if( ae_fp_less(v,0) )
+            {
+                *fierr = *fierr+v*v;
+            }
+            t = -state->lm.ptr.p_double[m]*v;
+            *cserr = *cserr+t*t;
+            m = m+1;
+            hasconstraints = ae_true;
+        }
+    }
+    for(i=0; i<=state->cicnt-1; i++)
+    {
+        v = ae_v_dotproduct(&state->ci.ptr.pp_double[i][0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,n-1));
+        v = v-state->ci.ptr.pp_double[i][n];
+        *mba = ae_minreal(v, *mba, _state);
+        barrierfunc(v, state->mu, &v0, &v1, &v2, _state);
+        *f = *f+state->mu*state->lm.ptr.p_double[m]*v0;
+        vv = state->mu*state->lm.ptr.p_double[m]*v1;
+        ae_v_addd(&g->ptr.p_double[0], 1, &state->ci.ptr.pp_double[i][0], 1, ae_v_len(0,n-1), vv);
+        if( ae_fp_less(v,0) )
+        {
+            *fierr = *fierr+v*v;
+        }
+        t = -state->lm.ptr.p_double[m]*v;
+        *cserr = *cserr+t*t;
+        m = m+1;
+        hasconstraints = ae_true;
+    }
+    *fierr = ae_sqrt(*fierr, _state);
+    *cserr = ae_sqrt(*cserr, _state);
+    if( !hasconstraints )
+    {
+        *mba = 0.0;
+    }
+    
+    /*
+     * Process equality constraints:
+     * * modify F to handle penalty term for equality constraints
+     * * project gradient on null space of equality constraints
+     * * add penalty term for equality constraints to gradient
+     */
+    *f = *f+rnorm2;
+    for(i=0; i<=state->cedim-1; i++)
+    {
+        v = ae_v_dotproduct(&g->ptr.p_double[0], 1, &state->cebasis.ptr.pp_double[i][0], 1, ae_v_len(0,n-1));
+        ae_v_subd(&g->ptr.p_double[0], 1, &state->cebasis.ptr.pp_double[i][0], 1, ae_v_len(0,n-1), v);
+    }
+    ae_v_addd(&g->ptr.p_double[0], 1, &r->ptr.p_double[0], 1, ae_v_len(0,n-1), 2);
+    
+    /*
+     * MPGNorm
+     */
+    v = ae_v_dotproduct(&g->ptr.p_double[0], 1, &g->ptr.p_double[0], 1, ae_v_len(0,n-1));
+    *mpgnorm = ae_sqrt(v, _state);
+}
+
+
+/*************************************************************************
+This subroutine calculates penalty for violation of equality/inequality
+constraints. It is used to find feasible point.
+
+Following modifications are applied:
+* quadratic penalty for deviations from inequality constrained subspace
+  (both F and G are modified)
+* projection of gradient into equality constrained subspace
+  (only G is modified)
+* quadratic penalty for deviations from equality constrained subspace
+  (both F and G are modified)
+
+INPUT PARAMETERS:
+    State   -   optimizer state (we use its fields to get information
+                about constraints)
+    X       -   point (modified by function)
+    G       -   preallocated array[N]
+    R       -   preallocated array[N]
+
+OUTPUT PARAMETERS:
+    X       -   projection of X into equality constrained subspace
+    F       -   modified function value at X
+    G       -   modified function gradient at X
+    MBA     -   minimum argument of barrier functions.
+                If X is strictly feasible, it is greater than zero.
+                If X lies on a boundary, it is zero.
+                It is negative for infeasible X.
+    FIErr   -   2-norm of feasibility error with respect to
+                inequality/bound constraints
+*************************************************************************/
+static void minbleic_penaltyfunction(minbleicstate* state,
+     /* Real    */ ae_vector* x,
+     double* f,
+     /* Real    */ ae_vector* g,
+     /* Real    */ ae_vector* r,
+     double* mba,
+     double* fierr,
+     ae_state *_state)
+{
+    double v;
+    double vv;
+    ae_int_t i;
+    ae_int_t m;
+    ae_int_t n;
+    double rnorm2;
+    ae_bool hasconstraints;
+
+    *mba = 0;
+    *fierr = 0;
+
+    n = state->n;
+    *mba = ae_maxrealnumber;
+    hasconstraints = ae_false;
+    
+    /*
+     * Initialize F/G
+     */
+    *f = 0.0;
+    for(i=0; i<=n-1; i++)
+    {
+        g->ptr.p_double[i] = 0.0;
+    }
+    
+    /*
+     * Calculate projection of X:
+     * * subtract XE from X
+     * * project X
+     * * calculate norm of deviation from null space, store it in VV
+     * * calculate residual from projection, store it in R
+     * * add XE to X
+     */
+    ae_v_sub(&x->ptr.p_double[0], 1, &state->xe.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    rnorm2 = 0;
+    for(i=0; i<=n-1; i++)
+    {
+        r->ptr.p_double[i] = 0;
+    }
+    for(i=0; i<=state->cedim-1; i++)
+    {
+        v = ae_v_dotproduct(&x->ptr.p_double[0], 1, &state->cebasis.ptr.pp_double[i][0], 1, ae_v_len(0,n-1));
+        ae_v_subd(&x->ptr.p_double[0], 1, &state->cebasis.ptr.pp_double[i][0], 1, ae_v_len(0,n-1), v);
+        ae_v_addd(&r->ptr.p_double[0], 1, &state->cebasis.ptr.pp_double[i][0], 1, ae_v_len(0,n-1), v);
+        rnorm2 = rnorm2+ae_sqr(v, _state);
+    }
+    ae_v_add(&x->ptr.p_double[0], 1, &state->xe.ptr.p_double[0], 1, ae_v_len(0,n-1));
+    
+    /*
+     * Process bound and inequality constraints.
+     * Bound constraints with +-INF are ignored.
+     * Here M is used to store number of constraints processed.
+     */
+    m = 0;
+    *fierr = 0;
+    for(i=0; i<=n-1; i++)
+    {
+        if( state->hasbndl.ptr.p_bool[i] )
+        {
+            v = x->ptr.p_double[i]-state->bndl.ptr.p_double[i];
+            *mba = ae_minreal(v, *mba, _state);
+            if( ae_fp_less(v,0) )
+            {
+                *f = *f+v*v;
+                g->ptr.p_double[i] = g->ptr.p_double[i]+2*v;
+                *fierr = *fierr+v*v;
+            }
+            m = m+1;
+            hasconstraints = ae_true;
+        }
+        if( state->hasbndu.ptr.p_bool[i] )
+        {
+            v = state->bndu.ptr.p_double[i]-x->ptr.p_double[i];
+            *mba = ae_minreal(v, *mba, _state);
+            if( ae_fp_less(v,0) )
+            {
+                *f = *f+v*v;
+                g->ptr.p_double[i] = g->ptr.p_double[i]-2*v;
+                *fierr = *fierr+v*v;
+            }
+            m = m+1;
+            hasconstraints = ae_true;
+        }
+    }
+    for(i=0; i<=state->cicnt-1; i++)
+    {
+        v = ae_v_dotproduct(&state->ci.ptr.pp_double[i][0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,n-1));
+        v = v-state->ci.ptr.pp_double[i][n];
+        *mba = ae_minreal(v, *mba, _state);
+        if( ae_fp_less(v,0) )
+        {
+            *f = *f+v*v;
+            vv = 2*v;
+            ae_v_addd(&g->ptr.p_double[0], 1, &state->ci.ptr.pp_double[i][0], 1, ae_v_len(0,n-1), vv);
+            *fierr = *fierr+v*v;
+        }
+        m = m+1;
+        hasconstraints = ae_true;
+    }
+    *fierr = ae_sqrt(*fierr, _state);
+    if( !hasconstraints )
+    {
+        *mba = 0.0;
+    }
+    
+    /*
+     * Process equality constraints:
+     * * modify F to handle penalty term for equality constraints
+     * * project gradient on null space of equality constraints
+     * * add penalty term for equality constraints to gradient
+     */
+    *f = *f+rnorm2;
+    for(i=0; i<=state->cedim-1; i++)
+    {
+        v = ae_v_dotproduct(&g->ptr.p_double[0], 1, &state->cebasis.ptr.pp_double[i][0], 1, ae_v_len(0,n-1));
+        ae_v_subd(&g->ptr.p_double[0], 1, &state->cebasis.ptr.pp_double[i][0], 1, ae_v_len(0,n-1), v);
+    }
+    ae_v_addd(&g->ptr.p_double[0], 1, &r->ptr.p_double[0], 1, ae_v_len(0,n-1), 2);
+}
+
+
+ae_bool _minbleicstate_init(minbleicstate* p, ae_state *_state, ae_bool make_automatic)
+{
+    if( !ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->g, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !_rcommstate_init(&p->rstate, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->xcur, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->xprev, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->xstart, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_matrix_init(&p->ce, 0, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_matrix_init(&p->ci, 0, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_matrix_init(&p->cebasis, 0, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_matrix_init(&p->cesvl, 0, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->xe, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->bndl, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->hasbndl, 0, DT_BOOL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->bndu, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->hasbndu, 0, DT_BOOL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->lm, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->w, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->tmp0, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->tmp1, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init(&p->r, 0, DT_REAL, _state, make_automatic) )
+        return ae_false;
+    if( !_linminstate_init(&p->lstate, _state, make_automatic) )
+        return ae_false;
+    if( !_mincgstate_init(&p->cgstate, _state, make_automatic) )
+        return ae_false;
+    if( !_mincgreport_init(&p->cgrep, _state, make_automatic) )
+        return ae_false;
+    return ae_true;
+}
+
+
+ae_bool _minbleicstate_init_copy(minbleicstate* dst, minbleicstate* src, ae_state *_state, ae_bool make_automatic)
+{
+    dst->n = src->n;
+    dst->innerepsg = src->innerepsg;
+    dst->innerepsf = src->innerepsf;
+    dst->innerepsx = src->innerepsx;
+    dst->outerepsx = src->outerepsx;
+    dst->outerepsi = src->outerepsi;
+    dst->maxits = src->maxits;
+    dst->xrep = src->xrep;
+    dst->stpmax = src->stpmax;
+    dst->cgtype = src->cgtype;
+    dst->mustart = src->mustart;
+    dst->mudecay = src->mudecay;
+    if( !ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic) )
+        return ae_false;
+    dst->f = src->f;
+    if( !ae_vector_init_copy(&dst->g, &src->g, _state, make_automatic) )
+        return ae_false;
+    dst->needfg = src->needfg;
+    dst->xupdated = src->xupdated;
+    if( !_rcommstate_init_copy(&dst->rstate, &src->rstate, _state, make_automatic) )
+        return ae_false;
+    dst->repinneriterationscount = src->repinneriterationscount;
+    dst->repouteriterationscount = src->repouteriterationscount;
+    dst->repnfev = src->repnfev;
+    dst->repterminationtype = src->repterminationtype;
+    dst->repdebugeqerr = src->repdebugeqerr;
+    dst->repdebugfs = src->repdebugfs;
+    dst->repdebugff = src->repdebugff;
+    dst->repdebugdx = src->repdebugdx;
+    if( !ae_vector_init_copy(&dst->xcur, &src->xcur, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->xprev, &src->xprev, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->xstart, &src->xstart, _state, make_automatic) )
+        return ae_false;
+    dst->itsleft = src->itsleft;
+    dst->mucounter = src->mucounter;
+    if( !ae_matrix_init_copy(&dst->ce, &src->ce, _state, make_automatic) )
+        return ae_false;
+    if( !ae_matrix_init_copy(&dst->ci, &src->ci, _state, make_automatic) )
+        return ae_false;
+    if( !ae_matrix_init_copy(&dst->cebasis, &src->cebasis, _state, make_automatic) )
+        return ae_false;
+    if( !ae_matrix_init_copy(&dst->cesvl, &src->cesvl, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->xe, &src->xe, _state, make_automatic) )
+        return ae_false;
+    dst->cecnt = src->cecnt;
+    dst->cicnt = src->cicnt;
+    dst->cedim = src->cedim;
+    if( !ae_vector_init_copy(&dst->bndl, &src->bndl, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->hasbndl, &src->hasbndl, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->bndu, &src->bndu, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->hasbndu, &src->hasbndu, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->lm, &src->lm, _state, make_automatic) )
+        return ae_false;
+    dst->lmcnt = src->lmcnt;
+    dst->mu = src->mu;
+    if( !ae_vector_init_copy(&dst->w, &src->w, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->tmp0, &src->tmp0, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->tmp1, &src->tmp1, _state, make_automatic) )
+        return ae_false;
+    if( !ae_vector_init_copy(&dst->r, &src->r, _state, make_automatic) )
+        return ae_false;
+    dst->v0 = src->v0;
+    dst->v1 = src->v1;
+    dst->v2 = src->v2;
+    dst->t = src->t;
+    dst->errfeas = src->errfeas;
+    dst->errslack = src->errslack;
+    dst->gnorm = src->gnorm;
+    dst->mpgnorm = src->mpgnorm;
+    dst->lmdif = src->lmdif;
+    dst->lmnorm = src->lmnorm;
+    dst->lmgrowth = src->lmgrowth;
+    dst->mba = src->mba;
+    dst->boundary = src->boundary;
+    dst->closetobarrier = src->closetobarrier;
+    dst->bndmax = src->bndmax;
+    if( !_linminstate_init_copy(&dst->lstate, &src->lstate, _state, make_automatic) )
+        return ae_false;
+    if( !_mincgstate_init_copy(&dst->cgstate, &src->cgstate, _state, make_automatic) )
+        return ae_false;
+    if( !_mincgreport_init_copy(&dst->cgrep, &src->cgrep, _state, make_automatic) )
+        return ae_false;
+    return ae_true;
+}
+
+
+void _minbleicstate_clear(minbleicstate* p)
+{
+    ae_vector_clear(&p->x);
+    ae_vector_clear(&p->g);
+    _rcommstate_clear(&p->rstate);
+    ae_vector_clear(&p->xcur);
+    ae_vector_clear(&p->xprev);
+    ae_vector_clear(&p->xstart);
+    ae_matrix_clear(&p->ce);
+    ae_matrix_clear(&p->ci);
+    ae_matrix_clear(&p->cebasis);
+    ae_matrix_clear(&p->cesvl);
+    ae_vector_clear(&p->xe);
+    ae_vector_clear(&p->bndl);
+    ae_vector_clear(&p->hasbndl);
+    ae_vector_clear(&p->bndu);
+    ae_vector_clear(&p->hasbndu);
+    ae_vector_clear(&p->lm);
+    ae_vector_clear(&p->w);
+    ae_vector_clear(&p->tmp0);
+    ae_vector_clear(&p->tmp1);
+    ae_vector_clear(&p->r);
+    _linminstate_clear(&p->lstate);
+    _mincgstate_clear(&p->cgstate);
+    _mincgreport_clear(&p->cgrep);
+}
+
+
+ae_bool _minbleicreport_init(minbleicreport* p, ae_state *_state, ae_bool make_automatic)
+{
+    return ae_true;
+}
+
+
+ae_bool _minbleicreport_init_copy(minbleicreport* dst, minbleicreport* src, ae_state *_state, ae_bool make_automatic)
+{
+    dst->inneriterationscount = src->inneriterationscount;
+    dst->outeriterationscount = src->outeriterationscount;
+    dst->nfev = src->nfev;
+    dst->terminationtype = src->terminationtype;
+    dst->debugeqerr = src->debugeqerr;
+    dst->debugfs = src->debugfs;
+    dst->debugff = src->debugff;
+    dst->debugdx = src->debugdx;
+    return ae_true;
+}
+
+
+void _minbleicreport_clear(minbleicreport* p)
+{
+}
+
+
+
+}
+
diff --git a/contrib/lbfgs/optimization.h b/contrib/lbfgs/optimization.h
new file mode 100755
index 0000000000..86a6c0a0ad
--- /dev/null
+++ b/contrib/lbfgs/optimization.h
@@ -0,0 +1,2649 @@
+/*************************************************************************
+Copyright (c) Sergey Bochkanov (ALGLIB project).
+
+>>> SOURCE LICENSE >>>
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation (www.fsf.org); either version 2 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+A copy of the GNU General Public License is available at
+http://www.fsf.org/licensing/licenses
+>>> END OF LICENSE >>>
+*************************************************************************/
+#ifndef _optimization_pkg_h
+#define _optimization_pkg_h
+#include "ap.h"
+#include "alglibinternal.h"
+#include "linalg.h"
+#include "alglibmisc.h"
+
+/////////////////////////////////////////////////////////////////////////
+//
+// THIS SECTION CONTAINS COMPUTATIONAL CORE DECLARATIONS (DATATYPES)
+//
+/////////////////////////////////////////////////////////////////////////
+namespace alglib_impl
+{
+typedef struct
+{
+    ae_int_t n;
+    ae_int_t m;
+    double epsg;
+    double epsf;
+    double epsx;
+    ae_int_t maxits;
+    ae_int_t flags;
+    ae_bool xrep;
+    double stpmax;
+    ae_int_t nfev;
+    ae_int_t mcstage;
+    ae_int_t k;
+    ae_int_t q;
+    ae_int_t p;
+    ae_vector rho;
+    ae_matrix y;
+    ae_matrix s;
+    ae_vector theta;
+    ae_vector d;
+    double stp;
+    ae_vector work;
+    double fold;
+    ae_int_t prectype;
+    double gammak;
+    ae_matrix denseh;
+    ae_vector autobuf;
+    ae_vector x;
+    double f;
+    ae_vector g;
+    ae_bool needfg;
+    ae_bool xupdated;
+    rcommstate rstate;
+    ae_int_t repiterationscount;
+    ae_int_t repnfev;
+    ae_int_t repterminationtype;
+    linminstate lstate;
+} minlbfgsstate;
+typedef struct
+{
+    ae_int_t iterationscount;
+    ae_int_t nfev;
+    ae_int_t terminationtype;
+} minlbfgsreport;
+typedef struct
+{
+    ae_int_t n;
+    ae_int_t m;
+    double diffstep;
+    double epsg;
+    double epsf;
+    double epsx;
+    ae_int_t maxits;
+    ae_bool xrep;
+    double stpmax;
+    ae_int_t maxmodelage;
+    ae_bool makeadditers;
+    ae_vector x;
+    double f;
+    ae_vector fi;
+    ae_matrix j;
+    ae_matrix h;
+    ae_vector g;
+    ae_bool needf;
+    ae_bool needfg;
+    ae_bool needfgh;
+    ae_bool needfij;
+    ae_bool needfi;
+    ae_bool xupdated;
+    ae_int_t algomode;
+    ae_bool hasf;
+    ae_bool hasfi;
+    ae_bool hasg;
+    ae_vector xbase;
+    double fbase;
+    ae_vector fibase;
+    ae_vector gbase;
+    ae_matrix quadraticmodel;
+    double lambdav;
+    double nu;
+    ae_matrix dampedmodel;
+    ae_int_t modelage;
+    ae_vector xdir;
+    ae_vector deltax;
+    ae_vector deltaf;
+    ae_bool deltaxready;
+    ae_bool deltafready;
+    ae_int_t repiterationscount;
+    ae_int_t repterminationtype;
+    ae_int_t repnfunc;
+    ae_int_t repnjac;
+    ae_int_t repngrad;
+    ae_int_t repnhess;
+    ae_int_t repncholesky;
+    rcommstate rstate;
+    ae_vector choleskybuf;
+    double actualdecrease;
+    double predicteddecrease;
+    ae_vector fm2;
+    ae_vector fm1;
+    ae_vector fp2;
+    ae_vector fp1;
+    minlbfgsstate internalstate;
+    minlbfgsreport internalrep;
+} minlmstate;
+typedef struct
+{
+    ae_int_t iterationscount;
+    ae_int_t terminationtype;
+    ae_int_t nfunc;
+    ae_int_t njac;
+    ae_int_t ngrad;
+    ae_int_t nhess;
+    ae_int_t ncholesky;
+} minlmreport;
+typedef struct
+{
+    ae_int_t n;
+    double epsg;
+    double epsf;
+    double epsx;
+    ae_int_t maxits;
+    ae_bool xrep;
+    double stpmax;
+    ae_int_t cgtype;
+    ae_int_t k;
+    ae_int_t nfev;
+    ae_int_t mcstage;
+    ae_vector bndl;
+    ae_vector bndu;
+    ae_int_t curalgo;
+    ae_int_t acount;
+    double mu;
+    double finit;
+    double dginit;
+    ae_vector ak;
+    ae_vector xk;
+    ae_vector dk;
+    ae_vector an;
+    ae_vector xn;
+    ae_vector dn;
+    ae_vector d;
+    double fold;
+    double stp;
+    ae_vector work;
+    ae_vector yk;
+    ae_vector gc;
+    double laststep;
+    ae_vector x;
+    double f;
+    ae_vector g;
+    ae_bool needfg;
+    ae_bool xupdated;
+    rcommstate rstate;
+    ae_int_t repiterationscount;
+    ae_int_t repnfev;
+    ae_int_t repterminationtype;
+    ae_int_t debugrestartscount;
+    linminstate lstate;
+    double betahs;
+    double betady;
+} minasastate;
+typedef struct
+{
+    ae_int_t iterationscount;
+    ae_int_t nfev;
+    ae_int_t terminationtype;
+    ae_int_t activeconstraints;
+} minasareport;
+typedef struct
+{
+    ae_int_t n;
+    double epsg;
+    double epsf;
+    double epsx;
+    ae_int_t maxits;
+    double stpmax;
+    double suggestedstep;
+    ae_bool xrep;
+    ae_bool drep;
+    ae_int_t cgtype;
+    ae_int_t nfev;
+    ae_int_t mcstage;
+    ae_int_t k;
+    ae_vector xk;
+    ae_vector dk;
+    ae_vector xn;
+    ae_vector dn;
+    ae_vector d;
+    double fold;
+    double stp;
+    double curstpmax;
+    ae_vector work;
+    ae_vector yk;
+    double laststep;
+    ae_int_t rstimer;
+    ae_vector x;
+    double f;
+    ae_vector g;
+    ae_bool needfg;
+    ae_bool xupdated;
+    ae_bool lsstart;
+    ae_bool lsend;
+    rcommstate rstate;
+    ae_int_t repiterationscount;
+    ae_int_t repnfev;
+    ae_int_t repterminationtype;
+    ae_int_t debugrestartscount;
+    linminstate lstate;
+    double betahs;
+    double betady;
+} mincgstate;
+typedef struct
+{
+    ae_int_t iterationscount;
+    ae_int_t nfev;
+    ae_int_t terminationtype;
+} mincgreport;
+typedef struct
+{
+    ae_int_t n;
+    double innerepsg;
+    double innerepsf;
+    double innerepsx;
+    double outerepsx;
+    double outerepsi;
+    ae_int_t maxits;
+    ae_bool xrep;
+    double stpmax;
+    ae_int_t cgtype;
+    double mustart;
+    double mudecay;
+    ae_vector x;
+    double f;
+    ae_vector g;
+    ae_bool needfg;
+    ae_bool xupdated;
+    rcommstate rstate;
+    ae_int_t repinneriterationscount;
+    ae_int_t repouteriterationscount;
+    ae_int_t repnfev;
+    ae_int_t repterminationtype;
+    double repdebugeqerr;
+    double repdebugfs;
+    double repdebugff;
+    double repdebugdx;
+    ae_vector xcur;
+    ae_vector xprev;
+    ae_vector xstart;
+    ae_int_t itsleft;
+    ae_int_t mucounter;
+    ae_matrix ce;
+    ae_matrix ci;
+    ae_matrix cebasis;
+    ae_matrix cesvl;
+    ae_vector xe;
+    ae_int_t cecnt;
+    ae_int_t cicnt;
+    ae_int_t cedim;
+    ae_vector bndl;
+    ae_vector hasbndl;
+    ae_vector bndu;
+    ae_vector hasbndu;
+    ae_vector lm;
+    ae_int_t lmcnt;
+    double mu;
+    ae_vector w;
+    ae_vector tmp0;
+    ae_vector tmp1;
+    ae_vector r;
+    double v0;
+    double v1;
+    double v2;
+    double t;
+    double errfeas;
+    double errslack;
+    double gnorm;
+    double mpgnorm;
+    double lmdif;
+    double lmnorm;
+    double lmgrowth;
+    double mba;
+    double boundary;
+    ae_bool closetobarrier;
+    double bndmax;
+    linminstate lstate;
+    mincgstate cgstate;
+    mincgreport cgrep;
+} minbleicstate;
+typedef struct
+{
+    ae_int_t inneriterationscount;
+    ae_int_t outeriterationscount;
+    ae_int_t nfev;
+    ae_int_t terminationtype;
+    double debugeqerr;
+    double debugfs;
+    double debugff;
+    double debugdx;
+} minbleicreport;
+
+}
+
+/////////////////////////////////////////////////////////////////////////
+//
+// THIS SECTION CONTAINS C++ INTERFACE
+//
+/////////////////////////////////////////////////////////////////////////
+namespace alglib
+{
+
+/*************************************************************************
+
+*************************************************************************/
+class _minlbfgsstate_owner
+{
+public:
+    _minlbfgsstate_owner();
+    _minlbfgsstate_owner(const _minlbfgsstate_owner &rhs);
+    _minlbfgsstate_owner& operator=(const _minlbfgsstate_owner &rhs);
+    virtual ~_minlbfgsstate_owner();
+    alglib_impl::minlbfgsstate* c_ptr();
+    alglib_impl::minlbfgsstate* c_ptr() const;
+protected:
+    alglib_impl::minlbfgsstate *p_struct;
+};
+class minlbfgsstate : public _minlbfgsstate_owner
+{
+public:
+    minlbfgsstate();
+    minlbfgsstate(const minlbfgsstate &rhs);
+    minlbfgsstate& operator=(const minlbfgsstate &rhs);
+    virtual ~minlbfgsstate();
+    ae_bool &needfg;
+    ae_bool &xupdated;
+    double &f;
+    real_1d_array g;
+    real_1d_array x;
+
+};
+
+
+/*************************************************************************
+
+*************************************************************************/
+class _minlbfgsreport_owner
+{
+public:
+    _minlbfgsreport_owner();
+    _minlbfgsreport_owner(const _minlbfgsreport_owner &rhs);
+    _minlbfgsreport_owner& operator=(const _minlbfgsreport_owner &rhs);
+    virtual ~_minlbfgsreport_owner();
+    alglib_impl::minlbfgsreport* c_ptr();
+    alglib_impl::minlbfgsreport* c_ptr() const;
+protected:
+    alglib_impl::minlbfgsreport *p_struct;
+};
+class minlbfgsreport : public _minlbfgsreport_owner
+{
+public:
+    minlbfgsreport();
+    minlbfgsreport(const minlbfgsreport &rhs);
+    minlbfgsreport& operator=(const minlbfgsreport &rhs);
+    virtual ~minlbfgsreport();
+    ae_int_t &iterationscount;
+    ae_int_t &nfev;
+    ae_int_t &terminationtype;
+
+};
+
+/*************************************************************************
+Levenberg-Marquardt optimizer.
+
+This structure should be created using one of the MinLMCreate???()
+functions. You should not access its fields directly; use ALGLIB functions
+to work with it.
+*************************************************************************/
+class _minlmstate_owner
+{
+public:
+    _minlmstate_owner();
+    _minlmstate_owner(const _minlmstate_owner &rhs);
+    _minlmstate_owner& operator=(const _minlmstate_owner &rhs);
+    virtual ~_minlmstate_owner();
+    alglib_impl::minlmstate* c_ptr();
+    alglib_impl::minlmstate* c_ptr() const;
+protected:
+    alglib_impl::minlmstate *p_struct;
+};
+class minlmstate : public _minlmstate_owner
+{
+public:
+    minlmstate();
+    minlmstate(const minlmstate &rhs);
+    minlmstate& operator=(const minlmstate &rhs);
+    virtual ~minlmstate();
+    ae_bool &needf;
+    ae_bool &needfg;
+    ae_bool &needfgh;
+    ae_bool &needfi;
+    ae_bool &needfij;
+    ae_bool &xupdated;
+    double &f;
+    real_1d_array fi;
+    real_1d_array g;
+    real_2d_array h;
+    real_2d_array j;
+    real_1d_array x;
+
+};
+
+
+/*************************************************************************
+Optimization report, filled by MinLMResults() function
+
+FIELDS:
+* TerminationType, completetion code:
+    * -9    derivative correctness check failed;
+            see Rep.WrongNum, Rep.WrongI, Rep.WrongJ for
+            more information.
+    *  1    relative function improvement is no more than
+            EpsF.
+    *  2    relative step is no more than EpsX.
+    *  4    gradient is no more than EpsG.
+    *  5    MaxIts steps was taken
+    *  7    stopping conditions are too stringent,
+            further improvement is impossible
+* IterationsCount, contains iterations count
+* NFunc, number of function calculations
+* NJac, number of Jacobi matrix calculations
+* NGrad, number of gradient calculations
+* NHess, number of Hessian calculations
+* NCholesky, number of Cholesky decomposition calculations
+*************************************************************************/
+class _minlmreport_owner
+{
+public:
+    _minlmreport_owner();
+    _minlmreport_owner(const _minlmreport_owner &rhs);
+    _minlmreport_owner& operator=(const _minlmreport_owner &rhs);
+    virtual ~_minlmreport_owner();
+    alglib_impl::minlmreport* c_ptr();
+    alglib_impl::minlmreport* c_ptr() const;
+protected:
+    alglib_impl::minlmreport *p_struct;
+};
+class minlmreport : public _minlmreport_owner
+{
+public:
+    minlmreport();
+    minlmreport(const minlmreport &rhs);
+    minlmreport& operator=(const minlmreport &rhs);
+    virtual ~minlmreport();
+    ae_int_t &iterationscount;
+    ae_int_t &terminationtype;
+    ae_int_t &nfunc;
+    ae_int_t &njac;
+    ae_int_t &ngrad;
+    ae_int_t &nhess;
+    ae_int_t &ncholesky;
+
+};
+
+/*************************************************************************
+
+*************************************************************************/
+class _minasastate_owner
+{
+public:
+    _minasastate_owner();
+    _minasastate_owner(const _minasastate_owner &rhs);
+    _minasastate_owner& operator=(const _minasastate_owner &rhs);
+    virtual ~_minasastate_owner();
+    alglib_impl::minasastate* c_ptr();
+    alglib_impl::minasastate* c_ptr() const;
+protected:
+    alglib_impl::minasastate *p_struct;
+};
+class minasastate : public _minasastate_owner
+{
+public:
+    minasastate();
+    minasastate(const minasastate &rhs);
+    minasastate& operator=(const minasastate &rhs);
+    virtual ~minasastate();
+    ae_bool &needfg;
+    ae_bool &xupdated;
+    double &f;
+    real_1d_array g;
+    real_1d_array x;
+
+};
+
+
+/*************************************************************************
+
+*************************************************************************/
+class _minasareport_owner
+{
+public:
+    _minasareport_owner();
+    _minasareport_owner(const _minasareport_owner &rhs);
+    _minasareport_owner& operator=(const _minasareport_owner &rhs);
+    virtual ~_minasareport_owner();
+    alglib_impl::minasareport* c_ptr();
+    alglib_impl::minasareport* c_ptr() const;
+protected:
+    alglib_impl::minasareport *p_struct;
+};
+class minasareport : public _minasareport_owner
+{
+public:
+    minasareport();
+    minasareport(const minasareport &rhs);
+    minasareport& operator=(const minasareport &rhs);
+    virtual ~minasareport();
+    ae_int_t &iterationscount;
+    ae_int_t &nfev;
+    ae_int_t &terminationtype;
+    ae_int_t &activeconstraints;
+
+};
+
+/*************************************************************************
+This object stores state of the nonlinear CG optimizer.
+
+You should use ALGLIB functions to work with this object.
+*************************************************************************/
+class _mincgstate_owner
+{
+public:
+    _mincgstate_owner();
+    _mincgstate_owner(const _mincgstate_owner &rhs);
+    _mincgstate_owner& operator=(const _mincgstate_owner &rhs);
+    virtual ~_mincgstate_owner();
+    alglib_impl::mincgstate* c_ptr();
+    alglib_impl::mincgstate* c_ptr() const;
+protected:
+    alglib_impl::mincgstate *p_struct;
+};
+class mincgstate : public _mincgstate_owner
+{
+public:
+    mincgstate();
+    mincgstate(const mincgstate &rhs);
+    mincgstate& operator=(const mincgstate &rhs);
+    virtual ~mincgstate();
+    ae_bool &needfg;
+    ae_bool &xupdated;
+    double &f;
+    real_1d_array g;
+    real_1d_array x;
+
+};
+
+
+/*************************************************************************
+
+*************************************************************************/
+class _mincgreport_owner
+{
+public:
+    _mincgreport_owner();
+    _mincgreport_owner(const _mincgreport_owner &rhs);
+    _mincgreport_owner& operator=(const _mincgreport_owner &rhs);
+    virtual ~_mincgreport_owner();
+    alglib_impl::mincgreport* c_ptr();
+    alglib_impl::mincgreport* c_ptr() const;
+protected:
+    alglib_impl::mincgreport *p_struct;
+};
+class mincgreport : public _mincgreport_owner
+{
+public:
+    mincgreport();
+    mincgreport(const mincgreport &rhs);
+    mincgreport& operator=(const mincgreport &rhs);
+    virtual ~mincgreport();
+    ae_int_t &iterationscount;
+    ae_int_t &nfev;
+    ae_int_t &terminationtype;
+
+};
+
+/*************************************************************************
+This object stores nonlinear optimizer state.
+You should use functions provided by MinBLEIC subpackage to work with this
+object
+*************************************************************************/
+class _minbleicstate_owner
+{
+public:
+    _minbleicstate_owner();
+    _minbleicstate_owner(const _minbleicstate_owner &rhs);
+    _minbleicstate_owner& operator=(const _minbleicstate_owner &rhs);
+    virtual ~_minbleicstate_owner();
+    alglib_impl::minbleicstate* c_ptr();
+    alglib_impl::minbleicstate* c_ptr() const;
+protected:
+    alglib_impl::minbleicstate *p_struct;
+};
+class minbleicstate : public _minbleicstate_owner
+{
+public:
+    minbleicstate();
+    minbleicstate(const minbleicstate &rhs);
+    minbleicstate& operator=(const minbleicstate &rhs);
+    virtual ~minbleicstate();
+    ae_bool &needfg;
+    ae_bool &xupdated;
+    double &f;
+    real_1d_array g;
+    real_1d_array x;
+
+};
+
+
+/*************************************************************************
+This structure stores optimization report:
+* InnerIterationsCount      number of inner iterations
+* OuterIterationsCount      number of outer iterations
+* NFEV                      number of gradient evaluations
+
+There are additional fields which can be used for debugging:
+* DebugEqErr                error in the equality constraints (2-norm)
+* DebugFS                   f, calculated at projection of initial point
+                            to the feasible set
+* DebugFF                   f, calculated at the final point
+* DebugDX                   |X_start-X_final|
+*************************************************************************/
+class _minbleicreport_owner
+{
+public:
+    _minbleicreport_owner();
+    _minbleicreport_owner(const _minbleicreport_owner &rhs);
+    _minbleicreport_owner& operator=(const _minbleicreport_owner &rhs);
+    virtual ~_minbleicreport_owner();
+    alglib_impl::minbleicreport* c_ptr();
+    alglib_impl::minbleicreport* c_ptr() const;
+protected:
+    alglib_impl::minbleicreport *p_struct;
+};
+class minbleicreport : public _minbleicreport_owner
+{
+public:
+    minbleicreport();
+    minbleicreport(const minbleicreport &rhs);
+    minbleicreport& operator=(const minbleicreport &rhs);
+    virtual ~minbleicreport();
+    ae_int_t &inneriterationscount;
+    ae_int_t &outeriterationscount;
+    ae_int_t &nfev;
+    ae_int_t &terminationtype;
+    double &debugeqerr;
+    double &debugfs;
+    double &debugff;
+    double &debugdx;
+
+};
+
+/*************************************************************************
+        LIMITED MEMORY BFGS METHOD FOR LARGE SCALE OPTIMIZATION
+
+DESCRIPTION:
+The subroutine minimizes function F(x) of N arguments by  using  a  quasi-
+Newton method (LBFGS scheme) which is optimized to use  a  minimum  amount
+of memory.
+The subroutine generates the approximation of an inverse Hessian matrix by
+using information about the last M steps of the algorithm  (instead of N).
+It lessens a required amount of memory from a value  of  order  N^2  to  a
+value of order 2*N*M.
+
+
+REQUIREMENTS:
+Algorithm will request following information during its operation:
+* function value F and its gradient G (simultaneously) at given point X
+
+
+USAGE:
+1. User initializes algorithm state with MinLBFGSCreate() call
+2. User tunes solver parameters with MinLBFGSSetCond() MinLBFGSSetStpMax()
+   and other functions
+3. User calls MinLBFGSOptimize() function which takes algorithm  state and
+   pointer (delegate, etc.) to callback function which calculates F/G.
+4. User calls MinLBFGSResults() to get solution
+5. Optionally user may call MinLBFGSRestartFrom() to solve another problem
+   with same N/M but another starting point and/or another function.
+   MinLBFGSRestartFrom() allows to reuse already initialized structure.
+
+
+INPUT PARAMETERS:
+    N       -   problem dimension. N>0
+    M       -   number of corrections in the BFGS scheme of Hessian
+                approximation update. Recommended value:  3<=M<=7. The smaller
+                value causes worse convergence, the bigger will  not  cause  a
+                considerably better convergence, but will cause a fall in  the
+                performance. M<=N.
+    X       -   initial solution approximation, array[0..N-1].
+
+
+OUTPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+
+
+NOTES:
+1. you may tune stopping conditions with MinLBFGSSetCond() function
+2. if target function contains exp() or other fast growing functions,  and
+   optimization algorithm makes too large steps which leads  to  overflow,
+   use MinLBFGSSetStpMax() function to bound algorithm's  steps.  However,
+   L-BFGS rarely needs such a tuning.
+
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlbfgscreate(const ae_int_t n, const ae_int_t m, const real_1d_array &x, minlbfgsstate &state);
+void minlbfgscreate(const ae_int_t m, const real_1d_array &x, minlbfgsstate &state);
+
+
+/*************************************************************************
+This function sets stopping conditions for L-BFGS optimization algorithm.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    EpsG    -   >=0
+                The  subroutine  finishes  its  work   if   the  condition
+                ||G||<EpsG is satisfied, where ||.|| means Euclidian norm,
+                G - gradient.
+    EpsF    -   >=0
+                The  subroutine  finishes  its work if on k+1-th iteration
+                the  condition  |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1}
+                is satisfied.
+    EpsX    -   >=0
+                The subroutine finishes its work if  on  k+1-th  iteration
+                the condition |X(k+1)-X(k)| <= EpsX is fulfilled.
+    MaxIts  -   maximum number of iterations. If MaxIts=0, the  number  of
+                iterations is unlimited.
+
+Passing EpsG=0, EpsF=0, EpsX=0 and MaxIts=0 (simultaneously) will lead to
+automatic stopping criterion selection (small EpsX).
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlbfgssetcond(const minlbfgsstate &state, const double epsg, const double epsf, const double epsx, const ae_int_t maxits);
+
+
+/*************************************************************************
+This function turns on/off reporting.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    NeedXRep-   whether iteration reports are needed or not
+
+If NeedXRep is True, algorithm will call rep() callback function if  it is
+provided to MinLBFGSOptimize().
+
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlbfgssetxrep(const minlbfgsstate &state, const bool needxrep);
+
+
+/*************************************************************************
+This function sets maximum step length
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    StpMax  -   maximum step length, >=0. Set StpMax to 0.0 (default),  if
+                you don't want to limit step length.
+
+Use this subroutine when you optimize target function which contains exp()
+or  other  fast  growing  functions,  and optimization algorithm makes too
+large  steps  which  leads  to overflow. This function allows us to reject
+steps  that  are  too  large  (and  therefore  expose  us  to the possible
+overflow) without actually calculating function value at the x+stp*d.
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlbfgssetstpmax(const minlbfgsstate &state, const double stpmax);
+
+
+/*************************************************************************
+Modification of the preconditioner:
+default preconditioner (simple scaling) is used.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+
+After call to this function preconditioner is changed to the default one.
+
+NOTE:  you  can  change  preconditioner  "on  the  fly",  during algorithm
+iterations.
+
+  -- ALGLIB --
+     Copyright 13.10.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlbfgssetdefaultpreconditioner(const minlbfgsstate &state);
+
+
+/*************************************************************************
+Modification of the preconditioner:
+Cholesky factorization of approximate Hessian is used.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    P       -   triangular preconditioner, Cholesky factorization of
+                the approximate Hessian. array[0..N-1,0..N-1],
+                (if larger, only leading N elements are used).
+    IsUpper -   whether upper or lower triangle of P is given
+                (other triangle is not referenced)
+
+After call to this function preconditioner is changed to P  (P  is  copied
+into the internal buffer).
+
+NOTE:  you  can  change  preconditioner  "on  the  fly",  during algorithm
+iterations.
+
+NOTE 2:  P  should  be nonsingular. Exception will be thrown otherwise. It
+also should be well conditioned, although only strict  non-singularity  is
+tested.
+
+  -- ALGLIB --
+     Copyright 13.10.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlbfgssetcholeskypreconditioner(const minlbfgsstate &state, const real_2d_array &p, const bool isupper);
+
+
+/*************************************************************************
+This function provides reverse communication interface
+Reverse communication interface is not documented or recommended to use.
+See below for functions which provide better documented API
+*************************************************************************/
+bool minlbfgsiteration(const minlbfgsstate &state);
+
+
+/*************************************************************************
+This family of functions is used to launcn iterations of nonlinear optimizer
+
+These functions accept following parameters:
+    state   -   algorithm state
+    grad    -   callback which calculates function (or merit function)
+                value func and gradient grad at given point x
+    rep     -   optional callback which is called after each iteration
+                can be NULL
+    ptr     -   optional pointer which is passed to func/grad/hess/jac/rep
+                can be NULL
+
+
+  -- ALGLIB --
+     Copyright 20.03.2009 by Bochkanov Sergey
+
+*************************************************************************/
+void minlbfgsoptimize(minlbfgsstate &state,
+    void (*grad)(const real_1d_array &x, double &func, real_1d_array &grad, void *ptr),
+    void  (*rep)(const real_1d_array &x, double func, void *ptr) = NULL,
+    void *ptr = NULL);
+
+
+/*************************************************************************
+L-BFGS algorithm results
+
+INPUT PARAMETERS:
+    State   -   algorithm state
+
+OUTPUT PARAMETERS:
+    X       -   array[0..N-1], solution
+    Rep     -   optimization report:
+                * Rep.TerminationType completetion code:
+                    * -2    rounding errors prevent further improvement.
+                            X contains best point found.
+                    * -1    incorrect parameters were specified
+                    *  1    relative function improvement is no more than
+                            EpsF.
+                    *  2    relative step is no more than EpsX.
+                    *  4    gradient norm is no more than EpsG
+                    *  5    MaxIts steps was taken
+                    *  7    stopping conditions are too stringent,
+                            further improvement is impossible
+                * Rep.IterationsCount contains iterations count
+                * NFEV countains number of function calculations
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlbfgsresults(const minlbfgsstate &state, real_1d_array &x, minlbfgsreport &rep);
+
+
+/*************************************************************************
+L-BFGS algorithm results
+
+Buffered implementation of MinLBFGSResults which uses pre-allocated buffer
+to store X[]. If buffer size is  too  small,  it  resizes  buffer.  It  is
+intended to be used in the inner cycles of performance critical algorithms
+where array reallocation penalty is too large to be ignored.
+
+  -- ALGLIB --
+     Copyright 20.08.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlbfgsresultsbuf(const minlbfgsstate &state, real_1d_array &x, minlbfgsreport &rep);
+
+
+/*************************************************************************
+This  subroutine restarts LBFGS algorithm from new point. All optimization
+parameters are left unchanged.
+
+This  function  allows  to  solve multiple  optimization  problems  (which
+must have same number of dimensions) without object reallocation penalty.
+
+INPUT PARAMETERS:
+    State   -   structure used to store algorithm state
+    X       -   new starting point.
+
+  -- ALGLIB --
+     Copyright 30.07.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlbfgsrestartfrom(const minlbfgsstate &state, const real_1d_array &x);
+
+/*************************************************************************
+                IMPROVED LEVENBERG-MARQUARDT METHOD FOR
+                 NON-LINEAR LEAST SQUARES OPTIMIZATION
+
+DESCRIPTION:
+This function is used to find minimum of function which is represented  as
+sum of squares:
+    F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1])
+using value of function vector f[] and Jacobian of f[].
+
+
+REQUIREMENTS:
+This algorithm will request following information during its operation:
+
+* function vector f[] at given point X
+* function vector f[] and Jacobian of f[] (simultaneously) at given point
+
+There are several overloaded versions of  MinLMOptimize()  function  which
+correspond  to  different LM-like optimization algorithms provided by this
+unit. You should choose version which accepts fvec()  and jac() callbacks.
+First  one  is used to calculate f[] at given point, second one calculates
+f[] and Jacobian df[i]/dx[j].
+
+You can try to initialize MinLMState structure with VJ  function and  then
+use incorrect version  of  MinLMOptimize()  (for  example,  version  which
+works  with  general  form function and does not provide Jacobian), but it
+will  lead  to  exception  being  thrown  after first attempt to calculate
+Jacobian.
+
+
+USAGE:
+1. User initializes algorithm state with MinLMCreateVJ() call
+2. User tunes solver parameters with MinLMSetCond(),  MinLMSetStpMax() and
+   other functions
+3. User calls MinLMOptimize() function which  takes algorithm  state   and
+   callback functions.
+4. User calls MinLMResults() to get solution
+5. Optionally, user may call MinLMRestartFrom() to solve  another  problem
+   with same N/M but another starting point and/or another function.
+   MinLMRestartFrom() allows to reuse already initialized structure.
+
+
+INPUT PARAMETERS:
+    N       -   dimension, N>1
+                * if given, only leading N elements of X are used
+                * if not given, automatically determined from size of X
+    M       -   number of functions f[i]
+    X       -   initial solution, array[0..N-1]
+
+OUTPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+
+NOTES:
+1. you may tune stopping conditions with MinLMSetCond() function
+2. if target function contains exp() or other fast growing functions,  and
+   optimization algorithm makes too large steps which leads  to  overflow,
+   use MinLMSetStpMax() function to bound algorithm's steps.
+
+  -- ALGLIB --
+     Copyright 30.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minlmcreatevj(const ae_int_t n, const ae_int_t m, const real_1d_array &x, minlmstate &state);
+void minlmcreatevj(const ae_int_t m, const real_1d_array &x, minlmstate &state);
+
+
+/*************************************************************************
+                IMPROVED LEVENBERG-MARQUARDT METHOD FOR
+                 NON-LINEAR LEAST SQUARES OPTIMIZATION
+
+DESCRIPTION:
+This function is used to find minimum of function which is represented  as
+sum of squares:
+    F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1])
+using value of function vector f[] only. Finite differences  are  used  to
+calculate Jacobian.
+
+
+REQUIREMENTS:
+This algorithm will request following information during its operation:
+* function vector f[] at given point X
+
+There are several overloaded versions of  MinLMOptimize()  function  which
+correspond  to  different LM-like optimization algorithms provided by this
+unit. You should choose version which accepts fvec() callback.
+
+You can try to initialize MinLMState structure with VJ  function and  then
+use incorrect version  of  MinLMOptimize()  (for  example,  version  which
+works with general form function and does not accept function vector), but
+it will  lead  to  exception being thrown after first attempt to calculate
+Jacobian.
+
+
+USAGE:
+1. User initializes algorithm state with MinLMCreateV() call
+2. User tunes solver parameters with MinLMSetCond(),  MinLMSetStpMax() and
+   other functions
+3. User calls MinLMOptimize() function which  takes algorithm  state   and
+   callback functions.
+4. User calls MinLMResults() to get solution
+5. Optionally, user may call MinLMRestartFrom() to solve  another  problem
+   with same N/M but another starting point and/or another function.
+   MinLMRestartFrom() allows to reuse already initialized structure.
+
+
+INPUT PARAMETERS:
+    N       -   dimension, N>1
+                * if given, only leading N elements of X are used
+                * if not given, automatically determined from size of X
+    M       -   number of functions f[i]
+    X       -   initial solution, array[0..N-1]
+    DiffStep-   differentiation step, >0
+
+OUTPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+
+See also MinLMIteration, MinLMResults.
+
+NOTES:
+1. you may tune stopping conditions with MinLMSetCond() function
+2. if target function contains exp() or other fast growing functions,  and
+   optimization algorithm makes too large steps which leads  to  overflow,
+   use MinLMSetStpMax() function to bound algorithm's steps.
+
+  -- ALGLIB --
+     Copyright 30.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minlmcreatev(const ae_int_t n, const ae_int_t m, const real_1d_array &x, const double diffstep, minlmstate &state);
+void minlmcreatev(const ae_int_t m, const real_1d_array &x, const double diffstep, minlmstate &state);
+
+
+/*************************************************************************
+    LEVENBERG-MARQUARDT-LIKE METHOD FOR NON-LINEAR OPTIMIZATION
+
+DESCRIPTION:
+This  function  is  used  to  find  minimum  of general form (not "sum-of-
+-squares") function
+    F = F(x[0], ..., x[n-1])
+using  its  gradient  and  Hessian.  Levenberg-Marquardt modification with
+L-BFGS pre-optimization and internal pre-conditioned  L-BFGS  optimization
+after each Levenberg-Marquardt step is used.
+
+
+REQUIREMENTS:
+This algorithm will request following information during its operation:
+
+* function value F at given point X
+* F and gradient G (simultaneously) at given point X
+* F, G and Hessian H (simultaneously) at given point X
+
+There are several overloaded versions of  MinLMOptimize()  function  which
+correspond  to  different LM-like optimization algorithms provided by this
+unit. You should choose version which accepts func(),  grad()  and  hess()
+function pointers. First pointer is used to calculate F  at  given  point,
+second  one  calculates  F(x)  and  grad F(x),  third one calculates F(x),
+grad F(x), hess F(x).
+
+You can try to initialize MinLMState structure with FGH-function and  then
+use incorrect version of MinLMOptimize() (for example, version which  does
+not provide Hessian matrix), but it will lead to  exception  being  thrown
+after first attempt to calculate Hessian.
+
+
+USAGE:
+1. User initializes algorithm state with MinLMCreateFGH() call
+2. User tunes solver parameters with MinLMSetCond(),  MinLMSetStpMax() and
+   other functions
+3. User calls MinLMOptimize() function which  takes algorithm  state   and
+   pointers (delegates, etc.) to callback functions.
+4. User calls MinLMResults() to get solution
+5. Optionally, user may call MinLMRestartFrom() to solve  another  problem
+   with same N but another starting point and/or another function.
+   MinLMRestartFrom() allows to reuse already initialized structure.
+
+
+INPUT PARAMETERS:
+    N       -   dimension, N>1
+                * if given, only leading N elements of X are used
+                * if not given, automatically determined from size of X
+    X       -   initial solution, array[0..N-1]
+
+OUTPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+
+NOTES:
+1. you may tune stopping conditions with MinLMSetCond() function
+2. if target function contains exp() or other fast growing functions,  and
+   optimization algorithm makes too large steps which leads  to  overflow,
+   use MinLMSetStpMax() function to bound algorithm's steps.
+
+  -- ALGLIB --
+     Copyright 30.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minlmcreatefgh(const ae_int_t n, const real_1d_array &x, minlmstate &state);
+void minlmcreatefgh(const real_1d_array &x, minlmstate &state);
+
+
+/*************************************************************************
+                IMPROVED LEVENBERG-MARQUARDT METHOD FOR
+                 NON-LINEAR LEAST SQUARES OPTIMIZATION
+
+DESCRIPTION:
+This function is used to find minimum of function which is represented  as
+sum of squares:
+    F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1])
+using:
+* value of function vector f[]
+* value of Jacobian of f[]
+* gradient of merit function F(x)
+
+This function creates optimizer which uses acceleration strategy 2.  Cheap
+gradient of merit function (which is twice the product of function  vector
+and Jacobian) is used for accelerated iterations (see User Guide for  more
+info on this subject).
+
+REQUIREMENTS:
+This algorithm will request following information during its operation:
+
+* function vector f[] at given point X
+* function vector f[] and Jacobian of f[] (simultaneously) at given point
+* gradient of
+
+There are several overloaded versions of  MinLMOptimize()  function  which
+correspond  to  different LM-like optimization algorithms provided by this
+unit. You should choose version which accepts  fvec(),  jac()  and  grad()
+callbacks. First one is used to calculate f[] at given point,  second  one
+calculates f[] and Jacobian df[i]/dx[j], last one calculates  gradient  of
+merit function F(x).
+
+You can try to initialize MinLMState structure with VJ  function and  then
+use incorrect version  of  MinLMOptimize()  (for  example,  version  which
+works  with  general  form function and does not provide Jacobian), but it
+will  lead  to  exception  being  thrown  after first attempt to calculate
+Jacobian.
+
+
+USAGE:
+1. User initializes algorithm state with MinLMCreateVGJ() call
+2. User tunes solver parameters with MinLMSetCond(),  MinLMSetStpMax() and
+   other functions
+3. User calls MinLMOptimize() function which  takes algorithm  state   and
+   callback functions.
+4. User calls MinLMResults() to get solution
+5. Optionally, user may call MinLMRestartFrom() to solve  another  problem
+   with same N/M but another starting point and/or another function.
+   MinLMRestartFrom() allows to reuse already initialized structure.
+
+
+INPUT PARAMETERS:
+    N       -   dimension, N>1
+                * if given, only leading N elements of X are used
+                * if not given, automatically determined from size of X
+    M       -   number of functions f[i]
+    X       -   initial solution, array[0..N-1]
+
+OUTPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+
+NOTES:
+1. you may tune stopping conditions with MinLMSetCond() function
+2. if target function contains exp() or other fast growing functions,  and
+   optimization algorithm makes too large steps which leads  to  overflow,
+   use MinLMSetStpMax() function to bound algorithm's steps.
+
+  -- ALGLIB --
+     Copyright 30.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minlmcreatevgj(const ae_int_t n, const ae_int_t m, const real_1d_array &x, minlmstate &state);
+void minlmcreatevgj(const ae_int_t m, const real_1d_array &x, minlmstate &state);
+
+
+/*************************************************************************
+                   LEVENBERG-MARQUARDT-LIKE METHOD FOR
+                  NON-LINEAR LEAST SQUARES OPTIMIZATION
+
+DESCRIPTION:
+
+This function is used to find minimum of function which is represented  as
+sum of squares:
+    F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1])
+using value of F(), gradient of F(), function vector f[]  and  Jacobian of
+f[].
+
+This function is considered obsolete since ALGLIB 3.1.0 and is present for
+backward  compatibility  only.  We  recommend to use MinLMCreateVGJ, which
+provides similar, but more consistent interface.
+
+  -- ALGLIB --
+     Copyright 30.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minlmcreatefgj(const ae_int_t n, const ae_int_t m, const real_1d_array &x, minlmstate &state);
+void minlmcreatefgj(const ae_int_t m, const real_1d_array &x, minlmstate &state);
+
+
+/*************************************************************************
+    CLASSIC LEVENBERG-MARQUARDT METHOD FOR NON-LINEAR OPTIMIZATION
+
+DESCRIPTION:
+This function is used to find minimum of function which is represented  as
+sum of squares:
+    F(x) = f[0]^2(x[0],...,x[n-1]) + ... + f[m-1]^2(x[0],...,x[n-1])
+using  value  of  F(),  function  vector  f[] and Jacobian of f[]. Classic
+Levenberg-Marquardt method is used.
+
+This function is considered obsolete since ALGLIB 3.1.0 and is present for
+backward  compatibility  only.  We  recommend  to use MinLMCreateVJ, which
+provides similar, but more consistent and feature-rich interface.
+
+  -- ALGLIB --
+     Copyright 30.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minlmcreatefj(const ae_int_t n, const ae_int_t m, const real_1d_array &x, minlmstate &state);
+void minlmcreatefj(const ae_int_t m, const real_1d_array &x, minlmstate &state);
+
+
+/*************************************************************************
+This function sets stopping conditions for Levenberg-Marquardt optimization
+algorithm.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    EpsG    -   >=0
+                The  subroutine  finishes  its  work   if   the  condition
+                ||G||<EpsG is satisfied, where ||.|| means Euclidian norm,
+                G - gradient.
+    EpsF    -   >=0
+                The  subroutine  finishes  its work if on k+1-th iteration
+                the  condition  |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1}
+                is satisfied.
+    EpsX    -   >=0
+                The subroutine finishes its work if  on  k+1-th  iteration
+                the condition |X(k+1)-X(k)| <= EpsX is fulfilled.
+    MaxIts  -   maximum number of iterations. If MaxIts=0, the  number  of
+                iterations   is    unlimited.   Only   Levenberg-Marquardt
+                iterations  are  counted  (L-BFGS/CG  iterations  are  NOT
+                counted because their cost is very low compared to that of
+                LM).
+
+Passing EpsG=0, EpsF=0, EpsX=0 and MaxIts=0 (simultaneously) will lead to
+automatic stopping criterion selection (small EpsX).
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlmsetcond(const minlmstate &state, const double epsg, const double epsf, const double epsx, const ae_int_t maxits);
+
+
+/*************************************************************************
+This function turns on/off reporting.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    NeedXRep-   whether iteration reports are needed or not
+
+If NeedXRep is True, algorithm will call rep() callback function if  it is
+provided to MinLMOptimize(). Both Levenberg-Marquardt and internal  L-BFGS
+iterations are reported.
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlmsetxrep(const minlmstate &state, const bool needxrep);
+
+
+/*************************************************************************
+This function sets maximum step length
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    StpMax  -   maximum step length, >=0. Set StpMax to 0.0,  if you don't
+                want to limit step length.
+
+Use this subroutine when you optimize target function which contains exp()
+or  other  fast  growing  functions,  and optimization algorithm makes too
+large  steps  which  leads  to overflow. This function allows us to reject
+steps  that  are  too  large  (and  therefore  expose  us  to the possible
+overflow) without actually calculating function value at the x+stp*d.
+
+NOTE: non-zero StpMax leads to moderate  performance  degradation  because
+intermediate  step  of  preconditioned L-BFGS optimization is incompatible
+with limits on step size.
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlmsetstpmax(const minlmstate &state, const double stpmax);
+
+
+/*************************************************************************
+This function is used to change acceleration settings
+
+You can choose between three acceleration strategies:
+* AccType=0, no acceleration.
+* AccType=1, secant updates are used to update quadratic model after  each
+  iteration. After fixed number of iterations (or after  model  breakdown)
+  we  recalculate  quadratic  model  using  analytic  Jacobian  or  finite
+  differences. Number of secant-based iterations depends  on  optimization
+  settings: about 3 iterations - when we have analytic Jacobian, up to 2*N
+  iterations - when we use finite differences to calculate Jacobian.
+* AccType=2, after quadratic model is built and LM step is made, we use it
+  as preconditioner for several (5-10) iterations of L-BFGS algorithm.
+
+AccType=1 is recommended when Jacobian  calculation  cost  is  prohibitive
+high (several Mx1 function vector calculations  followed  by  several  NxN
+Cholesky factorizations are faster than calculation of one M*N  Jacobian).
+It should also be used when we have no Jacobian, because finite difference
+approximation takes too much time to compute.
+
+AccType=2 is recommended when Jacobian is cheap - much more  cheaper  than
+one  Cholesky  factorization.   We   can   reduce   number   of   Cholesky
+factorizations at the cost of increased number of  Jacobian  calculations.
+Sometimes it helps.
+
+Table below list  optimization  protocols  (XYZ  protocol  corresponds  to
+MinLMCreateXYZ) and acceleration types they support (and use by  default).
+
+ACCELERATION TYPES SUPPORTED BY OPTIMIZATION PROTOCOLS:
+
+protocol    0   1   2   comment
+V           +   +
+VJ          +   +   +
+FGH         +       +
+VGJ         +   +   +   special protocol, not for widespread use
+FJ          +       +   obsolete protocol, not recommended
+FGJ         +       +   obsolete protocol, not recommended
+
+DAFAULT VALUES:
+
+protocol    0   1   2   comment
+V               x       without acceleration it is so slooooooooow
+VJ          x
+FGH         x
+VGJ                 x   we've implicitly turned (2) by passing gradient
+FJ          x           obsolete protocol, not recommended
+FGJ                 x   obsolete protocol, not recommended
+
+NOTE: this  function should be called before optimization. Attempt to call
+it during algorithm iterations may result in unexpected behavior.
+
+NOTE: attempt to call this function with unsupported protocol/acceleration
+combination will result in exception being thrown.
+
+  -- ALGLIB --
+     Copyright 14.10.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlmsetacctype(const minlmstate &state, const ae_int_t acctype);
+
+
+/*************************************************************************
+This function provides reverse communication interface
+Reverse communication interface is not documented or recommended to use.
+See below for functions which provide better documented API
+*************************************************************************/
+bool minlmiteration(const minlmstate &state);
+
+
+/*************************************************************************
+This family of functions is used to launcn iterations of nonlinear optimizer
+
+These functions accept following parameters:
+    state   -   algorithm state
+    func    -   callback which calculates function (or merit function)
+                value func at given point x
+    grad    -   callback which calculates function (or merit function)
+                value func and gradient grad at given point x
+    hess    -   callback which calculates function (or merit function)
+                value func, gradient grad and Hessian hess at given point x
+    fvec    -   callback which calculates function vector fi[]
+                at given point x
+    jac     -   callback which calculates function vector fi[]
+                and Jacobian jac at given point x
+    rep     -   optional callback which is called after each iteration
+                can be NULL
+    ptr     -   optional pointer which is passed to func/grad/hess/jac/rep
+                can be NULL
+
+NOTES:
+
+1. Depending on function used to create state  structure,  this  algorithm
+   may accept Jacobian and/or Hessian and/or gradient.  According  to  the
+   said above, there ase several versions of this function,  which  accept
+   different sets of callbacks.
+
+   This flexibility opens way to subtle errors - you may create state with
+   MinLMCreateFGH() (optimization using Hessian), but call function  which
+   does not accept Hessian. So when algorithm will request Hessian,  there
+   will be no callback to call. In this case exception will be thrown.
+
+   Be careful to avoid such errors because there is no way to find them at
+   compile time - you can see them at runtime only.
+
+  -- ALGLIB --
+     Copyright 10.03.2009 by Bochkanov Sergey
+
+*************************************************************************/
+void minlmoptimize(minlmstate &state,
+    void (*fvec)(const real_1d_array &x, real_1d_array &fi, void *ptr),
+    void  (*rep)(const real_1d_array &x, double func, void *ptr) = NULL,
+    void *ptr = NULL);
+void minlmoptimize(minlmstate &state,
+    void (*fvec)(const real_1d_array &x, real_1d_array &fi, void *ptr),
+    void  (*jac)(const real_1d_array &x, real_1d_array &fi, real_2d_array &jac, void *ptr),
+    void  (*rep)(const real_1d_array &x, double func, void *ptr) = NULL,
+    void *ptr = NULL);
+void minlmoptimize(minlmstate &state,
+    void (*func)(const real_1d_array &x, double &func, void *ptr),
+    void (*grad)(const real_1d_array &x, double &func, real_1d_array &grad, void *ptr),
+    void (*hess)(const real_1d_array &x, double &func, real_1d_array &grad, real_2d_array &hess, void *ptr),
+    void  (*rep)(const real_1d_array &x, double func, void *ptr) = NULL,
+    void *ptr = NULL);
+void minlmoptimize(minlmstate &state,
+    void (*func)(const real_1d_array &x, double &func, void *ptr),
+    void  (*jac)(const real_1d_array &x, real_1d_array &fi, real_2d_array &jac, void *ptr),
+    void  (*rep)(const real_1d_array &x, double func, void *ptr) = NULL,
+    void *ptr = NULL);
+void minlmoptimize(minlmstate &state,
+    void (*func)(const real_1d_array &x, double &func, void *ptr),
+    void (*grad)(const real_1d_array &x, double &func, real_1d_array &grad, void *ptr),
+    void  (*jac)(const real_1d_array &x, real_1d_array &fi, real_2d_array &jac, void *ptr),
+    void  (*rep)(const real_1d_array &x, double func, void *ptr) = NULL,
+    void *ptr = NULL);
+
+
+/*************************************************************************
+Levenberg-Marquardt algorithm results
+
+INPUT PARAMETERS:
+    State   -   algorithm state
+
+OUTPUT PARAMETERS:
+    X       -   array[0..N-1], solution
+    Rep     -   optimization report;
+                see comments for this structure for more info.
+
+  -- ALGLIB --
+     Copyright 10.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minlmresults(const minlmstate &state, real_1d_array &x, minlmreport &rep);
+
+
+/*************************************************************************
+Levenberg-Marquardt algorithm results
+
+Buffered implementation of MinLMResults(), which uses pre-allocated buffer
+to store X[]. If buffer size is  too  small,  it  resizes  buffer.  It  is
+intended to be used in the inner cycles of performance critical algorithms
+where array reallocation penalty is too large to be ignored.
+
+  -- ALGLIB --
+     Copyright 10.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minlmresultsbuf(const minlmstate &state, real_1d_array &x, minlmreport &rep);
+
+
+/*************************************************************************
+This  subroutine  restarts  LM  algorithm from new point. All optimization
+parameters are left unchanged.
+
+This  function  allows  to  solve multiple  optimization  problems  (which
+must have same number of dimensions) without object reallocation penalty.
+
+INPUT PARAMETERS:
+    State   -   structure used for reverse communication previously
+                allocated with MinLMCreateXXX call.
+    X       -   new starting point.
+
+  -- ALGLIB --
+     Copyright 30.07.2010 by Bochkanov Sergey
+*************************************************************************/
+void minlmrestartfrom(const minlmstate &state, const real_1d_array &x);
+
+/*************************************************************************
+              NONLINEAR BOUND CONSTRAINED OPTIMIZATION USING
+                      MODIFIED ACTIVE SET ALGORITHM
+                   WILLIAM W. HAGER AND HONGCHAO ZHANG
+
+DESCRIPTION:
+The  subroutine  minimizes  function  F(x)  of  N  arguments  with   bound
+constraints: BndL[i] <= x[i] <= BndU[i]
+
+This method is  globally  convergent  as  long  as  grad(f)  is  Lipschitz
+continuous on a level set: L = { x : f(x)<=f(x0) }.
+
+
+REQUIREMENTS:
+Algorithm will request following information during its operation:
+* function value F and its gradient G (simultaneously) at given point X
+
+
+USAGE:
+1. User initializes algorithm state with MinASACreate() call
+2. User tunes solver parameters with MinASASetCond() MinASASetStpMax() and
+   other functions
+3. User calls MinASAOptimize() function which takes algorithm  state   and
+   pointer (delegate, etc.) to callback function which calculates F/G.
+4. User calls MinASAResults() to get solution
+5. Optionally, user may call MinASARestartFrom() to solve another  problem
+   with same N but another starting point and/or another function.
+   MinASARestartFrom() allows to reuse already initialized structure.
+
+
+INPUT PARAMETERS:
+    N       -   problem dimension, N>0:
+                * if given, only leading N elements of X are used
+                * if not given, automatically determined from sizes of
+                  X/BndL/BndU.
+    X       -   starting point, array[0..N-1].
+    BndL    -   lower bounds, array[0..N-1].
+                all elements MUST be specified,  i.e.  all  variables  are
+                bounded. However, if some (all) variables  are  unbounded,
+                you may specify very small number as bound: -1000,  -1.0E6
+                or -1.0E300, or something like that.
+    BndU    -   upper bounds, array[0..N-1].
+                all elements MUST be specified,  i.e.  all  variables  are
+                bounded. However, if some (all) variables  are  unbounded,
+                you may specify very large number as bound: +1000,  +1.0E6
+                or +1.0E300, or something like that.
+
+OUTPUT PARAMETERS:
+    State   -   structure stores algorithm state
+
+NOTES:
+
+1. you may tune stopping conditions with MinASASetCond() function
+2. if target function contains exp() or other fast growing functions,  and
+   optimization algorithm makes too large steps which leads  to  overflow,
+   use MinASASetStpMax() function to bound algorithm's steps.
+3. this function does NOT support infinite/NaN values in X, BndL, BndU.
+
+  -- ALGLIB --
+     Copyright 25.03.2010 by Bochkanov Sergey
+*************************************************************************/
+void minasacreate(const ae_int_t n, const real_1d_array &x, const real_1d_array &bndl, const real_1d_array &bndu, minasastate &state);
+void minasacreate(const real_1d_array &x, const real_1d_array &bndl, const real_1d_array &bndu, minasastate &state);
+
+
+/*************************************************************************
+This function sets stopping conditions for the ASA optimization algorithm.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    EpsG    -   >=0
+                The  subroutine  finishes  its  work   if   the  condition
+                ||G||<EpsG is satisfied, where ||.|| means Euclidian norm,
+                G - gradient.
+    EpsF    -   >=0
+                The  subroutine  finishes  its work if on k+1-th iteration
+                the  condition  |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1}
+                is satisfied.
+    EpsX    -   >=0
+                The subroutine finishes its work if  on  k+1-th  iteration
+                the condition |X(k+1)-X(k)| <= EpsX is fulfilled.
+    MaxIts  -   maximum number of iterations. If MaxIts=0, the  number  of
+                iterations is unlimited.
+
+Passing EpsG=0, EpsF=0, EpsX=0 and MaxIts=0 (simultaneously) will lead to
+automatic stopping criterion selection (small EpsX).
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minasasetcond(const minasastate &state, const double epsg, const double epsf, const double epsx, const ae_int_t maxits);
+
+
+/*************************************************************************
+This function turns on/off reporting.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    NeedXRep-   whether iteration reports are needed or not
+
+If NeedXRep is True, algorithm will call rep() callback function if  it is
+provided to MinASAOptimize().
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minasasetxrep(const minasastate &state, const bool needxrep);
+
+
+/*************************************************************************
+This function sets optimization algorithm.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm stat
+    UAType  -   algorithm type:
+                * -1    automatic selection of the best algorithm
+                * 0     DY (Dai and Yuan) algorithm
+                * 1     Hybrid DY-HS algorithm
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minasasetalgorithm(const minasastate &state, const ae_int_t algotype);
+
+
+/*************************************************************************
+This function sets maximum step length
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    StpMax  -   maximum step length, >=0. Set StpMax to 0.0,  if you don't
+                want to limit step length (zero by default).
+
+Use this subroutine when you optimize target function which contains exp()
+or  other  fast  growing  functions,  and optimization algorithm makes too
+large  steps  which  leads  to overflow. This function allows us to reject
+steps  that  are  too  large  (and  therefore  expose  us  to the possible
+overflow) without actually calculating function value at the x+stp*d.
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minasasetstpmax(const minasastate &state, const double stpmax);
+
+
+/*************************************************************************
+This function provides reverse communication interface
+Reverse communication interface is not documented or recommended to use.
+See below for functions which provide better documented API
+*************************************************************************/
+bool minasaiteration(const minasastate &state);
+
+
+/*************************************************************************
+This family of functions is used to launcn iterations of nonlinear optimizer
+
+These functions accept following parameters:
+    state   -   algorithm state
+    grad    -   callback which calculates function (or merit function)
+                value func and gradient grad at given point x
+    rep     -   optional callback which is called after each iteration
+                can be NULL
+    ptr     -   optional pointer which is passed to func/grad/hess/jac/rep
+                can be NULL
+
+
+  -- ALGLIB --
+     Copyright 20.03.2009 by Bochkanov Sergey
+
+*************************************************************************/
+void minasaoptimize(minasastate &state,
+    void (*grad)(const real_1d_array &x, double &func, real_1d_array &grad, void *ptr),
+    void  (*rep)(const real_1d_array &x, double func, void *ptr) = NULL,
+    void *ptr = NULL);
+
+
+/*************************************************************************
+ASA results
+
+INPUT PARAMETERS:
+    State   -   algorithm state
+
+OUTPUT PARAMETERS:
+    X       -   array[0..N-1], solution
+    Rep     -   optimization report:
+                * Rep.TerminationType completetion code:
+                    * -2    rounding errors prevent further improvement.
+                            X contains best point found.
+                    * -1    incorrect parameters were specified
+                    *  1    relative function improvement is no more than
+                            EpsF.
+                    *  2    relative step is no more than EpsX.
+                    *  4    gradient norm is no more than EpsG
+                    *  5    MaxIts steps was taken
+                    *  7    stopping conditions are too stringent,
+                            further improvement is impossible
+                * Rep.IterationsCount contains iterations count
+                * NFEV countains number of function calculations
+                * ActiveConstraints contains number of active constraints
+
+  -- ALGLIB --
+     Copyright 20.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minasaresults(const minasastate &state, real_1d_array &x, minasareport &rep);
+
+
+/*************************************************************************
+ASA results
+
+Buffered implementation of MinASAResults() which uses pre-allocated buffer
+to store X[]. If buffer size is  too  small,  it  resizes  buffer.  It  is
+intended to be used in the inner cycles of performance critical algorithms
+where array reallocation penalty is too large to be ignored.
+
+  -- ALGLIB --
+     Copyright 20.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void minasaresultsbuf(const minasastate &state, real_1d_array &x, minasareport &rep);
+
+
+/*************************************************************************
+This  subroutine  restarts  CG  algorithm from new point. All optimization
+parameters are left unchanged.
+
+This  function  allows  to  solve multiple  optimization  problems  (which
+must have same number of dimensions) without object reallocation penalty.
+
+INPUT PARAMETERS:
+    State   -   structure previously allocated with MinCGCreate call.
+    X       -   new starting point.
+    BndL    -   new lower bounds
+    BndU    -   new upper bounds
+
+  -- ALGLIB --
+     Copyright 30.07.2010 by Bochkanov Sergey
+*************************************************************************/
+void minasarestartfrom(const minasastate &state, const real_1d_array &x, const real_1d_array &bndl, const real_1d_array &bndu);
+
+/*************************************************************************
+        NONLINEAR CONJUGATE GRADIENT METHOD
+
+DESCRIPTION:
+The subroutine minimizes function F(x) of N arguments by using one of  the
+nonlinear conjugate gradient methods.
+
+These CG methods are globally convergent (even on non-convex functions) as
+long as grad(f) is Lipschitz continuous in  a  some  neighborhood  of  the
+L = { x : f(x)<=f(x0) }.
+
+
+REQUIREMENTS:
+Algorithm will request following information during its operation:
+* function value F and its gradient G (simultaneously) at given point X
+
+
+USAGE:
+1. User initializes algorithm state with MinCGCreate() call
+2. User tunes solver parameters with MinCGSetCond(), MinCGSetStpMax() and
+   other functions
+3. User calls MinCGOptimize() function which takes algorithm  state   and
+   pointer (delegate, etc.) to callback function which calculates F/G.
+4. User calls MinCGResults() to get solution
+5. Optionally, user may call MinCGRestartFrom() to solve another  problem
+   with same N but another starting point and/or another function.
+   MinCGRestartFrom() allows to reuse already initialized structure.
+
+
+INPUT PARAMETERS:
+    N       -   problem dimension, N>0:
+                * if given, only leading N elements of X are used
+                * if not given, automatically determined from size of X
+    X       -   starting point, array[0..N-1].
+
+OUTPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+
+  -- ALGLIB --
+     Copyright 25.03.2010 by Bochkanov Sergey
+*************************************************************************/
+void mincgcreate(const ae_int_t n, const real_1d_array &x, mincgstate &state);
+void mincgcreate(const real_1d_array &x, mincgstate &state);
+
+
+/*************************************************************************
+This function sets stopping conditions for CG optimization algorithm.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    EpsG    -   >=0
+                The  subroutine  finishes  its  work   if   the  condition
+                ||G||<EpsG is satisfied, where ||.|| means Euclidian norm,
+                G - gradient.
+    EpsF    -   >=0
+                The  subroutine  finishes  its work if on k+1-th iteration
+                the  condition  |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1}
+                is satisfied.
+    EpsX    -   >=0
+                The subroutine finishes its work if  on  k+1-th  iteration
+                the condition |X(k+1)-X(k)| <= EpsX is fulfilled.
+    MaxIts  -   maximum number of iterations. If MaxIts=0, the  number  of
+                iterations is unlimited.
+
+Passing EpsG=0, EpsF=0, EpsX=0 and MaxIts=0 (simultaneously) will lead to
+automatic stopping criterion selection (small EpsX).
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void mincgsetcond(const mincgstate &state, const double epsg, const double epsf, const double epsx, const ae_int_t maxits);
+
+
+/*************************************************************************
+This function turns on/off reporting.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    NeedXRep-   whether iteration reports are needed or not
+
+If NeedXRep is True, algorithm will call rep() callback function if  it is
+provided to MinCGOptimize().
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void mincgsetxrep(const mincgstate &state, const bool needxrep);
+
+
+/*************************************************************************
+This function sets CG algorithm.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    CGType  -   algorithm type:
+                * -1    automatic selection of the best algorithm
+                * 0     DY (Dai and Yuan) algorithm
+                * 1     Hybrid DY-HS algorithm
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void mincgsetcgtype(const mincgstate &state, const ae_int_t cgtype);
+
+
+/*************************************************************************
+This function sets maximum step length
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    StpMax  -   maximum step length, >=0. Set StpMax to 0.0,  if you don't
+                want to limit step length.
+
+Use this subroutine when you optimize target function which contains exp()
+or  other  fast  growing  functions,  and optimization algorithm makes too
+large  steps  which  leads  to overflow. This function allows us to reject
+steps  that  are  too  large  (and  therefore  expose  us  to the possible
+overflow) without actually calculating function value at the x+stp*d.
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void mincgsetstpmax(const mincgstate &state, const double stpmax);
+
+
+/*************************************************************************
+This function allows to suggest initial step length to the CG algorithm.
+
+Suggested  step  length  is used as starting point for the line search. It
+can be useful when you have  badly  scaled  problem,  i.e.  when  ||grad||
+(which is used as initial estimate for the first step) is many  orders  of
+magnitude different from the desired step.
+
+Line search  may  fail  on  such problems without good estimate of initial
+step length. Imagine, for example, problem with ||grad||=10^50 and desired
+step equal to 0.1 Line  search function will use 10^50  as  initial  step,
+then  it  will  decrease step length by 2 (up to 20 attempts) and will get
+10^44, which is still too large.
+
+This function allows us to tell than line search should  be  started  from
+some moderate step length, like 1.0, so algorithm will be able  to  detect
+desired step length in a several searches.
+
+This function influences only first iteration of algorithm. It  should  be
+called between MinCGCreate/MinCGRestartFrom() call and MinCGOptimize call.
+
+INPUT PARAMETERS:
+    State   -   structure used to store algorithm state.
+    Stp     -   initial estimate of the step length.
+                Can be zero (no estimate).
+
+  -- ALGLIB --
+     Copyright 30.07.2010 by Bochkanov Sergey
+*************************************************************************/
+void mincgsuggeststep(const mincgstate &state, const double stp);
+
+
+/*************************************************************************
+This function provides reverse communication interface
+Reverse communication interface is not documented or recommended to use.
+See below for functions which provide better documented API
+*************************************************************************/
+bool mincgiteration(const mincgstate &state);
+
+
+/*************************************************************************
+This family of functions is used to launcn iterations of nonlinear optimizer
+
+These functions accept following parameters:
+    state   -   algorithm state
+    grad    -   callback which calculates function (or merit function)
+                value func and gradient grad at given point x
+    rep     -   optional callback which is called after each iteration
+                can be NULL
+    ptr     -   optional pointer which is passed to func/grad/hess/jac/rep
+                can be NULL
+
+
+  -- ALGLIB --
+     Copyright 20.04.2009 by Bochkanov Sergey
+
+*************************************************************************/
+void mincgoptimize(mincgstate &state,
+    void (*grad)(const real_1d_array &x, double &func, real_1d_array &grad, void *ptr),
+    void  (*rep)(const real_1d_array &x, double func, void *ptr) = NULL,
+    void *ptr = NULL);
+
+
+/*************************************************************************
+Conjugate gradient results
+
+INPUT PARAMETERS:
+    State   -   algorithm state
+
+OUTPUT PARAMETERS:
+    X       -   array[0..N-1], solution
+    Rep     -   optimization report:
+                * Rep.TerminationType completetion code:
+                    *  1    relative function improvement is no more than
+                            EpsF.
+                    *  2    relative step is no more than EpsX.
+                    *  4    gradient norm is no more than EpsG
+                    *  5    MaxIts steps was taken
+                    *  7    stopping conditions are too stringent,
+                            further improvement is impossible,
+                            we return best X found so far
+                * Rep.IterationsCount contains iterations count
+                * NFEV countains number of function calculations
+
+  -- ALGLIB --
+     Copyright 20.04.2009 by Bochkanov Sergey
+*************************************************************************/
+void mincgresults(const mincgstate &state, real_1d_array &x, mincgreport &rep);
+
+
+/*************************************************************************
+Conjugate gradient results
+
+Buffered implementation of MinCGResults(), which uses pre-allocated buffer
+to store X[]. If buffer size is  too  small,  it  resizes  buffer.  It  is
+intended to be used in the inner cycles of performance critical algorithms
+where array reallocation penalty is too large to be ignored.
+
+  -- ALGLIB --
+     Copyright 20.04.2009 by Bochkanov Sergey
+*************************************************************************/
+void mincgresultsbuf(const mincgstate &state, real_1d_array &x, mincgreport &rep);
+
+
+/*************************************************************************
+This  subroutine  restarts  CG  algorithm from new point. All optimization
+parameters are left unchanged.
+
+This  function  allows  to  solve multiple  optimization  problems  (which
+must have same number of dimensions) without object reallocation penalty.
+
+INPUT PARAMETERS:
+    State   -   structure used to store algorithm state.
+    X       -   new starting point.
+
+  -- ALGLIB --
+     Copyright 30.07.2010 by Bochkanov Sergey
+*************************************************************************/
+void mincgrestartfrom(const mincgstate &state, const real_1d_array &x);
+
+/*************************************************************************
+                     BOUND CONSTRAINED OPTIMIZATION
+       WITH ADDITIONAL LINEAR EQUALITY AND INEQUALITY CONSTRAINTS
+
+DESCRIPTION:
+The  subroutine  minimizes  function   F(x)  of N arguments subject to any
+combination of:
+* bound constraints
+* linear inequality constraints
+* linear equality constraints
+
+REQUIREMENTS:
+* function value and gradient
+* grad(f) must be Lipschitz continuous on a level set: L = { x : f(x)<=f(x0) }
+* function must be defined even in the infeasible points (algorithm make take
+  steps in the infeasible area before converging to the feasible point)
+* starting point X0 must be feasible or not too far away from the feasible set
+* problem must satisfy strict complementary conditions
+
+USAGE:
+
+Constrained optimization if far more complex than the unconstrained one.
+Here we give very brief outline of the BLEIC optimizer. We strongly recommend
+you to read examples in the ALGLIB Reference Manual and to read ALGLIB User Guide
+on optimization, which is available at http://www.alglib.net/optimization/
+
+1. User initializes algorithm state with MinBLEICCreate() call
+
+2. USer adds boundary and/or linear constraints by calling
+   MinBLEICSetBC() and MinBLEICSetLC() functions.
+
+3. User sets stopping conditions for underlying unconstrained solver
+   with MinBLEICSetInnerCond() call.
+   This function controls accuracy of underlying optimization algorithm.
+
+4. User sets stopping conditions for outer iteration by calling
+   MinBLEICSetOuterCond() function.
+   This function controls handling of boundary and inequality constraints.
+
+5. User tunes barrier parameters:
+   * barrier width with MinBLEICSetBarrierWidth() call
+   * (optionally) dynamics of the barrier width with MinBLEICSetBarrierDecay() call
+   These functions control handling of boundary and inequality constraints.
+
+6. Additionally, user may set limit on number of internal iterations
+   by MinBLEICSetMaxIts() call.
+   This function allows to prevent algorithm from looping forever.
+
+7. User calls MinBLEICOptimize() function which takes algorithm  state and
+   pointer (delegate, etc.) to callback function which calculates F/G.
+
+8. User calls MinBLEICResults() to get solution
+
+9. Optionally user may call MinBLEICRestartFrom() to solve another problem
+   with same N but another starting point.
+   MinBLEICRestartFrom() allows to reuse already initialized structure.
+
+
+INPUT PARAMETERS:
+    N       -   problem dimension, N>0:
+                * if given, only leading N elements of X are used
+                * if not given, automatically determined from size ofX
+    X       -   starting point, array[N]:
+                * it is better to set X to a feasible point
+                * but X can be infeasible, in which case algorithm will try
+                  to find feasible point first, using X as initial
+                  approximation.
+
+OUTPUT PARAMETERS:
+    State   -   structure stores algorithm state
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleiccreate(const ae_int_t n, const real_1d_array &x, minbleicstate &state);
+void minbleiccreate(const real_1d_array &x, minbleicstate &state);
+
+
+/*************************************************************************
+This function sets boundary constraints for BLEIC optimizer.
+
+Boundary constraints are inactive by default (after initial creation).
+They are preserved after algorithm restart with MinBLEICRestartFrom().
+
+INPUT PARAMETERS:
+    State   -   structure stores algorithm state
+    BndL    -   lower bounds, array[N].
+                If some (all) variables are unbounded, you may specify
+                very small number or -INF.
+    BndU    -   upper bounds, array[N].
+                If some (all) variables are unbounded, you may specify
+                very large number or +INF.
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicsetbc(const minbleicstate &state, const real_1d_array &bndl, const real_1d_array &bndu);
+
+
+/*************************************************************************
+This function sets linear constraints for BLEIC optimizer.
+
+Linear constraints are inactive by default (after initial creation).
+They are preserved after algorithm restart with MinBLEICRestartFrom().
+
+INPUT PARAMETERS:
+    State   -   structure previously allocated with MinBLEICCreate call.
+    C       -   linear constraints, array[K,N+1].
+                Each row of C represents one constraint, either equality
+                or inequality (see below):
+                * first N elements correspond to coefficients,
+                * last element corresponds to the right part.
+                All elements of C (including right part) must be finite.
+    CT      -   type of constraints, array[K]:
+                * if CT[i]>0, then I-th constraint is C[i,*]*x >= C[i,n+1]
+                * if CT[i]=0, then I-th constraint is C[i,*]*x  = C[i,n+1]
+                * if CT[i]<0, then I-th constraint is C[i,*]*x <= C[i,n+1]
+    K       -   number of equality/inequality constraints, K>=0:
+                * if given, only leading K elements of C/CT are used
+                * if not given, automatically determined from sizes of C/CT
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicsetlc(const minbleicstate &state, const real_2d_array &c, const integer_1d_array &ct, const ae_int_t k);
+void minbleicsetlc(const minbleicstate &state, const real_2d_array &c, const integer_1d_array &ct);
+
+
+/*************************************************************************
+This function sets stopping conditions for the underlying nonlinear CG
+optimizer. It controls overall accuracy of solution. These conditions
+should be strict enough in order for algorithm to converge.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    EpsG    -   >=0
+                Algorithm finishes its work if 2-norm of the Lagrangian
+                gradient is less than or equal to EpsG.
+    EpsF    -   >=0
+                The  subroutine  finishes  its work if on k+1-th iteration
+                the  condition  |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1}
+                is satisfied.
+    EpsX    -   >=0
+                The subroutine finishes its work if  on  k+1-th  iteration
+                the condition |X(k+1)-X(k)| <= EpsX is fulfilled.
+
+Passing EpsG=0, EpsF=0 and EpsX=0 (simultaneously) will lead to
+automatic stopping criterion selection.
+
+These conditions are used to terminate inner iterations. However, you
+need to tune termination conditions for outer iterations too.
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicsetinnercond(const minbleicstate &state, const double epsg, const double epsf, const double epsx);
+
+
+/*************************************************************************
+This function sets stopping conditions for outer iteration of BLEIC algo.
+
+These conditions control accuracy of constraint handling and amount of
+infeasibility allowed in the solution.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    EpsX    -   >0, stopping condition on outer iteration step length
+    EpsI    -   >0, stopping condition on infeasibility
+
+Both EpsX and EpsI must be non-zero.
+
+MEANING OF EpsX
+
+EpsX  is  a  stopping  condition for outer iterations. Algorithm will stop
+when  solution  of  the  current  modified  subproblem will be within EpsX
+(using 2-norm) of the previous solution.
+
+MEANING OF EpsI
+
+EpsI controls feasibility properties -  algorithm  won't  stop  until  all
+inequality constraints will be satisfied with error (distance from current
+point to the feasible area) at most EpsI.
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicsetoutercond(const minbleicstate &state, const double epsx, const double epsi);
+
+
+/*************************************************************************
+This function sets initial barrier width.
+
+BLEIC optimizer uses  modified  barrier  functions  to  handle  inequality
+constraints. These functions are almost constant in the inner parts of the
+feasible  area,  but  grow rapidly to the infinity OUTSIDE of the feasible
+area. Barrier width is a distance from feasible area to  the  point  where
+modified barrier function becomes infinite.
+
+Barrier width must be:
+* small enough (below some problem-dependent value) in order for algorithm
+  to  converge.  Necessary  condition  is that the target function must be
+  well described by linear model in the areas as small as barrier width.
+* not VERY small (in order to avoid  difficulties  associated  with  rapid
+  changes in the modified function, ill-conditioning, round-off issues).
+
+Choosing  appropriate  barrier  width  is  very  important  for  efficient
+optimization, and it often requires error  and  trial.  You  can  use  two
+strategies when choosing barrier width:
+* set barrier width with MinBLEICSetBarrierWidth() call. In this case  you
+  should try different barrier widths and examine results.
+* set decreasing barrier width by combining  MinBLEICSetBarrierWidth() and
+  MinBLEICSetBarrierDecay()  calls.  In  this case algorithm will decrease
+  barrier  width  after  each  outer iteration until it encounters optimal
+  barrier width.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    Mu      -   >0, initial barrier width
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicsetbarrierwidth(const minbleicstate &state, const double mu);
+
+
+/*************************************************************************
+This function sets decay coefficient for barrier width.
+
+By default, no barrier decay is used (Decay=1.0).
+
+BLEIC optimizer uses  modified  barrier  functions  to  handle  inequality
+constraints. These functions are almost constant in the inner parts of the
+feasible  area,  but  grow rapidly to the infinity OUTSIDE of the feasible
+area. Barrier width is a distance from feasible area to  the  point  where
+modified barrier function becomes infinite. Decay coefficient allows us to
+decrease  barrier  width  from  the  initial  (suboptimial)  value   until
+optimal value will be met.
+
+We recommend you either to set MuDecay=1.0 (no decay) or use some moderate
+value like 0.5-0.7
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    MuDecay -   0<MuDecay<=1, decay coefficient
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicsetbarrierdecay(const minbleicstate &state, const double mudecay);
+
+
+/*************************************************************************
+This function allows to stop algorithm after specified number of inner
+iterations.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    MaxIts  -   maximum number of inner iterations.
+                If MaxIts=0, the number of iterations is unlimited.
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicsetmaxits(const minbleicstate &state, const ae_int_t maxits);
+
+
+/*************************************************************************
+This function turns on/off reporting.
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    NeedXRep-   whether iteration reports are needed or not
+
+If NeedXRep is True, algorithm will call rep() callback function if  it is
+provided to MinBLEICOptimize().
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicsetxrep(const minbleicstate &state, const bool needxrep);
+
+
+/*************************************************************************
+This function sets maximum step length
+
+INPUT PARAMETERS:
+    State   -   structure which stores algorithm state
+    StpMax  -   maximum step length, >=0. Set StpMax to 0.0,  if you don't
+                want to limit step length.
+
+Use this subroutine when you optimize target function which contains exp()
+or  other  fast  growing  functions,  and optimization algorithm makes too
+large  steps  which  lead   to overflow. This function allows us to reject
+steps  that  are  too  large  (and  therefore  expose  us  to the possible
+overflow) without actually calculating function value at the x+stp*d.
+
+  -- ALGLIB --
+     Copyright 02.04.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicsetstpmax(const minbleicstate &state, const double stpmax);
+
+
+/*************************************************************************
+This function provides reverse communication interface
+Reverse communication interface is not documented or recommended to use.
+See below for functions which provide better documented API
+*************************************************************************/
+bool minbleiciteration(const minbleicstate &state);
+
+
+/*************************************************************************
+This family of functions is used to launcn iterations of nonlinear optimizer
+
+These functions accept following parameters:
+    state   -   algorithm state
+    grad    -   callback which calculates function (or merit function)
+                value func and gradient grad at given point x
+    rep     -   optional callback which is called after each iteration
+                can be NULL
+    ptr     -   optional pointer which is passed to func/grad/hess/jac/rep
+                can be NULL
+
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+
+*************************************************************************/
+void minbleicoptimize(minbleicstate &state,
+    void (*grad)(const real_1d_array &x, double &func, real_1d_array &grad, void *ptr),
+    void  (*rep)(const real_1d_array &x, double func, void *ptr) = NULL,
+    void *ptr = NULL);
+
+
+/*************************************************************************
+BLEIC results
+
+INPUT PARAMETERS:
+    State   -   algorithm state
+
+OUTPUT PARAMETERS:
+    X       -   array[0..N-1], solution
+    Rep     -   optimization report:
+                * Rep.TerminationType completetion code:
+                    * -3    inconsistent constraints. Feasible point is
+                            either nonexistent or too hard to find. Try to
+                            restart optimizer with better initial
+                            approximation
+                    * -2    rounding errors prevent further improvement.
+                            X contains best point found.
+                    *  4    conditions on constraints are fulfilled
+                            with error less than or equal to EpsC
+                    *  5    MaxIts steps was taken
+                    *  7    stopping conditions are too stringent,
+                            further improvement is impossible,
+                            X contains best point found so far.
+                * Rep.IterationsCount contains iterations count
+                * NFEV countains number of function calculations
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicresults(const minbleicstate &state, real_1d_array &x, minbleicreport &rep);
+
+
+/*************************************************************************
+BLEIC results
+
+Buffered implementation of MinBLEICResults() which uses pre-allocated buffer
+to store X[]. If buffer size is  too  small,  it  resizes  buffer.  It  is
+intended to be used in the inner cycles of performance critical algorithms
+where array reallocation penalty is too large to be ignored.
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicresultsbuf(const minbleicstate &state, real_1d_array &x, minbleicreport &rep);
+
+
+/*************************************************************************
+This subroutine restarts algorithm from new point.
+All optimization parameters (including constraints) are left unchanged.
+
+This  function  allows  to  solve multiple  optimization  problems  (which
+must have  same number of dimensions) without object reallocation penalty.
+
+INPUT PARAMETERS:
+    State   -   structure previously allocated with MinBLEICCreate call.
+    X       -   new starting point.
+
+  -- ALGLIB --
+     Copyright 28.11.2010 by Bochkanov Sergey
+*************************************************************************/
+void minbleicrestartfrom(const minbleicstate &state, const real_1d_array &x);
+}
+
+/////////////////////////////////////////////////////////////////////////
+//
+// THIS SECTION CONTAINS COMPUTATIONAL CORE DECLARATIONS (FUNCTIONS)
+//
+/////////////////////////////////////////////////////////////////////////
+namespace alglib_impl
+{
+void minlbfgscreate(ae_int_t n,
+     ae_int_t m,
+     /* Real    */ ae_vector* x,
+     minlbfgsstate* state,
+     ae_state *_state);
+void minlbfgssetcond(minlbfgsstate* state,
+     double epsg,
+     double epsf,
+     double epsx,
+     ae_int_t maxits,
+     ae_state *_state);
+void minlbfgssetxrep(minlbfgsstate* state,
+     ae_bool needxrep,
+     ae_state *_state);
+void minlbfgssetstpmax(minlbfgsstate* state,
+     double stpmax,
+     ae_state *_state);
+void minlbfgscreatex(ae_int_t n,
+     ae_int_t m,
+     /* Real    */ ae_vector* x,
+     ae_int_t flags,
+     minlbfgsstate* state,
+     ae_state *_state);
+void minlbfgssetdefaultpreconditioner(minlbfgsstate* state,
+     ae_state *_state);
+void minlbfgssetcholeskypreconditioner(minlbfgsstate* state,
+     /* Real    */ ae_matrix* p,
+     ae_bool isupper,
+     ae_state *_state);
+ae_bool minlbfgsiteration(minlbfgsstate* state, ae_state *_state);
+void minlbfgsresults(minlbfgsstate* state,
+     /* Real    */ ae_vector* x,
+     minlbfgsreport* rep,
+     ae_state *_state);
+void minlbfgsresultsbuf(minlbfgsstate* state,
+     /* Real    */ ae_vector* x,
+     minlbfgsreport* rep,
+     ae_state *_state);
+void minlbfgsrestartfrom(minlbfgsstate* state,
+     /* Real    */ ae_vector* x,
+     ae_state *_state);
+ae_bool _minlbfgsstate_init(minlbfgsstate* p, ae_state *_state, ae_bool make_automatic);
+ae_bool _minlbfgsstate_init_copy(minlbfgsstate* dst, minlbfgsstate* src, ae_state *_state, ae_bool make_automatic);
+void _minlbfgsstate_clear(minlbfgsstate* p);
+ae_bool _minlbfgsreport_init(minlbfgsreport* p, ae_state *_state, ae_bool make_automatic);
+ae_bool _minlbfgsreport_init_copy(minlbfgsreport* dst, minlbfgsreport* src, ae_state *_state, ae_bool make_automatic);
+void _minlbfgsreport_clear(minlbfgsreport* p);
+void minlmcreatevj(ae_int_t n,
+     ae_int_t m,
+     /* Real    */ ae_vector* x,
+     minlmstate* state,
+     ae_state *_state);
+void minlmcreatev(ae_int_t n,
+     ae_int_t m,
+     /* Real    */ ae_vector* x,
+     double diffstep,
+     minlmstate* state,
+     ae_state *_state);
+void minlmcreatefgh(ae_int_t n,
+     /* Real    */ ae_vector* x,
+     minlmstate* state,
+     ae_state *_state);
+void minlmcreatevgj(ae_int_t n,
+     ae_int_t m,
+     /* Real    */ ae_vector* x,
+     minlmstate* state,
+     ae_state *_state);
+void minlmcreatefgj(ae_int_t n,
+     ae_int_t m,
+     /* Real    */ ae_vector* x,
+     minlmstate* state,
+     ae_state *_state);
+void minlmcreatefj(ae_int_t n,
+     ae_int_t m,
+     /* Real    */ ae_vector* x,
+     minlmstate* state,
+     ae_state *_state);
+void minlmsetcond(minlmstate* state,
+     double epsg,
+     double epsf,
+     double epsx,
+     ae_int_t maxits,
+     ae_state *_state);
+void minlmsetxrep(minlmstate* state, ae_bool needxrep, ae_state *_state);
+void minlmsetstpmax(minlmstate* state, double stpmax, ae_state *_state);
+void minlmsetacctype(minlmstate* state,
+     ae_int_t acctype,
+     ae_state *_state);
+ae_bool minlmiteration(minlmstate* state, ae_state *_state);
+void minlmresults(minlmstate* state,
+     /* Real    */ ae_vector* x,
+     minlmreport* rep,
+     ae_state *_state);
+void minlmresultsbuf(minlmstate* state,
+     /* Real    */ ae_vector* x,
+     minlmreport* rep,
+     ae_state *_state);
+void minlmrestartfrom(minlmstate* state,
+     /* Real    */ ae_vector* x,
+     ae_state *_state);
+ae_bool _minlmstate_init(minlmstate* p, ae_state *_state, ae_bool make_automatic);
+ae_bool _minlmstate_init_copy(minlmstate* dst, minlmstate* src, ae_state *_state, ae_bool make_automatic);
+void _minlmstate_clear(minlmstate* p);
+ae_bool _minlmreport_init(minlmreport* p, ae_state *_state, ae_bool make_automatic);
+ae_bool _minlmreport_init_copy(minlmreport* dst, minlmreport* src, ae_state *_state, ae_bool make_automatic);
+void _minlmreport_clear(minlmreport* p);
+void minasacreate(ae_int_t n,
+     /* Real    */ ae_vector* x,
+     /* Real    */ ae_vector* bndl,
+     /* Real    */ ae_vector* bndu,
+     minasastate* state,
+     ae_state *_state);
+void minasasetcond(minasastate* state,
+     double epsg,
+     double epsf,
+     double epsx,
+     ae_int_t maxits,
+     ae_state *_state);
+void minasasetxrep(minasastate* state, ae_bool needxrep, ae_state *_state);
+void minasasetalgorithm(minasastate* state,
+     ae_int_t algotype,
+     ae_state *_state);
+void minasasetstpmax(minasastate* state, double stpmax, ae_state *_state);
+ae_bool minasaiteration(minasastate* state, ae_state *_state);
+void minasaresults(minasastate* state,
+     /* Real    */ ae_vector* x,
+     minasareport* rep,
+     ae_state *_state);
+void minasaresultsbuf(minasastate* state,
+     /* Real    */ ae_vector* x,
+     minasareport* rep,
+     ae_state *_state);
+void minasarestartfrom(minasastate* state,
+     /* Real    */ ae_vector* x,
+     /* Real    */ ae_vector* bndl,
+     /* Real    */ ae_vector* bndu,
+     ae_state *_state);
+ae_bool _minasastate_init(minasastate* p, ae_state *_state, ae_bool make_automatic);
+ae_bool _minasastate_init_copy(minasastate* dst, minasastate* src, ae_state *_state, ae_bool make_automatic);
+void _minasastate_clear(minasastate* p);
+ae_bool _minasareport_init(minasareport* p, ae_state *_state, ae_bool make_automatic);
+ae_bool _minasareport_init_copy(minasareport* dst, minasareport* src, ae_state *_state, ae_bool make_automatic);
+void _minasareport_clear(minasareport* p);
+void mincgcreate(ae_int_t n,
+     /* Real    */ ae_vector* x,
+     mincgstate* state,
+     ae_state *_state);
+void mincgsetcond(mincgstate* state,
+     double epsg,
+     double epsf,
+     double epsx,
+     ae_int_t maxits,
+     ae_state *_state);
+void mincgsetxrep(mincgstate* state, ae_bool needxrep, ae_state *_state);
+void mincgsetdrep(mincgstate* state, ae_bool needdrep, ae_state *_state);
+void mincgsetcgtype(mincgstate* state, ae_int_t cgtype, ae_state *_state);
+void mincgsetstpmax(mincgstate* state, double stpmax, ae_state *_state);
+void mincgsuggeststep(mincgstate* state, double stp, ae_state *_state);
+ae_bool mincgiteration(mincgstate* state, ae_state *_state);
+void mincgresults(mincgstate* state,
+     /* Real    */ ae_vector* x,
+     mincgreport* rep,
+     ae_state *_state);
+void mincgresultsbuf(mincgstate* state,
+     /* Real    */ ae_vector* x,
+     mincgreport* rep,
+     ae_state *_state);
+void mincgrestartfrom(mincgstate* state,
+     /* Real    */ ae_vector* x,
+     ae_state *_state);
+ae_bool _mincgstate_init(mincgstate* p, ae_state *_state, ae_bool make_automatic);
+ae_bool _mincgstate_init_copy(mincgstate* dst, mincgstate* src, ae_state *_state, ae_bool make_automatic);
+void _mincgstate_clear(mincgstate* p);
+ae_bool _mincgreport_init(mincgreport* p, ae_state *_state, ae_bool make_automatic);
+ae_bool _mincgreport_init_copy(mincgreport* dst, mincgreport* src, ae_state *_state, ae_bool make_automatic);
+void _mincgreport_clear(mincgreport* p);
+void minbleiccreate(ae_int_t n,
+     /* Real    */ ae_vector* x,
+     minbleicstate* state,
+     ae_state *_state);
+void minbleicsetbc(minbleicstate* state,
+     /* Real    */ ae_vector* bndl,
+     /* Real    */ ae_vector* bndu,
+     ae_state *_state);
+void minbleicsetlc(minbleicstate* state,
+     /* Real    */ ae_matrix* c,
+     /* Integer */ ae_vector* ct,
+     ae_int_t k,
+     ae_state *_state);
+void minbleicsetinnercond(minbleicstate* state,
+     double epsg,
+     double epsf,
+     double epsx,
+     ae_state *_state);
+void minbleicsetoutercond(minbleicstate* state,
+     double epsx,
+     double epsi,
+     ae_state *_state);
+void minbleicsetbarrierwidth(minbleicstate* state,
+     double mu,
+     ae_state *_state);
+void minbleicsetbarrierdecay(minbleicstate* state,
+     double mudecay,
+     ae_state *_state);
+void minbleicsetmaxits(minbleicstate* state,
+     ae_int_t maxits,
+     ae_state *_state);
+void minbleicsetxrep(minbleicstate* state,
+     ae_bool needxrep,
+     ae_state *_state);
+void minbleicsetstpmax(minbleicstate* state,
+     double stpmax,
+     ae_state *_state);
+ae_bool minbleiciteration(minbleicstate* state, ae_state *_state);
+void minbleicresults(minbleicstate* state,
+     /* Real    */ ae_vector* x,
+     minbleicreport* rep,
+     ae_state *_state);
+void minbleicresultsbuf(minbleicstate* state,
+     /* Real    */ ae_vector* x,
+     minbleicreport* rep,
+     ae_state *_state);
+void minbleicrestartfrom(minbleicstate* state,
+     /* Real    */ ae_vector* x,
+     ae_state *_state);
+void barrierfunc(double x,
+     double mu,
+     double* f,
+     double* df,
+     double* d2f,
+     ae_state *_state);
+ae_bool _minbleicstate_init(minbleicstate* p, ae_state *_state, ae_bool make_automatic);
+ae_bool _minbleicstate_init_copy(minbleicstate* dst, minbleicstate* src, ae_state *_state, ae_bool make_automatic);
+void _minbleicstate_clear(minbleicstate* p);
+ae_bool _minbleicreport_init(minbleicreport* p, ae_state *_state, ae_bool make_automatic);
+ae_bool _minbleicreport_init_copy(minbleicreport* dst, minbleicreport* src, ae_state *_state, ae_bool make_automatic);
+void _minbleicreport_clear(minbleicreport* p);
+
+}
+#endif
+
diff --git a/contrib/lbfgs/stdafx.h b/contrib/lbfgs/stdafx.h
new file mode 100755
index 0000000000..99a8091366
--- /dev/null
+++ b/contrib/lbfgs/stdafx.h
@@ -0,0 +1,2 @@
+
+
-- 
GitLab