diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2dd8d41aad14010c81645650a81366a1b744b167..f8da7f36be5d0315f0f8953b00902a91671dd2d6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -899,14 +899,10 @@ if(HAVE_SOLVER)
 
   if(ENABLE_TAUCS)
     if(HAVE_METIS)
-      find_library(TAUCS_LIB taucs PATH_SUFFIXES lib)
-      find_path(TAUCS_INC "taucs.h" PATH_SUFFIXES src include taucs)
-      if(TAUCS_LIB AND TAUCS_INC)
-        set_config_option(HAVE_TAUCS "Taucs")
-        add_definitions(-DTAUCS_CILK)
-        list(APPEND EXTERNAL_LIBRARIES ${TAUCS_LIB})
-        list(APPEND EXTERNAL_INCLUDES ${TAUCS_INC})
-      endif(TAUCS_LIB AND TAUCS_INC)
+      add_subdirectory(contrib/taucs)
+      include_directories(contrib/taucs/src contrib/taucs/config)
+      list(APPEND EXTERNAL_LIBRARIES taucs)
+      set_config_option(HAVE_TAUCS "Taucs")
     else(HAVE_METIS)
       message(STATUS "Warning: Disabling Taucs (requires METIS)")
     endif(HAVE_METIS)
diff --git a/contrib/taucs/CMakeLists.txt b/contrib/taucs/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4345385d07ba787bbf34fd4424f19f8e063bc10a
--- /dev/null
+++ b/contrib/taucs/CMakeLists.txt
@@ -0,0 +1,94 @@
+# Gmsh - Copyright (C) 1997-2015 C. Geuzaine, J.-F. Remacle
+#
+# See the LICENSE.txt file for license information. Please report all
+# bugs and problems to the public mailing list <gmsh@geuz.org>.
+
+# contributor : Jonathan Lambrechts
+
+set(SRC_MULTI
+  taucs_sn_llt
+  taucs_ccs_base
+  taucs_vec_base
+  taucs_ccs_ops
+  taucs_ccs_io
+  taucs_ccs_factor_llt
+  taucs_ccs_solve_llt
+  taucs_complex
+  taucs_ccs_ooc_llt
+  taucs_ccs_ooc_lu
+)
+
+set(SRC_DOUBLE
+  taucs_iter
+  taucs_vaidya
+  taucs_recvaidya
+  taucs_gremban
+  taucs_ccs_xxt
+  taucs_ccs_generators
+)
+
+set(SRC_GENERAL
+  taucs_linsolve
+  taucs_ccs_order
+  taucs_memory
+  taucs_logging
+  taucs_timer
+  taucs_ooc_io
+  taucs_malloc
+)
+
+set(SRC
+  external/src/readhb.c
+  external/src/amdatr.c
+  external/src/amdbar.c
+  external/src/amdexa.c
+  external/src/amdhaf.c
+  external/src/amdhat.c
+  external/src/amdpre.c
+  external/src/amdtru.c
+  external/src/genmmd.c
+  external/src/colamd.c
+)
+
+function (build_variant SFILE FLAG DNAME)
+  file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/src/${SFILE}.c" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/${DNAME}")
+  set(DEST "${CMAKE_CURRENT_BINARY_DIR}/${DNAME}/${SFILE}.c")
+  set_property(SOURCE ${DEST} PROPERTY COMPILE_FLAGS " ${FLAG}")
+  list(APPEND SRC ${DEST})
+  set(SRC ${SRC} PARENT_SCOPE)
+endfunction(build_variant)
+
+foreach(SFILE ${SRC_MULTI})
+  build_variant(${SFILE} "-DTAUCS_CORE_DOUBLE" "D")
+  build_variant(${SFILE} "-DTAUCS_CORE_SINGLE" "S")
+  build_variant(${SFILE} "-DTAUCS_CORE_DCOMPLEX" "Z")
+  build_variant(${SFILE} "-DTAUCS_CORE_SCOMPLEX" "C")
+  build_variant(${SFILE} "-DTAUCS_CORE_GENERAL" "G")
+endforeach(SFILE)
+
+foreach(SFILE ${SRC_DOUBLE})
+  build_variant(${SFILE} "-DTAUCS_CORE_DOUBLE" "D")
+endforeach(SFILE)
+
+foreach(SFILE ${SRC_GENERAL})
+  build_variant(${SFILE} "-DTAUCS_CORE_GENERAL" "G")
+endforeach(SFILE)
+
+include_directories(src config)
+
+set (FLAGS "-std=c99 -fPIC")
+
+if(UNIX)
+  if(APPLE)
+    set(FLAGS "${FLAGS} -DOSTYPE_darwin")
+  else(APPLE)
+    set(FLAGS "${FLAGS} -DOSTYPE_linux")
+  endif(APPLE)
+endif(UNIX)
+
+if(WIN32)
+  set(FLAGS "${FLAGS} -DOSTYPE_win32")
+endif(WIN32)
+
+add_library(taucs STATIC ${SRC})
+set_property(TARGET taucs PROPERTY COMPILE_FLAGS ${FLAGS})
diff --git a/contrib/taucs/LICENSE b/contrib/taucs/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..bf86e757c099d574ff83c78e80addc11661faf44
--- /dev/null
+++ b/contrib/taucs/LICENSE
@@ -0,0 +1,9 @@
+TAUCS Version 2.0, November 29, 2001. Copyright (c) 2001, 2002, 2003 by Sivan Toledo, Tel-Aviv Univesity, stoledo@tau.ac.il. All Rights Reserved.
+
+TAUCS License:
+
+Your use or distribution of TAUCS or any derivative code implies that you agree to this License.
+
+THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+Permission is hereby granted to use or copy this program, provided that the Copyright, this License, and the Availability of the original version is retained on all copies. User documentation of any code that uses this code or any derivative code must cite the Copyright, this License, the Availability note, and "Used by permission." If this code or any derivative code is accessible from within MATLAB, then typing "help taucs" must cite the Copyright, and "type taucs" must also cite this License and the Availability note. Permission to modify the code and to distribute modified code is granted, provided the Copyright, this License, and the Availability note are retained, and a notice that the code was modified is included. This software is provided to you free of charge.
diff --git a/contrib/taucs/config/taucs_config_build.h b/contrib/taucs/config/taucs_config_build.h
new file mode 100644
index 0000000000000000000000000000000000000000..5972a2c92ca01f23f8ba90a2c8854f88cfef3270
--- /dev/null
+++ b/contrib/taucs/config/taucs_config_build.h
@@ -0,0 +1,31 @@
+#define TAUCS_CONFIG_DREAL
+#define TAUCS_CONFIG_SREAL
+#define TAUCS_CONFIG_DCOMPLEX
+#define TAUCS_CONFIG_SCOMPLEX
+#define TAUCS_CONFIG_GENERIC_COMPLEX
+#define TAUCS_CONFIG_TIMING
+#define TAUCS_CONFIG_BASE
+#define TAUCS_CONFIG_MATRIX_IO
+#define TAUCS_CONFIG_METIS
+#define TAUCS_CONFIG_AMD
+#define TAUCS_CONFIG_COLAMD
+#define TAUCS_CONFIG_GENMMD
+#define TAUCS_CONFIG_ORDERING
+#define TAUCS_CONFIG_FACTOR
+#define TAUCS_CONFIG_LLT
+#define TAUCS_CONFIG_OOC_LLT
+#define TAUCS_CONFIG_OOC_LU
+#define TAUCS_CONFIG_ADVANCED_MEMORY_OPS
+#define TAUCS_CONFIG_VAIDYA
+#define TAUCS_CONFIG_REC_VAIDYA
+#define TAUCS_CONFIG_GREMBAN
+#define TAUCS_CONFIG_INCOMPLETE_CHOL
+#define TAUCS_CONFIG_ITER
+#define TAUCS_CONFIG_INVERSE_FACTOR
+#define TAUCS_CONFIG_TESTING_PROGRAMS
+#define TAUCS_CONFIG_TEST_DIRECT
+#define TAUCS_CONFIG_TEST_RUN
+#define TAUCS_CONFIG_TEST_ITER
+#define TAUCS_CONFIG_MATRIX_GENERATORS
+#define TAUCS_CONFIG_MALLOC_STUBS
+
diff --git a/contrib/taucs/config/taucs_config_tests.h b/contrib/taucs/config/taucs_config_tests.h
new file mode 100644
index 0000000000000000000000000000000000000000..bb205e91b2c4413752ac8b95910ceaee656467d8
--- /dev/null
+++ b/contrib/taucs/config/taucs_config_tests.h
@@ -0,0 +1,4 @@
+/* Definition for BLAS functions */
+#define TAUCS_BLAS_UNDERSCORE
+/* Does the compiler support C99 complex numbers? */
+#define TAUCS_C99_COMPLEX
diff --git a/contrib/taucs/external/src/amdatr.c b/contrib/taucs/external/src/amdatr.c
new file mode 100644
index 0000000000000000000000000000000000000000..a7347ee0dcf6191ce0a927ed9c3250d4a194acbd
--- /dev/null
+++ b/contrib/taucs/external/src/amdatr.c
@@ -0,0 +1,1475 @@
+/* amdatr.f -- translated by f2c (version of 23 April 1993  18:34:30).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+#include "f2c.h"
+
+/* Subroutine */ int amdatr_(n, pe, iw, len, iwlen, pfree, nv, next, last, 
+	head, elen, degree, ncmpa, w, iovflo)
+integer *n, *pe, *iw, *len, *iwlen, *pfree, *nv, *next, *last, *head, *elen, *
+	degree, *ncmpa, *w, *iovflo;
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer hash, pend, hmod, lenj, dmax_, wbig, wflg, psrc, pdst, 
+	    wnvi, e, i, j, k, p, degme, x, nleft, ilast, jlast, inext, jnext, 
+	    p1, nvpiv, p2, p3, me, ln, we, pj, pn, mindeg, elenme, slenme, 
+	    maxmem, newmem, deg, eln, mem, nel, pme, nvi, nvj, pme1, pme2, 
+	    knt1, knt2, knt3;
+
+/* -----------------------------------------------------------------------
+ */
+/*  The MC47 / AMD suite of minimum degree ordering algorithms. */
+
+/*  This code is one of seven variations of a single algorithm: */
+/*  the primary routine (MC47B/BD, only available in the Harwell */
+/*  Subroutine Library), and 6 variations that differ only in */
+/*  how they compute the degree (available in NETLIB). */
+
+/*  For information on the Harwell Subroutine Library, contact */
+/*  John Harding, Harwell Subroutine Library, B 552, AEA Technology, */
+/*  Harwell, Didcot, Oxon OX11 0RA, telephone (44) 1235 434573, */
+/*  fax (44) 1235 434340, email john.harding@aeat.co.uk, who will */
+/*  provide details of price and conditions of use. */
+/* -----------------------------------------------------------------------
+ */
+/* ***********************************************************************
+ */
+/* NOTICE:  "The AMD routines (AMDEXA, AMDBAR, AMDHAF, AMDHAT, AMDTRU, */
+/* and AMDATR) may be used SOLELY for educational, research, and */
+/* benchmarking purposes by non-profit organizations and the U.S. */
+/* government.  Commercial and other organizations may make use of the */
+/* AMD routines SOLELY for benchmarking purposes only.  The AMD */
+/* routines may be modified by or on behalf of the User for such */
+/* use but at no time shall the AMD routines or any such modified */
+/* version of them become the property of the User.  The AMD routines */
+/* are provided without warranty of any kind, either expressed or */
+/* implied.  Neither the Authors nor their employers shall be liable */
+/* for any direct or consequential loss or damage whatsoever arising */
+/* out of the use or misuse of the AMD routines by the User.  The AMD */
+/* routines must not be sold.  You may make copies of the AMD routines, */
+/* but this NOTICE and the Copyright notice must appear in all copies. */
+/* Any other use of the AMD routines requires written permission. */
+/* Your use of the AMD routines is an implicit agreement to these */
+/* conditions." */
+/* ***********************************************************************
+ */
+/* -----------------------------------------------------------------------
+ */
+/* AMDatr:  Approximate Minimum (UMFPACK/MA38-style, true) Degree */
+/*          ordering algorithm, but without aggresive absorption */
+/* -----------------------------------------------------------------------
+ */
+/*  Variation 6:  MC47-style approximate true degree, but with no */
+/*  aggresive absorption.  Note that some of the comments in the code */
+/*  below reflect the approximate *external* degree, whereas this */
+/*  code actually uses the approximate *true* degree. */
+
+/*  We recommend using MC47B/BD instead of this routine since MC47B/BD */
+/*  gives better results in about the same time. */
+/* -----------------------------------------------------------------------
+ */
+/* Given a representation of the nonzero pattern of a symmetric matrix, */
+/*       A, (excluding the diagonal) perform an approximate minimum */
+/*       (UMFPACK/MA38-style) degree ordering to compute a pivot order */
+/*       such that the introduction of nonzeros (fill-in) in the Cholesky 
+*/
+/*       factors A = LL^T are kept low.  At each step, the pivot */
+/*       selected is the one with the minimum UMFAPACK/MA38-style */
+/*       upper-bound on the true degree.  This routine does not */
+/*       perform aggresive absorption (as done by MC47B/BD).  Aggresive */
+/*       absorption in MC47B/BD is used to tighten the bound on the */
+/*       degree.  This can result an significant improvement in the */
+/*       quality of the ordering for some matrices. */
+
+/*       The approximate degree algorithm implemented here is the */
+/*       symmetric analog of the degree update algorithm in MA38 and */
+/*       UMFPACK (the Unsymmetric-pattern MultiFrontal PACKage, both by */
+/*       Davis and Duff, available for academic users in NETLIB as */
+/*       linalg/umfpack.shar or via anonymous ftp to */
+/*       ftp.cis.ufl.edu:pub/umfpack).  Non-academic users must use */
+/*       MA38 in the Harwell Subroutine Library instead of UMPFACK. */
+/* ********************************************************************** 
+*/
+/* ***** CAUTION:  ARGUMENTS ARE NOT CHECKED FOR ERRORS ON INPUT.  ****** 
+*/
+/* ********************************************************************** 
+*/
+/* ** If you want error checking, a more versatile input format, and a ** 
+*/
+/* ** simpler user interface, then use MC47A/AD in the Harwell         ** 
+*/
+/* ** Subroutine Library, which checks for errors, transforms the      ** 
+*/
+/* ** input, and calls MC47B/BD.                                       ** 
+*/
+/* ********************************************************************** 
+*/
+/*       References:  (UF Tech Reports are available via anonymous ftp */
+/*       to ftp.cis.ufl.edu:cis/tech-reports). */
+
+/*       [1] Timothy A. Davis and Iain Duff, "An unsymmetric-pattern */
+/*               multifrontal method for sparse LU factorization", */
+/*               SIAM J. Matrix Analysis and Applications, to appear. */
+/*               also Univ. of Florida Technical Report TR-94-038. */
+/*               Discusses UMFPACK / MA38. */
+
+/*       [2] Patrick Amestoy, Timothy A. Davis, and Iain S. Duff, */
+/*               "An approximate minimum degree ordering algorithm," */
+/*               SIAM J. Matrix Analysis and Applications (to appear), */
+/*               also Univ. of Florida Technical Report TR-94-039. */
+/*               Discusses this routine. */
+
+/*       [3] Alan George and Joseph Liu, "The evolution of the */
+/*               minimum degree ordering algorithm," SIAM Review, vol. */
+/*               31, no. 1, pp. 1-19, March 1989.  We list below the */
+/*               features mentioned in that paper that this code */
+/*               includes: */
+
+/*       mass elimination: */
+/*               Yes.  MA27 relied on supervariable detection for mass */
+/*               elimination. */
+/*       indistinguishable nodes: */
+/*               Yes (we call these "supervariables").  This was also in 
+*/
+/*               the MA27 code - although we modified the method of */
+/*               detecting them (the previous hash was the true degree, */
+/*               which we no longer keep track of).  A supervariable is */
+/*               a set of rows with identical nonzero pattern.  All */
+/*               variables in a supervariable are eliminated together. */
+/*               Each supervariable has as its numerical name that of */
+/*               one of its variables (its principal variable). */
+/*       quotient graph representation: */
+/*               Yes.  We use the term "element" for the cliques formed */
+/*               during elimination.  This was also in the MA27 code. */
+/*               The algorithm can operate in place, but it will work */
+/*               more efficiently if given some "elbow room." */
+/*       element absorption: */
+/*               Yes.  This was also in the MA27 code. */
+/*       external degree: */
+/*               Yes.  The MA27 code was based on the true degree. */
+/*       incomplete degree update and multiple elimination: */
+/*               No.  This was not in MA27, either.  Our method of */
+/*               degree update within MC47B/BD is element-based, not */
+/*               variable-based.  It is thus not well-suited for use */
+/*               with incomplete degree update or multiple elimination. */
+/* -----------------------------------------------------------------------
+ */
+/* Authors, and Copyright (C) 1995 by: */
+/*       Timothy A. Davis, Patrick Amestoy, Iain S. Duff, & John K. Reid. 
+*/
+
+/* Acknowledgements: */
+/*       This work (and the UMFPACK package) was supported by the */
+/*       National Science Foundation (ASC-9111263 and DMS-9223088). */
+/*       The UMFPACK/MA38 approximate degree update algorithm, the */
+/*       unsymmetric analog which forms the basis of MC47B/BD, was */
+/*       developed while Tim Davis was supported by CERFACS (Toulouse, */
+/*       France) in a post-doctoral position. */
+
+/* Date:  September, 1995 */
+/* -----------------------------------------------------------------------
+ */
+/* -----------------------------------------------------------------------
+ */
+/* INPUT ARGUMENTS (unaltered): */
+/* -----------------------------------------------------------------------
+ */
+/* n:    The matrix order. */
+
+/*       Restriction:  1 .le. n .lt. (iovflo/2)-2 */
+/* iwlen:        The length of iw (1..iwlen).  On input, the matrix is */
+/*       stored in iw (1..pfree-1).  However, iw (1..iwlen) should be */
+/*       slightly larger than what is required to hold the matrix, at */
+/*       least iwlen .ge. pfree + n is recommended.  Otherwise, */
+/*       excessive compressions will take place. */
+/*       *** We do not recommend running this algorithm with *** */
+/*       ***      iwlen .lt. pfree + n.                      *** */
+/*       *** Better performance will be obtained if          *** */
+/*       ***      iwlen .ge. pfree + n                       *** */
+/*       *** or better yet                                   *** */
+/*       ***      iwlen .gt. 1.2 * pfree                     *** */
+/*       *** (where pfree is its value on input).            *** */
+/*       The algorithm will not run at all if iwlen .lt. pfree-1. */
+
+/*       Restriction: iwlen .ge. pfree-1 */
+/* iovflo:       The largest positive integer that your computer can */
+/*       represent (-iovflo should also be representable).  On a 32-bit */
+/*       computer with 2's-complement arithmetic, */
+/*       iovflo = (2^31)-1 = 2,147,483,648. */
+/* -----------------------------------------------------------------------
+ */
+/* INPUT/OUPUT ARGUMENTS: */
+/* -----------------------------------------------------------------------
+ */
+/* pe:   On input, pe (i) is the index in iw of the start of row i, or */
+/*       zero if row i has no off-diagonal non-zeros. */
+
+/*       During execution, it is used for both supervariables and */
+/*       elements: */
+
+/*       * Principal supervariable i:  index into iw of the */
+/*               description of supervariable i.  A supervariable */
+/*               represents one or more rows of the matrix */
+/*               with identical nonzero pattern. */
+/*       * Non-principal supervariable i:  if i has been absorbed */
+/*               into another supervariable j, then pe (i) = -j. */
+/*               That is, j has the same pattern as i. */
+/*               Note that j might later be absorbed into another */
+/*               supervariable j2, in which case pe (i) is still -j, */
+/*               and pe (j) = -j2. */
+/*       * Unabsorbed element e:  the index into iw of the description */
+/*               of element e, if e has not yet been absorbed by a */
+/*               subsequent element.  Element e is created when */
+/*               the supervariable of the same name is selected as */
+/*               the pivot. */
+/*       * Absorbed element e:  if element e is absorbed into element */
+/*               e2, then pe (e) = -e2.  This occurs when the pattern of 
+*/
+/*               e (that is, Le) is found to be a subset of the pattern */
+/*               of e2 (that is, Le2).  If element e is "null" (it has */
+/*               no nonzeros outside its pivot block), then pe (e) = 0. */
+
+/*       On output, pe holds the assembly tree/forest, which implicitly */
+/*       represents a pivot order with identical fill-in as the actual */
+/*       order (via a depth-first search of the tree). */
+
+/*       On output: */
+/*       If nv (i) .gt. 0, then i represents a node in the assembly tree, 
+*/
+/*       and the parent of i is -pe (i), or zero if i is a root. */
+/*       If nv (i) = 0, then (i,-pe (i)) represents an edge in a */
+/*       subtree, the root of which is a node in the assembly tree. */
+/* pfree:        On input the tail end of the array, iw (pfree..iwlen), */
+/*       is empty, and the matrix is stored in iw (1..pfree-1). */
+/*       During execution, additional data is placed in iw, and pfree */
+/*       is modified so that iw (pfree..iwlen) is always the unused part 
+*/
+/*       of iw.  On output, pfree is set equal to the size of iw that */
+/*       would have been needed for no compressions to occur.  If */
+/*       ncmpa is zero, then pfree (on output) is less than or equal to */
+/*       iwlen, and the space iw (pfree+1 ... iwlen) was not used. */
+/*       Otherwise, pfree (on output) is greater than iwlen, and all the 
+*/
+/*       memory in iw was used. */
+/* -----------------------------------------------------------------------
+ */
+/* INPUT/MODIFIED (undefined on output): */
+/* -----------------------------------------------------------------------
+ */
+/* len:  On input, len (i) holds the number of entries in row i of the */
+/*       matrix, excluding the diagonal.  The contents of len (1..n) */
+/*       are undefined on output. */
+/* iw:   On input, iw (1..pfree-1) holds the description of each row i */
+/*       in the matrix.  The matrix must be symmetric, and both upper */
+/*       and lower triangular parts must be present.  The diagonal must */
+/*       not be present.  Row i is held as follows: */
+
+/*               len (i):  the length of the row i data structure */
+/*               iw (pe (i) ... pe (i) + len (i) - 1): */
+/*                       the list of column indices for nonzeros */
+/*                       in row i (simple supervariables), excluding */
+/*                       the diagonal.  All supervariables start with */
+/*                       one row/column each (supervariable i is just */
+/*                       row i). */
+/*               if len (i) is zero on input, then pe (i) is ignored */
+/*               on input. */
+
+/*               Note that the rows need not be in any particular order, 
+*/
+/*               and there may be empty space between the rows. */
+
+/*       During execution, the supervariable i experiences fill-in. */
+/*       This is represented by placing in i a list of the elements */
+/*       that cause fill-in in supervariable i: */
+
+/*               len (i):  the length of supervariable i */
+/*               iw (pe (i) ... pe (i) + elen (i) - 1): */
+/*                       the list of elements that contain i.  This list 
+*/
+/*                       is kept short by removing absorbed elements. */
+/*               iw (pe (i) + elen (i) ... pe (i) + len (i) - 1): */
+/*                       the list of supervariables in i.  This list */
+/*                       is kept short by removing nonprincipal */
+/*                       variables, and any entry j that is also */
+/*                       contained in at least one of the elements */
+/*                       (j in Le) in the list for i (e in row i). */
+
+/*       When supervariable i is selected as pivot, we create an */
+/*       element e of the same name (e=i): */
+
+/*               len (e):  the length of element e */
+/*               iw (pe (e) ... pe (e) + len (e) - 1): */
+/*                       the list of supervariables in element e. */
+
+/*       An element represents the fill-in that occurs when supervariable 
+*/
+/*       i is selected as pivot (which represents the selection of row i 
+*/
+/*       and all non-principal variables whose principal variable is i). 
+*/
+/*       We use the term Le to denote the set of all supervariables */
+/*       in element e.  Absorbed supervariables and elements are pruned */
+/*       from these lists when computationally convenient. */
+
+/*       CAUTION:  THE INPUT MATRIX IS OVERWRITTEN DURING COMPUTATION. */
+/*       The contents of iw are undefined on output. */
+/* -----------------------------------------------------------------------
+ */
+/* OUTPUT (need not be set on input): */
+/* -----------------------------------------------------------------------
+ */
+/* nv:   During execution, abs (nv (i)) is equal to the number of rows */
+/*       that are represented by the principal supervariable i.  If i is 
+*/
+/*       a nonprincipal variable, then nv (i) = 0.  Initially, */
+/*       nv (i) = 1 for all i.  nv (i) .lt. 0 signifies that i is a */
+/*       principal variable in the pattern Lme of the current pivot */
+/*       element me.  On output, nv (e) holds the true degree of element 
+*/
+/*       e at the time it was created (including the diagonal part). */
+/* ncmpa:        The number of times iw was compressed.  If this is */
+/*       excessive, then the execution took longer than what could have */
+/*       been.  To reduce ncmpa, try increasing iwlen to be 10% or 20% */
+/*       larger than the value of pfree on input (or at least */
+/*       iwlen .ge. pfree + n).  The fastest performance will be */
+/*       obtained when ncmpa is returned as zero.  If iwlen is set to */
+/*       the value returned by pfree on *output*, then no compressions */
+/*       will occur. */
+/* elen: See the description of iw above.  At the start of execution, */
+/*       elen (i) is set to zero.  During execution, elen (i) is the */
+/*       number of elements in the list for supervariable i.  When e */
+/*       becomes an element, elen (e) = -nel is set, where nel is the */
+/*       current step of factorization.  elen (i) = 0 is done when i */
+/*       becomes nonprincipal. */
+
+/*       For variables, elen (i) .ge. 0 holds until just before the */
+/*       permutation vectors are computed.  For elements, */
+/*       elen (e) .lt. 0 holds. */
+
+/*       On output elen (1..n) holds the inverse permutation (the same */
+/*       as the 'INVP' argument in Sparspak).  That is, if k = elen (i), 
+*/
+/*       then row i is the kth pivot row.  Row i of A appears as the */
+/*       (elen(i))-th row in the permuted matrix, PAP^T. */
+/* last: In a degree list, last (i) is the supervariable preceding i, */
+/*       or zero if i is the head of the list.  In a hash bucket, */
+/*       last (i) is the hash key for i.  last (head (hash)) is also */
+/*       used as the head of a hash bucket if head (hash) contains a */
+/*       degree list (see head, below). */
+
+/*       On output, last (1..n) holds the permutation (the same as the */
+/*       'PERM' argument in Sparspak).  That is, if i = last (k), then */
+/*       row i is the kth pivot row.  Row last (k) of A is the k-th row */
+/*       in the permuted matrix, PAP^T. */
+/* -----------------------------------------------------------------------
+ */
+/* LOCAL (not input or output - used only during execution): */
+/* -----------------------------------------------------------------------
+ */
+/* degree:       If i is a supervariable, then degree (i) holds the */
+/*       current approximation of the external degree of row i (an upper 
+*/
+/*       bound).  The external degree is the number of nonzeros in row i, 
+*/
+/*       minus abs (nv (i)) (the diagonal part).  The bound is equal to */
+/*       the external degree if elen (i) is less than or equal to two. */
+
+/*       We also use the term "external degree" for elements e to refer */
+/*       to |Le \ Lme|.  If e is an element, then degree (e) holds |Le|, 
+*/
+/*       which is the degree of the off-diagonal part of the element e */
+/*       (not including the diagonal part). */
+/* head: head is used for degree lists.  head (deg) is the first */
+/*       supervariable in a degree list (all supervariables i in a */
+/*       degree list deg have the same approximate degree, namely, */
+/*       deg = degree (i)).  If the list deg is empty then */
+/*       head (deg) = 0. */
+
+/*       During supervariable detection head (hash) also serves as a */
+/*       pointer to a hash bucket. */
+/*       If head (hash) .gt. 0, there is a degree list of degree hash. */
+/*               The hash bucket head pointer is last (head (hash)). */
+/*       If head (hash) = 0, then the degree list and hash bucket are */
+/*               both empty. */
+/*       If head (hash) .lt. 0, then the degree list is empty, and */
+/*               -head (hash) is the head of the hash bucket. */
+/*       After supervariable detection is complete, all hash buckets */
+/*       are empty, and the (last (head (hash)) = 0) condition is */
+/*       restored for the non-empty degree lists. */
+/* next: next (i) is the supervariable following i in a link list, or */
+/*       zero if i is the last in the list.  Used for two kinds of */
+/*       lists:  degree lists and hash buckets (a supervariable can be */
+/*       in only one kind of list at a time). */
+/* w:    The flag array w determines the status of elements and */
+/*       variables, and the external degree of elements. */
+
+/*       for elements: */
+/*          if w (e) = 0, then the element e is absorbed */
+/*          if w (e) .ge. wflg, then w (e) - wflg is the size of */
+/*               the set |Le \ Lme|, in terms of nonzeros (the */
+/*               sum of abs (nv (i)) for each principal variable i that */
+/*               is both in the pattern of element e and NOT in the */
+/*               pattern of the current pivot element, me). */
+/*          if wflg .gt. w (e) .gt. 0, then e is not absorbed and has */
+/*               not yet been seen in the scan of the element lists in */
+/*               the computation of |Le\Lme| in loop 150 below. */
+
+/*       for variables: */
+/*          during supervariable detection, if w (j) .ne. wflg then j is 
+*/
+/*          not in the pattern of variable i */
+
+/*       The w array is initialized by setting w (i) = 1 for all i, */
+/*       and by setting wflg = 2.  It is reinitialized if wflg becomes */
+/*       too large (to ensure that wflg+n does not cause integer */
+/*       overflow). */
+/* -----------------------------------------------------------------------
+ */
+/* LOCAL INTEGERS: */
+/* -----------------------------------------------------------------------
+ */
+/* deg:          the degree of a variable or element */
+/* degme:        size, |Lme|, of the current element, me (= degree (me)) 
+*/
+/* dext:         external degree, |Le \ Lme|, of some element e */
+/* dmax:         largest |Le| seen so far */
+/* e:            an element */
+/* elenme:       the length, elen (me), of element list of pivotal var. */
+/* eln:          the length, elen (...), of an element list */
+/* hash:         the computed value of the hash function */
+/* hmod:         the hash function is computed modulo hmod = max (1,n-1) 
+*/
+/* i:            a supervariable */
+/* ilast:        the entry in a link list preceding i */
+/* inext:        the entry in a link list following i */
+/* j:            a supervariable */
+/* jlast:        the entry in a link list preceding j */
+/* jnext:        the entry in a link list, or path, following j */
+/* k:            the pivot order of an element or variable */
+/* knt1:         loop counter used during element construction */
+/* knt2:         loop counter used during element construction */
+/* knt3:         loop counter used during compression */
+/* lenj:         len (j) */
+/* ln:           length of a supervariable list */
+/* maxmem:       amount of memory needed for no compressions */
+/* me:           current supervariable being eliminated, and the */
+/*                       current element created by eliminating that */
+/*                       supervariable */
+/* mem:          memory in use assuming no compressions have occurred */
+/* mindeg:       current minimum degree */
+/* nel:          number of pivots selected so far */
+/* newmem:       amount of new memory needed for current pivot element */
+/* nleft:        n - nel, the number of nonpivotal rows/columns remaining 
+*/
+/* nvi:          the number of variables in a supervariable i (= nv (i)) 
+*/
+/* nvj:          the number of variables in a supervariable j (= nv (j)) 
+*/
+/* nvpiv:        number of pivots in current element */
+/* slenme:       number of variables in variable list of pivotal variable 
+*/
+/* wbig:         = iovflo - n.  wflg is not allowed to be .ge. wbig. */
+/* we:           w (e) */
+/* wflg:         used for flagging the w array.  See description of iw. */
+/* wnvi:         wflg - nv (i) */
+/* x:            either a supervariable or an element */
+/* -----------------------------------------------------------------------
+ */
+/* LOCAL POINTERS: */
+/* -----------------------------------------------------------------------
+ */
+/*               Any parameter (pe (...) or pfree) or local variable */
+/*               starting with "p" (for Pointer) is an index into iw, */
+/*               and all indices into iw use variables starting with */
+/*               "p."  The only exception to this rule is the iwlen */
+/*               input argument. */
+/* p:            pointer into lots of things */
+/* p1:           pe (i) for some variable i (start of element list) */
+/* p2:           pe (i) + elen (i) -  1 for some var. i (end of el. list) 
+*/
+/* p3:           index of first supervariable in clean list */
+/* pdst:         destination pointer, for compression */
+/* pend:         end of memory to compress */
+/* pj:           pointer into an element or variable */
+/* pme:          pointer into the current element (pme1...pme2) */
+/* pme1:         the current element, me, is stored in iw (pme1...pme2) */
+/* pme2:         the end of the current element */
+/* pn:           pointer into a "clean" variable, also used to compress */
+/* psrc:         source pointer, for compression */
+/* -----------------------------------------------------------------------
+ */
+/*  FUNCTIONS CALLED: */
+/* -----------------------------------------------------------------------
+ */
+/* =======================================================================
+ */
+/*  INITIALIZATIONS */
+/* =======================================================================
+ */
+    /* Parameter adjustments */
+    --w;
+    --degree;
+    --elen;
+    --head;
+    --last;
+    --next;
+    --nv;
+    --len;
+    --iw;
+    --pe;
+
+    /* Function Body */
+    wflg = 2;
+    mindeg = 1;
+    *ncmpa = 0;
+    nel = 0;
+/* Computing MAX */
+    i__1 = 1, i__2 = *n - 1;
+    hmod = max(i__1,i__2);
+    dmax_ = 0;
+    wbig = *iovflo - *n;
+    mem = *pfree - 1;
+    maxmem = mem;
+    i__1 = *n;
+    for (i = 1; i <= i__1; ++i) {
+	last[i] = 0;
+	head[i] = 0;
+	nv[i] = 1;
+	w[i] = 1;
+	elen[i] = 0;
+	degree[i] = len[i];
+/* L10: */
+    }
+/*       ---------------------------------------------------------------- 
+*/
+/*       initialize degree lists and eliminate rows with no off-diag. nz. 
+*/
+/*       ---------------------------------------------------------------- 
+*/
+    i__1 = *n;
+    for (i = 1; i <= i__1; ++i) {
+	deg = degree[i];
+/*          include the diagonal in the true degree */
+	++deg;
+	degree[i] = deg;
+	if (deg > 1) {
+/*             --------------------------------------------------
+-------- */
+/*             place i in the degree list corresponding to its deg
+ree */
+/*             --------------------------------------------------
+-------- */
+	    inext = head[deg];
+	    if (inext != 0) {
+		last[inext] = i;
+	    }
+	    next[i] = inext;
+	    head[deg] = i;
+	} else {
+/*             --------------------------------------------------
+-------- */
+/*             we have a variable that can be eliminated at once b
+ecause */
+/*             there is no off-diagonal non-zero in its row. */
+/*             --------------------------------------------------
+-------- */
+	    degree[i] = 0;
+	    ++nel;
+	    elen[i] = -nel;
+	    pe[i] = 0;
+	    w[i] = 0;
+	}
+/* L20: */
+    }
+/* =======================================================================
+ */
+/*  WHILE (selecting pivots) DO */
+/* =======================================================================
+ */
+L30:
+    if (nel < *n) {
+/* ==================================================================
+===== */
+/*  GET PIVOT OF MINIMUM DEGREE */
+/* ==================================================================
+===== */
+/*          ---------------------------------------------------------
+---- */
+/*          find next supervariable for elimination */
+/*          ---------------------------------------------------------
+---- */
+	i__1 = *n;
+	for (deg = mindeg; deg <= i__1; ++deg) {
+	    me = head[deg];
+	    if (me > 0) {
+		goto L50;
+	    }
+/* L40: */
+	}
+L50:
+	mindeg = deg;
+/*          ---------------------------------------------------------
+---- */
+/*          remove chosen variable from link list */
+/*          ---------------------------------------------------------
+---- */
+	inext = next[me];
+	if (inext != 0) {
+	    last[inext] = 0;
+	}
+	head[deg] = inext;
+/*          ---------------------------------------------------------
+---- */
+/*          me represents the elimination of pivots nel+1 to nel+nv(me
+). */
+/*          place me itself as the first in this set.  It will be move
+d */
+/*          to the nel+nv(me) position when the permutation vectors ar
+e */
+/*          computed. */
+/*          ---------------------------------------------------------
+---- */
+	elenme = elen[me];
+	elen[me] = -(nel + 1);
+	nvpiv = nv[me];
+	nel += nvpiv;
+/* ==================================================================
+===== */
+/*  CONSTRUCT NEW ELEMENT */
+/* ==================================================================
+===== */
+/*          ---------------------------------------------------------
+---- */
+/*          At this point, me is the pivotal supervariable.  It will b
+e */
+/*          converted into the current element.  Scan list of the */
+/*          pivotal supervariable, me, setting tree pointers and */
+/*          constructing new list of supervariables for the new elemen
+t, */
+/*          me.  p is a pointer to the current position in the old lis
+t. */
+/*          ---------------------------------------------------------
+---- */
+/*          flag the variable "me" as being in Lme by negating nv (me)
+ */
+	nv[me] = -nvpiv;
+	degme = 0;
+	if (elenme == 0) {
+/*             --------------------------------------------------
+-------- */
+/*             construct the new element in place */
+/*             --------------------------------------------------
+-------- */
+	    pme1 = pe[me];
+	    pme2 = pme1 - 1;
+	    i__1 = pme1 + len[me] - 1;
+	    for (p = pme1; p <= i__1; ++p) {
+		i = iw[p];
+		nvi = nv[i];
+		if (nvi > 0) {
+/*                   ------------------------------------
+---------------- */
+/*                   i is a principal variable not yet pla
+ced in Lme. */
+/*                   store i in new list */
+/*                   ------------------------------------
+---------------- */
+		    degme += nvi;
+/*                   flag i as being in Lme by negating nv
+ (i) */
+		    nv[i] = -nvi;
+		    ++pme2;
+		    iw[pme2] = i;
+/*                   ------------------------------------
+---------------- */
+/*                   remove variable i from degree list. 
+*/
+/*                   ------------------------------------
+---------------- */
+		    ilast = last[i];
+		    inext = next[i];
+		    if (inext != 0) {
+			last[inext] = ilast;
+		    }
+		    if (ilast != 0) {
+			next[ilast] = inext;
+		    } else {
+/*                      i is at the head of the degree
+ list */
+			head[degree[i]] = inext;
+		    }
+		}
+/* L60: */
+	    }
+/*             this element takes no new memory in iw: */
+	    newmem = 0;
+	} else {
+/*             --------------------------------------------------
+-------- */
+/*             construct the new element in empty space, iw (pfree
+ ...) */
+/*             --------------------------------------------------
+-------- */
+	    p = pe[me];
+	    pme1 = *pfree;
+	    slenme = len[me] - elenme;
+	    i__1 = elenme + 1;
+	    for (knt1 = 1; knt1 <= i__1; ++knt1) {
+		if (knt1 > elenme) {
+/*                   search the supervariables in me. */
+		    e = me;
+		    pj = p;
+		    ln = slenme;
+		} else {
+/*                   search the elements in me. */
+		    e = iw[p];
+		    ++p;
+		    pj = pe[e];
+		    ln = len[e];
+		}
+/*                -------------------------------------------
+------------ */
+/*                search for different supervariables and add 
+them to the */
+/*                new list, compressing when necessary. this l
+oop is */
+/*                executed once for each element in the list a
+nd once for */
+/*                all the supervariables in the list. */
+/*                -------------------------------------------
+------------ */
+		i__2 = ln;
+		for (knt2 = 1; knt2 <= i__2; ++knt2) {
+		    i = iw[pj];
+		    ++pj;
+		    nvi = nv[i];
+		    if (nvi > 0) {
+/*                      -----------------------------
+-------------------- */
+/*                      compress iw, if necessary */
+/*                      -----------------------------
+-------------------- */
+			if (*pfree > *iwlen) {
+/*                         prepare for compressing
+ iw by adjusting */
+/*                         pointers and lengths so
+ that the lists being */
+/*                         searched in the inner a
+nd outer loops contain */
+/*                         only the remaining entr
+ies. */
+			    pe[me] = p;
+			    len[me] -= knt1;
+			    if (len[me] == 0) {
+/*                            nothing left of 
+supervariable me */
+				pe[me] = 0;
+			    }
+			    pe[e] = pj;
+			    len[e] = ln - knt2;
+			    if (len[e] == 0) {
+/*                            nothing left of 
+element e */
+				pe[e] = 0;
+			    }
+			    ++(*ncmpa);
+/*                         store first item in pe 
+*/
+/*                         set first entry to -ite
+m */
+			    i__3 = *n;
+			    for (j = 1; j <= i__3; ++j) {
+				pn = pe[j];
+				if (pn > 0) {
+				    pe[j] = iw[pn];
+				    iw[pn] = -j;
+				}
+/* L70: */
+			    }
+/*                         psrc/pdst point to sour
+ce/destination */
+			    pdst = 1;
+			    psrc = 1;
+			    pend = pme1 - 1;
+/*                         while loop: */
+L80:
+			    if (psrc <= pend) {
+/*                            search for next 
+negative entry */
+				j = -iw[psrc];
+				++psrc;
+				if (j > 0) {
+				    iw[pdst] = pe[j];
+				    pe[j] = pdst;
+				    ++pdst;
+/*                               copy from
+ source to destination */
+				    lenj = len[j];
+				    i__3 = lenj - 2;
+				    for (knt3 = 0; knt3 <= i__3; ++knt3) {
+					iw[pdst + knt3] = iw[psrc + knt3];
+/* L90: */
+				    }
+				    pdst = pdst + lenj - 1;
+				    psrc = psrc + lenj - 1;
+				}
+				goto L80;
+			    }
+/*                         move the new partially-
+constructed element */
+			    p1 = pdst;
+			    i__3 = *pfree - 1;
+			    for (psrc = pme1; psrc <= i__3; ++psrc) {
+				iw[pdst] = iw[psrc];
+				++pdst;
+/* L100: */
+			    }
+			    pme1 = p1;
+			    *pfree = pdst;
+			    pj = pe[e];
+			    p = pe[me];
+			}
+/*                      -----------------------------
+-------------------- */
+/*                      i is a principal variable not 
+yet placed in Lme */
+/*                      store i in new list */
+/*                      -----------------------------
+-------------------- */
+			degme += nvi;
+/*                      flag i as being in Lme by nega
+ting nv (i) */
+			nv[i] = -nvi;
+			iw[*pfree] = i;
+			++(*pfree);
+/*                      -----------------------------
+-------------------- */
+/*                      remove variable i from degree 
+link list */
+/*                      -----------------------------
+-------------------- */
+			ilast = last[i];
+			inext = next[i];
+			if (inext != 0) {
+			    last[inext] = ilast;
+			}
+			if (ilast != 0) {
+			    next[ilast] = inext;
+			} else {
+/*                         i is at the head of the
+ degree list */
+			    head[degree[i]] = inext;
+			}
+		    }
+/* L110: */
+		}
+		if (e != me) {
+/*                   set tree pointer and flag to indicate
+ element e is */
+/*                   absorbed into new element me (the par
+ent of e is me) */
+		    pe[e] = -me;
+		    w[e] = 0;
+		}
+/* L120: */
+	    }
+	    pme2 = *pfree - 1;
+/*             this element takes newmem new memory in iw (possibl
+y zero) */
+	    newmem = *pfree - pme1;
+	    mem += newmem;
+	    maxmem = max(maxmem,mem);
+	}
+/*          ---------------------------------------------------------
+---- */
+/*          me has now been converted into an element in iw (pme1..pme
+2) */
+/*          ---------------------------------------------------------
+---- */
+/*          degme holds the external degree of new element */
+	degree[me] = degme;
+	pe[me] = pme1;
+	len[me] = pme2 - pme1 + 1;
+/*          ---------------------------------------------------------
+---- */
+/*          make sure that wflg is not too large.  With the current */
+/*          value of wflg, wflg+n must not cause integer overflow */
+/*          ---------------------------------------------------------
+---- */
+	if (wflg >= wbig) {
+	    i__1 = *n;
+	    for (x = 1; x <= i__1; ++x) {
+		if (w[x] != 0) {
+		    w[x] = 1;
+		}
+/* L130: */
+	    }
+	    wflg = 2;
+	}
+/* ==================================================================
+===== */
+/*  COMPUTE (w (e) - wflg) = |Le\Lme| FOR ALL ELEMENTS */
+/* ==================================================================
+===== */
+/*          ---------------------------------------------------------
+---- */
+/*          Scan 1:  compute the external degrees of previous elements
+ */
+/*          with respect to the current element.  That is: */
+/*               (w (e) - wflg) = |Le \ Lme| */
+/*          for each element e that appears in any supervariable in Lm
+e. */
+/*          The notation Le refers to the pattern (list of */
+/*          supervariables) of a previous element e, where e is not ye
+t */
+/*          absorbed, stored in iw (pe (e) + 1 ... pe (e) + iw (pe (e)
+)). */
+/*          The notation Lme refers to the pattern of the current elem
+ent */
+/*          (stored in iw (pme1..pme2)).   If (w (e) - wflg) becomes 
+*/
+/*          zero, then the element e will be absorbed in scan 2. */
+/*          ---------------------------------------------------------
+---- */
+	i__1 = pme2;
+	for (pme = pme1; pme <= i__1; ++pme) {
+	    i = iw[pme];
+	    eln = elen[i];
+	    if (eln > 0) {
+/*                note that nv (i) has been negated to denote 
+i in Lme: */
+		nvi = -nv[i];
+		wnvi = wflg - nvi;
+		i__2 = pe[i] + eln - 1;
+		for (p = pe[i]; p <= i__2; ++p) {
+		    e = iw[p];
+		    we = w[e];
+		    if (we >= wflg) {
+/*                      unabsorbed element e has been 
+seen in this loop */
+			we -= nvi;
+		    } else if (we != 0) {
+/*                      e is an unabsorbed element */
+/*                      this is the first we have seen
+ e in all of Scan 1 */
+			we = degree[e] + wnvi;
+		    }
+		    w[e] = we;
+/* L140: */
+		}
+	    }
+/* L150: */
+	}
+/* ==================================================================
+===== */
+/*  DEGREE UPDATE AND ELEMENT ABSORPTION */
+/* ==================================================================
+===== */
+/*          ---------------------------------------------------------
+---- */
+/*          Scan 2:  for each i in Lme, sum up the degree of Lme (whic
+h */
+/*          is degme), plus the sum of the external degrees of each Le
+ */
+/*          for the elements e appearing within i, plus the */
+/*          supervariables in i.  Place i in hash list. */
+/*          ---------------------------------------------------------
+---- */
+	i__1 = pme2;
+	for (pme = pme1; pme <= i__1; ++pme) {
+	    i = iw[pme];
+	    p1 = pe[i];
+	    p2 = p1 + elen[i] - 1;
+	    pn = p1;
+	    hash = 0;
+	    deg = 0;
+/*             --------------------------------------------------
+-------- */
+/*             scan the element list associated with supervariable
+ i */
+/*             --------------------------------------------------
+-------- */
+/*             UMFPACK/MA38-style approximate degree: */
+	    i__2 = p2;
+	    for (p = p1; p <= i__2; ++p) {
+		e = iw[p];
+		we = w[e];
+		if (we != 0) {
+/*                   e is an unabsorbed element */
+		    deg = deg + we - wflg;
+		    iw[pn] = e;
+		    ++pn;
+		    hash += e;
+		}
+/* L160: */
+	    }
+/*             count the number of elements in i (including me): 
+*/
+	    elen[i] = pn - p1 + 1;
+/*             --------------------------------------------------
+-------- */
+/*             scan the supervariables in the list associated with
+ i */
+/*             --------------------------------------------------
+-------- */
+	    p3 = pn;
+	    i__2 = p1 + len[i] - 1;
+	    for (p = p2 + 1; p <= i__2; ++p) {
+		j = iw[p];
+		nvj = nv[j];
+		if (nvj > 0) {
+/*                   j is unabsorbed, and not in Lme. */
+/*                   add to degree and add to new list */
+		    deg += nvj;
+		    iw[pn] = j;
+		    ++pn;
+		    hash += j;
+		}
+/* L170: */
+	    }
+/*             --------------------------------------------------
+-------- */
+/*             update the degree and check for mass elimination */
+/*             --------------------------------------------------
+-------- */
+	    if (elen[i] == 1 && p3 == pn) {
+/*                -------------------------------------------
+------------ */
+/*                mass elimination */
+/*                -------------------------------------------
+------------ */
+/*                There is nothing left of this node except fo
+r an */
+/*                edge to the current pivot element.  elen (i)
+ is 1, */
+/*                and there are no variables adjacent to node 
+i. */
+/*                Absorb i into the current pivot element, me.
+ */
+		pe[i] = -me;
+		nvi = -nv[i];
+		degme -= nvi;
+		nvpiv += nvi;
+		nel += nvi;
+		nv[i] = 0;
+		elen[i] = 0;
+	    } else {
+/*                -------------------------------------------
+------------ */
+/*                update the upper-bound degree of i */
+/*                -------------------------------------------
+------------ */
+/*                the following degree does not yet include th
+e size */
+/*                of the current element, which is added later
+: */
+/* Computing MIN */
+		i__2 = degree[i];
+		degree[i] = min(i__2,deg);
+/*                -------------------------------------------
+------------ */
+/*                add me to the list for i */
+/*                -------------------------------------------
+------------ */
+/*                move first supervariable to end of list */
+		iw[pn] = iw[p3];
+/*                move first element to end of element part of
+ list */
+		iw[p3] = iw[p1];
+/*                add new element to front of list. */
+		iw[p1] = me;
+/*                store the new length of the list in len (i) 
+*/
+		len[i] = pn - p1 + 1;
+/*                -------------------------------------------
+------------ */
+/*                place in hash bucket.  Save hash key of i in
+ last (i). */
+/*                -------------------------------------------
+------------ */
+		hash = hash % hmod + 1;
+		j = head[hash];
+		if (j <= 0) {
+/*                   the degree list is empty, hash head i
+s -j */
+		    next[i] = -j;
+		    head[hash] = -i;
+		} else {
+/*                   degree list is not empty */
+/*                   use last (head (hash)) as hash head 
+*/
+		    next[i] = last[j];
+		    last[j] = i;
+		}
+		last[i] = hash;
+	    }
+/* L180: */
+	}
+	degree[me] = degme;
+/*          ---------------------------------------------------------
+---- */
+/*          Clear the counter array, w (...), by incrementing wflg. */
+/*          ---------------------------------------------------------
+---- */
+	dmax_ = max(dmax_,degme);
+	wflg += dmax_;
+/*          make sure that wflg+n does not cause integer overflow */
+	if (wflg >= wbig) {
+	    i__1 = *n;
+	    for (x = 1; x <= i__1; ++x) {
+		if (w[x] != 0) {
+		    w[x] = 1;
+		}
+/* L190: */
+	    }
+	    wflg = 2;
+	}
+/*          at this point, w (1..n) .lt. wflg holds */
+/* ==================================================================
+===== */
+/*  SUPERVARIABLE DETECTION */
+/* ==================================================================
+===== */
+	i__1 = pme2;
+	for (pme = pme1; pme <= i__1; ++pme) {
+	    i = iw[pme];
+	    if (nv[i] < 0) {
+/*                i is a principal variable in Lme */
+/*                -------------------------------------------
+------------ */
+/*                examine all hash buckets with 2 or more vari
+ables.  We */
+/*                do this by examing all unique hash keys for 
+super- */
+/*                variables in the pattern Lme of the current 
+element, me */
+/*                -------------------------------------------
+------------ */
+		hash = last[i];
+/*                let i = head of hash bucket, and empty the h
+ash bucket */
+		j = head[hash];
+		if (j == 0) {
+		    goto L250;
+		}
+		if (j < 0) {
+/*                   degree list is empty */
+		    i = -j;
+		    head[hash] = 0;
+		} else {
+/*                   degree list is not empty, restore las
+t () of head */
+		    i = last[j];
+		    last[j] = 0;
+		}
+		if (i == 0) {
+		    goto L250;
+		}
+/*                while loop: */
+L200:
+		if (next[i] != 0) {
+/*                   ------------------------------------
+---------------- */
+/*                   this bucket has one or more variables
+ following i. */
+/*                   scan all of them to see if i can abso
+rb any entries */
+/*                   that follow i in hash bucket.  Scatte
+r i into w. */
+/*                   ------------------------------------
+---------------- */
+		    ln = len[i];
+		    eln = elen[i];
+/*                   do not flag the first element in the 
+list (me) */
+		    i__2 = pe[i] + ln - 1;
+		    for (p = pe[i] + 1; p <= i__2; ++p) {
+			w[iw[p]] = wflg;
+/* L210: */
+		    }
+/*                   ------------------------------------
+---------------- */
+/*                   scan every other entry j following i 
+in bucket */
+/*                   ------------------------------------
+---------------- */
+		    jlast = i;
+		    j = next[i];
+/*                   while loop: */
+L220:
+		    if (j != 0) {
+/*                      -----------------------------
+-------------------- */
+/*                      check if j and i have identica
+l nonzero pattern */
+/*                      -----------------------------
+-------------------- */
+			if (len[j] != ln) {
+/*                         i and j do not have sam
+e size data structure */
+			    goto L240;
+			}
+			if (elen[j] != eln) {
+/*                         i and j do not have sam
+e number of adjacent el */
+			    goto L240;
+			}
+/*                      do not flag the first element 
+in the list (me) */
+			i__2 = pe[j] + ln - 1;
+			for (p = pe[j] + 1; p <= i__2; ++p) {
+			    if (w[iw[p]] != wflg) {
+/*                            an entry (iw(p))
+ is in j but not in i */
+				goto L240;
+			    }
+/* L230: */
+			}
+/*                      -----------------------------
+-------------------- */
+/*                      found it!  j can be absorbed i
+nto i */
+/*                      -----------------------------
+-------------------- */
+			pe[j] = -i;
+/*                      both nv (i) and nv (j) are neg
+ated since they */
+/*                      are in Lme, and the absolute v
+alues of each */
+/*                      are the number of variables in
+ i and j: */
+			nv[i] += nv[j];
+			nv[j] = 0;
+			elen[j] = 0;
+/*                      delete j from hash bucket */
+			j = next[j];
+			next[jlast] = j;
+			goto L220;
+/*                      -----------------------------
+-------------------- */
+L240:
+/*                      j cannot be absorbed into i */
+/*                      -----------------------------
+-------------------- */
+			jlast = j;
+			j = next[j];
+			goto L220;
+		    }
+/*                   ------------------------------------
+---------------- */
+/*                   no more variables can be absorbed int
+o i */
+/*                   go to next i in bucket and clear flag
+ array */
+/*                   ------------------------------------
+---------------- */
+		    ++wflg;
+		    i = next[i];
+		    if (i != 0) {
+			goto L200;
+		    }
+		}
+	    }
+L250:
+	    ;
+	}
+/* ==================================================================
+===== */
+/*  RESTORE DEGREE LISTS AND REMOVE NONPRINCIPAL SUPERVAR. FROM ELEMEN
+T */
+/* ==================================================================
+===== */
+	p = pme1;
+	nleft = *n - nel;
+	i__1 = pme2;
+	for (pme = pme1; pme <= i__1; ++pme) {
+	    i = iw[pme];
+	    nvi = -nv[i];
+	    if (nvi > 0) {
+/*                i is a principal variable in Lme */
+/*                restore nv (i) to signify that i is principa
+l */
+		nv[i] = nvi;
+/*                -------------------------------------------
+------------ */
+/*                compute the true degree (add size of current
+ element) */
+/*                -------------------------------------------
+------------ */
+/* Computing MIN */
+		i__2 = degree[i] + degme;
+		deg = min(i__2,nleft);
+/*                -------------------------------------------
+------------ */
+/*                place the supervariable at the head of the d
+egree list */
+/*                -------------------------------------------
+------------ */
+		inext = head[deg];
+		if (inext != 0) {
+		    last[inext] = i;
+		}
+		next[i] = inext;
+		last[i] = 0;
+		head[deg] = i;
+/*                -------------------------------------------
+------------ */
+/*                save the new degree, and find the minimum de
+gree */
+/*                -------------------------------------------
+------------ */
+		mindeg = min(mindeg,deg);
+		degree[i] = deg;
+/*                -------------------------------------------
+------------ */
+/*                place the supervariable in the element patte
+rn */
+/*                -------------------------------------------
+------------ */
+		iw[p] = i;
+		++p;
+	    }
+/* L260: */
+	}
+/* ==================================================================
+===== */
+/*  FINALIZE THE NEW ELEMENT */
+/* ==================================================================
+===== */
+	nv[me] = nvpiv + degme;
+/*          nv (me) is now the degree of pivot (including diagonal par
+t) */
+/*          save the length of the list for the new element me */
+	len[me] = p - pme1;
+	if (len[me] == 0) {
+/*             there is nothing left of the current pivot element 
+*/
+	    pe[me] = 0;
+	    w[me] = 0;
+	}
+	if (newmem != 0) {
+/*             element was not constructed in place: deallocate pa
+rt */
+/*             of it (final size is less than or equal to newmem, 
+*/
+/*             since newly nonprincipal variables have been remove
+d). */
+	    *pfree = p;
+	    mem = mem - newmem + len[me];
+	}
+/* ==================================================================
+===== */
+/*          END WHILE (selecting pivots) */
+	goto L30;
+    }
+/* =======================================================================
+ */
+/* =======================================================================
+ */
+/*  COMPUTE THE PERMUTATION VECTORS */
+/* =======================================================================
+ */
+/*       ---------------------------------------------------------------- 
+*/
+/*       The time taken by the following code is O(n).  At this */
+/*       point, elen (e) = -k has been done for all elements e, */
+/*       and elen (i) = 0 has been done for all nonprincipal */
+/*       variables i.  At this point, there are no principal */
+/*       supervariables left, and all elements are absorbed. */
+/*       ---------------------------------------------------------------- 
+*/
+/*       ---------------------------------------------------------------- 
+*/
+/*       compute the ordering of unordered nonprincipal variables */
+/*       ---------------------------------------------------------------- 
+*/
+    i__1 = *n;
+    for (i = 1; i <= i__1; ++i) {
+	if (elen[i] == 0) {
+/*             --------------------------------------------------
+-------- */
+/*             i is an un-ordered row.  Traverse the tree from i u
+ntil */
+/*             reaching an element, e.  The element, e, was the */
+/*             principal supervariable of i and all nodes in the p
+ath */
+/*             from i to when e was selected as pivot. */
+/*             --------------------------------------------------
+-------- */
+	    j = -pe[i];
+/*             while (j is a variable) do: */
+L270:
+	    if (elen[j] >= 0) {
+		j = -pe[j];
+		goto L270;
+	    }
+	    e = j;
+/*             --------------------------------------------------
+-------- */
+/*             get the current pivot ordering of e */
+/*             --------------------------------------------------
+-------- */
+	    k = -elen[e];
+/*             --------------------------------------------------
+-------- */
+/*             traverse the path again from i to e, and compress t
+he */
+/*             path (all nodes point to e).  Path compression allo
+ws */
+/*             this code to compute in O(n) time.  Order the unord
+ered */
+/*             nodes in the path, and place the element e at the e
+nd. */
+/*             --------------------------------------------------
+-------- */
+	    j = i;
+/*             while (j is a variable) do: */
+L280:
+	    if (elen[j] >= 0) {
+		jnext = -pe[j];
+		pe[j] = -e;
+		if (elen[j] == 0) {
+/*                   j is an unordered row */
+		    elen[j] = k;
+		    ++k;
+		}
+		j = jnext;
+		goto L280;
+	    }
+/*             leave elen (e) negative, so we know it is an elemen
+t */
+	    elen[e] = -k;
+	}
+/* L290: */
+    }
+/*       ---------------------------------------------------------------- 
+*/
+/*       reset the inverse permutation (elen (1..n)) to be positive, */
+/*       and compute the permutation (last (1..n)). */
+/*       ---------------------------------------------------------------- 
+*/
+    i__1 = *n;
+    for (i = 1; i <= i__1; ++i) {
+	k = (i__2 = elen[i], abs(i__2));
+	last[k] = i;
+	elen[i] = k;
+/* L300: */
+    }
+/* =======================================================================
+ */
+/*  RETURN THE MEMORY USAGE IN IW */
+/* =======================================================================
+ */
+/*       If maxmem is less than or equal to iwlen, then no compressions */
+/*       occurred, and iw (maxmem+1 ... iwlen) was unused.  Otherwise */
+/*       compressions did occur, and iwlen would have had to have been */
+/*       greater than or equal to maxmem for no compressions to occur. */
+/*       Return the value of maxmem in the pfree argument. */
+    *pfree = maxmem;
+    return 0;
+} /* amdatr_ */
+
diff --git a/contrib/taucs/external/src/amdatr.f b/contrib/taucs/external/src/amdatr.f
new file mode 100644
index 0000000000000000000000000000000000000000..f96f5fbf82d27402e4f1607f7e2fd564173a0baa
--- /dev/null
+++ b/contrib/taucs/external/src/amdatr.f
@@ -0,0 +1,1281 @@
+
+        SUBROUTINE AMDATR
+     $          (N, PE, IW, LEN, IWLEN, PFREE, NV, NEXT,
+     $          LAST, HEAD, ELEN, DEGREE, NCMPA, W, IOVFLO)
+
+        INTEGER N, IWLEN, PFREE, NCMPA, IOVFLO, IW (IWLEN), PE (N),
+     $          DEGREE (N), NV (N), NEXT (N), LAST (N), HEAD (N),
+     $          ELEN (N), W (N), LEN (N)
+
+C-----------------------------------------------------------------------
+C  The MC47 / AMD suite of minimum degree ordering algorithms.
+C
+C  This code is one of seven variations of a single algorithm:
+C  the primary routine (MC47B/BD, only available in the Harwell
+C  Subroutine Library), and 6 variations that differ only in
+C  how they compute the degree (available in NETLIB).
+C
+C  For information on the Harwell Subroutine Library, contact
+C  John Harding, Harwell Subroutine Library, B 552, AEA Technology,
+C  Harwell, Didcot, Oxon OX11 0RA, telephone (44) 1235 434573,
+C  fax (44) 1235 434340, email john.harding@aeat.co.uk, who will
+C  provide details of price and conditions of use.
+C-----------------------------------------------------------------------
+
+************************************************************************
+* NOTICE:  "The AMD routines (AMDEXA, AMDBAR, AMDHAF, AMDHAT, AMDTRU,
+* and AMDATR) may be used SOLELY for educational, research, and
+* benchmarking purposes by non-profit organizations and the U.S.
+* government.  Commercial and other organizations may make use of the
+* AMD routines SOLELY for benchmarking purposes only.  The AMD
+* routines may be modified by or on behalf of the User for such
+* use but at no time shall the AMD routines or any such modified
+* version of them become the property of the User.  The AMD routines
+* are provided without warranty of any kind, either expressed or
+* implied.  Neither the Authors nor their employers shall be liable
+* for any direct or consequential loss or damage whatsoever arising
+* out of the use or misuse of the AMD routines by the User.  The AMD
+* routines must not be sold.  You may make copies of the AMD routines,
+* but this NOTICE and the Copyright notice must appear in all copies.
+* Any other use of the AMD routines requires written permission.
+* Your use of the AMD routines is an implicit agreement to these
+* conditions."
+************************************************************************
+
+C-----------------------------------------------------------------------
+C AMDatr:  Approximate Minimum (UMFPACK/MA38-style, true) Degree
+C          ordering algorithm, but without aggresive absorption
+C-----------------------------------------------------------------------
+
+C  Variation 6:  MC47-style approximate true degree, but with no
+C  aggresive absorption.  Note that some of the comments in the code
+C  below reflect the approximate *external* degree, whereas this
+C  code actually uses the approximate *true* degree.
+C
+C  We recommend using MC47B/BD instead of this routine since MC47B/BD
+C  gives better results in about the same time.
+
+C-----------------------------------------------------------------------
+
+C Given a representation of the nonzero pattern of a symmetric matrix,
+C       A, (excluding the diagonal) perform an approximate minimum
+C       (UMFPACK/MA38-style) degree ordering to compute a pivot order
+C       such that the introduction of nonzeros (fill-in) in the Cholesky
+C       factors A = LL^T are kept low.  At each step, the pivot
+C       selected is the one with the minimum UMFAPACK/MA38-style
+C       upper-bound on the true degree.  This routine does not
+C       perform aggresive absorption (as done by MC47B/BD).  Aggresive
+C       absorption in MC47B/BD is used to tighten the bound on the
+C       degree.  This can result an significant improvement in the
+C       quality of the ordering for some matrices.
+C
+C       The approximate degree algorithm implemented here is the
+C       symmetric analog of the degree update algorithm in MA38 and
+C       UMFPACK (the Unsymmetric-pattern MultiFrontal PACKage, both by
+C       Davis and Duff, available for academic users in NETLIB as
+C       linalg/umfpack.shar or via anonymous ftp to
+C       ftp.cis.ufl.edu:pub/umfpack).  Non-academic users must use
+C       MA38 in the Harwell Subroutine Library instead of UMPFACK.
+
+C **********************************************************************
+C ***** CAUTION:  ARGUMENTS ARE NOT CHECKED FOR ERRORS ON INPUT.  ******
+C **********************************************************************
+C ** If you want error checking, a more versatile input format, and a **
+C ** simpler user interface, then use MC47A/AD in the Harwell         **
+C ** Subroutine Library, which checks for errors, transforms the      **
+C ** input, and calls MC47B/BD.                                       **
+C **********************************************************************
+
+C       References:  (UF Tech Reports are available via anonymous ftp
+C       to ftp.cis.ufl.edu:cis/tech-reports).
+C
+C       [1] Timothy A. Davis and Iain Duff, "An unsymmetric-pattern
+C               multifrontal method for sparse LU factorization",
+C               SIAM J. Matrix Analysis and Applications, to appear.
+C               also Univ. of Florida Technical Report TR-94-038.
+C               Discusses UMFPACK / MA38.
+C
+C       [2] Patrick Amestoy, Timothy A. Davis, and Iain S. Duff,
+C               "An approximate minimum degree ordering algorithm,"
+C               SIAM J. Matrix Analysis and Applications (to appear),
+C               also Univ. of Florida Technical Report TR-94-039.
+C               Discusses this routine.
+C
+C       [3] Alan George and Joseph Liu, "The evolution of the
+C               minimum degree ordering algorithm," SIAM Review, vol.
+C               31, no. 1, pp. 1-19, March 1989.  We list below the
+C               features mentioned in that paper that this code
+C               includes:
+C
+C       mass elimination:
+C               Yes.  MA27 relied on supervariable detection for mass
+C               elimination.
+C       indistinguishable nodes:
+C               Yes (we call these "supervariables").  This was also in
+C               the MA27 code - although we modified the method of
+C               detecting them (the previous hash was the true degree,
+C               which we no longer keep track of).  A supervariable is
+C               a set of rows with identical nonzero pattern.  All
+C               variables in a supervariable are eliminated together.
+C               Each supervariable has as its numerical name that of
+C               one of its variables (its principal variable).
+C       quotient graph representation:
+C               Yes.  We use the term "element" for the cliques formed
+C               during elimination.  This was also in the MA27 code.
+C               The algorithm can operate in place, but it will work
+C               more efficiently if given some "elbow room."
+C       element absorption:
+C               Yes.  This was also in the MA27 code.
+C       external degree:
+C               Yes.  The MA27 code was based on the true degree.
+C       incomplete degree update and multiple elimination:
+C               No.  This was not in MA27, either.  Our method of
+C               degree update within MC47B/BD is element-based, not
+C               variable-based.  It is thus not well-suited for use
+C               with incomplete degree update or multiple elimination.
+
+C-----------------------------------------------------------------------
+C Authors, and Copyright (C) 1995 by:
+C       Timothy A. Davis, Patrick Amestoy, Iain S. Duff, & John K. Reid.
+C
+C Acknowledgements:
+C       This work (and the UMFPACK package) was supported by the
+C       National Science Foundation (ASC-9111263 and DMS-9223088).
+C       The UMFPACK/MA38 approximate degree update algorithm, the
+C       unsymmetric analog which forms the basis of MC47B/BD, was
+C       developed while Tim Davis was supported by CERFACS (Toulouse,
+C       France) in a post-doctoral position.
+C
+C Date:  September, 1995
+C-----------------------------------------------------------------------
+
+C-----------------------------------------------------------------------
+C INPUT ARGUMENTS (unaltered):
+C-----------------------------------------------------------------------
+
+C n:    The matrix order.
+C
+C       Restriction:  1 .le. n .lt. (iovflo/2)-2
+
+C iwlen:        The length of iw (1..iwlen).  On input, the matrix is
+C       stored in iw (1..pfree-1).  However, iw (1..iwlen) should be
+C       slightly larger than what is required to hold the matrix, at
+C       least iwlen .ge. pfree + n is recommended.  Otherwise,
+C       excessive compressions will take place.
+C       *** We do not recommend running this algorithm with ***
+C       ***      iwlen .lt. pfree + n.                      ***
+C       *** Better performance will be obtained if          ***
+C       ***      iwlen .ge. pfree + n                       ***
+C       *** or better yet                                   ***
+C       ***      iwlen .gt. 1.2 * pfree                     ***
+C       *** (where pfree is its value on input).            ***
+C       The algorithm will not run at all if iwlen .lt. pfree-1.
+C
+C       Restriction: iwlen .ge. pfree-1
+
+C iovflo:       The largest positive integer that your computer can
+C       represent (-iovflo should also be representable).  On a 32-bit
+C       computer with 2's-complement arithmetic,
+C       iovflo = (2^31)-1 = 2,147,483,648.
+
+C-----------------------------------------------------------------------
+C INPUT/OUPUT ARGUMENTS:
+C-----------------------------------------------------------------------
+
+C pe:   On input, pe (i) is the index in iw of the start of row i, or
+C       zero if row i has no off-diagonal non-zeros.
+C
+C       During execution, it is used for both supervariables and
+C       elements:
+C
+C       * Principal supervariable i:  index into iw of the
+C               description of supervariable i.  A supervariable
+C               represents one or more rows of the matrix
+C               with identical nonzero pattern.
+C       * Non-principal supervariable i:  if i has been absorbed
+C               into another supervariable j, then pe (i) = -j.
+C               That is, j has the same pattern as i.
+C               Note that j might later be absorbed into another
+C               supervariable j2, in which case pe (i) is still -j,
+C               and pe (j) = -j2.
+C       * Unabsorbed element e:  the index into iw of the description
+C               of element e, if e has not yet been absorbed by a
+C               subsequent element.  Element e is created when
+C               the supervariable of the same name is selected as
+C               the pivot.
+C       * Absorbed element e:  if element e is absorbed into element
+C               e2, then pe (e) = -e2.  This occurs when the pattern of
+C               e (that is, Le) is found to be a subset of the pattern
+C               of e2 (that is, Le2).  If element e is "null" (it has
+C               no nonzeros outside its pivot block), then pe (e) = 0.
+C
+C       On output, pe holds the assembly tree/forest, which implicitly
+C       represents a pivot order with identical fill-in as the actual
+C       order (via a depth-first search of the tree).
+C
+C       On output:
+C       If nv (i) .gt. 0, then i represents a node in the assembly tree,
+C       and the parent of i is -pe (i), or zero if i is a root.
+C       If nv (i) = 0, then (i,-pe (i)) represents an edge in a
+C       subtree, the root of which is a node in the assembly tree.
+
+C pfree:        On input the tail end of the array, iw (pfree..iwlen),
+C       is empty, and the matrix is stored in iw (1..pfree-1).
+C       During execution, additional data is placed in iw, and pfree
+C       is modified so that iw (pfree..iwlen) is always the unused part
+C       of iw.  On output, pfree is set equal to the size of iw that
+C       would have been needed for no compressions to occur.  If
+C       ncmpa is zero, then pfree (on output) is less than or equal to
+C       iwlen, and the space iw (pfree+1 ... iwlen) was not used.
+C       Otherwise, pfree (on output) is greater than iwlen, and all the
+C       memory in iw was used.
+
+C-----------------------------------------------------------------------
+C INPUT/MODIFIED (undefined on output):
+C-----------------------------------------------------------------------
+
+C len:  On input, len (i) holds the number of entries in row i of the
+C       matrix, excluding the diagonal.  The contents of len (1..n)
+C       are undefined on output.
+
+C iw:   On input, iw (1..pfree-1) holds the description of each row i
+C       in the matrix.  The matrix must be symmetric, and both upper
+C       and lower triangular parts must be present.  The diagonal must
+C       not be present.  Row i is held as follows:
+C
+C               len (i):  the length of the row i data structure
+C               iw (pe (i) ... pe (i) + len (i) - 1):
+C                       the list of column indices for nonzeros
+C                       in row i (simple supervariables), excluding
+C                       the diagonal.  All supervariables start with
+C                       one row/column each (supervariable i is just
+C                       row i).
+C               if len (i) is zero on input, then pe (i) is ignored
+C               on input.
+C
+C               Note that the rows need not be in any particular order,
+C               and there may be empty space between the rows.
+C
+C       During execution, the supervariable i experiences fill-in.
+C       This is represented by placing in i a list of the elements
+C       that cause fill-in in supervariable i:
+C
+C               len (i):  the length of supervariable i
+C               iw (pe (i) ... pe (i) + elen (i) - 1):
+C                       the list of elements that contain i.  This list
+C                       is kept short by removing absorbed elements.
+C               iw (pe (i) + elen (i) ... pe (i) + len (i) - 1):
+C                       the list of supervariables in i.  This list
+C                       is kept short by removing nonprincipal
+C                       variables, and any entry j that is also
+C                       contained in at least one of the elements
+C                       (j in Le) in the list for i (e in row i).
+C
+C       When supervariable i is selected as pivot, we create an
+C       element e of the same name (e=i):
+C
+C               len (e):  the length of element e
+C               iw (pe (e) ... pe (e) + len (e) - 1):
+C                       the list of supervariables in element e.
+C
+C       An element represents the fill-in that occurs when supervariable
+C       i is selected as pivot (which represents the selection of row i
+C       and all non-principal variables whose principal variable is i).
+C       We use the term Le to denote the set of all supervariables
+C       in element e.  Absorbed supervariables and elements are pruned
+C       from these lists when computationally convenient.
+C
+C       CAUTION:  THE INPUT MATRIX IS OVERWRITTEN DURING COMPUTATION.
+C       The contents of iw are undefined on output.
+
+C-----------------------------------------------------------------------
+C OUTPUT (need not be set on input):
+C-----------------------------------------------------------------------
+
+C nv:   During execution, abs (nv (i)) is equal to the number of rows
+C       that are represented by the principal supervariable i.  If i is
+C       a nonprincipal variable, then nv (i) = 0.  Initially,
+C       nv (i) = 1 for all i.  nv (i) .lt. 0 signifies that i is a
+C       principal variable in the pattern Lme of the current pivot
+C       element me.  On output, nv (e) holds the true degree of element
+C       e at the time it was created (including the diagonal part).
+
+C ncmpa:        The number of times iw was compressed.  If this is
+C       excessive, then the execution took longer than what could have
+C       been.  To reduce ncmpa, try increasing iwlen to be 10% or 20%
+C       larger than the value of pfree on input (or at least
+C       iwlen .ge. pfree + n).  The fastest performance will be
+C       obtained when ncmpa is returned as zero.  If iwlen is set to
+C       the value returned by pfree on *output*, then no compressions
+C       will occur.
+
+C elen: See the description of iw above.  At the start of execution,
+C       elen (i) is set to zero.  During execution, elen (i) is the
+C       number of elements in the list for supervariable i.  When e
+C       becomes an element, elen (e) = -nel is set, where nel is the
+C       current step of factorization.  elen (i) = 0 is done when i
+C       becomes nonprincipal.
+C
+C       For variables, elen (i) .ge. 0 holds until just before the
+C       permutation vectors are computed.  For elements,
+C       elen (e) .lt. 0 holds.
+C
+C       On output elen (1..n) holds the inverse permutation (the same
+C       as the 'INVP' argument in Sparspak).  That is, if k = elen (i),
+C       then row i is the kth pivot row.  Row i of A appears as the
+C       (elen(i))-th row in the permuted matrix, PAP^T.
+
+C last: In a degree list, last (i) is the supervariable preceding i,
+C       or zero if i is the head of the list.  In a hash bucket,
+C       last (i) is the hash key for i.  last (head (hash)) is also
+C       used as the head of a hash bucket if head (hash) contains a
+C       degree list (see head, below).
+C
+C       On output, last (1..n) holds the permutation (the same as the
+C       'PERM' argument in Sparspak).  That is, if i = last (k), then
+C       row i is the kth pivot row.  Row last (k) of A is the k-th row
+C       in the permuted matrix, PAP^T.
+
+C-----------------------------------------------------------------------
+C LOCAL (not input or output - used only during execution):
+C-----------------------------------------------------------------------
+
+C degree:       If i is a supervariable, then degree (i) holds the
+C       current approximation of the external degree of row i (an upper
+C       bound).  The external degree is the number of nonzeros in row i,
+C       minus abs (nv (i)) (the diagonal part).  The bound is equal to
+C       the external degree if elen (i) is less than or equal to two.
+C
+C       We also use the term "external degree" for elements e to refer
+C       to |Le \ Lme|.  If e is an element, then degree (e) holds |Le|,
+C       which is the degree of the off-diagonal part of the element e
+C       (not including the diagonal part).
+
+C head: head is used for degree lists.  head (deg) is the first
+C       supervariable in a degree list (all supervariables i in a
+C       degree list deg have the same approximate degree, namely,
+C       deg = degree (i)).  If the list deg is empty then
+C       head (deg) = 0.
+C
+C       During supervariable detection head (hash) also serves as a
+C       pointer to a hash bucket.
+C       If head (hash) .gt. 0, there is a degree list of degree hash.
+C               The hash bucket head pointer is last (head (hash)).
+C       If head (hash) = 0, then the degree list and hash bucket are
+C               both empty.
+C       If head (hash) .lt. 0, then the degree list is empty, and
+C               -head (hash) is the head of the hash bucket.
+C       After supervariable detection is complete, all hash buckets
+C       are empty, and the (last (head (hash)) = 0) condition is
+C       restored for the non-empty degree lists.
+
+C next: next (i) is the supervariable following i in a link list, or
+C       zero if i is the last in the list.  Used for two kinds of
+C       lists:  degree lists and hash buckets (a supervariable can be
+C       in only one kind of list at a time).
+
+C w:    The flag array w determines the status of elements and
+C       variables, and the external degree of elements.
+C
+C       for elements:
+C          if w (e) = 0, then the element e is absorbed
+C          if w (e) .ge. wflg, then w (e) - wflg is the size of
+C               the set |Le \ Lme|, in terms of nonzeros (the
+C               sum of abs (nv (i)) for each principal variable i that
+C               is both in the pattern of element e and NOT in the
+C               pattern of the current pivot element, me).
+C          if wflg .gt. w (e) .gt. 0, then e is not absorbed and has
+C               not yet been seen in the scan of the element lists in
+C               the computation of |Le\Lme| in loop 150 below.
+C
+C       for variables:
+C          during supervariable detection, if w (j) .ne. wflg then j is
+C          not in the pattern of variable i
+C
+C       The w array is initialized by setting w (i) = 1 for all i,
+C       and by setting wflg = 2.  It is reinitialized if wflg becomes
+C       too large (to ensure that wflg+n does not cause integer
+C       overflow).
+
+C-----------------------------------------------------------------------
+C LOCAL INTEGERS:
+C-----------------------------------------------------------------------
+
+        INTEGER DEG, DEGME, DEXT, DMAX, E, ELENME, ELN, HASH, HMOD, I,
+     $          ILAST, INEXT, J, JLAST, JNEXT, K, KNT1, KNT2, KNT3,
+     $          LENJ, LN, MAXMEM, ME, MEM, MINDEG, NEL, NEWMEM,
+     $          NLEFT, NVI, NVJ, NVPIV, SLENME, WBIG, WE, WFLG, WNVI, X
+
+C deg:          the degree of a variable or element
+C degme:        size, |Lme|, of the current element, me (= degree (me))
+C dext:         external degree, |Le \ Lme|, of some element e
+C dmax:         largest |Le| seen so far
+C e:            an element
+C elenme:       the length, elen (me), of element list of pivotal var.
+C eln:          the length, elen (...), of an element list
+C hash:         the computed value of the hash function
+C hmod:         the hash function is computed modulo hmod = max (1,n-1)
+C i:            a supervariable
+C ilast:        the entry in a link list preceding i
+C inext:        the entry in a link list following i
+C j:            a supervariable
+C jlast:        the entry in a link list preceding j
+C jnext:        the entry in a link list, or path, following j
+C k:            the pivot order of an element or variable
+C knt1:         loop counter used during element construction
+C knt2:         loop counter used during element construction
+C knt3:         loop counter used during compression
+C lenj:         len (j)
+C ln:           length of a supervariable list
+C maxmem:       amount of memory needed for no compressions
+C me:           current supervariable being eliminated, and the
+C                       current element created by eliminating that
+C                       supervariable
+C mem:          memory in use assuming no compressions have occurred
+C mindeg:       current minimum degree
+C nel:          number of pivots selected so far
+C newmem:       amount of new memory needed for current pivot element
+C nleft:        n - nel, the number of nonpivotal rows/columns remaining
+C nvi:          the number of variables in a supervariable i (= nv (i))
+C nvj:          the number of variables in a supervariable j (= nv (j))
+C nvpiv:        number of pivots in current element
+C slenme:       number of variables in variable list of pivotal variable
+C wbig:         = iovflo - n.  wflg is not allowed to be .ge. wbig.
+C we:           w (e)
+C wflg:         used for flagging the w array.  See description of iw.
+C wnvi:         wflg - nv (i)
+C x:            either a supervariable or an element
+
+C-----------------------------------------------------------------------
+C LOCAL POINTERS:
+C-----------------------------------------------------------------------
+
+        INTEGER P, P1, P2, P3, PDST, PEND, PJ, PME, PME1, PME2, PN, PSRC
+
+C               Any parameter (pe (...) or pfree) or local variable
+C               starting with "p" (for Pointer) is an index into iw,
+C               and all indices into iw use variables starting with
+C               "p."  The only exception to this rule is the iwlen
+C               input argument.
+
+C p:            pointer into lots of things
+C p1:           pe (i) for some variable i (start of element list)
+C p2:           pe (i) + elen (i) -  1 for some var. i (end of el. list)
+C p3:           index of first supervariable in clean list
+C pdst:         destination pointer, for compression
+C pend:         end of memory to compress
+C pj:           pointer into an element or variable
+C pme:          pointer into the current element (pme1...pme2)
+C pme1:         the current element, me, is stored in iw (pme1...pme2)
+C pme2:         the end of the current element
+C pn:           pointer into a "clean" variable, also used to compress
+C psrc:         source pointer, for compression
+
+C-----------------------------------------------------------------------
+C  FUNCTIONS CALLED:
+C-----------------------------------------------------------------------
+
+        INTRINSIC MAX, MIN, MOD
+
+C=======================================================================
+C  INITIALIZATIONS
+C=======================================================================
+
+        WFLG = 2
+        MINDEG = 1
+        NCMPA = 0
+        NEL = 0
+        HMOD = MAX (1, N-1)
+        DMAX = 0
+        WBIG = IOVFLO - N
+        MEM = PFREE - 1
+        MAXMEM = MEM
+
+        DO 10 I = 1, N
+           LAST (I) = 0
+           HEAD (I) = 0
+           NV (I) = 1
+           W (I) = 1
+           ELEN (I) = 0
+           DEGREE (I) = LEN (I)
+10         CONTINUE
+
+C       ----------------------------------------------------------------
+C       initialize degree lists and eliminate rows with no off-diag. nz.
+C       ----------------------------------------------------------------
+
+        DO 20 I = 1, N
+
+           DEG = DEGREE (I)
+
+C          include the diagonal in the true degree
+           DEG = DEG + 1
+           DEGREE (I) = DEG
+           IF (DEG .GT. 1) THEN
+
+C             ----------------------------------------------------------
+C             place i in the degree list corresponding to its degree
+C             ----------------------------------------------------------
+
+              INEXT = HEAD (DEG)
+              IF (INEXT .NE. 0) LAST (INEXT) = I
+              NEXT (I) = INEXT
+              HEAD (DEG) = I
+
+           ELSE
+
+C             ----------------------------------------------------------
+C             we have a variable that can be eliminated at once because
+C             there is no off-diagonal non-zero in its row.
+C             ----------------------------------------------------------
+
+              DEGREE (I) = 0
+
+              NEL = NEL + 1
+              ELEN (I) = -NEL
+              PE (I) = 0
+              W (I) = 0
+
+              ENDIF
+
+20         CONTINUE
+
+C=======================================================================
+C  WHILE (selecting pivots) DO
+C=======================================================================
+
+30      CONTINUE
+        IF (NEL .LT. N) THEN
+
+C=======================================================================
+C  GET PIVOT OF MINIMUM DEGREE
+C=======================================================================
+
+C          -------------------------------------------------------------
+C          find next supervariable for elimination
+C          -------------------------------------------------------------
+
+           DO 40 DEG = MINDEG, N
+              ME = HEAD (DEG)
+              IF (ME .GT. 0) GOTO 50
+40            CONTINUE
+50         CONTINUE
+           MINDEG = DEG
+
+C          -------------------------------------------------------------
+C          remove chosen variable from link list
+C          -------------------------------------------------------------
+
+           INEXT = NEXT (ME)
+           IF (INEXT .NE. 0) LAST (INEXT) = 0
+           HEAD (DEG) = INEXT
+
+C          -------------------------------------------------------------
+C          me represents the elimination of pivots nel+1 to nel+nv(me).
+C          place me itself as the first in this set.  It will be moved
+C          to the nel+nv(me) position when the permutation vectors are
+C          computed.
+C          -------------------------------------------------------------
+
+           ELENME = ELEN (ME)
+           ELEN (ME) = - (NEL + 1)
+           NVPIV = NV (ME)
+           NEL = NEL + NVPIV
+
+C=======================================================================
+C  CONSTRUCT NEW ELEMENT
+C=======================================================================
+
+C          -------------------------------------------------------------
+C          At this point, me is the pivotal supervariable.  It will be
+C          converted into the current element.  Scan list of the
+C          pivotal supervariable, me, setting tree pointers and
+C          constructing new list of supervariables for the new element,
+C          me.  p is a pointer to the current position in the old list.
+C          -------------------------------------------------------------
+
+C          flag the variable "me" as being in Lme by negating nv (me)
+           NV (ME) = -NVPIV
+           DEGME = 0
+
+           IF (ELENME .EQ. 0) THEN
+
+C             ----------------------------------------------------------
+C             construct the new element in place
+C             ----------------------------------------------------------
+
+              PME1 = PE (ME)
+              PME2 = PME1 - 1
+
+              DO 60 P = PME1, PME1 + LEN (ME) - 1
+                 I = IW (P)
+                 NVI = NV (I)
+                 IF (NVI .GT. 0) THEN
+
+C                   ----------------------------------------------------
+C                   i is a principal variable not yet placed in Lme.
+C                   store i in new list
+C                   ----------------------------------------------------
+
+                    DEGME = DEGME + NVI
+C                   flag i as being in Lme by negating nv (i)
+                    NV (I) = -NVI
+                    PME2 = PME2 + 1
+                    IW (PME2) = I
+
+C                   ----------------------------------------------------
+C                   remove variable i from degree list.
+C                   ----------------------------------------------------
+
+                    ILAST = LAST (I)
+                    INEXT = NEXT (I)
+                    IF (INEXT .NE. 0) LAST (INEXT) = ILAST
+                    IF (ILAST .NE. 0) THEN
+                       NEXT (ILAST) = INEXT
+                    ELSE
+C                      i is at the head of the degree list
+                       HEAD (DEGREE (I)) = INEXT
+                       ENDIF
+
+                    ENDIF
+60               CONTINUE
+C             this element takes no new memory in iw:
+              NEWMEM = 0
+
+           ELSE
+
+C             ----------------------------------------------------------
+C             construct the new element in empty space, iw (pfree ...)
+C             ----------------------------------------------------------
+
+              P = PE (ME)
+              PME1 = PFREE
+              SLENME = LEN (ME) - ELENME
+
+              DO 120 KNT1 = 1, ELENME + 1
+
+                 IF (KNT1 .GT. ELENME) THEN
+C                   search the supervariables in me.
+                    E = ME
+                    PJ = P
+                    LN = SLENME
+                 ELSE
+C                   search the elements in me.
+                    E = IW (P)
+                    P = P + 1
+                    PJ = PE (E)
+                    LN = LEN (E)
+                    ENDIF
+
+C                -------------------------------------------------------
+C                search for different supervariables and add them to the
+C                new list, compressing when necessary. this loop is
+C                executed once for each element in the list and once for
+C                all the supervariables in the list.
+C                -------------------------------------------------------
+
+                 DO 110 KNT2 = 1, LN
+                    I = IW (PJ)
+                    PJ = PJ + 1
+                    NVI = NV (I)
+                    IF (NVI .GT. 0) THEN
+
+C                      -------------------------------------------------
+C                      compress iw, if necessary
+C                      -------------------------------------------------
+
+                       IF (PFREE .GT. IWLEN) THEN
+C                         prepare for compressing iw by adjusting
+C                         pointers and lengths so that the lists being
+C                         searched in the inner and outer loops contain
+C                         only the remaining entries.
+
+                          PE (ME) = P
+                          LEN (ME) = LEN (ME) - KNT1
+                          IF (LEN (ME) .EQ. 0) THEN
+C                            nothing left of supervariable me
+                             PE (ME) = 0
+                             ENDIF
+                          PE (E) = PJ
+                          LEN (E) = LN - KNT2
+                          IF (LEN (E) .EQ. 0) THEN
+C                            nothing left of element e
+                             PE (E) = 0
+                             ENDIF
+
+                          NCMPA = NCMPA + 1
+C                         store first item in pe
+C                         set first entry to -item
+                          DO 70 J = 1, N
+                             PN = PE (J)
+                             IF (PN .GT. 0) THEN
+                                PE (J) = IW (PN)
+                                IW (PN) = -J
+                                ENDIF
+70                           CONTINUE
+
+C                         psrc/pdst point to source/destination
+                          PDST = 1
+                          PSRC = 1
+                          PEND = PME1 - 1
+
+C                         while loop:
+80                        CONTINUE
+                          IF (PSRC .LE. PEND) THEN
+C                            search for next negative entry
+                             J = -IW (PSRC)
+                             PSRC = PSRC + 1
+                             IF (J .GT. 0) THEN
+                                IW (PDST) = PE (J)
+                                PE (J) = PDST
+                                PDST = PDST + 1
+C                               copy from source to destination
+                                LENJ = LEN (J)
+                                DO 90 KNT3 = 0, LENJ - 2
+                                   IW (PDST + KNT3) = IW (PSRC + KNT3)
+90                                 CONTINUE
+                                PDST = PDST + LENJ - 1
+                                PSRC = PSRC + LENJ - 1
+                                ENDIF
+                             GOTO 80
+                             ENDIF
+
+C                         move the new partially-constructed element
+                          P1 = PDST
+                          DO 100 PSRC = PME1, PFREE - 1
+                             IW (PDST) = IW (PSRC)
+                             PDST = PDST + 1
+100                          CONTINUE
+                          PME1 = P1
+                          PFREE = PDST
+                          PJ = PE (E)
+                          P = PE (ME)
+                          ENDIF
+
+C                      -------------------------------------------------
+C                      i is a principal variable not yet placed in Lme
+C                      store i in new list
+C                      -------------------------------------------------
+
+                       DEGME = DEGME + NVI
+C                      flag i as being in Lme by negating nv (i)
+                       NV (I) = -NVI
+                       IW (PFREE) = I
+                       PFREE = PFREE + 1
+
+C                      -------------------------------------------------
+C                      remove variable i from degree link list
+C                      -------------------------------------------------
+
+                       ILAST = LAST (I)
+                       INEXT = NEXT (I)
+                       IF (INEXT .NE. 0) LAST (INEXT) = ILAST
+                       IF (ILAST .NE. 0) THEN
+                          NEXT (ILAST) = INEXT
+                       ELSE
+C                         i is at the head of the degree list
+                          HEAD (DEGREE (I)) = INEXT
+                          ENDIF
+
+                       ENDIF
+110                 CONTINUE
+
+                 IF (E .NE. ME) THEN
+C                   set tree pointer and flag to indicate element e is
+C                   absorbed into new element me (the parent of e is me)
+                    PE (E) = -ME
+                    W (E) = 0
+                    ENDIF
+120              CONTINUE
+
+              PME2 = PFREE - 1
+C             this element takes newmem new memory in iw (possibly zero)
+              NEWMEM = PFREE - PME1
+              MEM = MEM + NEWMEM
+              MAXMEM = MAX (MAXMEM, MEM)
+              ENDIF
+
+C          -------------------------------------------------------------
+C          me has now been converted into an element in iw (pme1..pme2)
+C          -------------------------------------------------------------
+
+C          degme holds the external degree of new element
+           DEGREE (ME) = DEGME
+           PE (ME) = PME1
+           LEN (ME) = PME2 - PME1 + 1
+
+C          -------------------------------------------------------------
+C          make sure that wflg is not too large.  With the current
+C          value of wflg, wflg+n must not cause integer overflow
+C          -------------------------------------------------------------
+
+           IF (WFLG .GE. WBIG) THEN
+              DO 130 X = 1, N
+                 IF (W (X) .NE. 0) W (X) = 1
+130              CONTINUE
+              WFLG = 2
+              ENDIF
+
+C=======================================================================
+C  COMPUTE (w (e) - wflg) = |Le\Lme| FOR ALL ELEMENTS
+C=======================================================================
+
+C          -------------------------------------------------------------
+C          Scan 1:  compute the external degrees of previous elements
+C          with respect to the current element.  That is:
+C               (w (e) - wflg) = |Le \ Lme|
+C          for each element e that appears in any supervariable in Lme.
+C          The notation Le refers to the pattern (list of
+C          supervariables) of a previous element e, where e is not yet
+C          absorbed, stored in iw (pe (e) + 1 ... pe (e) + iw (pe (e))).
+C          The notation Lme refers to the pattern of the current element
+C          (stored in iw (pme1..pme2)).   If (w (e) - wflg) becomes
+C          zero, then the element e will be absorbed in scan 2.
+C          -------------------------------------------------------------
+
+           DO 150 PME = PME1, PME2
+              I = IW (PME)
+              ELN = ELEN (I)
+              IF (ELN .GT. 0) THEN
+C                note that nv (i) has been negated to denote i in Lme:
+                 NVI = -NV (I)
+                 WNVI = WFLG - NVI
+                 DO 140 P = PE (I), PE (I) + ELN - 1
+                    E = IW (P)
+                    WE = W (E)
+                    IF (WE .GE. WFLG) THEN
+C                      unabsorbed element e has been seen in this loop
+                       WE = WE - NVI
+                    ELSE IF (WE .NE. 0) THEN
+C                      e is an unabsorbed element
+C                      this is the first we have seen e in all of Scan 1
+                       WE = DEGREE (E) + WNVI
+                       ENDIF
+                    W (E) = WE
+140                 CONTINUE
+                 ENDIF
+150           CONTINUE
+
+C=======================================================================
+C  DEGREE UPDATE AND ELEMENT ABSORPTION
+C=======================================================================
+
+C          -------------------------------------------------------------
+C          Scan 2:  for each i in Lme, sum up the degree of Lme (which
+C          is degme), plus the sum of the external degrees of each Le
+C          for the elements e appearing within i, plus the
+C          supervariables in i.  Place i in hash list.
+C          -------------------------------------------------------------
+
+           DO 180 PME = PME1, PME2
+              I = IW (PME)
+              P1 = PE (I)
+              P2 = P1 + ELEN (I) - 1
+              PN = P1
+              HASH = 0
+              DEG = 0
+
+C             ----------------------------------------------------------
+C             scan the element list associated with supervariable i
+C             ----------------------------------------------------------
+
+C             UMFPACK/MA38-style approximate degree:
+              DO 160 P = P1, P2
+                 E = IW (P)
+                 WE = W (E)
+                 IF (WE .NE. 0) THEN
+C                   e is an unabsorbed element
+                    DEG = DEG + WE - WFLG
+                    IW (PN) = E
+                    PN = PN + 1
+                    HASH = HASH + E
+                    ENDIF
+160              CONTINUE
+
+C             count the number of elements in i (including me):
+              ELEN (I) = PN - P1 + 1
+
+C             ----------------------------------------------------------
+C             scan the supervariables in the list associated with i
+C             ----------------------------------------------------------
+
+              P3 = PN
+              DO 170 P = P2 + 1, P1 + LEN (I) - 1
+                 J = IW (P)
+                 NVJ = NV (J)
+                 IF (NVJ .GT. 0) THEN
+C                   j is unabsorbed, and not in Lme.
+C                   add to degree and add to new list
+                    DEG = DEG + NVJ
+                    IW (PN) = J
+                    PN = PN + 1
+                    HASH = HASH + J
+                    ENDIF
+170              CONTINUE
+
+C             ----------------------------------------------------------
+C             update the degree and check for mass elimination
+C             ----------------------------------------------------------
+
+              IF (ELEN (I) .EQ. 1 .AND. P3 .EQ. PN) THEN
+
+C                -------------------------------------------------------
+C                mass elimination
+C                -------------------------------------------------------
+
+C                There is nothing left of this node except for an
+C                edge to the current pivot element.  elen (i) is 1,
+C                and there are no variables adjacent to node i.
+C                Absorb i into the current pivot element, me.
+
+                 PE (I) = -ME
+                 NVI = -NV (I)
+                 DEGME = DEGME - NVI
+                 NVPIV = NVPIV + NVI
+                 NEL = NEL + NVI
+                 NV (I) = 0
+                 ELEN (I) = 0
+
+              ELSE
+
+C                -------------------------------------------------------
+C                update the upper-bound degree of i
+C                -------------------------------------------------------
+
+C                the following degree does not yet include the size
+C                of the current element, which is added later:
+                 DEGREE (I) = MIN (DEGREE (I), DEG)
+
+C                -------------------------------------------------------
+C                add me to the list for i
+C                -------------------------------------------------------
+
+C                move first supervariable to end of list
+                 IW (PN) = IW (P3)
+C                move first element to end of element part of list
+                 IW (P3) = IW (P1)
+C                add new element to front of list.
+                 IW (P1) = ME
+C                store the new length of the list in len (i)
+                 LEN (I) = PN - P1 + 1
+
+C                -------------------------------------------------------
+C                place in hash bucket.  Save hash key of i in last (i).
+C                -------------------------------------------------------
+
+                 HASH = MOD (HASH, HMOD) + 1
+                 J = HEAD (HASH)
+                 IF (J .LE. 0) THEN
+C                   the degree list is empty, hash head is -j
+                    NEXT (I) = -J
+                    HEAD (HASH) = -I
+                 ELSE
+C                   degree list is not empty
+C                   use last (head (hash)) as hash head
+                    NEXT (I) = LAST (J)
+                    LAST (J) = I
+                    ENDIF
+                 LAST (I) = HASH
+                 ENDIF
+180           CONTINUE
+
+           DEGREE (ME) = DEGME
+
+C          -------------------------------------------------------------
+C          Clear the counter array, w (...), by incrementing wflg.
+C          -------------------------------------------------------------
+
+           DMAX = MAX (DMAX, DEGME)
+           WFLG = WFLG + DMAX
+
+C          make sure that wflg+n does not cause integer overflow
+           IF (WFLG .GE. WBIG) THEN
+              DO 190 X = 1, N
+                 IF (W (X) .NE. 0) W (X) = 1
+190              CONTINUE
+              WFLG = 2
+              ENDIF
+C          at this point, w (1..n) .lt. wflg holds
+
+C=======================================================================
+C  SUPERVARIABLE DETECTION
+C=======================================================================
+
+           DO 250 PME = PME1, PME2
+              I = IW (PME)
+              IF (NV (I) .LT. 0) THEN
+C                i is a principal variable in Lme
+
+C                -------------------------------------------------------
+C                examine all hash buckets with 2 or more variables.  We
+C                do this by examing all unique hash keys for super-
+C                variables in the pattern Lme of the current element, me
+C                -------------------------------------------------------
+
+                 HASH = LAST (I)
+C                let i = head of hash bucket, and empty the hash bucket
+                 J = HEAD (HASH)
+                 IF (J .EQ. 0) GOTO 250
+                 IF (J .LT. 0) THEN
+C                   degree list is empty
+                    I = -J
+                    HEAD (HASH) = 0
+                 ELSE
+C                   degree list is not empty, restore last () of head
+                    I = LAST (J)
+                    LAST (J) = 0
+                    ENDIF
+                 IF (I .EQ. 0) GOTO 250
+
+C                while loop:
+200              CONTINUE
+                 IF (NEXT (I) .NE. 0) THEN
+
+C                   ----------------------------------------------------
+C                   this bucket has one or more variables following i.
+C                   scan all of them to see if i can absorb any entries
+C                   that follow i in hash bucket.  Scatter i into w.
+C                   ----------------------------------------------------
+
+                    LN = LEN (I)
+                    ELN = ELEN (I)
+C                   do not flag the first element in the list (me)
+                    DO 210 P = PE (I) + 1, PE (I) + LN - 1
+                       W (IW (P)) = WFLG
+210                    CONTINUE
+
+C                   ----------------------------------------------------
+C                   scan every other entry j following i in bucket
+C                   ----------------------------------------------------
+
+                    JLAST = I
+                    J = NEXT (I)
+
+C                   while loop:
+220                 CONTINUE
+                    IF (J .NE. 0) THEN
+
+C                      -------------------------------------------------
+C                      check if j and i have identical nonzero pattern
+C                      -------------------------------------------------
+
+                       IF (LEN (J) .NE. LN) THEN
+C                         i and j do not have same size data structure
+                          GOTO 240
+                          ENDIF
+                       IF (ELEN (J) .NE. ELN) THEN
+C                         i and j do not have same number of adjacent el
+                          GOTO 240
+                          ENDIF
+C                      do not flag the first element in the list (me)
+                       DO 230 P = PE (J) + 1, PE (J) + LN - 1
+                          IF (W (IW (P)) .NE. WFLG) THEN
+C                            an entry (iw(p)) is in j but not in i
+                             GOTO 240
+                             ENDIF
+230                       CONTINUE
+
+C                      -------------------------------------------------
+C                      found it!  j can be absorbed into i
+C                      -------------------------------------------------
+
+                       PE (J) = -I
+C                      both nv (i) and nv (j) are negated since they
+C                      are in Lme, and the absolute values of each
+C                      are the number of variables in i and j:
+                       NV (I) = NV (I) + NV (J)
+                       NV (J) = 0
+                       ELEN (J) = 0
+C                      delete j from hash bucket
+                       J = NEXT (J)
+                       NEXT (JLAST) = J
+                       GOTO 220
+
+C                      -------------------------------------------------
+240                    CONTINUE
+C                      j cannot be absorbed into i
+C                      -------------------------------------------------
+
+                       JLAST = J
+                       J = NEXT (J)
+                       GOTO 220
+                       ENDIF
+
+C                   ----------------------------------------------------
+C                   no more variables can be absorbed into i
+C                   go to next i in bucket and clear flag array
+C                   ----------------------------------------------------
+
+                    WFLG = WFLG + 1
+                    I = NEXT (I)
+                    IF (I .NE. 0) GOTO 200
+                    ENDIF
+                 ENDIF
+250           CONTINUE
+
+C=======================================================================
+C  RESTORE DEGREE LISTS AND REMOVE NONPRINCIPAL SUPERVAR. FROM ELEMENT
+C=======================================================================
+
+           P = PME1
+           NLEFT = N - NEL
+           DO 260 PME = PME1, PME2
+              I = IW (PME)
+              NVI = -NV (I)
+              IF (NVI .GT. 0) THEN
+C                i is a principal variable in Lme
+C                restore nv (i) to signify that i is principal
+                 NV (I) = NVI
+
+C                -------------------------------------------------------
+C                compute the true degree (add size of current element)
+C                -------------------------------------------------------
+
+                 DEG = MIN (DEGREE (I) + DEGME, NLEFT)
+
+C                -------------------------------------------------------
+C                place the supervariable at the head of the degree list
+C                -------------------------------------------------------
+
+                 INEXT = HEAD (DEG)
+                 IF (INEXT .NE. 0) LAST (INEXT) = I
+                 NEXT (I) = INEXT
+                 LAST (I) = 0
+                 HEAD (DEG) = I
+
+C                -------------------------------------------------------
+C                save the new degree, and find the minimum degree
+C                -------------------------------------------------------
+
+                 MINDEG = MIN (MINDEG, DEG)
+                 DEGREE (I) = DEG
+
+C                -------------------------------------------------------
+C                place the supervariable in the element pattern
+C                -------------------------------------------------------
+
+                 IW (P) = I
+                 P = P + 1
+                 ENDIF
+260           CONTINUE
+
+C=======================================================================
+C  FINALIZE THE NEW ELEMENT
+C=======================================================================
+
+           NV (ME) = NVPIV + DEGME
+C          nv (me) is now the degree of pivot (including diagonal part)
+C          save the length of the list for the new element me
+           LEN (ME) = P - PME1
+           IF (LEN (ME) .EQ. 0) THEN
+C             there is nothing left of the current pivot element
+              PE (ME) = 0
+              W (ME) = 0
+              ENDIF
+           IF (NEWMEM .NE. 0) THEN
+C             element was not constructed in place: deallocate part
+C             of it (final size is less than or equal to newmem,
+C             since newly nonprincipal variables have been removed).
+              PFREE = P
+              MEM = MEM - NEWMEM + LEN (ME)
+              ENDIF
+
+C=======================================================================
+C          END WHILE (selecting pivots)
+           GOTO 30
+           ENDIF
+C=======================================================================
+
+C=======================================================================
+C  COMPUTE THE PERMUTATION VECTORS
+C=======================================================================
+
+C       ----------------------------------------------------------------
+C       The time taken by the following code is O(n).  At this
+C       point, elen (e) = -k has been done for all elements e,
+C       and elen (i) = 0 has been done for all nonprincipal
+C       variables i.  At this point, there are no principal
+C       supervariables left, and all elements are absorbed.
+C       ----------------------------------------------------------------
+
+C       ----------------------------------------------------------------
+C       compute the ordering of unordered nonprincipal variables
+C       ----------------------------------------------------------------
+
+        DO 290 I = 1, N
+           IF (ELEN (I) .EQ. 0) THEN
+
+C             ----------------------------------------------------------
+C             i is an un-ordered row.  Traverse the tree from i until
+C             reaching an element, e.  The element, e, was the
+C             principal supervariable of i and all nodes in the path
+C             from i to when e was selected as pivot.
+C             ----------------------------------------------------------
+
+              J = -PE (I)
+C             while (j is a variable) do:
+270           CONTINUE
+              IF (ELEN (J) .GE. 0) THEN
+                 J = -PE (J)
+                 GOTO 270
+                 ENDIF
+              E = J
+
+C             ----------------------------------------------------------
+C             get the current pivot ordering of e
+C             ----------------------------------------------------------
+
+              K = -ELEN (E)
+
+C             ----------------------------------------------------------
+C             traverse the path again from i to e, and compress the
+C             path (all nodes point to e).  Path compression allows
+C             this code to compute in O(n) time.  Order the unordered
+C             nodes in the path, and place the element e at the end.
+C             ----------------------------------------------------------
+
+              J = I
+C             while (j is a variable) do:
+280           CONTINUE
+              IF (ELEN (J) .GE. 0) THEN
+                 JNEXT = -PE (J)
+                 PE (J) = -E
+                 IF (ELEN (J) .EQ. 0) THEN
+C                   j is an unordered row
+                    ELEN (J) = K
+                    K = K + 1
+                    ENDIF
+                 J = JNEXT
+                 GOTO 280
+                 ENDIF
+C             leave elen (e) negative, so we know it is an element
+              ELEN (E) = -K
+              ENDIF
+290        CONTINUE
+
+C       ----------------------------------------------------------------
+C       reset the inverse permutation (elen (1..n)) to be positive,
+C       and compute the permutation (last (1..n)).
+C       ----------------------------------------------------------------
+
+        DO 300 I = 1, N
+           K = ABS (ELEN (I))
+           LAST (K) = I
+           ELEN (I) = K
+300        CONTINUE
+
+C=======================================================================
+C  RETURN THE MEMORY USAGE IN IW
+C=======================================================================
+
+C       If maxmem is less than or equal to iwlen, then no compressions
+C       occurred, and iw (maxmem+1 ... iwlen) was unused.  Otherwise
+C       compressions did occur, and iwlen would have had to have been
+C       greater than or equal to maxmem for no compressions to occur.
+C       Return the value of maxmem in the pfree argument.
+
+        PFREE = MAXMEM
+
+        RETURN
+        END
+
diff --git a/contrib/taucs/external/src/amdbar.c b/contrib/taucs/external/src/amdbar.c
new file mode 100644
index 0000000000000000000000000000000000000000..718a9f0d3373d1ff9e1803aeb56162cdb7c855e7
--- /dev/null
+++ b/contrib/taucs/external/src/amdbar.c
@@ -0,0 +1,1474 @@
+/* amdbar.f -- translated by f2c (version of 23 April 1993  18:34:30).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+#include "f2c.h"
+
+/* Subroutine */ int amdbar_(n, pe, iw, len, iwlen, pfree, nv, next, last, 
+	head, elen, degree, ncmpa, w, iovflo)
+integer *n, *pe, *iw, *len, *iwlen, *pfree, *nv, *next, *last, *head, *elen, *
+	degree, *ncmpa, *w, *iovflo;
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3, i__4, i__5;
+
+    /* Local variables */
+    static integer hash, pend, hmod, lenj, dmax_, wbig, wflg, psrc, pdst, 
+	    wnvi, e, i, j, k, p, degme, x, nleft, ilast, jlast, inext, jnext, 
+	    p1, nvpiv, p2, p3, me, ln, we, pj, pn, mindeg, elenme, slenme, 
+	    maxmem, newmem, deg, eln, mem, nel, pme, nvi, nvj, pme1, pme2, 
+	    knt1, knt2, knt3;
+
+/* -----------------------------------------------------------------------
+ */
+/*  The MC47 / AMD suite of minimum degree ordering algorithms. */
+
+/*  This code is one of seven variations of a single algorithm: */
+/*  the primary routine (MC47B/BD, only available in the Harwell */
+/*  Subroutine Library), and 6 variations that differ only in */
+/*  how they compute the degree (available in NETLIB). */
+
+/*  For information on the Harwell Subroutine Library, contact */
+/*  John Harding, Harwell Subroutine Library, B 552, AEA Technology, */
+/*  Harwell, Didcot, Oxon OX11 0RA, telephone (44) 1235 434573, */
+/*  fax (44) 1235 434340, email john.harding@aeat.co.uk, who will */
+/*  provide details of price and conditions of use. */
+/* -----------------------------------------------------------------------
+ */
+/* ***********************************************************************
+ */
+/* NOTICE:  "The AMD routines (AMDEXA, AMDBAR, AMDHAF, AMDHAT, AMDTRU, */
+/* and AMDATR) may be used SOLELY for educational, research, and */
+/* benchmarking purposes by non-profit organizations and the U.S. */
+/* government.  Commercial and other organizations may make use of the */
+/* AMD routines SOLELY for benchmarking purposes only.  The AMD */
+/* routines may be modified by or on behalf of the User for such */
+/* use but at no time shall the AMD routines or any such modified */
+/* version of them become the property of the User.  The AMD routines */
+/* are provided without warranty of any kind, either expressed or */
+/* implied.  Neither the Authors nor their employers shall be liable */
+/* for any direct or consequential loss or damage whatsoever arising */
+/* out of the use or misuse of the AMD routines by the User.  The AMD */
+/* routines must not be sold.  You may make copies of the AMD routines, */
+/* but this NOTICE and the Copyright notice must appear in all copies. */
+/* Any other use of the AMD routines requires written permission. */
+/* Your use of the AMD routines is an implicit agreement to these */
+/* conditions." */
+/* ***********************************************************************
+ */
+/* -----------------------------------------------------------------------
+ */
+/* AMDbar:  Approximate Minimum (UMFPACK/MA38-style, external) Degree */
+/*          ordering algorithm, but without aggresive absorption */
+/* -----------------------------------------------------------------------
+ */
+/*  Variation 2:  MC47-style approximate external degree, but with no */
+/*  aggresive absorption.  This is included for comparison with the */
+/*  other 5 variations.  It tends to compute orderings comparable to */
+/*  MC47B/BD, or slightly worse in some cases.  It tends to be about as */
+/*  fast as MC47B/BD. */
+
+/*  We recommend using MC47B/BD instead of this routine since MC47B/BD */
+/*  gives better results in about the same time. */
+/* -----------------------------------------------------------------------
+ */
+/* Given a representation of the nonzero pattern of a symmetric matrix, */
+/*       A, (excluding the diagonal) perform an approximate minimum */
+/*       (UMFPACK/MA38-style) degree ordering to compute a pivot order */
+/*       such that the introduction of nonzeros (fill-in) in the Cholesky 
+*/
+/*       factors A = LL^T are kept low.  At each step, the pivot */
+/*       selected is the one with the minimum UMFAPACK/MA38-style */
+/*       upper-bound on the external degree.  This routine does not */
+/*       perform aggresive absorption (as done by MC47B/BD).  Aggresive */
+/*       absorption in MC47B/BD is used to tighten the bound on the */
+/*       degree.  This can result an significant improvement in the */
+/*       quality of the ordering for some matrices. */
+
+/*       The approximate degree algorithm implemented here is the */
+/*       symmetric analog of the degree update algorithm in MA38 and */
+/*       UMFPACK (the Unsymmetric-pattern MultiFrontal PACKage, both by */
+/*       Davis and Duff, available for academic users in NETLIB as */
+/*       linalg/umfpack.shar or via anonymous ftp to */
+/*       ftp.cis.ufl.edu:pub/umfpack).  Non-academic users must use */
+/*       MA38 in the Harwell Subroutine Library instead of UMPFACK. */
+/* ********************************************************************** 
+*/
+/* ***** CAUTION:  ARGUMENTS ARE NOT CHECKED FOR ERRORS ON INPUT.  ****** 
+*/
+/* ********************************************************************** 
+*/
+/* ** If you want error checking, a more versatile input format, and a ** 
+*/
+/* ** simpler user interface, then use MC47A/AD in the Harwell         ** 
+*/
+/* ** Subroutine Library, which checks for errors, transforms the      ** 
+*/
+/* ** input, and calls MC47B/BD.                                       ** 
+*/
+/* ********************************************************************** 
+*/
+/*       References:  (UF Tech Reports are available via anonymous ftp */
+/*       to ftp.cis.ufl.edu:cis/tech-reports). */
+
+/*       [1] Timothy A. Davis and Iain Duff, "An unsymmetric-pattern */
+/*               multifrontal method for sparse LU factorization", */
+/*               SIAM J. Matrix Analysis and Applications, to appear. */
+/*               also Univ. of Florida Technical Report TR-94-038. */
+/*               Discusses UMFPACK / MA38. */
+
+/*       [2] Patrick Amestoy, Timothy A. Davis, and Iain S. Duff, */
+/*               "An approximate minimum degree ordering algorithm," */
+/*               SIAM J. Matrix Analysis and Applications (to appear), */
+/*               also Univ. of Florida Technical Report TR-94-039. */
+/*               Discusses this routine. */
+
+/*       [3] Alan George and Joseph Liu, "The evolution of the */
+/*               minimum degree ordering algorithm," SIAM Review, vol. */
+/*               31, no. 1, pp. 1-19, March 1989.  We list below the */
+/*               features mentioned in that paper that this code */
+/*               includes: */
+
+/*       mass elimination: */
+/*               Yes.  MA27 relied on supervariable detection for mass */
+/*               elimination. */
+/*       indistinguishable nodes: */
+/*               Yes (we call these "supervariables").  This was also in 
+*/
+/*               the MA27 code - although we modified the method of */
+/*               detecting them (the previous hash was the true degree, */
+/*               which we no longer keep track of).  A supervariable is */
+/*               a set of rows with identical nonzero pattern.  All */
+/*               variables in a supervariable are eliminated together. */
+/*               Each supervariable has as its numerical name that of */
+/*               one of its variables (its principal variable). */
+/*       quotient graph representation: */
+/*               Yes.  We use the term "element" for the cliques formed */
+/*               during elimination.  This was also in the MA27 code. */
+/*               The algorithm can operate in place, but it will work */
+/*               more efficiently if given some "elbow room." */
+/*       element absorption: */
+/*               Yes.  This was also in the MA27 code. */
+/*       external degree: */
+/*               Yes.  The MA27 code was based on the true degree. */
+/*       incomplete degree update and multiple elimination: */
+/*               No.  This was not in MA27, either.  Our method of */
+/*               degree update within MC47B/BD is element-based, not */
+/*               variable-based.  It is thus not well-suited for use */
+/*               with incomplete degree update or multiple elimination. */
+/* -----------------------------------------------------------------------
+ */
+/* Authors, and Copyright (C) 1995 by: */
+/*       Timothy A. Davis, Patrick Amestoy, Iain S. Duff, & John K. Reid. 
+*/
+
+/* Acknowledgements: */
+/*       This work (and the UMFPACK package) was supported by the */
+/*       National Science Foundation (ASC-9111263 and DMS-9223088). */
+/*       The UMFPACK/MA38 approximate degree update algorithm, the */
+/*       unsymmetric analog which forms the basis of MC47B/BD, was */
+/*       developed while Tim Davis was supported by CERFACS (Toulouse, */
+/*       France) in a post-doctoral position. */
+
+/* Date:  September, 1995 */
+/* -----------------------------------------------------------------------
+ */
+/* -----------------------------------------------------------------------
+ */
+/* INPUT ARGUMENTS (unaltered): */
+/* -----------------------------------------------------------------------
+ */
+/* n:    The matrix order. */
+
+/*       Restriction:  1 .le. n .lt. (iovflo/2)-2 */
+/* iwlen:        The length of iw (1..iwlen).  On input, the matrix is */
+/*       stored in iw (1..pfree-1).  However, iw (1..iwlen) should be */
+/*       slightly larger than what is required to hold the matrix, at */
+/*       least iwlen .ge. pfree + n is recommended.  Otherwise, */
+/*       excessive compressions will take place. */
+/*       *** We do not recommend running this algorithm with *** */
+/*       ***      iwlen .lt. pfree + n.                      *** */
+/*       *** Better performance will be obtained if          *** */
+/*       ***      iwlen .ge. pfree + n                       *** */
+/*       *** or better yet                                   *** */
+/*       ***      iwlen .gt. 1.2 * pfree                     *** */
+/*       *** (where pfree is its value on input).            *** */
+/*       The algorithm will not run at all if iwlen .lt. pfree-1. */
+
+/*       Restriction: iwlen .ge. pfree-1 */
+/* iovflo:       The largest positive integer that your computer can */
+/*       represent (-iovflo should also be representable).  On a 32-bit */
+/*       computer with 2's-complement arithmetic, */
+/*       iovflo = (2^31)-1 = 2,147,483,648. */
+/* -----------------------------------------------------------------------
+ */
+/* INPUT/OUPUT ARGUMENTS: */
+/* -----------------------------------------------------------------------
+ */
+/* pe:   On input, pe (i) is the index in iw of the start of row i, or */
+/*       zero if row i has no off-diagonal non-zeros. */
+
+/*       During execution, it is used for both supervariables and */
+/*       elements: */
+
+/*       * Principal supervariable i:  index into iw of the */
+/*               description of supervariable i.  A supervariable */
+/*               represents one or more rows of the matrix */
+/*               with identical nonzero pattern. */
+/*       * Non-principal supervariable i:  if i has been absorbed */
+/*               into another supervariable j, then pe (i) = -j. */
+/*               That is, j has the same pattern as i. */
+/*               Note that j might later be absorbed into another */
+/*               supervariable j2, in which case pe (i) is still -j, */
+/*               and pe (j) = -j2. */
+/*       * Unabsorbed element e:  the index into iw of the description */
+/*               of element e, if e has not yet been absorbed by a */
+/*               subsequent element.  Element e is created when */
+/*               the supervariable of the same name is selected as */
+/*               the pivot. */
+/*       * Absorbed element e:  if element e is absorbed into element */
+/*               e2, then pe (e) = -e2.  This occurs when the pattern of 
+*/
+/*               e (that is, Le) is found to be a subset of the pattern */
+/*               of e2 (that is, Le2).  If element e is "null" (it has */
+/*               no nonzeros outside its pivot block), then pe (e) = 0. */
+
+/*       On output, pe holds the assembly tree/forest, which implicitly */
+/*       represents a pivot order with identical fill-in as the actual */
+/*       order (via a depth-first search of the tree). */
+
+/*       On output: */
+/*       If nv (i) .gt. 0, then i represents a node in the assembly tree, 
+*/
+/*       and the parent of i is -pe (i), or zero if i is a root. */
+/*       If nv (i) = 0, then (i,-pe (i)) represents an edge in a */
+/*       subtree, the root of which is a node in the assembly tree. */
+/* pfree:        On input the tail end of the array, iw (pfree..iwlen), */
+/*       is empty, and the matrix is stored in iw (1..pfree-1). */
+/*       During execution, additional data is placed in iw, and pfree */
+/*       is modified so that iw (pfree..iwlen) is always the unused part 
+*/
+/*       of iw.  On output, pfree is set equal to the size of iw that */
+/*       would have been needed for no compressions to occur.  If */
+/*       ncmpa is zero, then pfree (on output) is less than or equal to */
+/*       iwlen, and the space iw (pfree+1 ... iwlen) was not used. */
+/*       Otherwise, pfree (on output) is greater than iwlen, and all the 
+*/
+/*       memory in iw was used. */
+/* -----------------------------------------------------------------------
+ */
+/* INPUT/MODIFIED (undefined on output): */
+/* -----------------------------------------------------------------------
+ */
+/* len:  On input, len (i) holds the number of entries in row i of the */
+/*       matrix, excluding the diagonal.  The contents of len (1..n) */
+/*       are undefined on output. */
+/* iw:   On input, iw (1..pfree-1) holds the description of each row i */
+/*       in the matrix.  The matrix must be symmetric, and both upper */
+/*       and lower triangular parts must be present.  The diagonal must */
+/*       not be present.  Row i is held as follows: */
+
+/*               len (i):  the length of the row i data structure */
+/*               iw (pe (i) ... pe (i) + len (i) - 1): */
+/*                       the list of column indices for nonzeros */
+/*                       in row i (simple supervariables), excluding */
+/*                       the diagonal.  All supervariables start with */
+/*                       one row/column each (supervariable i is just */
+/*                       row i). */
+/*               if len (i) is zero on input, then pe (i) is ignored */
+/*               on input. */
+
+/*               Note that the rows need not be in any particular order, 
+*/
+/*               and there may be empty space between the rows. */
+
+/*       During execution, the supervariable i experiences fill-in. */
+/*       This is represented by placing in i a list of the elements */
+/*       that cause fill-in in supervariable i: */
+
+/*               len (i):  the length of supervariable i */
+/*               iw (pe (i) ... pe (i) + elen (i) - 1): */
+/*                       the list of elements that contain i.  This list 
+*/
+/*                       is kept short by removing absorbed elements. */
+/*               iw (pe (i) + elen (i) ... pe (i) + len (i) - 1): */
+/*                       the list of supervariables in i.  This list */
+/*                       is kept short by removing nonprincipal */
+/*                       variables, and any entry j that is also */
+/*                       contained in at least one of the elements */
+/*                       (j in Le) in the list for i (e in row i). */
+
+/*       When supervariable i is selected as pivot, we create an */
+/*       element e of the same name (e=i): */
+
+/*               len (e):  the length of element e */
+/*               iw (pe (e) ... pe (e) + len (e) - 1): */
+/*                       the list of supervariables in element e. */
+
+/*       An element represents the fill-in that occurs when supervariable 
+*/
+/*       i is selected as pivot (which represents the selection of row i 
+*/
+/*       and all non-principal variables whose principal variable is i). 
+*/
+/*       We use the term Le to denote the set of all supervariables */
+/*       in element e.  Absorbed supervariables and elements are pruned */
+/*       from these lists when computationally convenient. */
+
+/*       CAUTION:  THE INPUT MATRIX IS OVERWRITTEN DURING COMPUTATION. */
+/*       The contents of iw are undefined on output. */
+/* -----------------------------------------------------------------------
+ */
+/* OUTPUT (need not be set on input): */
+/* -----------------------------------------------------------------------
+ */
+/* nv:   During execution, abs (nv (i)) is equal to the number of rows */
+/*       that are represented by the principal supervariable i.  If i is 
+*/
+/*       a nonprincipal variable, then nv (i) = 0.  Initially, */
+/*       nv (i) = 1 for all i.  nv (i) .lt. 0 signifies that i is a */
+/*       principal variable in the pattern Lme of the current pivot */
+/*       element me.  On output, nv (e) holds the true degree of element 
+*/
+/*       e at the time it was created (including the diagonal part). */
+/* ncmpa:        The number of times iw was compressed.  If this is */
+/*       excessive, then the execution took longer than what could have */
+/*       been.  To reduce ncmpa, try increasing iwlen to be 10% or 20% */
+/*       larger than the value of pfree on input (or at least */
+/*       iwlen .ge. pfree + n).  The fastest performance will be */
+/*       obtained when ncmpa is returned as zero.  If iwlen is set to */
+/*       the value returned by pfree on *output*, then no compressions */
+/*       will occur. */
+/* elen: See the description of iw above.  At the start of execution, */
+/*       elen (i) is set to zero.  During execution, elen (i) is the */
+/*       number of elements in the list for supervariable i.  When e */
+/*       becomes an element, elen (e) = -nel is set, where nel is the */
+/*       current step of factorization.  elen (i) = 0 is done when i */
+/*       becomes nonprincipal. */
+
+/*       For variables, elen (i) .ge. 0 holds until just before the */
+/*       permutation vectors are computed.  For elements, */
+/*       elen (e) .lt. 0 holds. */
+
+/*       On output elen (1..n) holds the inverse permutation (the same */
+/*       as the 'INVP' argument in Sparspak).  That is, if k = elen (i), 
+*/
+/*       then row i is the kth pivot row.  Row i of A appears as the */
+/*       (elen(i))-th row in the permuted matrix, PAP^T. */
+/* last: In a degree list, last (i) is the supervariable preceding i, */
+/*       or zero if i is the head of the list.  In a hash bucket, */
+/*       last (i) is the hash key for i.  last (head (hash)) is also */
+/*       used as the head of a hash bucket if head (hash) contains a */
+/*       degree list (see head, below). */
+
+/*       On output, last (1..n) holds the permutation (the same as the */
+/*       'PERM' argument in Sparspak).  That is, if i = last (k), then */
+/*       row i is the kth pivot row.  Row last (k) of A is the k-th row */
+/*       in the permuted matrix, PAP^T. */
+/* -----------------------------------------------------------------------
+ */
+/* LOCAL (not input or output - used only during execution): */
+/* -----------------------------------------------------------------------
+ */
+/* degree:       If i is a supervariable, then degree (i) holds the */
+/*       current approximation of the external degree of row i (an upper 
+*/
+/*       bound).  The external degree is the number of nonzeros in row i, 
+*/
+/*       minus abs (nv (i)) (the diagonal part).  The bound is equal to */
+/*       the external degree if elen (i) is less than or equal to two. */
+
+/*       We also use the term "external degree" for elements e to refer */
+/*       to |Le \ Lme|.  If e is an element, then degree (e) holds |Le|, 
+*/
+/*       which is the degree of the off-diagonal part of the element e */
+/*       (not including the diagonal part). */
+/* head: head is used for degree lists.  head (deg) is the first */
+/*       supervariable in a degree list (all supervariables i in a */
+/*       degree list deg have the same approximate degree, namely, */
+/*       deg = degree (i)).  If the list deg is empty then */
+/*       head (deg) = 0. */
+
+/*       During supervariable detection head (hash) also serves as a */
+/*       pointer to a hash bucket. */
+/*       If head (hash) .gt. 0, there is a degree list of degree hash. */
+/*               The hash bucket head pointer is last (head (hash)). */
+/*       If head (hash) = 0, then the degree list and hash bucket are */
+/*               both empty. */
+/*       If head (hash) .lt. 0, then the degree list is empty, and */
+/*               -head (hash) is the head of the hash bucket. */
+/*       After supervariable detection is complete, all hash buckets */
+/*       are empty, and the (last (head (hash)) = 0) condition is */
+/*       restored for the non-empty degree lists. */
+/* next: next (i) is the supervariable following i in a link list, or */
+/*       zero if i is the last in the list.  Used for two kinds of */
+/*       lists:  degree lists and hash buckets (a supervariable can be */
+/*       in only one kind of list at a time). */
+/* w:    The flag array w determines the status of elements and */
+/*       variables, and the external degree of elements. */
+
+/*       for elements: */
+/*          if w (e) = 0, then the element e is absorbed */
+/*          if w (e) .ge. wflg, then w (e) - wflg is the size of */
+/*               the set |Le \ Lme|, in terms of nonzeros (the */
+/*               sum of abs (nv (i)) for each principal variable i that */
+/*               is both in the pattern of element e and NOT in the */
+/*               pattern of the current pivot element, me). */
+/*          if wflg .gt. w (e) .gt. 0, then e is not absorbed and has */
+/*               not yet been seen in the scan of the element lists in */
+/*               the computation of |Le\Lme| in loop 150 below. */
+
+/*       for variables: */
+/*          during supervariable detection, if w (j) .ne. wflg then j is 
+*/
+/*          not in the pattern of variable i */
+
+/*       The w array is initialized by setting w (i) = 1 for all i, */
+/*       and by setting wflg = 2.  It is reinitialized if wflg becomes */
+/*       too large (to ensure that wflg+n does not cause integer */
+/*       overflow). */
+/* -----------------------------------------------------------------------
+ */
+/* LOCAL INTEGERS: */
+/* -----------------------------------------------------------------------
+ */
+/* deg:          the degree of a variable or element */
+/* degme:        size, |Lme|, of the current element, me (= degree (me)) 
+*/
+/* dext:         external degree, |Le \ Lme|, of some element e */
+/* dmax:         largest |Le| seen so far */
+/* e:            an element */
+/* elenme:       the length, elen (me), of element list of pivotal var. */
+/* eln:          the length, elen (...), of an element list */
+/* hash:         the computed value of the hash function */
+/* hmod:         the hash function is computed modulo hmod = max (1,n-1) 
+*/
+/* i:            a supervariable */
+/* ilast:        the entry in a link list preceding i */
+/* inext:        the entry in a link list following i */
+/* j:            a supervariable */
+/* jlast:        the entry in a link list preceding j */
+/* jnext:        the entry in a link list, or path, following j */
+/* k:            the pivot order of an element or variable */
+/* knt1:         loop counter used during element construction */
+/* knt2:         loop counter used during element construction */
+/* knt3:         loop counter used during compression */
+/* lenj:         len (j) */
+/* ln:           length of a supervariable list */
+/* maxmem:       amount of memory needed for no compressions */
+/* me:           current supervariable being eliminated, and the */
+/*                       current element created by eliminating that */
+/*                       supervariable */
+/* mem:          memory in use assuming no compressions have occurred */
+/* mindeg:       current minimum degree */
+/* nel:          number of pivots selected so far */
+/* newmem:       amount of new memory needed for current pivot element */
+/* nleft:        n - nel, the number of nonpivotal rows/columns remaining 
+*/
+/* nvi:          the number of variables in a supervariable i (= nv (i)) 
+*/
+/* nvj:          the number of variables in a supervariable j (= nv (j)) 
+*/
+/* nvpiv:        number of pivots in current element */
+/* slenme:       number of variables in variable list of pivotal variable 
+*/
+/* wbig:         = iovflo - n.  wflg is not allowed to be .ge. wbig. */
+/* we:           w (e) */
+/* wflg:         used for flagging the w array.  See description of iw. */
+/* wnvi:         wflg - nv (i) */
+/* x:            either a supervariable or an element */
+/* -----------------------------------------------------------------------
+ */
+/* LOCAL POINTERS: */
+/* -----------------------------------------------------------------------
+ */
+/*               Any parameter (pe (...) or pfree) or local variable */
+/*               starting with "p" (for Pointer) is an index into iw, */
+/*               and all indices into iw use variables starting with */
+/*               "p."  The only exception to this rule is the iwlen */
+/*               input argument. */
+/* p:            pointer into lots of things */
+/* p1:           pe (i) for some variable i (start of element list) */
+/* p2:           pe (i) + elen (i) -  1 for some var. i (end of el. list) 
+*/
+/* p3:           index of first supervariable in clean list */
+/* pdst:         destination pointer, for compression */
+/* pend:         end of memory to compress */
+/* pj:           pointer into an element or variable */
+/* pme:          pointer into the current element (pme1...pme2) */
+/* pme1:         the current element, me, is stored in iw (pme1...pme2) */
+/* pme2:         the end of the current element */
+/* pn:           pointer into a "clean" variable, also used to compress */
+/* psrc:         source pointer, for compression */
+/* -----------------------------------------------------------------------
+ */
+/*  FUNCTIONS CALLED: */
+/* -----------------------------------------------------------------------
+ */
+/* =======================================================================
+ */
+/*  INITIALIZATIONS */
+/* =======================================================================
+ */
+    /* Parameter adjustments */
+    --w;
+    --degree;
+    --elen;
+    --head;
+    --last;
+    --next;
+    --nv;
+    --len;
+    --iw;
+    --pe;
+
+    /* Function Body */
+    wflg = 2;
+    mindeg = 1;
+    *ncmpa = 0;
+    nel = 0;
+/* Computing MAX */
+    i__1 = 1, i__2 = *n - 1;
+    hmod = max(i__1,i__2);
+    dmax_ = 0;
+    wbig = *iovflo - *n;
+    mem = *pfree - 1;
+    maxmem = mem;
+    i__1 = *n;
+    for (i = 1; i <= i__1; ++i) {
+	last[i] = 0;
+	head[i] = 0;
+	nv[i] = 1;
+	w[i] = 1;
+	elen[i] = 0;
+	degree[i] = len[i];
+/* L10: */
+    }
+/*       ---------------------------------------------------------------- 
+*/
+/*       initialize degree lists and eliminate rows with no off-diag. nz. 
+*/
+/*       ---------------------------------------------------------------- 
+*/
+    i__1 = *n;
+    for (i = 1; i <= i__1; ++i) {
+	deg = degree[i];
+	if (deg > 0) {
+/*             --------------------------------------------------
+-------- */
+/*             place i in the degree list corresponding to its deg
+ree */
+/*             --------------------------------------------------
+-------- */
+	    inext = head[deg];
+	    if (inext != 0) {
+		last[inext] = i;
+	    }
+	    next[i] = inext;
+	    head[deg] = i;
+	} else {
+/*             --------------------------------------------------
+-------- */
+/*             we have a variable that can be eliminated at once b
+ecause */
+/*             there is no off-diagonal non-zero in its row. */
+/*             --------------------------------------------------
+-------- */
+	    ++nel;
+	    elen[i] = -nel;
+	    pe[i] = 0;
+	    w[i] = 0;
+	}
+/* L20: */
+    }
+/* =======================================================================
+ */
+/*  WHILE (selecting pivots) DO */
+/* =======================================================================
+ */
+L30:
+    if (nel < *n) {
+/* ==================================================================
+===== */
+/*  GET PIVOT OF MINIMUM DEGREE */
+/* ==================================================================
+===== */
+/*          ---------------------------------------------------------
+---- */
+/*          find next supervariable for elimination */
+/*          ---------------------------------------------------------
+---- */
+	i__1 = *n;
+	for (deg = mindeg; deg <= i__1; ++deg) {
+	    me = head[deg];
+	    if (me > 0) {
+		goto L50;
+	    }
+/* L40: */
+	}
+L50:
+	mindeg = deg;
+/*          ---------------------------------------------------------
+---- */
+/*          remove chosen variable from link list */
+/*          ---------------------------------------------------------
+---- */
+	inext = next[me];
+	if (inext != 0) {
+	    last[inext] = 0;
+	}
+	head[deg] = inext;
+/*          ---------------------------------------------------------
+---- */
+/*          me represents the elimination of pivots nel+1 to nel+nv(me
+). */
+/*          place me itself as the first in this set.  It will be move
+d */
+/*          to the nel+nv(me) position when the permutation vectors ar
+e */
+/*          computed. */
+/*          ---------------------------------------------------------
+---- */
+	elenme = elen[me];
+	elen[me] = -(nel + 1);
+	nvpiv = nv[me];
+	nel += nvpiv;
+/* ==================================================================
+===== */
+/*  CONSTRUCT NEW ELEMENT */
+/* ==================================================================
+===== */
+/*          ---------------------------------------------------------
+---- */
+/*          At this point, me is the pivotal supervariable.  It will b
+e */
+/*          converted into the current element.  Scan list of the */
+/*          pivotal supervariable, me, setting tree pointers and */
+/*          constructing new list of supervariables for the new elemen
+t, */
+/*          me.  p is a pointer to the current position in the old lis
+t. */
+/*          ---------------------------------------------------------
+---- */
+/*          flag the variable "me" as being in Lme by negating nv (me)
+ */
+	nv[me] = -nvpiv;
+	degme = 0;
+	if (elenme == 0) {
+/*             --------------------------------------------------
+-------- */
+/*             construct the new element in place */
+/*             --------------------------------------------------
+-------- */
+	    pme1 = pe[me];
+	    pme2 = pme1 - 1;
+	    i__1 = pme1 + len[me] - 1;
+	    for (p = pme1; p <= i__1; ++p) {
+		i = iw[p];
+		nvi = nv[i];
+		if (nvi > 0) {
+/*                   ------------------------------------
+---------------- */
+/*                   i is a principal variable not yet pla
+ced in Lme. */
+/*                   store i in new list */
+/*                   ------------------------------------
+---------------- */
+		    degme += nvi;
+/*                   flag i as being in Lme by negating nv
+ (i) */
+		    nv[i] = -nvi;
+		    ++pme2;
+		    iw[pme2] = i;
+/*                   ------------------------------------
+---------------- */
+/*                   remove variable i from degree list. 
+*/
+/*                   ------------------------------------
+---------------- */
+		    ilast = last[i];
+		    inext = next[i];
+		    if (inext != 0) {
+			last[inext] = ilast;
+		    }
+		    if (ilast != 0) {
+			next[ilast] = inext;
+		    } else {
+/*                      i is at the head of the degree
+ list */
+			head[degree[i]] = inext;
+		    }
+		}
+/* L60: */
+	    }
+/*             this element takes no new memory in iw: */
+	    newmem = 0;
+	} else {
+/*             --------------------------------------------------
+-------- */
+/*             construct the new element in empty space, iw (pfree
+ ...) */
+/*             --------------------------------------------------
+-------- */
+	    p = pe[me];
+	    pme1 = *pfree;
+	    slenme = len[me] - elenme;
+	    i__1 = elenme + 1;
+	    for (knt1 = 1; knt1 <= i__1; ++knt1) {
+		if (knt1 > elenme) {
+/*                   search the supervariables in me. */
+		    e = me;
+		    pj = p;
+		    ln = slenme;
+		} else {
+/*                   search the elements in me. */
+		    e = iw[p];
+		    ++p;
+		    pj = pe[e];
+		    ln = len[e];
+		}
+/*                -------------------------------------------
+------------ */
+/*                search for different supervariables and add 
+them to the */
+/*                new list, compressing when necessary. this l
+oop is */
+/*                executed once for each element in the list a
+nd once for */
+/*                all the supervariables in the list. */
+/*                -------------------------------------------
+------------ */
+		i__2 = ln;
+		for (knt2 = 1; knt2 <= i__2; ++knt2) {
+		    i = iw[pj];
+		    ++pj;
+		    nvi = nv[i];
+		    if (nvi > 0) {
+/*                      -----------------------------
+-------------------- */
+/*                      compress iw, if necessary */
+/*                      -----------------------------
+-------------------- */
+			if (*pfree > *iwlen) {
+/*                         prepare for compressing
+ iw by adjusting */
+/*                         pointers and lengths so
+ that the lists being */
+/*                         searched in the inner a
+nd outer loops contain */
+/*                         only the remaining entr
+ies. */
+			    pe[me] = p;
+			    len[me] -= knt1;
+			    if (len[me] == 0) {
+/*                            nothing left of 
+supervariable me */
+				pe[me] = 0;
+			    }
+			    pe[e] = pj;
+			    len[e] = ln - knt2;
+			    if (len[e] == 0) {
+/*                            nothing left of 
+element e */
+				pe[e] = 0;
+			    }
+			    ++(*ncmpa);
+/*                         store first item in pe 
+*/
+/*                         set first entry to -ite
+m */
+			    i__3 = *n;
+			    for (j = 1; j <= i__3; ++j) {
+				pn = pe[j];
+				if (pn > 0) {
+				    pe[j] = iw[pn];
+				    iw[pn] = -j;
+				}
+/* L70: */
+			    }
+/*                         psrc/pdst point to sour
+ce/destination */
+			    pdst = 1;
+			    psrc = 1;
+			    pend = pme1 - 1;
+/*                         while loop: */
+L80:
+			    if (psrc <= pend) {
+/*                            search for next 
+negative entry */
+				j = -iw[psrc];
+				++psrc;
+				if (j > 0) {
+				    iw[pdst] = pe[j];
+				    pe[j] = pdst;
+				    ++pdst;
+/*                               copy from
+ source to destination */
+				    lenj = len[j];
+				    i__3 = lenj - 2;
+				    for (knt3 = 0; knt3 <= i__3; ++knt3) {
+					iw[pdst + knt3] = iw[psrc + knt3];
+/* L90: */
+				    }
+				    pdst = pdst + lenj - 1;
+				    psrc = psrc + lenj - 1;
+				}
+				goto L80;
+			    }
+/*                         move the new partially-
+constructed element */
+			    p1 = pdst;
+			    i__3 = *pfree - 1;
+			    for (psrc = pme1; psrc <= i__3; ++psrc) {
+				iw[pdst] = iw[psrc];
+				++pdst;
+/* L100: */
+			    }
+			    pme1 = p1;
+			    *pfree = pdst;
+			    pj = pe[e];
+			    p = pe[me];
+			}
+/*                      -----------------------------
+-------------------- */
+/*                      i is a principal variable not 
+yet placed in Lme */
+/*                      store i in new list */
+/*                      -----------------------------
+-------------------- */
+			degme += nvi;
+/*                      flag i as being in Lme by nega
+ting nv (i) */
+			nv[i] = -nvi;
+			iw[*pfree] = i;
+			++(*pfree);
+/*                      -----------------------------
+-------------------- */
+/*                      remove variable i from degree 
+link list */
+/*                      -----------------------------
+-------------------- */
+			ilast = last[i];
+			inext = next[i];
+			if (inext != 0) {
+			    last[inext] = ilast;
+			}
+			if (ilast != 0) {
+			    next[ilast] = inext;
+			} else {
+/*                         i is at the head of the
+ degree list */
+			    head[degree[i]] = inext;
+			}
+		    }
+/* L110: */
+		}
+		if (e != me) {
+/*                   set tree pointer and flag to indicate
+ element e is */
+/*                   absorbed into new element me (the par
+ent of e is me) */
+		    pe[e] = -me;
+		    w[e] = 0;
+		}
+/* L120: */
+	    }
+	    pme2 = *pfree - 1;
+/*             this element takes newmem new memory in iw (possibl
+y zero) */
+	    newmem = *pfree - pme1;
+	    mem += newmem;
+	    maxmem = max(maxmem,mem);
+	}
+/*          ---------------------------------------------------------
+---- */
+/*          me has now been converted into an element in iw (pme1..pme
+2) */
+/*          ---------------------------------------------------------
+---- */
+/*          degme holds the external degree of new element */
+	degree[me] = degme;
+	pe[me] = pme1;
+	len[me] = pme2 - pme1 + 1;
+/*          ---------------------------------------------------------
+---- */
+/*          make sure that wflg is not too large.  With the current */
+/*          value of wflg, wflg+n must not cause integer overflow */
+/*          ---------------------------------------------------------
+---- */
+	if (wflg >= wbig) {
+	    i__1 = *n;
+	    for (x = 1; x <= i__1; ++x) {
+		if (w[x] != 0) {
+		    w[x] = 1;
+		}
+/* L130: */
+	    }
+	    wflg = 2;
+	}
+/* ==================================================================
+===== */
+/*  COMPUTE (w (e) - wflg) = |Le\Lme| FOR ALL ELEMENTS */
+/* ==================================================================
+===== */
+/*          ---------------------------------------------------------
+---- */
+/*          Scan 1:  compute the external degrees of previous elements
+ */
+/*          with respect to the current element.  That is: */
+/*               (w (e) - wflg) = |Le \ Lme| */
+/*          for each element e that appears in any supervariable in Lm
+e. */
+/*          The notation Le refers to the pattern (list of */
+/*          supervariables) of a previous element e, where e is not ye
+t */
+/*          absorbed, stored in iw (pe (e) + 1 ... pe (e) + iw (pe (e)
+)). */
+/*          The notation Lme refers to the pattern of the current elem
+ent */
+/*          (stored in iw (pme1..pme2)).   If (w (e) - wflg) becomes 
+*/
+/*          zero, then the element e will be absorbed in scan 2. */
+/*          ---------------------------------------------------------
+---- */
+	i__1 = pme2;
+	for (pme = pme1; pme <= i__1; ++pme) {
+	    i = iw[pme];
+	    eln = elen[i];
+	    if (eln > 0) {
+/*                note that nv (i) has been negated to denote 
+i in Lme: */
+		nvi = -nv[i];
+		wnvi = wflg - nvi;
+		i__2 = pe[i] + eln - 1;
+		for (p = pe[i]; p <= i__2; ++p) {
+		    e = iw[p];
+		    we = w[e];
+		    if (we >= wflg) {
+/*                      unabsorbed element e has been 
+seen in this loop */
+			we -= nvi;
+		    } else if (we != 0) {
+/*                      e is an unabsorbed element */
+/*                      this is the first we have seen
+ e in all of Scan 1 */
+			we = degree[e] + wnvi;
+		    }
+		    w[e] = we;
+/* L140: */
+		}
+	    }
+/* L150: */
+	}
+/* ==================================================================
+===== */
+/*  DEGREE UPDATE AND ELEMENT ABSORPTION */
+/* ==================================================================
+===== */
+/*          ---------------------------------------------------------
+---- */
+/*          Scan 2:  for each i in Lme, sum up the degree of Lme (whic
+h */
+/*          is degme), plus the sum of the external degrees of each Le
+ */
+/*          for the elements e appearing within i, plus the */
+/*          supervariables in i.  Place i in hash list. */
+/*          ---------------------------------------------------------
+---- */
+	i__1 = pme2;
+	for (pme = pme1; pme <= i__1; ++pme) {
+	    i = iw[pme];
+	    p1 = pe[i];
+	    p2 = p1 + elen[i] - 1;
+	    pn = p1;
+	    hash = 0;
+	    deg = 0;
+/*             --------------------------------------------------
+-------- */
+/*             scan the element list associated with supervariable
+ i */
+/*             --------------------------------------------------
+-------- */
+/*             UMFPACK/MA38-style approximate degree: */
+	    i__2 = p2;
+	    for (p = p1; p <= i__2; ++p) {
+		e = iw[p];
+		we = w[e];
+		if (we != 0) {
+/*                   e is an unabsorbed element */
+		    deg = deg + we - wflg;
+		    iw[pn] = e;
+		    ++pn;
+		    hash += e;
+		}
+/* L160: */
+	    }
+/*             count the number of elements in i (including me): 
+*/
+	    elen[i] = pn - p1 + 1;
+/*             --------------------------------------------------
+-------- */
+/*             scan the supervariables in the list associated with
+ i */
+/*             --------------------------------------------------
+-------- */
+	    p3 = pn;
+	    i__2 = p1 + len[i] - 1;
+	    for (p = p2 + 1; p <= i__2; ++p) {
+		j = iw[p];
+		nvj = nv[j];
+		if (nvj > 0) {
+/*                   j is unabsorbed, and not in Lme. */
+/*                   add to degree and add to new list */
+		    deg += nvj;
+		    iw[pn] = j;
+		    ++pn;
+		    hash += j;
+		}
+/* L170: */
+	    }
+/*             --------------------------------------------------
+-------- */
+/*             update the degree and check for mass elimination */
+/*             --------------------------------------------------
+-------- */
+	    if (elen[i] == 1 && p3 == pn) {
+/*                -------------------------------------------
+------------ */
+/*                mass elimination */
+/*                -------------------------------------------
+------------ */
+/*                There is nothing left of this node except fo
+r an */
+/*                edge to the current pivot element.  elen (i)
+ is 1, */
+/*                and there are no variables adjacent to node 
+i. */
+/*                Absorb i into the current pivot element, me.
+ */
+		pe[i] = -me;
+		nvi = -nv[i];
+		degme -= nvi;
+		nvpiv += nvi;
+		nel += nvi;
+		nv[i] = 0;
+		elen[i] = 0;
+	    } else {
+/*                -------------------------------------------
+------------ */
+/*                update the upper-bound degree of i */
+/*                -------------------------------------------
+------------ */
+/*                the following degree does not yet include th
+e size */
+/*                of the current element, which is added later
+: */
+/* Computing MIN */
+		i__2 = degree[i];
+		degree[i] = min(i__2,deg);
+/*                -------------------------------------------
+------------ */
+/*                add me to the list for i */
+/*                -------------------------------------------
+------------ */
+/*                move first supervariable to end of list */
+		iw[pn] = iw[p3];
+/*                move first element to end of element part of
+ list */
+		iw[p3] = iw[p1];
+/*                add new element to front of list. */
+		iw[p1] = me;
+/*                store the new length of the list in len (i) 
+*/
+		len[i] = pn - p1 + 1;
+/*                -------------------------------------------
+------------ */
+/*                place in hash bucket.  Save hash key of i in
+ last (i). */
+/*                -------------------------------------------
+------------ */
+		hash = hash % hmod + 1;
+		j = head[hash];
+		if (j <= 0) {
+/*                   the degree list is empty, hash head i
+s -j */
+		    next[i] = -j;
+		    head[hash] = -i;
+		} else {
+/*                   degree list is not empty */
+/*                   use last (head (hash)) as hash head 
+*/
+		    next[i] = last[j];
+		    last[j] = i;
+		}
+		last[i] = hash;
+	    }
+/* L180: */
+	}
+	degree[me] = degme;
+/*          ---------------------------------------------------------
+---- */
+/*          Clear the counter array, w (...), by incrementing wflg. */
+/*          ---------------------------------------------------------
+---- */
+	dmax_ = max(dmax_,degme);
+	wflg += dmax_;
+/*          make sure that wflg+n does not cause integer overflow */
+	if (wflg >= wbig) {
+	    i__1 = *n;
+	    for (x = 1; x <= i__1; ++x) {
+		if (w[x] != 0) {
+		    w[x] = 1;
+		}
+/* L190: */
+	    }
+	    wflg = 2;
+	}
+/*          at this point, w (1..n) .lt. wflg holds */
+/* ==================================================================
+===== */
+/*  SUPERVARIABLE DETECTION */
+/* ==================================================================
+===== */
+	i__1 = pme2;
+	for (pme = pme1; pme <= i__1; ++pme) {
+	    i = iw[pme];
+	    if (nv[i] < 0) {
+/*                i is a principal variable in Lme */
+/*                -------------------------------------------
+------------ */
+/*                examine all hash buckets with 2 or more vari
+ables.  We */
+/*                do this by examing all unique hash keys for 
+super- */
+/*                variables in the pattern Lme of the current 
+element, me */
+/*                -------------------------------------------
+------------ */
+		hash = last[i];
+/*                let i = head of hash bucket, and empty the h
+ash bucket */
+		j = head[hash];
+		if (j == 0) {
+		    goto L250;
+		}
+		if (j < 0) {
+/*                   degree list is empty */
+		    i = -j;
+		    head[hash] = 0;
+		} else {
+/*                   degree list is not empty, restore las
+t () of head */
+		    i = last[j];
+		    last[j] = 0;
+		}
+		if (i == 0) {
+		    goto L250;
+		}
+/*                while loop: */
+L200:
+		if (next[i] != 0) {
+/*                   ------------------------------------
+---------------- */
+/*                   this bucket has one or more variables
+ following i. */
+/*                   scan all of them to see if i can abso
+rb any entries */
+/*                   that follow i in hash bucket.  Scatte
+r i into w. */
+/*                   ------------------------------------
+---------------- */
+		    ln = len[i];
+		    eln = elen[i];
+/*                   do not flag the first element in the 
+list (me) */
+		    i__2 = pe[i] + ln - 1;
+		    for (p = pe[i] + 1; p <= i__2; ++p) {
+			w[iw[p]] = wflg;
+/* L210: */
+		    }
+/*                   ------------------------------------
+---------------- */
+/*                   scan every other entry j following i 
+in bucket */
+/*                   ------------------------------------
+---------------- */
+		    jlast = i;
+		    j = next[i];
+/*                   while loop: */
+L220:
+		    if (j != 0) {
+/*                      -----------------------------
+-------------------- */
+/*                      check if j and i have identica
+l nonzero pattern */
+/*                      -----------------------------
+-------------------- */
+			if (len[j] != ln) {
+/*                         i and j do not have sam
+e size data structure */
+			    goto L240;
+			}
+			if (elen[j] != eln) {
+/*                         i and j do not have sam
+e number of adjacent el */
+			    goto L240;
+			}
+/*                      do not flag the first element 
+in the list (me) */
+			i__2 = pe[j] + ln - 1;
+			for (p = pe[j] + 1; p <= i__2; ++p) {
+			    if (w[iw[p]] != wflg) {
+/*                            an entry (iw(p))
+ is in j but not in i */
+				goto L240;
+			    }
+/* L230: */
+			}
+/*                      -----------------------------
+-------------------- */
+/*                      found it!  j can be absorbed i
+nto i */
+/*                      -----------------------------
+-------------------- */
+			pe[j] = -i;
+/*                      both nv (i) and nv (j) are neg
+ated since they */
+/*                      are in Lme, and the absolute v
+alues of each */
+/*                      are the number of variables in
+ i and j: */
+			nv[i] += nv[j];
+			nv[j] = 0;
+			elen[j] = 0;
+/*                      delete j from hash bucket */
+			j = next[j];
+			next[jlast] = j;
+			goto L220;
+/*                      -----------------------------
+-------------------- */
+L240:
+/*                      j cannot be absorbed into i */
+/*                      -----------------------------
+-------------------- */
+			jlast = j;
+			j = next[j];
+			goto L220;
+		    }
+/*                   ------------------------------------
+---------------- */
+/*                   no more variables can be absorbed int
+o i */
+/*                   go to next i in bucket and clear flag
+ array */
+/*                   ------------------------------------
+---------------- */
+		    ++wflg;
+		    i = next[i];
+		    if (i != 0) {
+			goto L200;
+		    }
+		}
+	    }
+L250:
+	    ;
+	}
+/* ==================================================================
+===== */
+/*  RESTORE DEGREE LISTS AND REMOVE NONPRINCIPAL SUPERVAR. FROM ELEMEN
+T */
+/* ==================================================================
+===== */
+	p = pme1;
+	nleft = *n - nel;
+	i__1 = pme2;
+	for (pme = pme1; pme <= i__1; ++pme) {
+	    i = iw[pme];
+	    nvi = -nv[i];
+	    if (nvi > 0) {
+/*                i is a principal variable in Lme */
+/*                restore nv (i) to signify that i is principa
+l */
+		nv[i] = nvi;
+/*                -------------------------------------------
+------------ */
+/*                compute the external degree (add size of cur
+rent elem) */
+/*                -------------------------------------------
+------------ */
+/* Computing MAX */
+/* Computing MIN */
+		i__4 = degree[i] + degme - nvi, i__5 = nleft - nvi;
+		i__2 = 1, i__3 = min(i__4,i__5);
+		deg = max(i__2,i__3);
+/*                -------------------------------------------
+------------ */
+/*                place the supervariable at the head of the d
+egree list */
+/*                -------------------------------------------
+------------ */
+		inext = head[deg];
+		if (inext != 0) {
+		    last[inext] = i;
+		}
+		next[i] = inext;
+		last[i] = 0;
+		head[deg] = i;
+/*                -------------------------------------------
+------------ */
+/*                save the new degree, and find the minimum de
+gree */
+/*                -------------------------------------------
+------------ */
+		mindeg = min(mindeg,deg);
+		degree[i] = deg;
+/*                -------------------------------------------
+------------ */
+/*                place the supervariable in the element patte
+rn */
+/*                -------------------------------------------
+------------ */
+		iw[p] = i;
+		++p;
+	    }
+/* L260: */
+	}
+/* ==================================================================
+===== */
+/*  FINALIZE THE NEW ELEMENT */
+/* ==================================================================
+===== */
+	nv[me] = nvpiv + degme;
+/*          nv (me) is now the degree of pivot (including diagonal par
+t) */
+/*          save the length of the list for the new element me */
+	len[me] = p - pme1;
+	if (len[me] == 0) {
+/*             there is nothing left of the current pivot element 
+*/
+	    pe[me] = 0;
+	    w[me] = 0;
+	}
+	if (newmem != 0) {
+/*             element was not constructed in place: deallocate pa
+rt */
+/*             of it (final size is less than or equal to newmem, 
+*/
+/*             since newly nonprincipal variables have been remove
+d). */
+	    *pfree = p;
+	    mem = mem - newmem + len[me];
+	}
+/* ==================================================================
+===== */
+/*          END WHILE (selecting pivots) */
+	goto L30;
+    }
+/* =======================================================================
+ */
+/* =======================================================================
+ */
+/*  COMPUTE THE PERMUTATION VECTORS */
+/* =======================================================================
+ */
+/*       ---------------------------------------------------------------- 
+*/
+/*       The time taken by the following code is O(n).  At this */
+/*       point, elen (e) = -k has been done for all elements e, */
+/*       and elen (i) = 0 has been done for all nonprincipal */
+/*       variables i.  At this point, there are no principal */
+/*       supervariables left, and all elements are absorbed. */
+/*       ---------------------------------------------------------------- 
+*/
+/*       ---------------------------------------------------------------- 
+*/
+/*       compute the ordering of unordered nonprincipal variables */
+/*       ---------------------------------------------------------------- 
+*/
+    i__1 = *n;
+    for (i = 1; i <= i__1; ++i) {
+	if (elen[i] == 0) {
+/*             --------------------------------------------------
+-------- */
+/*             i is an un-ordered row.  Traverse the tree from i u
+ntil */
+/*             reaching an element, e.  The element, e, was the */
+/*             principal supervariable of i and all nodes in the p
+ath */
+/*             from i to when e was selected as pivot. */
+/*             --------------------------------------------------
+-------- */
+	    j = -pe[i];
+/*             while (j is a variable) do: */
+L270:
+	    if (elen[j] >= 0) {
+		j = -pe[j];
+		goto L270;
+	    }
+	    e = j;
+/*             --------------------------------------------------
+-------- */
+/*             get the current pivot ordering of e */
+/*             --------------------------------------------------
+-------- */
+	    k = -elen[e];
+/*             --------------------------------------------------
+-------- */
+/*             traverse the path again from i to e, and compress t
+he */
+/*             path (all nodes point to e).  Path compression allo
+ws */
+/*             this code to compute in O(n) time.  Order the unord
+ered */
+/*             nodes in the path, and place the element e at the e
+nd. */
+/*             --------------------------------------------------
+-------- */
+	    j = i;
+/*             while (j is a variable) do: */
+L280:
+	    if (elen[j] >= 0) {
+		jnext = -pe[j];
+		pe[j] = -e;
+		if (elen[j] == 0) {
+/*                   j is an unordered row */
+		    elen[j] = k;
+		    ++k;
+		}
+		j = jnext;
+		goto L280;
+	    }
+/*             leave elen (e) negative, so we know it is an elemen
+t */
+	    elen[e] = -k;
+	}
+/* L290: */
+    }
+/*       ---------------------------------------------------------------- 
+*/
+/*       reset the inverse permutation (elen (1..n)) to be positive, */
+/*       and compute the permutation (last (1..n)). */
+/*       ---------------------------------------------------------------- 
+*/
+    i__1 = *n;
+    for (i = 1; i <= i__1; ++i) {
+	k = (i__2 = elen[i], abs(i__2));
+	last[k] = i;
+	elen[i] = k;
+/* L300: */
+    }
+/* =======================================================================
+ */
+/*  RETURN THE MEMORY USAGE IN IW */
+/* =======================================================================
+ */
+/*       If maxmem is less than or equal to iwlen, then no compressions */
+/*       occurred, and iw (maxmem+1 ... iwlen) was unused.  Otherwise */
+/*       compressions did occur, and iwlen would have had to have been */
+/*       greater than or equal to maxmem for no compressions to occur. */
+/*       Return the value of maxmem in the pfree argument. */
+    *pfree = maxmem;
+    return 0;
+} /* amdbar_ */
+
diff --git a/contrib/taucs/external/src/amdbar.f b/contrib/taucs/external/src/amdbar.f
new file mode 100644
index 0000000000000000000000000000000000000000..630b0dacf498fb2a1680498e9b8912e6007d6c07
--- /dev/null
+++ b/contrib/taucs/external/src/amdbar.f
@@ -0,0 +1,1277 @@
+
+        SUBROUTINE AMDBAR
+     $          (N, PE, IW, LEN, IWLEN, PFREE, NV, NEXT,
+     $          LAST, HEAD, ELEN, DEGREE, NCMPA, W, IOVFLO)
+
+        INTEGER N, IWLEN, PFREE, NCMPA, IOVFLO, IW (IWLEN), PE (N),
+     $          DEGREE (N), NV (N), NEXT (N), LAST (N), HEAD (N),
+     $          ELEN (N), W (N), LEN (N)
+
+C-----------------------------------------------------------------------
+C  The MC47 / AMD suite of minimum degree ordering algorithms.
+C
+C  This code is one of seven variations of a single algorithm:
+C  the primary routine (MC47B/BD, only available in the Harwell
+C  Subroutine Library), and 6 variations that differ only in
+C  how they compute the degree (available in NETLIB).
+C
+C  For information on the Harwell Subroutine Library, contact
+C  John Harding, Harwell Subroutine Library, B 552, AEA Technology,
+C  Harwell, Didcot, Oxon OX11 0RA, telephone (44) 1235 434573,
+C  fax (44) 1235 434340, email john.harding@aeat.co.uk, who will
+C  provide details of price and conditions of use.
+C-----------------------------------------------------------------------
+
+************************************************************************
+* NOTICE:  "The AMD routines (AMDEXA, AMDBAR, AMDHAF, AMDHAT, AMDTRU,
+* and AMDATR) may be used SOLELY for educational, research, and
+* benchmarking purposes by non-profit organizations and the U.S.
+* government.  Commercial and other organizations may make use of the
+* AMD routines SOLELY for benchmarking purposes only.  The AMD
+* routines may be modified by or on behalf of the User for such
+* use but at no time shall the AMD routines or any such modified
+* version of them become the property of the User.  The AMD routines
+* are provided without warranty of any kind, either expressed or
+* implied.  Neither the Authors nor their employers shall be liable
+* for any direct or consequential loss or damage whatsoever arising
+* out of the use or misuse of the AMD routines by the User.  The AMD
+* routines must not be sold.  You may make copies of the AMD routines,
+* but this NOTICE and the Copyright notice must appear in all copies.
+* Any other use of the AMD routines requires written permission.
+* Your use of the AMD routines is an implicit agreement to these
+* conditions."
+************************************************************************
+
+C-----------------------------------------------------------------------
+C AMDbar:  Approximate Minimum (UMFPACK/MA38-style, external) Degree
+C          ordering algorithm, but without aggresive absorption
+C-----------------------------------------------------------------------
+
+C  Variation 2:  MC47-style approximate external degree, but with no
+C  aggresive absorption.  This is included for comparison with the
+C  other 5 variations.  It tends to compute orderings comparable to
+C  MC47B/BD, or slightly worse in some cases.  It tends to be about as
+C  fast as MC47B/BD.
+C
+C  We recommend using MC47B/BD instead of this routine since MC47B/BD
+C  gives better results in about the same time.
+
+C-----------------------------------------------------------------------
+
+C Given a representation of the nonzero pattern of a symmetric matrix,
+C       A, (excluding the diagonal) perform an approximate minimum
+C       (UMFPACK/MA38-style) degree ordering to compute a pivot order
+C       such that the introduction of nonzeros (fill-in) in the Cholesky
+C       factors A = LL^T are kept low.  At each step, the pivot
+C       selected is the one with the minimum UMFAPACK/MA38-style
+C       upper-bound on the external degree.  This routine does not
+C       perform aggresive absorption (as done by MC47B/BD).  Aggresive
+C       absorption in MC47B/BD is used to tighten the bound on the
+C       degree.  This can result an significant improvement in the
+C       quality of the ordering for some matrices.
+C
+C       The approximate degree algorithm implemented here is the
+C       symmetric analog of the degree update algorithm in MA38 and
+C       UMFPACK (the Unsymmetric-pattern MultiFrontal PACKage, both by
+C       Davis and Duff, available for academic users in NETLIB as
+C       linalg/umfpack.shar or via anonymous ftp to
+C       ftp.cis.ufl.edu:pub/umfpack).  Non-academic users must use
+C       MA38 in the Harwell Subroutine Library instead of UMPFACK.
+
+C **********************************************************************
+C ***** CAUTION:  ARGUMENTS ARE NOT CHECKED FOR ERRORS ON INPUT.  ******
+C **********************************************************************
+C ** If you want error checking, a more versatile input format, and a **
+C ** simpler user interface, then use MC47A/AD in the Harwell         **
+C ** Subroutine Library, which checks for errors, transforms the      **
+C ** input, and calls MC47B/BD.                                       **
+C **********************************************************************
+
+C       References:  (UF Tech Reports are available via anonymous ftp
+C       to ftp.cis.ufl.edu:cis/tech-reports).
+C
+C       [1] Timothy A. Davis and Iain Duff, "An unsymmetric-pattern
+C               multifrontal method for sparse LU factorization",
+C               SIAM J. Matrix Analysis and Applications, to appear.
+C               also Univ. of Florida Technical Report TR-94-038.
+C               Discusses UMFPACK / MA38.
+C
+C       [2] Patrick Amestoy, Timothy A. Davis, and Iain S. Duff,
+C               "An approximate minimum degree ordering algorithm,"
+C               SIAM J. Matrix Analysis and Applications (to appear),
+C               also Univ. of Florida Technical Report TR-94-039.
+C               Discusses this routine.
+C
+C       [3] Alan George and Joseph Liu, "The evolution of the
+C               minimum degree ordering algorithm," SIAM Review, vol.
+C               31, no. 1, pp. 1-19, March 1989.  We list below the
+C               features mentioned in that paper that this code
+C               includes:
+C
+C       mass elimination:
+C               Yes.  MA27 relied on supervariable detection for mass
+C               elimination.
+C       indistinguishable nodes:
+C               Yes (we call these "supervariables").  This was also in
+C               the MA27 code - although we modified the method of
+C               detecting them (the previous hash was the true degree,
+C               which we no longer keep track of).  A supervariable is
+C               a set of rows with identical nonzero pattern.  All
+C               variables in a supervariable are eliminated together.
+C               Each supervariable has as its numerical name that of
+C               one of its variables (its principal variable).
+C       quotient graph representation:
+C               Yes.  We use the term "element" for the cliques formed
+C               during elimination.  This was also in the MA27 code.
+C               The algorithm can operate in place, but it will work
+C               more efficiently if given some "elbow room."
+C       element absorption:
+C               Yes.  This was also in the MA27 code.
+C       external degree:
+C               Yes.  The MA27 code was based on the true degree.
+C       incomplete degree update and multiple elimination:
+C               No.  This was not in MA27, either.  Our method of
+C               degree update within MC47B/BD is element-based, not
+C               variable-based.  It is thus not well-suited for use
+C               with incomplete degree update or multiple elimination.
+
+C-----------------------------------------------------------------------
+C Authors, and Copyright (C) 1995 by:
+C       Timothy A. Davis, Patrick Amestoy, Iain S. Duff, & John K. Reid.
+C
+C Acknowledgements:
+C       This work (and the UMFPACK package) was supported by the
+C       National Science Foundation (ASC-9111263 and DMS-9223088).
+C       The UMFPACK/MA38 approximate degree update algorithm, the
+C       unsymmetric analog which forms the basis of MC47B/BD, was
+C       developed while Tim Davis was supported by CERFACS (Toulouse,
+C       France) in a post-doctoral position.
+C
+C Date:  September, 1995
+C-----------------------------------------------------------------------
+
+C-----------------------------------------------------------------------
+C INPUT ARGUMENTS (unaltered):
+C-----------------------------------------------------------------------
+
+C n:    The matrix order.
+C
+C       Restriction:  1 .le. n .lt. (iovflo/2)-2
+
+C iwlen:        The length of iw (1..iwlen).  On input, the matrix is
+C       stored in iw (1..pfree-1).  However, iw (1..iwlen) should be
+C       slightly larger than what is required to hold the matrix, at
+C       least iwlen .ge. pfree + n is recommended.  Otherwise,
+C       excessive compressions will take place.
+C       *** We do not recommend running this algorithm with ***
+C       ***      iwlen .lt. pfree + n.                      ***
+C       *** Better performance will be obtained if          ***
+C       ***      iwlen .ge. pfree + n                       ***
+C       *** or better yet                                   ***
+C       ***      iwlen .gt. 1.2 * pfree                     ***
+C       *** (where pfree is its value on input).            ***
+C       The algorithm will not run at all if iwlen .lt. pfree-1.
+C
+C       Restriction: iwlen .ge. pfree-1
+
+C iovflo:       The largest positive integer that your computer can
+C       represent (-iovflo should also be representable).  On a 32-bit
+C       computer with 2's-complement arithmetic,
+C       iovflo = (2^31)-1 = 2,147,483,648.
+
+C-----------------------------------------------------------------------
+C INPUT/OUPUT ARGUMENTS:
+C-----------------------------------------------------------------------
+
+C pe:   On input, pe (i) is the index in iw of the start of row i, or
+C       zero if row i has no off-diagonal non-zeros.
+C
+C       During execution, it is used for both supervariables and
+C       elements:
+C
+C       * Principal supervariable i:  index into iw of the
+C               description of supervariable i.  A supervariable
+C               represents one or more rows of the matrix
+C               with identical nonzero pattern.
+C       * Non-principal supervariable i:  if i has been absorbed
+C               into another supervariable j, then pe (i) = -j.
+C               That is, j has the same pattern as i.
+C               Note that j might later be absorbed into another
+C               supervariable j2, in which case pe (i) is still -j,
+C               and pe (j) = -j2.
+C       * Unabsorbed element e:  the index into iw of the description
+C               of element e, if e has not yet been absorbed by a
+C               subsequent element.  Element e is created when
+C               the supervariable of the same name is selected as
+C               the pivot.
+C       * Absorbed element e:  if element e is absorbed into element
+C               e2, then pe (e) = -e2.  This occurs when the pattern of
+C               e (that is, Le) is found to be a subset of the pattern
+C               of e2 (that is, Le2).  If element e is "null" (it has
+C               no nonzeros outside its pivot block), then pe (e) = 0.
+C
+C       On output, pe holds the assembly tree/forest, which implicitly
+C       represents a pivot order with identical fill-in as the actual
+C       order (via a depth-first search of the tree).
+C
+C       On output:
+C       If nv (i) .gt. 0, then i represents a node in the assembly tree,
+C       and the parent of i is -pe (i), or zero if i is a root.
+C       If nv (i) = 0, then (i,-pe (i)) represents an edge in a
+C       subtree, the root of which is a node in the assembly tree.
+
+C pfree:        On input the tail end of the array, iw (pfree..iwlen),
+C       is empty, and the matrix is stored in iw (1..pfree-1).
+C       During execution, additional data is placed in iw, and pfree
+C       is modified so that iw (pfree..iwlen) is always the unused part
+C       of iw.  On output, pfree is set equal to the size of iw that
+C       would have been needed for no compressions to occur.  If
+C       ncmpa is zero, then pfree (on output) is less than or equal to
+C       iwlen, and the space iw (pfree+1 ... iwlen) was not used.
+C       Otherwise, pfree (on output) is greater than iwlen, and all the
+C       memory in iw was used.
+
+C-----------------------------------------------------------------------
+C INPUT/MODIFIED (undefined on output):
+C-----------------------------------------------------------------------
+
+C len:  On input, len (i) holds the number of entries in row i of the
+C       matrix, excluding the diagonal.  The contents of len (1..n)
+C       are undefined on output.
+
+C iw:   On input, iw (1..pfree-1) holds the description of each row i
+C       in the matrix.  The matrix must be symmetric, and both upper
+C       and lower triangular parts must be present.  The diagonal must
+C       not be present.  Row i is held as follows:
+C
+C               len (i):  the length of the row i data structure
+C               iw (pe (i) ... pe (i) + len (i) - 1):
+C                       the list of column indices for nonzeros
+C                       in row i (simple supervariables), excluding
+C                       the diagonal.  All supervariables start with
+C                       one row/column each (supervariable i is just
+C                       row i).
+C               if len (i) is zero on input, then pe (i) is ignored
+C               on input.
+C
+C               Note that the rows need not be in any particular order,
+C               and there may be empty space between the rows.
+C
+C       During execution, the supervariable i experiences fill-in.
+C       This is represented by placing in i a list of the elements
+C       that cause fill-in in supervariable i:
+C
+C               len (i):  the length of supervariable i
+C               iw (pe (i) ... pe (i) + elen (i) - 1):
+C                       the list of elements that contain i.  This list
+C                       is kept short by removing absorbed elements.
+C               iw (pe (i) + elen (i) ... pe (i) + len (i) - 1):
+C                       the list of supervariables in i.  This list
+C                       is kept short by removing nonprincipal
+C                       variables, and any entry j that is also
+C                       contained in at least one of the elements
+C                       (j in Le) in the list for i (e in row i).
+C
+C       When supervariable i is selected as pivot, we create an
+C       element e of the same name (e=i):
+C
+C               len (e):  the length of element e
+C               iw (pe (e) ... pe (e) + len (e) - 1):
+C                       the list of supervariables in element e.
+C
+C       An element represents the fill-in that occurs when supervariable
+C       i is selected as pivot (which represents the selection of row i
+C       and all non-principal variables whose principal variable is i).
+C       We use the term Le to denote the set of all supervariables
+C       in element e.  Absorbed supervariables and elements are pruned
+C       from these lists when computationally convenient.
+C
+C       CAUTION:  THE INPUT MATRIX IS OVERWRITTEN DURING COMPUTATION.
+C       The contents of iw are undefined on output.
+
+C-----------------------------------------------------------------------
+C OUTPUT (need not be set on input):
+C-----------------------------------------------------------------------
+
+C nv:   During execution, abs (nv (i)) is equal to the number of rows
+C       that are represented by the principal supervariable i.  If i is
+C       a nonprincipal variable, then nv (i) = 0.  Initially,
+C       nv (i) = 1 for all i.  nv (i) .lt. 0 signifies that i is a
+C       principal variable in the pattern Lme of the current pivot
+C       element me.  On output, nv (e) holds the true degree of element
+C       e at the time it was created (including the diagonal part).
+
+C ncmpa:        The number of times iw was compressed.  If this is
+C       excessive, then the execution took longer than what could have
+C       been.  To reduce ncmpa, try increasing iwlen to be 10% or 20%
+C       larger than the value of pfree on input (or at least
+C       iwlen .ge. pfree + n).  The fastest performance will be
+C       obtained when ncmpa is returned as zero.  If iwlen is set to
+C       the value returned by pfree on *output*, then no compressions
+C       will occur.
+
+C elen: See the description of iw above.  At the start of execution,
+C       elen (i) is set to zero.  During execution, elen (i) is the
+C       number of elements in the list for supervariable i.  When e
+C       becomes an element, elen (e) = -nel is set, where nel is the
+C       current step of factorization.  elen (i) = 0 is done when i
+C       becomes nonprincipal.
+C
+C       For variables, elen (i) .ge. 0 holds until just before the
+C       permutation vectors are computed.  For elements,
+C       elen (e) .lt. 0 holds.
+C
+C       On output elen (1..n) holds the inverse permutation (the same
+C       as the 'INVP' argument in Sparspak).  That is, if k = elen (i),
+C       then row i is the kth pivot row.  Row i of A appears as the
+C       (elen(i))-th row in the permuted matrix, PAP^T.
+
+C last: In a degree list, last (i) is the supervariable preceding i,
+C       or zero if i is the head of the list.  In a hash bucket,
+C       last (i) is the hash key for i.  last (head (hash)) is also
+C       used as the head of a hash bucket if head (hash) contains a
+C       degree list (see head, below).
+C
+C       On output, last (1..n) holds the permutation (the same as the
+C       'PERM' argument in Sparspak).  That is, if i = last (k), then
+C       row i is the kth pivot row.  Row last (k) of A is the k-th row
+C       in the permuted matrix, PAP^T.
+
+C-----------------------------------------------------------------------
+C LOCAL (not input or output - used only during execution):
+C-----------------------------------------------------------------------
+
+C degree:       If i is a supervariable, then degree (i) holds the
+C       current approximation of the external degree of row i (an upper
+C       bound).  The external degree is the number of nonzeros in row i,
+C       minus abs (nv (i)) (the diagonal part).  The bound is equal to
+C       the external degree if elen (i) is less than or equal to two.
+C
+C       We also use the term "external degree" for elements e to refer
+C       to |Le \ Lme|.  If e is an element, then degree (e) holds |Le|,
+C       which is the degree of the off-diagonal part of the element e
+C       (not including the diagonal part).
+
+C head: head is used for degree lists.  head (deg) is the first
+C       supervariable in a degree list (all supervariables i in a
+C       degree list deg have the same approximate degree, namely,
+C       deg = degree (i)).  If the list deg is empty then
+C       head (deg) = 0.
+C
+C       During supervariable detection head (hash) also serves as a
+C       pointer to a hash bucket.
+C       If head (hash) .gt. 0, there is a degree list of degree hash.
+C               The hash bucket head pointer is last (head (hash)).
+C       If head (hash) = 0, then the degree list and hash bucket are
+C               both empty.
+C       If head (hash) .lt. 0, then the degree list is empty, and
+C               -head (hash) is the head of the hash bucket.
+C       After supervariable detection is complete, all hash buckets
+C       are empty, and the (last (head (hash)) = 0) condition is
+C       restored for the non-empty degree lists.
+
+C next: next (i) is the supervariable following i in a link list, or
+C       zero if i is the last in the list.  Used for two kinds of
+C       lists:  degree lists and hash buckets (a supervariable can be
+C       in only one kind of list at a time).
+
+C w:    The flag array w determines the status of elements and
+C       variables, and the external degree of elements.
+C
+C       for elements:
+C          if w (e) = 0, then the element e is absorbed
+C          if w (e) .ge. wflg, then w (e) - wflg is the size of
+C               the set |Le \ Lme|, in terms of nonzeros (the
+C               sum of abs (nv (i)) for each principal variable i that
+C               is both in the pattern of element e and NOT in the
+C               pattern of the current pivot element, me).
+C          if wflg .gt. w (e) .gt. 0, then e is not absorbed and has
+C               not yet been seen in the scan of the element lists in
+C               the computation of |Le\Lme| in loop 150 below.
+C
+C       for variables:
+C          during supervariable detection, if w (j) .ne. wflg then j is
+C          not in the pattern of variable i
+C
+C       The w array is initialized by setting w (i) = 1 for all i,
+C       and by setting wflg = 2.  It is reinitialized if wflg becomes
+C       too large (to ensure that wflg+n does not cause integer
+C       overflow).
+
+C-----------------------------------------------------------------------
+C LOCAL INTEGERS:
+C-----------------------------------------------------------------------
+
+        INTEGER DEG, DEGME, DEXT, DMAX, E, ELENME, ELN, HASH, HMOD, I,
+     $          ILAST, INEXT, J, JLAST, JNEXT, K, KNT1, KNT2, KNT3,
+     $          LENJ, LN, MAXMEM, ME, MEM, MINDEG, NEL, NEWMEM,
+     $          NLEFT, NVI, NVJ, NVPIV, SLENME, WBIG, WE, WFLG, WNVI, X
+
+C deg:          the degree of a variable or element
+C degme:        size, |Lme|, of the current element, me (= degree (me))
+C dext:         external degree, |Le \ Lme|, of some element e
+C dmax:         largest |Le| seen so far
+C e:            an element
+C elenme:       the length, elen (me), of element list of pivotal var.
+C eln:          the length, elen (...), of an element list
+C hash:         the computed value of the hash function
+C hmod:         the hash function is computed modulo hmod = max (1,n-1)
+C i:            a supervariable
+C ilast:        the entry in a link list preceding i
+C inext:        the entry in a link list following i
+C j:            a supervariable
+C jlast:        the entry in a link list preceding j
+C jnext:        the entry in a link list, or path, following j
+C k:            the pivot order of an element or variable
+C knt1:         loop counter used during element construction
+C knt2:         loop counter used during element construction
+C knt3:         loop counter used during compression
+C lenj:         len (j)
+C ln:           length of a supervariable list
+C maxmem:       amount of memory needed for no compressions
+C me:           current supervariable being eliminated, and the
+C                       current element created by eliminating that
+C                       supervariable
+C mem:          memory in use assuming no compressions have occurred
+C mindeg:       current minimum degree
+C nel:          number of pivots selected so far
+C newmem:       amount of new memory needed for current pivot element
+C nleft:        n - nel, the number of nonpivotal rows/columns remaining
+C nvi:          the number of variables in a supervariable i (= nv (i))
+C nvj:          the number of variables in a supervariable j (= nv (j))
+C nvpiv:        number of pivots in current element
+C slenme:       number of variables in variable list of pivotal variable
+C wbig:         = iovflo - n.  wflg is not allowed to be .ge. wbig.
+C we:           w (e)
+C wflg:         used for flagging the w array.  See description of iw.
+C wnvi:         wflg - nv (i)
+C x:            either a supervariable or an element
+
+C-----------------------------------------------------------------------
+C LOCAL POINTERS:
+C-----------------------------------------------------------------------
+
+        INTEGER P, P1, P2, P3, PDST, PEND, PJ, PME, PME1, PME2, PN, PSRC
+
+C               Any parameter (pe (...) or pfree) or local variable
+C               starting with "p" (for Pointer) is an index into iw,
+C               and all indices into iw use variables starting with
+C               "p."  The only exception to this rule is the iwlen
+C               input argument.
+
+C p:            pointer into lots of things
+C p1:           pe (i) for some variable i (start of element list)
+C p2:           pe (i) + elen (i) -  1 for some var. i (end of el. list)
+C p3:           index of first supervariable in clean list
+C pdst:         destination pointer, for compression
+C pend:         end of memory to compress
+C pj:           pointer into an element or variable
+C pme:          pointer into the current element (pme1...pme2)
+C pme1:         the current element, me, is stored in iw (pme1...pme2)
+C pme2:         the end of the current element
+C pn:           pointer into a "clean" variable, also used to compress
+C psrc:         source pointer, for compression
+
+C-----------------------------------------------------------------------
+C  FUNCTIONS CALLED:
+C-----------------------------------------------------------------------
+
+        INTRINSIC MAX, MIN, MOD
+
+C=======================================================================
+C  INITIALIZATIONS
+C=======================================================================
+
+        WFLG = 2
+        MINDEG = 1
+        NCMPA = 0
+        NEL = 0
+        HMOD = MAX (1, N-1)
+        DMAX = 0
+        WBIG = IOVFLO - N
+        MEM = PFREE - 1
+        MAXMEM = MEM
+
+        DO 10 I = 1, N
+           LAST (I) = 0
+           HEAD (I) = 0
+           NV (I) = 1
+           W (I) = 1
+           ELEN (I) = 0
+           DEGREE (I) = LEN (I)
+10         CONTINUE
+
+C       ----------------------------------------------------------------
+C       initialize degree lists and eliminate rows with no off-diag. nz.
+C       ----------------------------------------------------------------
+
+        DO 20 I = 1, N
+
+           DEG = DEGREE (I)
+
+           IF (DEG .GT. 0) THEN
+
+C             ----------------------------------------------------------
+C             place i in the degree list corresponding to its degree
+C             ----------------------------------------------------------
+
+              INEXT = HEAD (DEG)
+              IF (INEXT .NE. 0) LAST (INEXT) = I
+              NEXT (I) = INEXT
+              HEAD (DEG) = I
+
+           ELSE
+
+C             ----------------------------------------------------------
+C             we have a variable that can be eliminated at once because
+C             there is no off-diagonal non-zero in its row.
+C             ----------------------------------------------------------
+
+              NEL = NEL + 1
+              ELEN (I) = -NEL
+              PE (I) = 0
+              W (I) = 0
+
+              ENDIF
+
+20         CONTINUE
+
+C=======================================================================
+C  WHILE (selecting pivots) DO
+C=======================================================================
+
+30      CONTINUE
+        IF (NEL .LT. N) THEN
+
+C=======================================================================
+C  GET PIVOT OF MINIMUM DEGREE
+C=======================================================================
+
+C          -------------------------------------------------------------
+C          find next supervariable for elimination
+C          -------------------------------------------------------------
+
+           DO 40 DEG = MINDEG, N
+              ME = HEAD (DEG)
+              IF (ME .GT. 0) GOTO 50
+40            CONTINUE
+50         CONTINUE
+           MINDEG = DEG
+
+C          -------------------------------------------------------------
+C          remove chosen variable from link list
+C          -------------------------------------------------------------
+
+           INEXT = NEXT (ME)
+           IF (INEXT .NE. 0) LAST (INEXT) = 0
+           HEAD (DEG) = INEXT
+
+C          -------------------------------------------------------------
+C          me represents the elimination of pivots nel+1 to nel+nv(me).
+C          place me itself as the first in this set.  It will be moved
+C          to the nel+nv(me) position when the permutation vectors are
+C          computed.
+C          -------------------------------------------------------------
+
+           ELENME = ELEN (ME)
+           ELEN (ME) = - (NEL + 1)
+           NVPIV = NV (ME)
+           NEL = NEL + NVPIV
+
+C=======================================================================
+C  CONSTRUCT NEW ELEMENT
+C=======================================================================
+
+C          -------------------------------------------------------------
+C          At this point, me is the pivotal supervariable.  It will be
+C          converted into the current element.  Scan list of the
+C          pivotal supervariable, me, setting tree pointers and
+C          constructing new list of supervariables for the new element,
+C          me.  p is a pointer to the current position in the old list.
+C          -------------------------------------------------------------
+
+C          flag the variable "me" as being in Lme by negating nv (me)
+           NV (ME) = -NVPIV
+           DEGME = 0
+
+           IF (ELENME .EQ. 0) THEN
+
+C             ----------------------------------------------------------
+C             construct the new element in place
+C             ----------------------------------------------------------
+
+              PME1 = PE (ME)
+              PME2 = PME1 - 1
+
+              DO 60 P = PME1, PME1 + LEN (ME) - 1
+                 I = IW (P)
+                 NVI = NV (I)
+                 IF (NVI .GT. 0) THEN
+
+C                   ----------------------------------------------------
+C                   i is a principal variable not yet placed in Lme.
+C                   store i in new list
+C                   ----------------------------------------------------
+
+                    DEGME = DEGME + NVI
+C                   flag i as being in Lme by negating nv (i)
+                    NV (I) = -NVI
+                    PME2 = PME2 + 1
+                    IW (PME2) = I
+
+C                   ----------------------------------------------------
+C                   remove variable i from degree list.
+C                   ----------------------------------------------------
+
+                    ILAST = LAST (I)
+                    INEXT = NEXT (I)
+                    IF (INEXT .NE. 0) LAST (INEXT) = ILAST
+                    IF (ILAST .NE. 0) THEN
+                       NEXT (ILAST) = INEXT
+                    ELSE
+C                      i is at the head of the degree list
+                       HEAD (DEGREE (I)) = INEXT
+                       ENDIF
+
+                    ENDIF
+60               CONTINUE
+C             this element takes no new memory in iw:
+              NEWMEM = 0
+
+           ELSE
+
+C             ----------------------------------------------------------
+C             construct the new element in empty space, iw (pfree ...)
+C             ----------------------------------------------------------
+
+              P = PE (ME)
+              PME1 = PFREE
+              SLENME = LEN (ME) - ELENME
+
+              DO 120 KNT1 = 1, ELENME + 1
+
+                 IF (KNT1 .GT. ELENME) THEN
+C                   search the supervariables in me.
+                    E = ME
+                    PJ = P
+                    LN = SLENME
+                 ELSE
+C                   search the elements in me.
+                    E = IW (P)
+                    P = P + 1
+                    PJ = PE (E)
+                    LN = LEN (E)
+                    ENDIF
+
+C                -------------------------------------------------------
+C                search for different supervariables and add them to the
+C                new list, compressing when necessary. this loop is
+C                executed once for each element in the list and once for
+C                all the supervariables in the list.
+C                -------------------------------------------------------
+
+                 DO 110 KNT2 = 1, LN
+                    I = IW (PJ)
+                    PJ = PJ + 1
+                    NVI = NV (I)
+                    IF (NVI .GT. 0) THEN
+
+C                      -------------------------------------------------
+C                      compress iw, if necessary
+C                      -------------------------------------------------
+
+                       IF (PFREE .GT. IWLEN) THEN
+C                         prepare for compressing iw by adjusting
+C                         pointers and lengths so that the lists being
+C                         searched in the inner and outer loops contain
+C                         only the remaining entries.
+
+                          PE (ME) = P
+                          LEN (ME) = LEN (ME) - KNT1
+                          IF (LEN (ME) .EQ. 0) THEN
+C                            nothing left of supervariable me
+                             PE (ME) = 0
+                             ENDIF
+                          PE (E) = PJ
+                          LEN (E) = LN - KNT2
+                          IF (LEN (E) .EQ. 0) THEN
+C                            nothing left of element e
+                             PE (E) = 0
+                             ENDIF
+
+                          NCMPA = NCMPA + 1
+C                         store first item in pe
+C                         set first entry to -item
+                          DO 70 J = 1, N
+                             PN = PE (J)
+                             IF (PN .GT. 0) THEN
+                                PE (J) = IW (PN)
+                                IW (PN) = -J
+                                ENDIF
+70                           CONTINUE
+
+C                         psrc/pdst point to source/destination
+                          PDST = 1
+                          PSRC = 1
+                          PEND = PME1 - 1
+
+C                         while loop:
+80                        CONTINUE
+                          IF (PSRC .LE. PEND) THEN
+C                            search for next negative entry
+                             J = -IW (PSRC)
+                             PSRC = PSRC + 1
+                             IF (J .GT. 0) THEN
+                                IW (PDST) = PE (J)
+                                PE (J) = PDST
+                                PDST = PDST + 1
+C                               copy from source to destination
+                                LENJ = LEN (J)
+                                DO 90 KNT3 = 0, LENJ - 2
+                                   IW (PDST + KNT3) = IW (PSRC + KNT3)
+90                                 CONTINUE
+                                PDST = PDST + LENJ - 1
+                                PSRC = PSRC + LENJ - 1
+                                ENDIF
+                             GOTO 80
+                             ENDIF
+
+C                         move the new partially-constructed element
+                          P1 = PDST
+                          DO 100 PSRC = PME1, PFREE - 1
+                             IW (PDST) = IW (PSRC)
+                             PDST = PDST + 1
+100                          CONTINUE
+                          PME1 = P1
+                          PFREE = PDST
+                          PJ = PE (E)
+                          P = PE (ME)
+                          ENDIF
+
+C                      -------------------------------------------------
+C                      i is a principal variable not yet placed in Lme
+C                      store i in new list
+C                      -------------------------------------------------
+
+                       DEGME = DEGME + NVI
+C                      flag i as being in Lme by negating nv (i)
+                       NV (I) = -NVI
+                       IW (PFREE) = I
+                       PFREE = PFREE + 1
+
+C                      -------------------------------------------------
+C                      remove variable i from degree link list
+C                      -------------------------------------------------
+
+                       ILAST = LAST (I)
+                       INEXT = NEXT (I)
+                       IF (INEXT .NE. 0) LAST (INEXT) = ILAST
+                       IF (ILAST .NE. 0) THEN
+                          NEXT (ILAST) = INEXT
+                       ELSE
+C                         i is at the head of the degree list
+                          HEAD (DEGREE (I)) = INEXT
+                          ENDIF
+
+                       ENDIF
+110                 CONTINUE
+
+                 IF (E .NE. ME) THEN
+C                   set tree pointer and flag to indicate element e is
+C                   absorbed into new element me (the parent of e is me)
+                    PE (E) = -ME
+                    W (E) = 0
+                    ENDIF
+120              CONTINUE
+
+              PME2 = PFREE - 1
+C             this element takes newmem new memory in iw (possibly zero)
+              NEWMEM = PFREE - PME1
+              MEM = MEM + NEWMEM
+              MAXMEM = MAX (MAXMEM, MEM)
+              ENDIF
+
+C          -------------------------------------------------------------
+C          me has now been converted into an element in iw (pme1..pme2)
+C          -------------------------------------------------------------
+
+C          degme holds the external degree of new element
+           DEGREE (ME) = DEGME
+           PE (ME) = PME1
+           LEN (ME) = PME2 - PME1 + 1
+
+C          -------------------------------------------------------------
+C          make sure that wflg is not too large.  With the current
+C          value of wflg, wflg+n must not cause integer overflow
+C          -------------------------------------------------------------
+
+           IF (WFLG .GE. WBIG) THEN
+              DO 130 X = 1, N
+                 IF (W (X) .NE. 0) W (X) = 1
+130              CONTINUE
+              WFLG = 2
+              ENDIF
+
+C=======================================================================
+C  COMPUTE (w (e) - wflg) = |Le\Lme| FOR ALL ELEMENTS
+C=======================================================================
+
+C          -------------------------------------------------------------
+C          Scan 1:  compute the external degrees of previous elements
+C          with respect to the current element.  That is:
+C               (w (e) - wflg) = |Le \ Lme|
+C          for each element e that appears in any supervariable in Lme.
+C          The notation Le refers to the pattern (list of
+C          supervariables) of a previous element e, where e is not yet
+C          absorbed, stored in iw (pe (e) + 1 ... pe (e) + iw (pe (e))).
+C          The notation Lme refers to the pattern of the current element
+C          (stored in iw (pme1..pme2)).   If (w (e) - wflg) becomes
+C          zero, then the element e will be absorbed in scan 2.
+C          -------------------------------------------------------------
+
+           DO 150 PME = PME1, PME2
+              I = IW (PME)
+              ELN = ELEN (I)
+              IF (ELN .GT. 0) THEN
+C                note that nv (i) has been negated to denote i in Lme:
+                 NVI = -NV (I)
+                 WNVI = WFLG - NVI
+                 DO 140 P = PE (I), PE (I) + ELN - 1
+                    E = IW (P)
+                    WE = W (E)
+                    IF (WE .GE. WFLG) THEN
+C                      unabsorbed element e has been seen in this loop
+                       WE = WE - NVI
+                    ELSE IF (WE .NE. 0) THEN
+C                      e is an unabsorbed element
+C                      this is the first we have seen e in all of Scan 1
+                       WE = DEGREE (E) + WNVI
+                       ENDIF
+                    W (E) = WE
+140                 CONTINUE
+                 ENDIF
+150           CONTINUE
+
+C=======================================================================
+C  DEGREE UPDATE AND ELEMENT ABSORPTION
+C=======================================================================
+
+C          -------------------------------------------------------------
+C          Scan 2:  for each i in Lme, sum up the degree of Lme (which
+C          is degme), plus the sum of the external degrees of each Le
+C          for the elements e appearing within i, plus the
+C          supervariables in i.  Place i in hash list.
+C          -------------------------------------------------------------
+
+           DO 180 PME = PME1, PME2
+              I = IW (PME)
+              P1 = PE (I)
+              P2 = P1 + ELEN (I) - 1
+              PN = P1
+              HASH = 0
+              DEG = 0
+
+C             ----------------------------------------------------------
+C             scan the element list associated with supervariable i
+C             ----------------------------------------------------------
+
+C             UMFPACK/MA38-style approximate degree:
+              DO 160 P = P1, P2
+                 E = IW (P)
+                 WE = W (E)
+                 IF (WE .NE. 0) THEN
+C                   e is an unabsorbed element
+                    DEG = DEG + WE - WFLG
+                    IW (PN) = E
+                    PN = PN + 1
+                    HASH = HASH + E
+                    ENDIF
+160              CONTINUE
+
+C             count the number of elements in i (including me):
+              ELEN (I) = PN - P1 + 1
+
+C             ----------------------------------------------------------
+C             scan the supervariables in the list associated with i
+C             ----------------------------------------------------------
+
+              P3 = PN
+              DO 170 P = P2 + 1, P1 + LEN (I) - 1
+                 J = IW (P)
+                 NVJ = NV (J)
+                 IF (NVJ .GT. 0) THEN
+C                   j is unabsorbed, and not in Lme.
+C                   add to degree and add to new list
+                    DEG = DEG + NVJ
+                    IW (PN) = J
+                    PN = PN + 1
+                    HASH = HASH + J
+                    ENDIF
+170              CONTINUE
+
+C             ----------------------------------------------------------
+C             update the degree and check for mass elimination
+C             ----------------------------------------------------------
+
+              IF (ELEN (I) .EQ. 1 .AND. P3 .EQ. PN) THEN
+
+C                -------------------------------------------------------
+C                mass elimination
+C                -------------------------------------------------------
+
+C                There is nothing left of this node except for an
+C                edge to the current pivot element.  elen (i) is 1,
+C                and there are no variables adjacent to node i.
+C                Absorb i into the current pivot element, me.
+
+                 PE (I) = -ME
+                 NVI = -NV (I)
+                 DEGME = DEGME - NVI
+                 NVPIV = NVPIV + NVI
+                 NEL = NEL + NVI
+                 NV (I) = 0
+                 ELEN (I) = 0
+
+              ELSE
+
+C                -------------------------------------------------------
+C                update the upper-bound degree of i
+C                -------------------------------------------------------
+
+C                the following degree does not yet include the size
+C                of the current element, which is added later:
+                 DEGREE (I) = MIN (DEGREE (I), DEG)
+
+C                -------------------------------------------------------
+C                add me to the list for i
+C                -------------------------------------------------------
+
+C                move first supervariable to end of list
+                 IW (PN) = IW (P3)
+C                move first element to end of element part of list
+                 IW (P3) = IW (P1)
+C                add new element to front of list.
+                 IW (P1) = ME
+C                store the new length of the list in len (i)
+                 LEN (I) = PN - P1 + 1
+
+C                -------------------------------------------------------
+C                place in hash bucket.  Save hash key of i in last (i).
+C                -------------------------------------------------------
+
+                 HASH = MOD (HASH, HMOD) + 1
+                 J = HEAD (HASH)
+                 IF (J .LE. 0) THEN
+C                   the degree list is empty, hash head is -j
+                    NEXT (I) = -J
+                    HEAD (HASH) = -I
+                 ELSE
+C                   degree list is not empty
+C                   use last (head (hash)) as hash head
+                    NEXT (I) = LAST (J)
+                    LAST (J) = I
+                    ENDIF
+                 LAST (I) = HASH
+                 ENDIF
+180           CONTINUE
+
+           DEGREE (ME) = DEGME
+
+C          -------------------------------------------------------------
+C          Clear the counter array, w (...), by incrementing wflg.
+C          -------------------------------------------------------------
+
+           DMAX = MAX (DMAX, DEGME)
+           WFLG = WFLG + DMAX
+
+C          make sure that wflg+n does not cause integer overflow
+           IF (WFLG .GE. WBIG) THEN
+              DO 190 X = 1, N
+                 IF (W (X) .NE. 0) W (X) = 1
+190              CONTINUE
+              WFLG = 2
+              ENDIF
+C          at this point, w (1..n) .lt. wflg holds
+
+C=======================================================================
+C  SUPERVARIABLE DETECTION
+C=======================================================================
+
+           DO 250 PME = PME1, PME2
+              I = IW (PME)
+              IF (NV (I) .LT. 0) THEN
+C                i is a principal variable in Lme
+
+C                -------------------------------------------------------
+C                examine all hash buckets with 2 or more variables.  We
+C                do this by examing all unique hash keys for super-
+C                variables in the pattern Lme of the current element, me
+C                -------------------------------------------------------
+
+                 HASH = LAST (I)
+C                let i = head of hash bucket, and empty the hash bucket
+                 J = HEAD (HASH)
+                 IF (J .EQ. 0) GOTO 250
+                 IF (J .LT. 0) THEN
+C                   degree list is empty
+                    I = -J
+                    HEAD (HASH) = 0
+                 ELSE
+C                   degree list is not empty, restore last () of head
+                    I = LAST (J)
+                    LAST (J) = 0
+                    ENDIF
+                 IF (I .EQ. 0) GOTO 250
+
+C                while loop:
+200              CONTINUE
+                 IF (NEXT (I) .NE. 0) THEN
+
+C                   ----------------------------------------------------
+C                   this bucket has one or more variables following i.
+C                   scan all of them to see if i can absorb any entries
+C                   that follow i in hash bucket.  Scatter i into w.
+C                   ----------------------------------------------------
+
+                    LN = LEN (I)
+                    ELN = ELEN (I)
+C                   do not flag the first element in the list (me)
+                    DO 210 P = PE (I) + 1, PE (I) + LN - 1
+                       W (IW (P)) = WFLG
+210                    CONTINUE
+
+C                   ----------------------------------------------------
+C                   scan every other entry j following i in bucket
+C                   ----------------------------------------------------
+
+                    JLAST = I
+                    J = NEXT (I)
+
+C                   while loop:
+220                 CONTINUE
+                    IF (J .NE. 0) THEN
+
+C                      -------------------------------------------------
+C                      check if j and i have identical nonzero pattern
+C                      -------------------------------------------------
+
+                       IF (LEN (J) .NE. LN) THEN
+C                         i and j do not have same size data structure
+                          GOTO 240
+                          ENDIF
+                       IF (ELEN (J) .NE. ELN) THEN
+C                         i and j do not have same number of adjacent el
+                          GOTO 240
+                          ENDIF
+C                      do not flag the first element in the list (me)
+                       DO 230 P = PE (J) + 1, PE (J) + LN - 1
+                          IF (W (IW (P)) .NE. WFLG) THEN
+C                            an entry (iw(p)) is in j but not in i
+                             GOTO 240
+                             ENDIF
+230                       CONTINUE
+
+C                      -------------------------------------------------
+C                      found it!  j can be absorbed into i
+C                      -------------------------------------------------
+
+                       PE (J) = -I
+C                      both nv (i) and nv (j) are negated since they
+C                      are in Lme, and the absolute values of each
+C                      are the number of variables in i and j:
+                       NV (I) = NV (I) + NV (J)
+                       NV (J) = 0
+                       ELEN (J) = 0
+C                      delete j from hash bucket
+                       J = NEXT (J)
+                       NEXT (JLAST) = J
+                       GOTO 220
+
+C                      -------------------------------------------------
+240                    CONTINUE
+C                      j cannot be absorbed into i
+C                      -------------------------------------------------
+
+                       JLAST = J
+                       J = NEXT (J)
+                       GOTO 220
+                       ENDIF
+
+C                   ----------------------------------------------------
+C                   no more variables can be absorbed into i
+C                   go to next i in bucket and clear flag array
+C                   ----------------------------------------------------
+
+                    WFLG = WFLG + 1
+                    I = NEXT (I)
+                    IF (I .NE. 0) GOTO 200
+                    ENDIF
+                 ENDIF
+250           CONTINUE
+
+C=======================================================================
+C  RESTORE DEGREE LISTS AND REMOVE NONPRINCIPAL SUPERVAR. FROM ELEMENT
+C=======================================================================
+
+           P = PME1
+           NLEFT = N - NEL
+           DO 260 PME = PME1, PME2
+              I = IW (PME)
+              NVI = -NV (I)
+              IF (NVI .GT. 0) THEN
+C                i is a principal variable in Lme
+C                restore nv (i) to signify that i is principal
+                 NV (I) = NVI
+
+C                -------------------------------------------------------
+C                compute the external degree (add size of current elem)
+C                -------------------------------------------------------
+
+                 DEG = MAX (1, MIN (DEGREE (I) + DEGME-NVI, NLEFT-NVI))
+
+C                -------------------------------------------------------
+C                place the supervariable at the head of the degree list
+C                -------------------------------------------------------
+
+                 INEXT = HEAD (DEG)
+                 IF (INEXT .NE. 0) LAST (INEXT) = I
+                 NEXT (I) = INEXT
+                 LAST (I) = 0
+                 HEAD (DEG) = I
+
+C                -------------------------------------------------------
+C                save the new degree, and find the minimum degree
+C                -------------------------------------------------------
+
+                 MINDEG = MIN (MINDEG, DEG)
+                 DEGREE (I) = DEG
+
+C                -------------------------------------------------------
+C                place the supervariable in the element pattern
+C                -------------------------------------------------------
+
+                 IW (P) = I
+                 P = P + 1
+                 ENDIF
+260           CONTINUE
+
+C=======================================================================
+C  FINALIZE THE NEW ELEMENT
+C=======================================================================
+
+           NV (ME) = NVPIV + DEGME
+C          nv (me) is now the degree of pivot (including diagonal part)
+C          save the length of the list for the new element me
+           LEN (ME) = P - PME1
+           IF (LEN (ME) .EQ. 0) THEN
+C             there is nothing left of the current pivot element
+              PE (ME) = 0
+              W (ME) = 0
+              ENDIF
+           IF (NEWMEM .NE. 0) THEN
+C             element was not constructed in place: deallocate part
+C             of it (final size is less than or equal to newmem,
+C             since newly nonprincipal variables have been removed).
+              PFREE = P
+              MEM = MEM - NEWMEM + LEN (ME)
+              ENDIF
+
+C=======================================================================
+C          END WHILE (selecting pivots)
+           GOTO 30
+           ENDIF
+C=======================================================================
+
+C=======================================================================
+C  COMPUTE THE PERMUTATION VECTORS
+C=======================================================================
+
+C       ----------------------------------------------------------------
+C       The time taken by the following code is O(n).  At this
+C       point, elen (e) = -k has been done for all elements e,
+C       and elen (i) = 0 has been done for all nonprincipal
+C       variables i.  At this point, there are no principal
+C       supervariables left, and all elements are absorbed.
+C       ----------------------------------------------------------------
+
+C       ----------------------------------------------------------------
+C       compute the ordering of unordered nonprincipal variables
+C       ----------------------------------------------------------------
+
+        DO 290 I = 1, N
+           IF (ELEN (I) .EQ. 0) THEN
+
+C             ----------------------------------------------------------
+C             i is an un-ordered row.  Traverse the tree from i until
+C             reaching an element, e.  The element, e, was the
+C             principal supervariable of i and all nodes in the path
+C             from i to when e was selected as pivot.
+C             ----------------------------------------------------------
+
+              J = -PE (I)
+C             while (j is a variable) do:
+270           CONTINUE
+              IF (ELEN (J) .GE. 0) THEN
+                 J = -PE (J)
+                 GOTO 270
+                 ENDIF
+              E = J
+
+C             ----------------------------------------------------------
+C             get the current pivot ordering of e
+C             ----------------------------------------------------------
+
+              K = -ELEN (E)
+
+C             ----------------------------------------------------------
+C             traverse the path again from i to e, and compress the
+C             path (all nodes point to e).  Path compression allows
+C             this code to compute in O(n) time.  Order the unordered
+C             nodes in the path, and place the element e at the end.
+C             ----------------------------------------------------------
+
+              J = I
+C             while (j is a variable) do:
+280           CONTINUE
+              IF (ELEN (J) .GE. 0) THEN
+                 JNEXT = -PE (J)
+                 PE (J) = -E
+                 IF (ELEN (J) .EQ. 0) THEN
+C                   j is an unordered row
+                    ELEN (J) = K
+                    K = K + 1
+                    ENDIF
+                 J = JNEXT
+                 GOTO 280
+                 ENDIF
+C             leave elen (e) negative, so we know it is an element
+              ELEN (E) = -K
+              ENDIF
+290        CONTINUE
+
+C       ----------------------------------------------------------------
+C       reset the inverse permutation (elen (1..n)) to be positive,
+C       and compute the permutation (last (1..n)).
+C       ----------------------------------------------------------------
+
+        DO 300 I = 1, N
+           K = ABS (ELEN (I))
+           LAST (K) = I
+           ELEN (I) = K
+300        CONTINUE
+
+C=======================================================================
+C  RETURN THE MEMORY USAGE IN IW
+C=======================================================================
+
+C       If maxmem is less than or equal to iwlen, then no compressions
+C       occurred, and iw (maxmem+1 ... iwlen) was unused.  Otherwise
+C       compressions did occur, and iwlen would have had to have been
+C       greater than or equal to maxmem for no compressions to occur.
+C       Return the value of maxmem in the pfree argument.
+
+        PFREE = MAXMEM
+
+        RETURN
+        END
+
diff --git a/contrib/taucs/external/src/amdbarmex.f b/contrib/taucs/external/src/amdbarmex.f
new file mode 100644
index 0000000000000000000000000000000000000000..172cf999be1b7882b0f83951db092fc7848777c1
--- /dev/null
+++ b/contrib/taucs/external/src/amdbarmex.f
@@ -0,0 +1,252 @@
+C* ========================================================================== * 
+C* === amdbar - a sparse matrix ordering algorithm ========================== * 
+C* ========================================================================== * 
+C
+C
+C   amdbar:  An approximate minimum degree ordering algorithm.
+C
+C   Usage:
+C
+C	p = amdbar (A) ;
+C
+C   Purpose:
+C
+C	Finds a permutation P such that the factorization PAP'=LL' (or LDL')
+C	has less fill-in and requires fewer floating point operations than
+C	the factorization A=LL'.  Returns P as a permutation vector, so that
+C	the permuted matrix is A (p,p).
+C
+C	If the n-by-n matrix A is not stored as a sparse matrix, p = 1:n is
+C	returned.  Note that this is NOT in keeping with the philosophy in
+C	Matlab, in which the outcome of this routine should depend on the value
+C	of A, not its data structure.  If this concerns you, then just use
+C	p = amdbar (sparse (A)) ;
+C
+C   Authors:
+C
+C	Amdbar was written by Timothy A. Davis, Patrick Amestoy, Iain S. Duff,
+C	and John K. Reid.  Timothy A. Davis (davis@cise.ufl.edu), University
+C	of Florida, wrote the Matlab interface for amdbar (this file).
+C
+C   Date (of this file, amdbarmex.f, the Matlab interface for AMDBAR):
+C
+C	August 6, 1998.  Version 1.0.  
+C
+C   Acknowledgements:
+C
+C	This work was supported by the National Science Foundation, under
+C	grants DMS-9504974 and DMS-9803599.
+C
+C    Notice (note the difference between amdbarmex.f and amdbar.f, below):
+C
+C	Copyright (c) 1998 by the University of Florida.  All Rights Reserved.
+C
+C	THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+C	EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+C
+C	Permission is hereby granted to use or copy this file (ONLY) for any
+C	purpose, provided the above notices are retained on all copies.
+C	User documentation of any code that uses this code must cite the
+C	Authors, the Copyright, and "Used by permission."  If this code is
+C	accessible from within Matlab, then typing "help amdbar"
+C	(with no arguments) must cite the Authors.  Permission to modify this
+C	file (ONLY) and to distribute modified code is granted, provided the
+C	above notices are retained, and a notice that the code was modified is
+C	included with the above copyright notice.  You must also retain the
+C	Availability information below, of the original version.
+C
+C	NOTE: This Matlab interface software is provided free of charge.
+C	However, the computational kernel, amdbar.f, has stricter licensing
+C	requirements.  Please see the licensing restrictions in amdbar.f,
+C	specifically:
+C
+C	************************************************************************
+C	* NOTICE:  "The AMD routines (AMDEXA, AMDBAR, AMDHAF, AMDHAT, AMDTRU,
+C	* and AMDATR) may be used SOLELY for educational, research, and
+C	* benchmarking purposes by non-profit organizations and the U.S.
+C	* government.  Commercial and other organizations may make use of the
+C	* AMD routines SOLELY for benchmarking purposes only.  The AMD
+C	* routines may be modified by or on behalf of the User for such
+C	* use but at no time shall the AMD routines or any such modified
+C	* version of them become the property of the User.  The AMD routines
+C	* are provided without warranty of any kind, either expressed or
+C	* implied.  Neither the Authors nor their employers shall be liable
+C	* for any direct or consequential loss or damage whatsoever arising
+C	* out of the use or misuse of the AMD routines by the User.  The AMD
+C	* routines must not be sold.  You may make copies of the AMD routines,
+C	* but this NOTICE and the Copyright notice must appear in all copies.
+C	* Any other use of the AMD routines requires written permission.
+C	* Your use of the AMD routines is an implicit agreement to these
+C	* conditions."
+C	************************************************************************
+C
+C	You *must* abide by these restrictions for amdbar.f, even though this
+C	file (amdbarmex.f, the Matlab C	interface for AMDBAR) has less stringent
+C	restrictions.
+C
+C    Availability:
+C
+C	This file is located at
+C
+C		http://www.cise.ufl.edu/~davis/amd/amdbarmex.f
+C
+C	The amdbar.f file is required, located at either of the two locations:
+C
+C		http://www.netlib.org/linalg/amd/amdbar.f
+C		http://www.cise.ufl.edu/~davis/amd/amdbarmex.f
+C
+C
+C    Tested under Solaris 2.6 and Matlab 5.2.  You may need to change the value
+C    of iovflo, below (the largest positive integer your computer can
+C    represent).
+C
+C-------------------------------------------------------------------------------
+
+	subroutine mexFunction (nlhs, plhs, nrhs, prhs)
+	integer plhs (*), prhs (*)
+	integer nlhs, nrhs
+
+	integer mxGetM, mxGetN, mxCreateFull, mxGetPr, mxIsSparse,
+     $		mxGetJc, mxGetIr, mxCalloc, mxGetNzmax
+
+	integer pe, degree, nv, next, last, head, elen, w, len, iw,
+     $		nrow, ncol, n, pa, a, nz, perm, iwlen
+
+	if (nrhs .ne. 1) then
+	    call mexErrMsgTxt ('One input argument required')
+	endif
+	if (nlhs .ne. 1) then
+	    call mexErrMsgTxt ('One output argument required')
+	endif
+
+c	get size of matrix
+	nrow = mxGetM (prhs (1))
+	ncol = mxGetN (prhs (1))
+	if (nrow .ne. ncol) then
+	    call mexErrMsgTxt ('Matrix must be square')
+	endif 
+	n = ncol
+
+c	create permutation vector, for output
+	plhs (1) = mxCreateFull (1, n, 0)
+	perm = mxGetPr (plhs (1))
+
+	if (mxIsSparse (prhs (1)) .eq. 0) then
+	    call idperm (%VAL (perm), n)
+	    return
+	endif
+
+	pa = mxGetJc (prhs (1))
+	a  = mxGetIr (prhs (1))
+	nz = mxGetNzmax (prhs (1))
+
+c	allocate workspace
+	iwlen = nz + nz/5
+	iw     = mxCalloc (iwlen, 4)
+	pe     = mxCalloc (n, 4)
+	degree = mxCalloc (n, 4)
+	nv     = mxCalloc (n, 4)
+	next   = mxCalloc (n, 4)
+	head   = mxCalloc (n, 4)
+	last   = mxCalloc (n, 4)
+	elen   = mxCalloc (n, 4)
+	w      = mxCalloc (n, 4)
+	len    = mxCalloc (n, 4)
+
+	call amdcomp (n, nz, %VAL(pe), %VAL(iw), iwlen, %VAL(nv),
+     $		%VAL(next), %VAL(last), %VAL(head), %VAL(elen),
+     $		%VAL(degree), %VAL(w), %VAL(len), %VAL(a), %VAL(pa),
+     $		%VAL(perm))
+
+c	free workspace
+	call mxFree (iw)
+	call mxFree (pe)
+	call mxFree (degree)
+	call mxFree (nv)
+	call mxFree (next)
+	call mxFree (head)
+	call mxFree (elen)
+	call mxFree (w)
+	call mxFree (len)
+
+	return
+	end
+
+
+C-------------------------------------------------------------------------------
+C Fortran front-end to AMD routines, called by the mex function. 
+C-------------------------------------------------------------------------------
+
+	subroutine amdcomp (n, nz, pe, iw, iwlen, nv,
+     $		next, last, head, elen,
+     $		degree, w, len, a, pa, perm)
+	integer n, nz, pe (n), iwlen, iw (iwlen), nv (n),
+     $		next (n), last (n), head (n), elen (n),
+     $		degree (n), w (n), len (n), a (nz), pa (n+1)
+	real*8 perm (n)
+
+	integer pfree, ncmpa, iovflo, i, col, p1, p2, p, row
+
+c	copy the matrix from a into iw
+	pfree = 1
+	do 20 col = 1, n
+	    p1 = pa (col) + 1
+	    p2 = pa (col+1)
+	    pe (col) = pfree
+	    do 10 p = p1, p2
+		row = a (p) + 1
+c		remove the diagonal
+		if (col .ne. row) then
+c		    add one to the row indices, to shift to 1..n range
+		    iw (pfree) = a (p) + 1
+		    pfree = pfree + 1
+		endif
+10	    continue
+	    len (col) = pfree - pe (col)
+20	continue
+
+	iovflo = 2147483647
+
+c	call the AMD routine - which one depends on the definition of
+c	the "order" routine (mc47bdord.f, amdbarord.f, amdexaord.f)
+
+	call order (n, pe, iw, len, iwlen, pfree, nv, next,
+     $		last, head, elen, degree, ncmpa, w, iovflo)
+
+c	copy the permutation into the (real*8) output array
+	do 40 i = 1, n
+	    perm (i) = last (i)
+40	continue
+	return
+	end
+
+C-------------------------------------------------------------------------------
+
+	subroutine idperm (perm, n)
+	integer i, n
+	real*8 perm (n)
+	do 1 i = 1, n
+	    perm (i) = i
+1	continue
+	return
+	end
+
+C-------------------------------------------------------------------------------
+
+C	If you want to use a different AMD routine, just change the name,
+C	below.
+
+        subroutine order
+     $          (n, pe, iw, len, iwlen, pfree, nv, next,
+     $          last, head, elen, degree, ncmpa, w, iovflo)
+
+        integer n, iwlen, pfree, ncmpa, iovflo, iw (iwlen), pe (n),
+     $          degree (n), nv (n), next (n), last (n), head (n),
+     $          elen (n), w (n), len (n)
+
+	call amdbar (n, pe, iw, len, iwlen, pfree, nv, next,
+     $		last, head, elen, degree, ncmpa, w, iovflo)
+
+	return
+	end
+
diff --git a/contrib/taucs/external/src/amdexa.c b/contrib/taucs/external/src/amdexa.c
new file mode 100644
index 0000000000000000000000000000000000000000..62dc27404724f4421ef1e2f6b517302d7d80b24e
--- /dev/null
+++ b/contrib/taucs/external/src/amdexa.c
@@ -0,0 +1,1415 @@
+/* amdexa.f -- translated by f2c (version of 23 April 1993  18:34:30).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+#include "f2c.h"
+
+/* Subroutine */ int amdexa_(n, pe, iw, len, iwlen, pfree, nv, next, last, 
+	head, elen, degree, ncmpa, w, iovflo)
+integer *n, *pe, *iw, *len, *iwlen, *pfree, *nv, *next, *last, *head, *elen, *
+	degree, *ncmpa, *w, *iovflo;
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer hash, pend, hmod, lenj, dmax_, wbig, wflg, psrc, pdst, e, 
+	    i, j, k, p, degme, x, nleft, ilast, jlast, inext, jnext, p1, 
+	    nvpiv, p2, p3, me, ln, pj, pn, mindeg, elenme, slenme, maxmem, 
+	    newmem, deg, eln, mem, nel, pme, nvi, nvj, pme1, pme2, knt1, knt2,
+	     knt3;
+
+/* -----------------------------------------------------------------------
+ */
+/*  The MC47 / AMD suite of minimum degree ordering algorithms. */
+
+/*  This code is one of seven variations of a single algorithm: */
+/*  the primary routine (MC47B/BD, only available in the Harwell */
+/*  Subroutine Library), and 6 variations that differ only in */
+/*  how they compute the degree (available in NETLIB). */
+
+/*  For information on the Harwell Subroutine Library, contact */
+/*  John Harding, Harwell Subroutine Library, B 552, AEA Technology, */
+/*  Harwell, Didcot, Oxon OX11 0RA, telephone (44) 1235 434573, */
+/*  fax (44) 1235 434340, email john.harding@aeat.co.uk, who will */
+/*  provide details of price and conditions of use. */
+/* -----------------------------------------------------------------------
+ */
+/* ***********************************************************************
+ */
+/* NOTICE:  "The AMD routines (AMDEXA, AMDBAR, AMDHAF, AMDHAT, AMDTRU, */
+/* and AMDATR) may be used SOLELY for educational, research, and */
+/* benchmarking purposes by non-profit organizations and the U.S. */
+/* government.  Commercial and other organizations may make use of the */
+/* AMD routines SOLELY for benchmarking purposes only.  The AMD */
+/* routines may be modified by or on behalf of the User for such */
+/* use but at no time shall the AMD routines or any such modified */
+/* version of them become the property of the User.  The AMD routines */
+/* are provided without warranty of any kind, either expressed or */
+/* implied.  Neither the Authors nor their employers shall be liable */
+/* for any direct or consequential loss or damage whatsoever arising */
+/* out of the use or misuse of the AMD routines by the User.  The AMD */
+/* routines must not be sold.  You may make copies of the AMD routines, */
+/* but this NOTICE and the Copyright notice must appear in all copies. */
+/* Any other use of the AMD routines requires written permission. */
+/* Your use of the AMD routines is an implicit agreement to these */
+/* conditions." */
+/* ***********************************************************************
+ */
+/* -----------------------------------------------------------------------
+ */
+/* AMDexa:  exact minimum (external) degree ordering algorithm */
+/* -----------------------------------------------------------------------
+ */
+/*  Variation 1: exact external degree (as used in MMD, for example. */
+/*  See A. George and J. Liu, "The evolution of the minimum degree */
+/*  ordering algorithm," SIAM Review, vol. 31, no. 1, pp. 1-19, 1989). */
+/*  Note that some of the comments in the code below reflect the */
+/*  MC47-style degree approximation.  Also not that we do not use */
+/*  multiple elimination or incomplete update, which are used in MMD. */
+
+/*  We recommend using MC47B/BD instead of this routine since MC47B/BD */
+/*  gives comparable results in much less time (this code has been */
+/*  observed to be up to 71 times slower than MC47B/BD). */
+/* -----------------------------------------------------------------------
+ */
+/* Given a representation of the nonzero pattern of a symmetric matrix, */
+/*       A, (excluding the diagonal) perform an exact minimum */
+/*       (external) degree ordering to compute a pivot order such */
+/*       that the introduction of nonzeros (fill-in) in the Cholesky */
+/*       factors A = LL^T are kept low.  At each step, the pivot */
+/*       selected is the one with the minimum exact external degree. */
+/* ********************************************************************** 
+*/
+/* ***** CAUTION:  ARGUMENTS ARE NOT CHECKED FOR ERRORS ON INPUT.  ****** 
+*/
+/* ********************************************************************** 
+*/
+/* ** If you want error checking, a more versatile input format, and a ** 
+*/
+/* ** simpler user interface, then use MC47A/AD in the Harwell         ** 
+*/
+/* ** Subroutine Library, which checks for errors, transforms the      ** 
+*/
+/* ** input, and calls MC47B/BD.                                       ** 
+*/
+/* ********************************************************************** 
+*/
+/*       References:  (UF Tech Reports are available via anonymous ftp */
+/*       to ftp.cis.ufl.edu:cis/tech-reports). */
+
+/*       [1] Timothy A. Davis and Iain Duff, "An unsymmetric-pattern */
+/*               multifrontal method for sparse LU factorization", */
+/*               SIAM J. Matrix Analysis and Applications, to appear. */
+/*               also Univ. of Florida Technical Report TR-94-038. */
+/*               Discusses UMFPACK / MA38. */
+
+/*       [2] Patrick Amestoy, Timothy A. Davis, and Iain S. Duff, */
+/*               "An approximate minimum degree ordering algorithm," */
+/*               SIAM J. Matrix Analysis and Applications (to appear), */
+/*               also Univ. of Florida Technical Report TR-94-039. */
+/*               Discusses this routine. */
+
+/*       [3] Alan George and Joseph Liu, "The evolution of the */
+/*               minimum degree ordering algorithm," SIAM Review, vol. */
+/*               31, no. 1, pp. 1-19, March 1989.  We list below the */
+/*               features mentioned in that paper that this code */
+/*               includes: */
+
+/*       mass elimination: */
+/*               Yes.  MA27 relied on supervariable detection for mass */
+/*               elimination. */
+/*       indistinguishable nodes: */
+/*               Yes (we call these "supervariables").  This was also in 
+*/
+/*               the MA27 code - although we modified the method of */
+/*               detecting them (the previous hash was the true degree, */
+/*               which we no longer keep track of).  A supervariable is */
+/*               a set of rows with identical nonzero pattern.  All */
+/*               variables in a supervariable are eliminated together. */
+/*               Each supervariable has as its numerical name that of */
+/*               one of its variables (its principal variable). */
+/*       quotient graph representation: */
+/*               Yes.  We use the term "element" for the cliques formed */
+/*               during elimination.  This was also in the MA27 code. */
+/*               The algorithm can operate in place, but it will work */
+/*               more efficiently if given some "elbow room." */
+/*       element absorption: */
+/*               Yes.  This was also in the MA27 code. */
+/*       external degree: */
+/*               Yes.  The MA27 code was based on the true degree. */
+/*       incomplete degree update and multiple elimination: */
+/*               No.  This was not in MA27, either.  Our method of */
+/*               degree update within MC47B/BD is element-based, not */
+/*               variable-based.  It is thus not well-suited for use */
+/*               with incomplete degree update or multiple elimination. */
+/* -----------------------------------------------------------------------
+ */
+/* Authors, and Copyright (C) 1995 by: */
+/*       Timothy A. Davis, Patrick Amestoy, Iain S. Duff, & John K. Reid. 
+*/
+
+/* Acknowledgements: */
+/*       This work (and the UMFPACK package) was supported by the */
+/*       National Science Foundation (ASC-9111263 and DMS-9223088). */
+/*       The UMFPACK/MA38 approximate degree update algorithm, the */
+/*       unsymmetric analog which forms the basis of MC47B/BD, was */
+/*       developed while Tim Davis was supported by CERFACS (Toulouse, */
+/*       France) in a post-doctoral position. */
+
+/* Date:  September, 1995 */
+/* -----------------------------------------------------------------------
+ */
+/* -----------------------------------------------------------------------
+ */
+/* INPUT ARGUMENTS (unaltered): */
+/* -----------------------------------------------------------------------
+ */
+/* n:    The matrix order. */
+
+/*       Restriction:  1 .le. n .lt. (iovflo/2)-2 */
+/* iwlen:        The length of iw (1..iwlen).  On input, the matrix is */
+/*       stored in iw (1..pfree-1).  However, iw (1..iwlen) should be */
+/*       slightly larger than what is required to hold the matrix, at */
+/*       least iwlen .ge. pfree + n is recommended.  Otherwise, */
+/*       excessive compressions will take place. */
+/*       *** We do not recommend running this algorithm with *** */
+/*       ***      iwlen .lt. pfree + n.                      *** */
+/*       *** Better performance will be obtained if          *** */
+/*       ***      iwlen .ge. pfree + n                       *** */
+/*       *** or better yet                                   *** */
+/*       ***      iwlen .gt. 1.2 * pfree                     *** */
+/*       *** (where pfree is its value on input).            *** */
+/*       The algorithm will not run at all if iwlen .lt. pfree-1. */
+
+/*       Restriction: iwlen .ge. pfree-1 */
+/* iovflo:       The largest positive integer that your computer can */
+/*       represent (-iovflo should also be representable).  On a 32-bit */
+/*       computer with 2's-complement arithmetic, */
+/*       iovflo = (2^31)-1 = 2,147,483,648. */
+/* -----------------------------------------------------------------------
+ */
+/* INPUT/OUPUT ARGUMENTS: */
+/* -----------------------------------------------------------------------
+ */
+/* pe:   On input, pe (i) is the index in iw of the start of row i, or */
+/*       zero if row i has no off-diagonal non-zeros. */
+
+/*       During execution, it is used for both supervariables and */
+/*       elements: */
+
+/*       * Principal supervariable i:  index into iw of the */
+/*               description of supervariable i.  A supervariable */
+/*               represents one or more rows of the matrix */
+/*               with identical nonzero pattern. */
+/*       * Non-principal supervariable i:  if i has been absorbed */
+/*               into another supervariable j, then pe (i) = -j. */
+/*               That is, j has the same pattern as i. */
+/*               Note that j might later be absorbed into another */
+/*               supervariable j2, in which case pe (i) is still -j, */
+/*               and pe (j) = -j2. */
+/*       * Unabsorbed element e:  the index into iw of the description */
+/*               of element e, if e has not yet been absorbed by a */
+/*               subsequent element.  Element e is created when */
+/*               the supervariable of the same name is selected as */
+/*               the pivot. */
+/*       * Absorbed element e:  if element e is absorbed into element */
+/*               e2, then pe (e) = -e2.  This occurs when the pattern of 
+*/
+/*               e (that is, Le) is found to be a subset of the pattern */
+/*               of e2 (that is, Le2).  If element e is "null" (it has */
+/*               no nonzeros outside its pivot block), then pe (e) = 0. */
+
+/*       On output, pe holds the assembly tree/forest, which implicitly */
+/*       represents a pivot order with identical fill-in as the actual */
+/*       order (via a depth-first search of the tree). */
+
+/*       On output: */
+/*       If nv (i) .gt. 0, then i represents a node in the assembly tree, 
+*/
+/*       and the parent of i is -pe (i), or zero if i is a root. */
+/*       If nv (i) = 0, then (i,-pe (i)) represents an edge in a */
+/*       subtree, the root of which is a node in the assembly tree. */
+/* pfree:        On input the tail end of the array, iw (pfree..iwlen), */
+/*       is empty, and the matrix is stored in iw (1..pfree-1). */
+/*       During execution, additional data is placed in iw, and pfree */
+/*       is modified so that iw (pfree..iwlen) is always the unused part 
+*/
+/*       of iw.  On output, pfree is set equal to the size of iw that */
+/*       would have been needed for no compressions to occur.  If */
+/*       ncmpa is zero, then pfree (on output) is less than or equal to */
+/*       iwlen, and the space iw (pfree+1 ... iwlen) was not used. */
+/*       Otherwise, pfree (on output) is greater than iwlen, and all the 
+*/
+/*       memory in iw was used. */
+/* -----------------------------------------------------------------------
+ */
+/* INPUT/MODIFIED (undefined on output): */
+/* -----------------------------------------------------------------------
+ */
+/* len:  On input, len (i) holds the number of entries in row i of the */
+/*       matrix, excluding the diagonal.  The contents of len (1..n) */
+/*       are undefined on output. */
+/* iw:   On input, iw (1..pfree-1) holds the description of each row i */
+/*       in the matrix.  The matrix must be symmetric, and both upper */
+/*       and lower triangular parts must be present.  The diagonal must */
+/*       not be present.  Row i is held as follows: */
+
+/*               len (i):  the length of the row i data structure */
+/*               iw (pe (i) ... pe (i) + len (i) - 1): */
+/*                       the list of column indices for nonzeros */
+/*                       in row i (simple supervariables), excluding */
+/*                       the diagonal.  All supervariables start with */
+/*                       one row/column each (supervariable i is just */
+/*                       row i). */
+/*               if len (i) is zero on input, then pe (i) is ignored */
+/*               on input. */
+
+/*               Note that the rows need not be in any particular order, 
+*/
+/*               and there may be empty space between the rows. */
+
+/*       During execution, the supervariable i experiences fill-in. */
+/*       This is represented by placing in i a list of the elements */
+/*       that cause fill-in in supervariable i: */
+
+/*               len (i):  the length of supervariable i */
+/*               iw (pe (i) ... pe (i) + elen (i) - 1): */
+/*                       the list of elements that contain i.  This list 
+*/
+/*                       is kept short by removing absorbed elements. */
+/*               iw (pe (i) + elen (i) ... pe (i) + len (i) - 1): */
+/*                       the list of supervariables in i.  This list */
+/*                       is kept short by removing nonprincipal */
+/*                       variables, and any entry j that is also */
+/*                       contained in at least one of the elements */
+/*                       (j in Le) in the list for i (e in row i). */
+
+/*       When supervariable i is selected as pivot, we create an */
+/*       element e of the same name (e=i): */
+
+/*               len (e):  the length of element e */
+/*               iw (pe (e) ... pe (e) + len (e) - 1): */
+/*                       the list of supervariables in element e. */
+
+/*       An element represents the fill-in that occurs when supervariable 
+*/
+/*       i is selected as pivot (which represents the selection of row i 
+*/
+/*       and all non-principal variables whose principal variable is i). 
+*/
+/*       We use the term Le to denote the set of all supervariables */
+/*       in element e.  Absorbed supervariables and elements are pruned */
+/*       from these lists when computationally convenient. */
+
+/*       CAUTION:  THE INPUT MATRIX IS OVERWRITTEN DURING COMPUTATION. */
+/*       The contents of iw are undefined on output. */
+/* -----------------------------------------------------------------------
+ */
+/* OUTPUT (need not be set on input): */
+/* -----------------------------------------------------------------------
+ */
+/* nv:   During execution, abs (nv (i)) is equal to the number of rows */
+/*       that are represented by the principal supervariable i.  If i is 
+*/
+/*       a nonprincipal variable, then nv (i) = 0.  Initially, */
+/*       nv (i) = 1 for all i.  nv (i) .lt. 0 signifies that i is a */
+/*       principal variable in the pattern Lme of the current pivot */
+/*       element me.  On output, nv (e) holds the true degree of element 
+*/
+/*       e at the time it was created (including the diagonal part). */
+/* ncmpa:        The number of times iw was compressed.  If this is */
+/*       excessive, then the execution took longer than what could have */
+/*       been.  To reduce ncmpa, try increasing iwlen to be 10% or 20% */
+/*       larger than the value of pfree on input (or at least */
+/*       iwlen .ge. pfree + n).  The fastest performance will be */
+/*       obtained when ncmpa is returned as zero.  If iwlen is set to */
+/*       the value returned by pfree on *output*, then no compressions */
+/*       will occur. */
+/* elen: See the description of iw above.  At the start of execution, */
+/*       elen (i) is set to zero.  During execution, elen (i) is the */
+/*       number of elements in the list for supervariable i.  When e */
+/*       becomes an element, elen (e) = -nel is set, where nel is the */
+/*       current step of factorization.  elen (i) = 0 is done when i */
+/*       becomes nonprincipal. */
+
+/*       For variables, elen (i) .ge. 0 holds until just before the */
+/*       permutation vectors are computed.  For elements, */
+/*       elen (e) .lt. 0 holds. */
+
+/*       On output elen (1..n) holds the inverse permutation (the same */
+/*       as the 'INVP' argument in Sparspak).  That is, if k = elen (i), 
+*/
+/*       then row i is the kth pivot row.  Row i of A appears as the */
+/*       (elen(i))-th row in the permuted matrix, PAP^T. */
+/* last: In a degree list, last (i) is the supervariable preceding i, */
+/*       or zero if i is the head of the list.  In a hash bucket, */
+/*       last (i) is the hash key for i.  last (head (hash)) is also */
+/*       used as the head of a hash bucket if head (hash) contains a */
+/*       degree list (see head, below). */
+
+/*       On output, last (1..n) holds the permutation (the same as the */
+/*       'PERM' argument in Sparspak).  That is, if i = last (k), then */
+/*       row i is the kth pivot row.  Row last (k) of A is the k-th row */
+/*       in the permuted matrix, PAP^T. */
+/* -----------------------------------------------------------------------
+ */
+/* LOCAL (not input or output - used only during execution): */
+/* -----------------------------------------------------------------------
+ */
+/* degree:       If i is a supervariable, then degree (i) holds the */
+/*       current approximation of the external degree of row i (an upper 
+*/
+/*       bound).  The external degree is the number of nonzeros in row i, 
+*/
+/*       minus abs (nv (i)) (the diagonal part).  The bound is equal to */
+/*       the external degree if elen (i) is less than or equal to two. */
+
+/*       We also use the term "external degree" for elements e to refer */
+/*       to |Le \ Lme|.  If e is an element, then degree (e) holds |Le|, 
+*/
+/*       which is the degree of the off-diagonal part of the element e */
+/*       (not including the diagonal part). */
+/* head: head is used for degree lists.  head (deg) is the first */
+/*       supervariable in a degree list (all supervariables i in a */
+/*       degree list deg have the same approximate degree, namely, */
+/*       deg = degree (i)).  If the list deg is empty then */
+/*       head (deg) = 0. */
+
+/*       During supervariable detection head (hash) also serves as a */
+/*       pointer to a hash bucket. */
+/*       If head (hash) .gt. 0, there is a degree list of degree hash. */
+/*               The hash bucket head pointer is last (head (hash)). */
+/*       If head (hash) = 0, then the degree list and hash bucket are */
+/*               both empty. */
+/*       If head (hash) .lt. 0, then the degree list is empty, and */
+/*               -head (hash) is the head of the hash bucket. */
+/*       After supervariable detection is complete, all hash buckets */
+/*       are empty, and the (last (head (hash)) = 0) condition is */
+/*       restored for the non-empty degree lists. */
+/* next: next (i) is the supervariable following i in a link list, or */
+/*       zero if i is the last in the list.  Used for two kinds of */
+/*       lists:  degree lists and hash buckets (a supervariable can be */
+/*       in only one kind of list at a time). */
+/* w:    The flag array w determines the status of elements and */
+/*       variables, and the external degree of elements. */
+
+/*       for elements: */
+/*          if w (e) = 0, then the element e is absorbed */
+/*          if w (e) .ge. wflg, then w (e) - wflg is the size of */
+/*               the set |Le \ Lme|, in terms of nonzeros (the */
+/*               sum of abs (nv (i)) for each principal variable i that */
+/*               is both in the pattern of element e and NOT in the */
+/*               pattern of the current pivot element, me). */
+/*          if wflg .gt. w (e) .gt. 0, then e is not absorbed and has */
+/*               not yet been seen in the scan of the element lists in */
+/*               the computation of |Le\Lme| in loop 150 below. */
+
+/*       for variables: */
+/*          during supervariable detection, if w (j) .ne. wflg then j is 
+*/
+/*          not in the pattern of variable i */
+
+/*       The w array is initialized by setting w (i) = 1 for all i, */
+/*       and by setting wflg = 2.  It is reinitialized if wflg becomes */
+/*       too large (to ensure that wflg+n does not cause integer */
+/*       overflow). */
+/* -----------------------------------------------------------------------
+ */
+/* LOCAL INTEGERS: */
+/* -----------------------------------------------------------------------
+ */
+/* deg:          the degree of a variable or element */
+/* degme:        size, |Lme|, of the current element, me (= degree (me)) 
+*/
+/* dext:         external degree, |Le \ Lme|, of some element e */
+/* dmax:         largest |Le| seen so far */
+/* e:            an element */
+/* elenme:       the length, elen (me), of element list of pivotal var. */
+/* eln:          the length, elen (...), of an element list */
+/* hash:         the computed value of the hash function */
+/* hmod:         the hash function is computed modulo hmod = max (1,n-1) 
+*/
+/* i:            a supervariable */
+/* ilast:        the entry in a link list preceding i */
+/* inext:        the entry in a link list following i */
+/* j:            a supervariable */
+/* jlast:        the entry in a link list preceding j */
+/* jnext:        the entry in a link list, or path, following j */
+/* k:            the pivot order of an element or variable */
+/* knt1:         loop counter used during element construction */
+/* knt2:         loop counter used during element construction */
+/* knt3:         loop counter used during compression */
+/* lenj:         len (j) */
+/* ln:           length of a supervariable list */
+/* maxmem:       amount of memory needed for no compressions */
+/* me:           current supervariable being eliminated, and the */
+/*                       current element created by eliminating that */
+/*                       supervariable */
+/* mem:          memory in use assuming no compressions have occurred */
+/* mindeg:       current minimum degree */
+/* nel:          number of pivots selected so far */
+/* newmem:       amount of new memory needed for current pivot element */
+/* nleft:        n - nel, the number of nonpivotal rows/columns remaining 
+*/
+/* nvi:          the number of variables in a supervariable i (= nv (i)) 
+*/
+/* nvj:          the number of variables in a supervariable j (= nv (j)) 
+*/
+/* nvpiv:        number of pivots in current element */
+/* slenme:       number of variables in variable list of pivotal variable 
+*/
+/* wbig:         = iovflo - n.  wflg is not allowed to be .ge. wbig. */
+/* we:           w (e) */
+/* wflg:         used for flagging the w array.  See description of iw. */
+/* wnvi:         wflg - nv (i) */
+/* x:            either a supervariable or an element */
+/* -----------------------------------------------------------------------
+ */
+/* LOCAL POINTERS: */
+/* -----------------------------------------------------------------------
+ */
+/*               Any parameter (pe (...) or pfree) or local variable */
+/*               starting with "p" (for Pointer) is an index into iw, */
+/*               and all indices into iw use variables starting with */
+/*               "p."  The only exception to this rule is the iwlen */
+/*               input argument. */
+/* p:            pointer into lots of things */
+/* p1:           pe (i) for some variable i (start of element list) */
+/* p2:           pe (i) + elen (i) -  1 for some var. i (end of el. list) 
+*/
+/* p3:           index of first supervariable in clean list */
+/* pdst:         destination pointer, for compression */
+/* pend:         end of memory to compress */
+/* pj:           pointer into an element or variable */
+/* pme:          pointer into the current element (pme1...pme2) */
+/* pme1:         the current element, me, is stored in iw (pme1...pme2) */
+/* pme2:         the end of the current element */
+/* pn:           pointer into a "clean" variable, also used to compress */
+/* psrc:         source pointer, for compression */
+/* -----------------------------------------------------------------------
+ */
+/*  FUNCTIONS CALLED: */
+/* -----------------------------------------------------------------------
+ */
+/* =======================================================================
+ */
+/*  INITIALIZATIONS */
+/* =======================================================================
+ */
+    /* Parameter adjustments */
+    --w;
+    --degree;
+    --elen;
+    --head;
+    --last;
+    --next;
+    --nv;
+    --len;
+    --iw;
+    --pe;
+
+    /* Function Body */
+    wflg = 2;
+    mindeg = 1;
+    *ncmpa = 0;
+    nel = 0;
+/* Computing MAX */
+    i__1 = 1, i__2 = *n - 1;
+    hmod = max(i__1,i__2);
+    dmax_ = 0;
+    wbig = *iovflo - *n;
+    mem = *pfree - 1;
+    maxmem = mem;
+    i__1 = *n;
+    for (i = 1; i <= i__1; ++i) {
+	last[i] = 0;
+	head[i] = 0;
+	nv[i] = 1;
+	w[i] = 1;
+	elen[i] = 0;
+	degree[i] = len[i];
+/* L10: */
+    }
+/*       ---------------------------------------------------------------- 
+*/
+/*       initialize degree lists and eliminate rows with no off-diag. nz. 
+*/
+/*       ---------------------------------------------------------------- 
+*/
+    i__1 = *n;
+    for (i = 1; i <= i__1; ++i) {
+	deg = degree[i];
+	if (deg > 0) {
+/*             --------------------------------------------------
+-------- */
+/*             place i in the degree list corresponding to its deg
+ree */
+/*             --------------------------------------------------
+-------- */
+	    inext = head[deg];
+	    if (inext != 0) {
+		last[inext] = i;
+	    }
+	    next[i] = inext;
+	    head[deg] = i;
+	} else {
+/*             --------------------------------------------------
+-------- */
+/*             we have a variable that can be eliminated at once b
+ecause */
+/*             there is no off-diagonal non-zero in its row. */
+/*             --------------------------------------------------
+-------- */
+	    ++nel;
+	    elen[i] = -nel;
+	    pe[i] = 0;
+	    w[i] = 0;
+	}
+/* L20: */
+    }
+/* =======================================================================
+ */
+/*  WHILE (selecting pivots) DO */
+/* =======================================================================
+ */
+L30:
+    if (nel < *n) {
+/* ==================================================================
+===== */
+/*  GET PIVOT OF MINIMUM DEGREE */
+/* ==================================================================
+===== */
+/*          ---------------------------------------------------------
+---- */
+/*          find next supervariable for elimination */
+/*          ---------------------------------------------------------
+---- */
+	i__1 = *n;
+	for (deg = mindeg; deg <= i__1; ++deg) {
+	    me = head[deg];
+	    if (me > 0) {
+		goto L50;
+	    }
+/* L40: */
+	}
+L50:
+	mindeg = deg;
+/*          ---------------------------------------------------------
+---- */
+/*          remove chosen variable from link list */
+/*          ---------------------------------------------------------
+---- */
+	inext = next[me];
+	if (inext != 0) {
+	    last[inext] = 0;
+	}
+	head[deg] = inext;
+/*          ---------------------------------------------------------
+---- */
+/*          me represents the elimination of pivots nel+1 to nel+nv(me
+). */
+/*          place me itself as the first in this set.  It will be move
+d */
+/*          to the nel+nv(me) position when the permutation vectors ar
+e */
+/*          computed. */
+/*          ---------------------------------------------------------
+---- */
+	elenme = elen[me];
+	elen[me] = -(nel + 1);
+	nvpiv = nv[me];
+	nel += nvpiv;
+/* ==================================================================
+===== */
+/*  CONSTRUCT NEW ELEMENT */
+/* ==================================================================
+===== */
+/*          ---------------------------------------------------------
+---- */
+/*          At this point, me is the pivotal supervariable.  It will b
+e */
+/*          converted into the current element.  Scan list of the */
+/*          pivotal supervariable, me, setting tree pointers and */
+/*          constructing new list of supervariables for the new elemen
+t, */
+/*          me.  p is a pointer to the current position in the old lis
+t. */
+/*          ---------------------------------------------------------
+---- */
+/*          flag the variable "me" as being in Lme by negating nv (me)
+ */
+	nv[me] = -nvpiv;
+	degme = 0;
+	if (elenme == 0) {
+/*             --------------------------------------------------
+-------- */
+/*             construct the new element in place */
+/*             --------------------------------------------------
+-------- */
+	    pme1 = pe[me];
+	    pme2 = pme1 - 1;
+	    i__1 = pme1 + len[me] - 1;
+	    for (p = pme1; p <= i__1; ++p) {
+		i = iw[p];
+		nvi = nv[i];
+		if (nvi > 0) {
+/*                   ------------------------------------
+---------------- */
+/*                   i is a principal variable not yet pla
+ced in Lme. */
+/*                   store i in new list */
+/*                   ------------------------------------
+---------------- */
+		    degme += nvi;
+/*                   flag i as being in Lme by negating nv
+ (i) */
+		    nv[i] = -nvi;
+		    ++pme2;
+		    iw[pme2] = i;
+/*                   ------------------------------------
+---------------- */
+/*                   remove variable i from degree list. 
+*/
+/*                   ------------------------------------
+---------------- */
+		    ilast = last[i];
+		    inext = next[i];
+		    if (inext != 0) {
+			last[inext] = ilast;
+		    }
+		    if (ilast != 0) {
+			next[ilast] = inext;
+		    } else {
+/*                      i is at the head of the degree
+ list */
+			head[degree[i]] = inext;
+		    }
+		}
+/* L60: */
+	    }
+/*             this element takes no new memory in iw: */
+	    newmem = 0;
+	} else {
+/*             --------------------------------------------------
+-------- */
+/*             construct the new element in empty space, iw (pfree
+ ...) */
+/*             --------------------------------------------------
+-------- */
+	    p = pe[me];
+	    pme1 = *pfree;
+	    slenme = len[me] - elenme;
+	    i__1 = elenme + 1;
+	    for (knt1 = 1; knt1 <= i__1; ++knt1) {
+		if (knt1 > elenme) {
+/*                   search the supervariables in me. */
+		    e = me;
+		    pj = p;
+		    ln = slenme;
+		} else {
+/*                   search the elements in me. */
+		    e = iw[p];
+		    ++p;
+		    pj = pe[e];
+		    ln = len[e];
+		}
+/*                -------------------------------------------
+------------ */
+/*                search for different supervariables and add 
+them to the */
+/*                new list, compressing when necessary. this l
+oop is */
+/*                executed once for each element in the list a
+nd once for */
+/*                all the supervariables in the list. */
+/*                -------------------------------------------
+------------ */
+		i__2 = ln;
+		for (knt2 = 1; knt2 <= i__2; ++knt2) {
+		    i = iw[pj];
+		    ++pj;
+		    nvi = nv[i];
+		    if (nvi > 0) {
+/*                      -----------------------------
+-------------------- */
+/*                      compress iw, if necessary */
+/*                      -----------------------------
+-------------------- */
+			if (*pfree > *iwlen) {
+/*                         prepare for compressing
+ iw by adjusting */
+/*                         pointers and lengths so
+ that the lists being */
+/*                         searched in the inner a
+nd outer loops contain */
+/*                         only the remaining entr
+ies. */
+			    pe[me] = p;
+			    len[me] -= knt1;
+			    if (len[me] == 0) {
+/*                            nothing left of 
+supervariable me */
+				pe[me] = 0;
+			    }
+			    pe[e] = pj;
+			    len[e] = ln - knt2;
+			    if (len[e] == 0) {
+/*                            nothing left of 
+element e */
+				pe[e] = 0;
+			    }
+			    ++(*ncmpa);
+/*                         store first item in pe 
+*/
+/*                         set first entry to -ite
+m */
+			    i__3 = *n;
+			    for (j = 1; j <= i__3; ++j) {
+				pn = pe[j];
+				if (pn > 0) {
+				    pe[j] = iw[pn];
+				    iw[pn] = -j;
+				}
+/* L70: */
+			    }
+/*                         psrc/pdst point to sour
+ce/destination */
+			    pdst = 1;
+			    psrc = 1;
+			    pend = pme1 - 1;
+/*                         while loop: */
+L80:
+			    if (psrc <= pend) {
+/*                            search for next 
+negative entry */
+				j = -iw[psrc];
+				++psrc;
+				if (j > 0) {
+				    iw[pdst] = pe[j];
+				    pe[j] = pdst;
+				    ++pdst;
+/*                               copy from
+ source to destination */
+				    lenj = len[j];
+				    i__3 = lenj - 2;
+				    for (knt3 = 0; knt3 <= i__3; ++knt3) {
+					iw[pdst + knt3] = iw[psrc + knt3];
+/* L90: */
+				    }
+				    pdst = pdst + lenj - 1;
+				    psrc = psrc + lenj - 1;
+				}
+				goto L80;
+			    }
+/*                         move the new partially-
+constructed element */
+			    p1 = pdst;
+			    i__3 = *pfree - 1;
+			    for (psrc = pme1; psrc <= i__3; ++psrc) {
+				iw[pdst] = iw[psrc];
+				++pdst;
+/* L100: */
+			    }
+			    pme1 = p1;
+			    *pfree = pdst;
+			    pj = pe[e];
+			    p = pe[me];
+			}
+/*                      -----------------------------
+-------------------- */
+/*                      i is a principal variable not 
+yet placed in Lme */
+/*                      store i in new list */
+/*                      -----------------------------
+-------------------- */
+			degme += nvi;
+/*                      flag i as being in Lme by nega
+ting nv (i) */
+			nv[i] = -nvi;
+			iw[*pfree] = i;
+			++(*pfree);
+/*                      -----------------------------
+-------------------- */
+/*                      remove variable i from degree 
+link list */
+/*                      -----------------------------
+-------------------- */
+			ilast = last[i];
+			inext = next[i];
+			if (inext != 0) {
+			    last[inext] = ilast;
+			}
+			if (ilast != 0) {
+			    next[ilast] = inext;
+			} else {
+/*                         i is at the head of the
+ degree list */
+			    head[degree[i]] = inext;
+			}
+		    }
+/* L110: */
+		}
+		if (e != me) {
+/*                   set tree pointer and flag to indicate
+ element e is */
+/*                   absorbed into new element me (the par
+ent of e is me) */
+		    pe[e] = -me;
+		    w[e] = 0;
+		}
+/* L120: */
+	    }
+	    pme2 = *pfree - 1;
+/*             this element takes newmem new memory in iw (possibl
+y zero) */
+	    newmem = *pfree - pme1;
+	    mem += newmem;
+	    maxmem = max(maxmem,mem);
+	}
+/*          ---------------------------------------------------------
+---- */
+/*          me has now been converted into an element in iw (pme1..pme
+2) */
+/*          ---------------------------------------------------------
+---- */
+/*          degme holds the external degree of new element */
+	degree[me] = degme;
+	pe[me] = pme1;
+	len[me] = pme2 - pme1 + 1;
+/*          ---------------------------------------------------------
+---- */
+/*          make sure that wflg is not too large.  With the current */
+/*          value of wflg, wflg+n must not cause integer overflow */
+/*          ---------------------------------------------------------
+---- */
+	if (wflg >= wbig) {
+	    i__1 = *n;
+	    for (x = 1; x <= i__1; ++x) {
+		if (w[x] != 0) {
+		    w[x] = 1;
+		}
+/* L130: */
+	    }
+	    wflg = 2;
+	}
+/* ==================================================================
+===== */
+/*  DEGREE UPDATE AND ELEMENT ABSORPTION */
+/* ==================================================================
+===== */
+/*          ---------------------------------------------------------
+---- */
+/*          Scan 2:  for each i in Lme, sum up the degree of Lme (whic
+h */
+/*          is degme), plus the sum of the external degrees of each Le
+ */
+/*          for the elements e appearing within i, plus the */
+/*          supervariables in i.  Place i in hash list. */
+/*          ---------------------------------------------------------
+---- */
+	i__1 = pme2;
+	for (pme = pme1; pme <= i__1; ++pme) {
+	    i = iw[pme];
+	    p1 = pe[i];
+	    p2 = p1 + elen[i] - 1;
+	    pn = p1;
+	    hash = 0;
+	    deg = 0;
+/*             --------------------------------------------------
+-------- */
+/*             scan the element list associated with supervariable
+ i */
+/*             --------------------------------------------------
+-------- */
+/*             exact external degree: */
+	    ++wflg;
+	    i__2 = p2;
+	    for (p = p1; p <= i__2; ++p) {
+		e = iw[p];
+		if (w[e] != 0) {
+/*                   e is an unabsorbed element */
+		    i__3 = pe[e] + len[e] - 1;
+		    for (pj = pe[e]; pj <= i__3; ++pj) {
+			j = iw[pj];
+			nvj = nv[j];
+			if (nvj > 0 && w[j] != wflg) {
+/*                         j is principal and not 
+in Lme if nv (j) .gt. 0 */
+/*                         and j is not yet seen i
+f w (j) .ne. wflg */
+			    w[j] = wflg;
+			    deg += nvj;
+			}
+/* L145: */
+		    }
+		    iw[pn] = e;
+		    ++pn;
+		    hash += e;
+		}
+/* L160: */
+	    }
+/*             count the number of elements in i (including me): 
+*/
+	    elen[i] = pn - p1 + 1;
+/*             --------------------------------------------------
+-------- */
+/*             scan the supervariables in the list associated with
+ i */
+/*             --------------------------------------------------
+-------- */
+	    p3 = pn;
+	    i__2 = p1 + len[i] - 1;
+	    for (p = p2 + 1; p <= i__2; ++p) {
+		j = iw[p];
+		nvj = nv[j];
+		if (nvj > 0) {
+/*                   j is unabsorbed, and not in Lme. */
+/*                   add to degree and add to new list */
+		    deg += nvj;
+		    iw[pn] = j;
+		    ++pn;
+		    hash += j;
+		}
+/* L170: */
+	    }
+/*             --------------------------------------------------
+-------- */
+/*             update the degree and check for mass elimination */
+/*             --------------------------------------------------
+-------- */
+	    if (elen[i] == 1 && p3 == pn) {
+/*                -------------------------------------------
+------------ */
+/*                mass elimination */
+/*                -------------------------------------------
+------------ */
+/*                There is nothing left of this node except fo
+r an */
+/*                edge to the current pivot element.  elen (i)
+ is 1, */
+/*                and there are no variables adjacent to node 
+i. */
+/*                Absorb i into the current pivot element, me.
+ */
+		pe[i] = -me;
+		nvi = -nv[i];
+		degme -= nvi;
+		nvpiv += nvi;
+		nel += nvi;
+		nv[i] = 0;
+		elen[i] = 0;
+	    } else {
+/*                -------------------------------------------
+------------ */
+/*                update the exact degree of i */
+/*                -------------------------------------------
+------------ */
+/*                the following degree does not yet include th
+e size */
+/*                of the current element, which is added later
+: */
+		degree[i] = deg;
+/*                -------------------------------------------
+------------ */
+/*                add me to the list for i */
+/*                -------------------------------------------
+------------ */
+/*                move first supervariable to end of list */
+		iw[pn] = iw[p3];
+/*                move first element to end of element part of
+ list */
+		iw[p3] = iw[p1];
+/*                add new element to front of list. */
+		iw[p1] = me;
+/*                store the new length of the list in len (i) 
+*/
+		len[i] = pn - p1 + 1;
+/*                -------------------------------------------
+------------ */
+/*                place in hash bucket.  Save hash key of i in
+ last (i). */
+/*                -------------------------------------------
+------------ */
+		hash = hash % hmod + 1;
+		j = head[hash];
+		if (j <= 0) {
+/*                   the degree list is empty, hash head i
+s -j */
+		    next[i] = -j;
+		    head[hash] = -i;
+		} else {
+/*                   degree list is not empty */
+/*                   use last (head (hash)) as hash head 
+*/
+		    next[i] = last[j];
+		    last[j] = i;
+		}
+		last[i] = hash;
+	    }
+/* L180: */
+	}
+	degree[me] = degme;
+/*          ---------------------------------------------------------
+---- */
+/*          Clear the counter array, w (...), by incrementing wflg. */
+/*          ---------------------------------------------------------
+---- */
+	++wflg;
+/*          make sure that wflg+n does not cause integer overflow */
+	if (wflg >= wbig) {
+	    i__1 = *n;
+	    for (x = 1; x <= i__1; ++x) {
+		if (w[x] != 0) {
+		    w[x] = 1;
+		}
+/* L190: */
+	    }
+	    wflg = 2;
+	}
+/*          at this point, w (1..n) .lt. wflg holds */
+/* ==================================================================
+===== */
+/*  SUPERVARIABLE DETECTION */
+/* ==================================================================
+===== */
+	i__1 = pme2;
+	for (pme = pme1; pme <= i__1; ++pme) {
+	    i = iw[pme];
+	    if (nv[i] < 0) {
+/*                i is a principal variable in Lme */
+/*                -------------------------------------------
+------------ */
+/*                examine all hash buckets with 2 or more vari
+ables.  We */
+/*                do this by examing all unique hash keys for 
+super- */
+/*                variables in the pattern Lme of the current 
+element, me */
+/*                -------------------------------------------
+------------ */
+		hash = last[i];
+/*                let i = head of hash bucket, and empty the h
+ash bucket */
+		j = head[hash];
+		if (j == 0) {
+		    goto L250;
+		}
+		if (j < 0) {
+/*                   degree list is empty */
+		    i = -j;
+		    head[hash] = 0;
+		} else {
+/*                   degree list is not empty, restore las
+t () of head */
+		    i = last[j];
+		    last[j] = 0;
+		}
+		if (i == 0) {
+		    goto L250;
+		}
+/*                while loop: */
+L200:
+		if (next[i] != 0) {
+/*                   ------------------------------------
+---------------- */
+/*                   this bucket has one or more variables
+ following i. */
+/*                   scan all of them to see if i can abso
+rb any entries */
+/*                   that follow i in hash bucket.  Scatte
+r i into w. */
+/*                   ------------------------------------
+---------------- */
+		    ln = len[i];
+		    eln = elen[i];
+/*                   do not flag the first element in the 
+list (me) */
+		    i__2 = pe[i] + ln - 1;
+		    for (p = pe[i] + 1; p <= i__2; ++p) {
+			w[iw[p]] = wflg;
+/* L210: */
+		    }
+/*                   ------------------------------------
+---------------- */
+/*                   scan every other entry j following i 
+in bucket */
+/*                   ------------------------------------
+---------------- */
+		    jlast = i;
+		    j = next[i];
+/*                   while loop: */
+L220:
+		    if (j != 0) {
+/*                      -----------------------------
+-------------------- */
+/*                      check if j and i have identica
+l nonzero pattern */
+/*                      -----------------------------
+-------------------- */
+			if (len[j] != ln) {
+/*                         i and j do not have sam
+e size data structure */
+			    goto L240;
+			}
+			if (elen[j] != eln) {
+/*                         i and j do not have sam
+e number of adjacent el */
+			    goto L240;
+			}
+/*                      do not flag the first element 
+in the list (me) */
+			i__2 = pe[j] + ln - 1;
+			for (p = pe[j] + 1; p <= i__2; ++p) {
+			    if (w[iw[p]] != wflg) {
+/*                            an entry (iw(p))
+ is in j but not in i */
+				goto L240;
+			    }
+/* L230: */
+			}
+/*                      -----------------------------
+-------------------- */
+/*                      found it!  j can be absorbed i
+nto i */
+/*                      -----------------------------
+-------------------- */
+			pe[j] = -i;
+/*                      both nv (i) and nv (j) are neg
+ated since they */
+/*                      are in Lme, and the absolute v
+alues of each */
+/*                      are the number of variables in
+ i and j: */
+			nv[i] += nv[j];
+			nv[j] = 0;
+			elen[j] = 0;
+/*                      delete j from hash bucket */
+			j = next[j];
+			next[jlast] = j;
+			goto L220;
+/*                      -----------------------------
+-------------------- */
+L240:
+/*                      j cannot be absorbed into i */
+/*                      -----------------------------
+-------------------- */
+			jlast = j;
+			j = next[j];
+			goto L220;
+		    }
+/*                   ------------------------------------
+---------------- */
+/*                   no more variables can be absorbed int
+o i */
+/*                   go to next i in bucket and clear flag
+ array */
+/*                   ------------------------------------
+---------------- */
+		    ++wflg;
+		    i = next[i];
+		    if (i != 0) {
+			goto L200;
+		    }
+		}
+	    }
+L250:
+	    ;
+	}
+/* ==================================================================
+===== */
+/*  RESTORE DEGREE LISTS AND REMOVE NONPRINCIPAL SUPERVAR. FROM ELEMEN
+T */
+/* ==================================================================
+===== */
+	p = pme1;
+	nleft = *n - nel;
+	i__1 = pme2;
+	for (pme = pme1; pme <= i__1; ++pme) {
+	    i = iw[pme];
+	    nvi = -nv[i];
+	    if (nvi > 0) {
+/*                i is a principal variable in Lme */
+/*                restore nv (i) to signify that i is principa
+l */
+		nv[i] = nvi;
+/*                -------------------------------------------
+------------ */
+/*                compute the external degree (add size of cur
+rent elem) */
+/*                -------------------------------------------
+------------ */
+/* Computing MAX */
+		i__2 = 1, i__3 = degree[i] + degme - nvi;
+		deg = max(i__2,i__3);
+/*                -------------------------------------------
+------------ */
+/*                place the supervariable at the head of the d
+egree list */
+/*                -------------------------------------------
+------------ */
+		inext = head[deg];
+		if (inext != 0) {
+		    last[inext] = i;
+		}
+		next[i] = inext;
+		last[i] = 0;
+		head[deg] = i;
+/*                -------------------------------------------
+------------ */
+/*                save the new degree, and find the minimum de
+gree */
+/*                -------------------------------------------
+------------ */
+		mindeg = min(mindeg,deg);
+		degree[i] = deg;
+/*                -------------------------------------------
+------------ */
+/*                place the supervariable in the element patte
+rn */
+/*                -------------------------------------------
+------------ */
+		iw[p] = i;
+		++p;
+	    }
+/* L260: */
+	}
+/* ==================================================================
+===== */
+/*  FINALIZE THE NEW ELEMENT */
+/* ==================================================================
+===== */
+	nv[me] = nvpiv + degme;
+/*          nv (me) is now the degree of pivot (including diagonal par
+t) */
+/*          save the length of the list for the new element me */
+	len[me] = p - pme1;
+	if (len[me] == 0) {
+/*             there is nothing left of the current pivot element 
+*/
+	    pe[me] = 0;
+	    w[me] = 0;
+	}
+	if (newmem != 0) {
+/*             element was not constructed in place: deallocate pa
+rt */
+/*             of it (final size is less than or equal to newmem, 
+*/
+/*             since newly nonprincipal variables have been remove
+d). */
+	    *pfree = p;
+	    mem = mem - newmem + len[me];
+	}
+/* ==================================================================
+===== */
+/*          END WHILE (selecting pivots) */
+	goto L30;
+    }
+/* =======================================================================
+ */
+/* =======================================================================
+ */
+/*  COMPUTE THE PERMUTATION VECTORS */
+/* =======================================================================
+ */
+/*       ---------------------------------------------------------------- 
+*/
+/*       The time taken by the following code is O(n).  At this */
+/*       point, elen (e) = -k has been done for all elements e, */
+/*       and elen (i) = 0 has been done for all nonprincipal */
+/*       variables i.  At this point, there are no principal */
+/*       supervariables left, and all elements are absorbed. */
+/*       ---------------------------------------------------------------- 
+*/
+/*       ---------------------------------------------------------------- 
+*/
+/*       compute the ordering of unordered nonprincipal variables */
+/*       ---------------------------------------------------------------- 
+*/
+    i__1 = *n;
+    for (i = 1; i <= i__1; ++i) {
+	if (elen[i] == 0) {
+/*             --------------------------------------------------
+-------- */
+/*             i is an un-ordered row.  Traverse the tree from i u
+ntil */
+/*             reaching an element, e.  The element, e, was the */
+/*             principal supervariable of i and all nodes in the p
+ath */
+/*             from i to when e was selected as pivot. */
+/*             --------------------------------------------------
+-------- */
+	    j = -pe[i];
+/*             while (j is a variable) do: */
+L270:
+	    if (elen[j] >= 0) {
+		j = -pe[j];
+		goto L270;
+	    }
+	    e = j;
+/*             --------------------------------------------------
+-------- */
+/*             get the current pivot ordering of e */
+/*             --------------------------------------------------
+-------- */
+	    k = -elen[e];
+/*             --------------------------------------------------
+-------- */
+/*             traverse the path again from i to e, and compress t
+he */
+/*             path (all nodes point to e).  Path compression allo
+ws */
+/*             this code to compute in O(n) time.  Order the unord
+ered */
+/*             nodes in the path, and place the element e at the e
+nd. */
+/*             --------------------------------------------------
+-------- */
+	    j = i;
+/*             while (j is a variable) do: */
+L280:
+	    if (elen[j] >= 0) {
+		jnext = -pe[j];
+		pe[j] = -e;
+		if (elen[j] == 0) {
+/*                   j is an unordered row */
+		    elen[j] = k;
+		    ++k;
+		}
+		j = jnext;
+		goto L280;
+	    }
+/*             leave elen (e) negative, so we know it is an elemen
+t */
+	    elen[e] = -k;
+	}
+/* L290: */
+    }
+/*       ---------------------------------------------------------------- 
+*/
+/*       reset the inverse permutation (elen (1..n)) to be positive, */
+/*       and compute the permutation (last (1..n)). */
+/*       ---------------------------------------------------------------- 
+*/
+    i__1 = *n;
+    for (i = 1; i <= i__1; ++i) {
+	k = (i__2 = elen[i], abs(i__2));
+	last[k] = i;
+	elen[i] = k;
+/* L300: */
+    }
+/* =======================================================================
+ */
+/*  RETURN THE MEMORY USAGE IN IW */
+/* =======================================================================
+ */
+/*       If maxmem is less than or equal to iwlen, then no compressions */
+/*       occurred, and iw (maxmem+1 ... iwlen) was unused.  Otherwise */
+/*       compressions did occur, and iwlen would have had to have been */
+/*       greater than or equal to maxmem for no compressions to occur. */
+/*       Return the value of maxmem in the pfree argument. */
+    *pfree = maxmem;
+    return 0;
+} /* amdexa_ */
+
diff --git a/contrib/taucs/external/src/amdexa.f b/contrib/taucs/external/src/amdexa.f
new file mode 100644
index 0000000000000000000000000000000000000000..c2f2d122f61d650b5e4f6ba67a4f551ff22699bc
--- /dev/null
+++ b/contrib/taucs/external/src/amdexa.f
@@ -0,0 +1,1233 @@
+
+        SUBROUTINE AMDEXA
+     $          (N, PE, IW, LEN, IWLEN, PFREE, NV, NEXT,
+     $          LAST, HEAD, ELEN, DEGREE, NCMPA, W, IOVFLO)
+
+        INTEGER N, IWLEN, PFREE, NCMPA, IOVFLO, IW (IWLEN), PE (N),
+     $          DEGREE (N), NV (N), NEXT (N), LAST (N), HEAD (N),
+     $          ELEN (N), W (N), LEN (N)
+
+C-----------------------------------------------------------------------
+C  The MC47 / AMD suite of minimum degree ordering algorithms.
+C
+C  This code is one of seven variations of a single algorithm:
+C  the primary routine (MC47B/BD, only available in the Harwell
+C  Subroutine Library), and 6 variations that differ only in
+C  how they compute the degree (available in NETLIB).
+C
+C  For information on the Harwell Subroutine Library, contact
+C  John Harding, Harwell Subroutine Library, B 552, AEA Technology,
+C  Harwell, Didcot, Oxon OX11 0RA, telephone (44) 1235 434573,
+C  fax (44) 1235 434340, email john.harding@aeat.co.uk, who will
+C  provide details of price and conditions of use.
+C-----------------------------------------------------------------------
+
+************************************************************************
+* NOTICE:  "The AMD routines (AMDEXA, AMDBAR, AMDHAF, AMDHAT, AMDTRU,
+* and AMDATR) may be used SOLELY for educational, research, and
+* benchmarking purposes by non-profit organizations and the U.S.
+* government.  Commercial and other organizations may make use of the
+* AMD routines SOLELY for benchmarking purposes only.  The AMD
+* routines may be modified by or on behalf of the User for such
+* use but at no time shall the AMD routines or any such modified
+* version of them become the property of the User.  The AMD routines
+* are provided without warranty of any kind, either expressed or
+* implied.  Neither the Authors nor their employers shall be liable
+* for any direct or consequential loss or damage whatsoever arising
+* out of the use or misuse of the AMD routines by the User.  The AMD
+* routines must not be sold.  You may make copies of the AMD routines,
+* but this NOTICE and the Copyright notice must appear in all copies.
+* Any other use of the AMD routines requires written permission.
+* Your use of the AMD routines is an implicit agreement to these
+* conditions."
+************************************************************************
+
+C-----------------------------------------------------------------------
+C AMDexa:  exact minimum (external) degree ordering algorithm
+C-----------------------------------------------------------------------
+
+C  Variation 1: exact external degree (as used in MMD, for example.
+C  See A. George and J. Liu, "The evolution of the minimum degree
+C  ordering algorithm," SIAM Review, vol. 31, no. 1, pp. 1-19, 1989).
+C  Note that some of the comments in the code below reflect the
+C  MC47-style degree approximation.  Also not that we do not use
+C  multiple elimination or incomplete update, which are used in MMD.
+C
+C  We recommend using MC47B/BD instead of this routine since MC47B/BD
+C  gives comparable results in much less time (this code has been
+C  observed to be up to 71 times slower than MC47B/BD).
+
+C-----------------------------------------------------------------------
+
+C Given a representation of the nonzero pattern of a symmetric matrix,
+C       A, (excluding the diagonal) perform an exact minimum
+C       (external) degree ordering to compute a pivot order such
+C       that the introduction of nonzeros (fill-in) in the Cholesky
+C       factors A = LL^T are kept low.  At each step, the pivot
+C       selected is the one with the minimum exact external degree.
+
+C **********************************************************************
+C ***** CAUTION:  ARGUMENTS ARE NOT CHECKED FOR ERRORS ON INPUT.  ******
+C **********************************************************************
+C ** If you want error checking, a more versatile input format, and a **
+C ** simpler user interface, then use MC47A/AD in the Harwell         **
+C ** Subroutine Library, which checks for errors, transforms the      **
+C ** input, and calls MC47B/BD.                                       **
+C **********************************************************************
+
+C       References:  (UF Tech Reports are available via anonymous ftp
+C       to ftp.cis.ufl.edu:cis/tech-reports).
+C
+C       [1] Timothy A. Davis and Iain Duff, "An unsymmetric-pattern
+C               multifrontal method for sparse LU factorization",
+C               SIAM J. Matrix Analysis and Applications, to appear.
+C               also Univ. of Florida Technical Report TR-94-038.
+C               Discusses UMFPACK / MA38.
+C
+C       [2] Patrick Amestoy, Timothy A. Davis, and Iain S. Duff,
+C               "An approximate minimum degree ordering algorithm,"
+C               SIAM J. Matrix Analysis and Applications (to appear),
+C               also Univ. of Florida Technical Report TR-94-039.
+C               Discusses this routine.
+C
+C       [3] Alan George and Joseph Liu, "The evolution of the
+C               minimum degree ordering algorithm," SIAM Review, vol.
+C               31, no. 1, pp. 1-19, March 1989.  We list below the
+C               features mentioned in that paper that this code
+C               includes:
+C
+C       mass elimination:
+C               Yes.  MA27 relied on supervariable detection for mass
+C               elimination.
+C       indistinguishable nodes:
+C               Yes (we call these "supervariables").  This was also in
+C               the MA27 code - although we modified the method of
+C               detecting them (the previous hash was the true degree,
+C               which we no longer keep track of).  A supervariable is
+C               a set of rows with identical nonzero pattern.  All
+C               variables in a supervariable are eliminated together.
+C               Each supervariable has as its numerical name that of
+C               one of its variables (its principal variable).
+C       quotient graph representation:
+C               Yes.  We use the term "element" for the cliques formed
+C               during elimination.  This was also in the MA27 code.
+C               The algorithm can operate in place, but it will work
+C               more efficiently if given some "elbow room."
+C       element absorption:
+C               Yes.  This was also in the MA27 code.
+C       external degree:
+C               Yes.  The MA27 code was based on the true degree.
+C       incomplete degree update and multiple elimination:
+C               No.  This was not in MA27, either.  Our method of
+C               degree update within MC47B/BD is element-based, not
+C               variable-based.  It is thus not well-suited for use
+C               with incomplete degree update or multiple elimination.
+
+C-----------------------------------------------------------------------
+C Authors, and Copyright (C) 1995 by:
+C       Timothy A. Davis, Patrick Amestoy, Iain S. Duff, & John K. Reid.
+C
+C Acknowledgements:
+C       This work (and the UMFPACK package) was supported by the
+C       National Science Foundation (ASC-9111263 and DMS-9223088).
+C       The UMFPACK/MA38 approximate degree update algorithm, the
+C       unsymmetric analog which forms the basis of MC47B/BD, was
+C       developed while Tim Davis was supported by CERFACS (Toulouse,
+C       France) in a post-doctoral position.
+C
+C Date:  September, 1995
+C-----------------------------------------------------------------------
+
+C-----------------------------------------------------------------------
+C INPUT ARGUMENTS (unaltered):
+C-----------------------------------------------------------------------
+
+C n:    The matrix order.
+C
+C       Restriction:  1 .le. n .lt. (iovflo/2)-2
+
+C iwlen:        The length of iw (1..iwlen).  On input, the matrix is
+C       stored in iw (1..pfree-1).  However, iw (1..iwlen) should be
+C       slightly larger than what is required to hold the matrix, at
+C       least iwlen .ge. pfree + n is recommended.  Otherwise,
+C       excessive compressions will take place.
+C       *** We do not recommend running this algorithm with ***
+C       ***      iwlen .lt. pfree + n.                      ***
+C       *** Better performance will be obtained if          ***
+C       ***      iwlen .ge. pfree + n                       ***
+C       *** or better yet                                   ***
+C       ***      iwlen .gt. 1.2 * pfree                     ***
+C       *** (where pfree is its value on input).            ***
+C       The algorithm will not run at all if iwlen .lt. pfree-1.
+C
+C       Restriction: iwlen .ge. pfree-1
+
+C iovflo:       The largest positive integer that your computer can
+C       represent (-iovflo should also be representable).  On a 32-bit
+C       computer with 2's-complement arithmetic,
+C       iovflo = (2^31)-1 = 2,147,483,648.
+
+C-----------------------------------------------------------------------
+C INPUT/OUPUT ARGUMENTS:
+C-----------------------------------------------------------------------
+
+C pe:   On input, pe (i) is the index in iw of the start of row i, or
+C       zero if row i has no off-diagonal non-zeros.
+C
+C       During execution, it is used for both supervariables and
+C       elements:
+C
+C       * Principal supervariable i:  index into iw of the
+C               description of supervariable i.  A supervariable
+C               represents one or more rows of the matrix
+C               with identical nonzero pattern.
+C       * Non-principal supervariable i:  if i has been absorbed
+C               into another supervariable j, then pe (i) = -j.
+C               That is, j has the same pattern as i.
+C               Note that j might later be absorbed into another
+C               supervariable j2, in which case pe (i) is still -j,
+C               and pe (j) = -j2.
+C       * Unabsorbed element e:  the index into iw of the description
+C               of element e, if e has not yet been absorbed by a
+C               subsequent element.  Element e is created when
+C               the supervariable of the same name is selected as
+C               the pivot.
+C       * Absorbed element e:  if element e is absorbed into element
+C               e2, then pe (e) = -e2.  This occurs when the pattern of
+C               e (that is, Le) is found to be a subset of the pattern
+C               of e2 (that is, Le2).  If element e is "null" (it has
+C               no nonzeros outside its pivot block), then pe (e) = 0.
+C
+C       On output, pe holds the assembly tree/forest, which implicitly
+C       represents a pivot order with identical fill-in as the actual
+C       order (via a depth-first search of the tree).
+C
+C       On output:
+C       If nv (i) .gt. 0, then i represents a node in the assembly tree,
+C       and the parent of i is -pe (i), or zero if i is a root.
+C       If nv (i) = 0, then (i,-pe (i)) represents an edge in a
+C       subtree, the root of which is a node in the assembly tree.
+
+C pfree:        On input the tail end of the array, iw (pfree..iwlen),
+C       is empty, and the matrix is stored in iw (1..pfree-1).
+C       During execution, additional data is placed in iw, and pfree
+C       is modified so that iw (pfree..iwlen) is always the unused part
+C       of iw.  On output, pfree is set equal to the size of iw that
+C       would have been needed for no compressions to occur.  If
+C       ncmpa is zero, then pfree (on output) is less than or equal to
+C       iwlen, and the space iw (pfree+1 ... iwlen) was not used.
+C       Otherwise, pfree (on output) is greater than iwlen, and all the
+C       memory in iw was used.
+
+C-----------------------------------------------------------------------
+C INPUT/MODIFIED (undefined on output):
+C-----------------------------------------------------------------------
+
+C len:  On input, len (i) holds the number of entries in row i of the
+C       matrix, excluding the diagonal.  The contents of len (1..n)
+C       are undefined on output.
+
+C iw:   On input, iw (1..pfree-1) holds the description of each row i
+C       in the matrix.  The matrix must be symmetric, and both upper
+C       and lower triangular parts must be present.  The diagonal must
+C       not be present.  Row i is held as follows:
+C
+C               len (i):  the length of the row i data structure
+C               iw (pe (i) ... pe (i) + len (i) - 1):
+C                       the list of column indices for nonzeros
+C                       in row i (simple supervariables), excluding
+C                       the diagonal.  All supervariables start with
+C                       one row/column each (supervariable i is just
+C                       row i).
+C               if len (i) is zero on input, then pe (i) is ignored
+C               on input.
+C
+C               Note that the rows need not be in any particular order,
+C               and there may be empty space between the rows.
+C
+C       During execution, the supervariable i experiences fill-in.
+C       This is represented by placing in i a list of the elements
+C       that cause fill-in in supervariable i:
+C
+C               len (i):  the length of supervariable i
+C               iw (pe (i) ... pe (i) + elen (i) - 1):
+C                       the list of elements that contain i.  This list
+C                       is kept short by removing absorbed elements.
+C               iw (pe (i) + elen (i) ... pe (i) + len (i) - 1):
+C                       the list of supervariables in i.  This list
+C                       is kept short by removing nonprincipal
+C                       variables, and any entry j that is also
+C                       contained in at least one of the elements
+C                       (j in Le) in the list for i (e in row i).
+C
+C       When supervariable i is selected as pivot, we create an
+C       element e of the same name (e=i):
+C
+C               len (e):  the length of element e
+C               iw (pe (e) ... pe (e) + len (e) - 1):
+C                       the list of supervariables in element e.
+C
+C       An element represents the fill-in that occurs when supervariable
+C       i is selected as pivot (which represents the selection of row i
+C       and all non-principal variables whose principal variable is i).
+C       We use the term Le to denote the set of all supervariables
+C       in element e.  Absorbed supervariables and elements are pruned
+C       from these lists when computationally convenient.
+C
+C       CAUTION:  THE INPUT MATRIX IS OVERWRITTEN DURING COMPUTATION.
+C       The contents of iw are undefined on output.
+
+C-----------------------------------------------------------------------
+C OUTPUT (need not be set on input):
+C-----------------------------------------------------------------------
+
+C nv:   During execution, abs (nv (i)) is equal to the number of rows
+C       that are represented by the principal supervariable i.  If i is
+C       a nonprincipal variable, then nv (i) = 0.  Initially,
+C       nv (i) = 1 for all i.  nv (i) .lt. 0 signifies that i is a
+C       principal variable in the pattern Lme of the current pivot
+C       element me.  On output, nv (e) holds the true degree of element
+C       e at the time it was created (including the diagonal part).
+
+C ncmpa:        The number of times iw was compressed.  If this is
+C       excessive, then the execution took longer than what could have
+C       been.  To reduce ncmpa, try increasing iwlen to be 10% or 20%
+C       larger than the value of pfree on input (or at least
+C       iwlen .ge. pfree + n).  The fastest performance will be
+C       obtained when ncmpa is returned as zero.  If iwlen is set to
+C       the value returned by pfree on *output*, then no compressions
+C       will occur.
+
+C elen: See the description of iw above.  At the start of execution,
+C       elen (i) is set to zero.  During execution, elen (i) is the
+C       number of elements in the list for supervariable i.  When e
+C       becomes an element, elen (e) = -nel is set, where nel is the
+C       current step of factorization.  elen (i) = 0 is done when i
+C       becomes nonprincipal.
+C
+C       For variables, elen (i) .ge. 0 holds until just before the
+C       permutation vectors are computed.  For elements,
+C       elen (e) .lt. 0 holds.
+C
+C       On output elen (1..n) holds the inverse permutation (the same
+C       as the 'INVP' argument in Sparspak).  That is, if k = elen (i),
+C       then row i is the kth pivot row.  Row i of A appears as the
+C       (elen(i))-th row in the permuted matrix, PAP^T.
+
+C last: In a degree list, last (i) is the supervariable preceding i,
+C       or zero if i is the head of the list.  In a hash bucket,
+C       last (i) is the hash key for i.  last (head (hash)) is also
+C       used as the head of a hash bucket if head (hash) contains a
+C       degree list (see head, below).
+C
+C       On output, last (1..n) holds the permutation (the same as the
+C       'PERM' argument in Sparspak).  That is, if i = last (k), then
+C       row i is the kth pivot row.  Row last (k) of A is the k-th row
+C       in the permuted matrix, PAP^T.
+
+C-----------------------------------------------------------------------
+C LOCAL (not input or output - used only during execution):
+C-----------------------------------------------------------------------
+
+C degree:       If i is a supervariable, then degree (i) holds the
+C       current approximation of the external degree of row i (an upper
+C       bound).  The external degree is the number of nonzeros in row i,
+C       minus abs (nv (i)) (the diagonal part).  The bound is equal to
+C       the external degree if elen (i) is less than or equal to two.
+C
+C       We also use the term "external degree" for elements e to refer
+C       to |Le \ Lme|.  If e is an element, then degree (e) holds |Le|,
+C       which is the degree of the off-diagonal part of the element e
+C       (not including the diagonal part).
+
+C head: head is used for degree lists.  head (deg) is the first
+C       supervariable in a degree list (all supervariables i in a
+C       degree list deg have the same approximate degree, namely,
+C       deg = degree (i)).  If the list deg is empty then
+C       head (deg) = 0.
+C
+C       During supervariable detection head (hash) also serves as a
+C       pointer to a hash bucket.
+C       If head (hash) .gt. 0, there is a degree list of degree hash.
+C               The hash bucket head pointer is last (head (hash)).
+C       If head (hash) = 0, then the degree list and hash bucket are
+C               both empty.
+C       If head (hash) .lt. 0, then the degree list is empty, and
+C               -head (hash) is the head of the hash bucket.
+C       After supervariable detection is complete, all hash buckets
+C       are empty, and the (last (head (hash)) = 0) condition is
+C       restored for the non-empty degree lists.
+
+C next: next (i) is the supervariable following i in a link list, or
+C       zero if i is the last in the list.  Used for two kinds of
+C       lists:  degree lists and hash buckets (a supervariable can be
+C       in only one kind of list at a time).
+
+C w:    The flag array w determines the status of elements and
+C       variables, and the external degree of elements.
+C
+C       for elements:
+C          if w (e) = 0, then the element e is absorbed
+C          if w (e) .ge. wflg, then w (e) - wflg is the size of
+C               the set |Le \ Lme|, in terms of nonzeros (the
+C               sum of abs (nv (i)) for each principal variable i that
+C               is both in the pattern of element e and NOT in the
+C               pattern of the current pivot element, me).
+C          if wflg .gt. w (e) .gt. 0, then e is not absorbed and has
+C               not yet been seen in the scan of the element lists in
+C               the computation of |Le\Lme| in loop 150 below.
+C
+C       for variables:
+C          during supervariable detection, if w (j) .ne. wflg then j is
+C          not in the pattern of variable i
+C
+C       The w array is initialized by setting w (i) = 1 for all i,
+C       and by setting wflg = 2.  It is reinitialized if wflg becomes
+C       too large (to ensure that wflg+n does not cause integer
+C       overflow).
+
+C-----------------------------------------------------------------------
+C LOCAL INTEGERS:
+C-----------------------------------------------------------------------
+
+        INTEGER DEG, DEGME, DEXT, DMAX, E, ELENME, ELN, HASH, HMOD, I,
+     $          ILAST, INEXT, J, JLAST, JNEXT, K, KNT1, KNT2, KNT3,
+     $          LENJ, LN, MAXMEM, ME, MEM, MINDEG, NEL, NEWMEM,
+     $          NLEFT, NVI, NVJ, NVPIV, SLENME, WBIG, WE, WFLG, WNVI, X
+
+C deg:          the degree of a variable or element
+C degme:        size, |Lme|, of the current element, me (= degree (me))
+C dext:         external degree, |Le \ Lme|, of some element e
+C dmax:         largest |Le| seen so far
+C e:            an element
+C elenme:       the length, elen (me), of element list of pivotal var.
+C eln:          the length, elen (...), of an element list
+C hash:         the computed value of the hash function
+C hmod:         the hash function is computed modulo hmod = max (1,n-1)
+C i:            a supervariable
+C ilast:        the entry in a link list preceding i
+C inext:        the entry in a link list following i
+C j:            a supervariable
+C jlast:        the entry in a link list preceding j
+C jnext:        the entry in a link list, or path, following j
+C k:            the pivot order of an element or variable
+C knt1:         loop counter used during element construction
+C knt2:         loop counter used during element construction
+C knt3:         loop counter used during compression
+C lenj:         len (j)
+C ln:           length of a supervariable list
+C maxmem:       amount of memory needed for no compressions
+C me:           current supervariable being eliminated, and the
+C                       current element created by eliminating that
+C                       supervariable
+C mem:          memory in use assuming no compressions have occurred
+C mindeg:       current minimum degree
+C nel:          number of pivots selected so far
+C newmem:       amount of new memory needed for current pivot element
+C nleft:        n - nel, the number of nonpivotal rows/columns remaining
+C nvi:          the number of variables in a supervariable i (= nv (i))
+C nvj:          the number of variables in a supervariable j (= nv (j))
+C nvpiv:        number of pivots in current element
+C slenme:       number of variables in variable list of pivotal variable
+C wbig:         = iovflo - n.  wflg is not allowed to be .ge. wbig.
+C we:           w (e)
+C wflg:         used for flagging the w array.  See description of iw.
+C wnvi:         wflg - nv (i)
+C x:            either a supervariable or an element
+
+C-----------------------------------------------------------------------
+C LOCAL POINTERS:
+C-----------------------------------------------------------------------
+
+        INTEGER P, P1, P2, P3, PDST, PEND, PJ, PME, PME1, PME2, PN, PSRC
+
+C               Any parameter (pe (...) or pfree) or local variable
+C               starting with "p" (for Pointer) is an index into iw,
+C               and all indices into iw use variables starting with
+C               "p."  The only exception to this rule is the iwlen
+C               input argument.
+
+C p:            pointer into lots of things
+C p1:           pe (i) for some variable i (start of element list)
+C p2:           pe (i) + elen (i) -  1 for some var. i (end of el. list)
+C p3:           index of first supervariable in clean list
+C pdst:         destination pointer, for compression
+C pend:         end of memory to compress
+C pj:           pointer into an element or variable
+C pme:          pointer into the current element (pme1...pme2)
+C pme1:         the current element, me, is stored in iw (pme1...pme2)
+C pme2:         the end of the current element
+C pn:           pointer into a "clean" variable, also used to compress
+C psrc:         source pointer, for compression
+
+C-----------------------------------------------------------------------
+C  FUNCTIONS CALLED:
+C-----------------------------------------------------------------------
+
+        INTRINSIC MAX, MIN, MOD
+
+C=======================================================================
+C  INITIALIZATIONS
+C=======================================================================
+
+        WFLG = 2
+        MINDEG = 1
+        NCMPA = 0
+        NEL = 0
+        HMOD = MAX (1, N-1)
+        DMAX = 0
+        WBIG = IOVFLO - N
+        MEM = PFREE - 1
+        MAXMEM = MEM
+
+        DO 10 I = 1, N
+           LAST (I) = 0
+           HEAD (I) = 0
+           NV (I) = 1
+           W (I) = 1
+           ELEN (I) = 0
+           DEGREE (I) = LEN (I)
+10         CONTINUE
+
+C       ----------------------------------------------------------------
+C       initialize degree lists and eliminate rows with no off-diag. nz.
+C       ----------------------------------------------------------------
+
+        DO 20 I = 1, N
+
+           DEG = DEGREE (I)
+
+           IF (DEG .GT. 0) THEN
+
+C             ----------------------------------------------------------
+C             place i in the degree list corresponding to its degree
+C             ----------------------------------------------------------
+
+              INEXT = HEAD (DEG)
+              IF (INEXT .NE. 0) LAST (INEXT) = I
+              NEXT (I) = INEXT
+              HEAD (DEG) = I
+
+           ELSE
+
+C             ----------------------------------------------------------
+C             we have a variable that can be eliminated at once because
+C             there is no off-diagonal non-zero in its row.
+C             ----------------------------------------------------------
+
+              NEL = NEL + 1
+              ELEN (I) = -NEL
+              PE (I) = 0
+              W (I) = 0
+
+              ENDIF
+
+20         CONTINUE
+
+C=======================================================================
+C  WHILE (selecting pivots) DO
+C=======================================================================
+
+30      CONTINUE
+        IF (NEL .LT. N) THEN
+
+C=======================================================================
+C  GET PIVOT OF MINIMUM DEGREE
+C=======================================================================
+
+C          -------------------------------------------------------------
+C          find next supervariable for elimination
+C          -------------------------------------------------------------
+
+           DO 40 DEG = MINDEG, N
+              ME = HEAD (DEG)
+              IF (ME .GT. 0) GOTO 50
+40            CONTINUE
+50         CONTINUE
+           MINDEG = DEG
+
+C          -------------------------------------------------------------
+C          remove chosen variable from link list
+C          -------------------------------------------------------------
+
+           INEXT = NEXT (ME)
+           IF (INEXT .NE. 0) LAST (INEXT) = 0
+           HEAD (DEG) = INEXT
+
+C          -------------------------------------------------------------
+C          me represents the elimination of pivots nel+1 to nel+nv(me).
+C          place me itself as the first in this set.  It will be moved
+C          to the nel+nv(me) position when the permutation vectors are
+C          computed.
+C          -------------------------------------------------------------
+
+           ELENME = ELEN (ME)
+           ELEN (ME) = - (NEL + 1)
+           NVPIV = NV (ME)
+           NEL = NEL + NVPIV
+
+C=======================================================================
+C  CONSTRUCT NEW ELEMENT
+C=======================================================================
+
+C          -------------------------------------------------------------
+C          At this point, me is the pivotal supervariable.  It will be
+C          converted into the current element.  Scan list of the
+C          pivotal supervariable, me, setting tree pointers and
+C          constructing new list of supervariables for the new element,
+C          me.  p is a pointer to the current position in the old list.
+C          -------------------------------------------------------------
+
+C          flag the variable "me" as being in Lme by negating nv (me)
+           NV (ME) = -NVPIV
+           DEGME = 0
+
+           IF (ELENME .EQ. 0) THEN
+
+C             ----------------------------------------------------------
+C             construct the new element in place
+C             ----------------------------------------------------------
+
+              PME1 = PE (ME)
+              PME2 = PME1 - 1
+
+              DO 60 P = PME1, PME1 + LEN (ME) - 1
+                 I = IW (P)
+                 NVI = NV (I)
+                 IF (NVI .GT. 0) THEN
+
+C                   ----------------------------------------------------
+C                   i is a principal variable not yet placed in Lme.
+C                   store i in new list
+C                   ----------------------------------------------------
+
+                    DEGME = DEGME + NVI
+C                   flag i as being in Lme by negating nv (i)
+                    NV (I) = -NVI
+                    PME2 = PME2 + 1
+                    IW (PME2) = I
+
+C                   ----------------------------------------------------
+C                   remove variable i from degree list.
+C                   ----------------------------------------------------
+
+                    ILAST = LAST (I)
+                    INEXT = NEXT (I)
+                    IF (INEXT .NE. 0) LAST (INEXT) = ILAST
+                    IF (ILAST .NE. 0) THEN
+                       NEXT (ILAST) = INEXT
+                    ELSE
+C                      i is at the head of the degree list
+                       HEAD (DEGREE (I)) = INEXT
+                       ENDIF
+
+                    ENDIF
+60               CONTINUE
+C             this element takes no new memory in iw:
+              NEWMEM = 0
+
+           ELSE
+
+C             ----------------------------------------------------------
+C             construct the new element in empty space, iw (pfree ...)
+C             ----------------------------------------------------------
+
+              P = PE (ME)
+              PME1 = PFREE
+              SLENME = LEN (ME) - ELENME
+
+              DO 120 KNT1 = 1, ELENME + 1
+
+                 IF (KNT1 .GT. ELENME) THEN
+C                   search the supervariables in me.
+                    E = ME
+                    PJ = P
+                    LN = SLENME
+                 ELSE
+C                   search the elements in me.
+                    E = IW (P)
+                    P = P + 1
+                    PJ = PE (E)
+                    LN = LEN (E)
+                    ENDIF
+
+C                -------------------------------------------------------
+C                search for different supervariables and add them to the
+C                new list, compressing when necessary. this loop is
+C                executed once for each element in the list and once for
+C                all the supervariables in the list.
+C                -------------------------------------------------------
+
+                 DO 110 KNT2 = 1, LN
+                    I = IW (PJ)
+                    PJ = PJ + 1
+                    NVI = NV (I)
+                    IF (NVI .GT. 0) THEN
+
+C                      -------------------------------------------------
+C                      compress iw, if necessary
+C                      -------------------------------------------------
+
+                       IF (PFREE .GT. IWLEN) THEN
+C                         prepare for compressing iw by adjusting
+C                         pointers and lengths so that the lists being
+C                         searched in the inner and outer loops contain
+C                         only the remaining entries.
+
+                          PE (ME) = P
+                          LEN (ME) = LEN (ME) - KNT1
+                          IF (LEN (ME) .EQ. 0) THEN
+C                            nothing left of supervariable me
+                             PE (ME) = 0
+                             ENDIF
+                          PE (E) = PJ
+                          LEN (E) = LN - KNT2
+                          IF (LEN (E) .EQ. 0) THEN
+C                            nothing left of element e
+                             PE (E) = 0
+                             ENDIF
+
+                          NCMPA = NCMPA + 1
+C                         store first item in pe
+C                         set first entry to -item
+                          DO 70 J = 1, N
+                             PN = PE (J)
+                             IF (PN .GT. 0) THEN
+                                PE (J) = IW (PN)
+                                IW (PN) = -J
+                                ENDIF
+70                           CONTINUE
+
+C                         psrc/pdst point to source/destination
+                          PDST = 1
+                          PSRC = 1
+                          PEND = PME1 - 1
+
+C                         while loop:
+80                        CONTINUE
+                          IF (PSRC .LE. PEND) THEN
+C                            search for next negative entry
+                             J = -IW (PSRC)
+                             PSRC = PSRC + 1
+                             IF (J .GT. 0) THEN
+                                IW (PDST) = PE (J)
+                                PE (J) = PDST
+                                PDST = PDST + 1
+C                               copy from source to destination
+                                LENJ = LEN (J)
+                                DO 90 KNT3 = 0, LENJ - 2
+                                   IW (PDST + KNT3) = IW (PSRC + KNT3)
+90                                 CONTINUE
+                                PDST = PDST + LENJ - 1
+                                PSRC = PSRC + LENJ - 1
+                                ENDIF
+                             GOTO 80
+                             ENDIF
+
+C                         move the new partially-constructed element
+                          P1 = PDST
+                          DO 100 PSRC = PME1, PFREE - 1
+                             IW (PDST) = IW (PSRC)
+                             PDST = PDST + 1
+100                          CONTINUE
+                          PME1 = P1
+                          PFREE = PDST
+                          PJ = PE (E)
+                          P = PE (ME)
+                          ENDIF
+
+C                      -------------------------------------------------
+C                      i is a principal variable not yet placed in Lme
+C                      store i in new list
+C                      -------------------------------------------------
+
+                       DEGME = DEGME + NVI
+C                      flag i as being in Lme by negating nv (i)
+                       NV (I) = -NVI
+                       IW (PFREE) = I
+                       PFREE = PFREE + 1
+
+C                      -------------------------------------------------
+C                      remove variable i from degree link list
+C                      -------------------------------------------------
+
+                       ILAST = LAST (I)
+                       INEXT = NEXT (I)
+                       IF (INEXT .NE. 0) LAST (INEXT) = ILAST
+                       IF (ILAST .NE. 0) THEN
+                          NEXT (ILAST) = INEXT
+                       ELSE
+C                         i is at the head of the degree list
+                          HEAD (DEGREE (I)) = INEXT
+                          ENDIF
+
+                       ENDIF
+110                 CONTINUE
+
+                 IF (E .NE. ME) THEN
+C                   set tree pointer and flag to indicate element e is
+C                   absorbed into new element me (the parent of e is me)
+                    PE (E) = -ME
+                    W (E) = 0
+                    ENDIF
+120              CONTINUE
+
+              PME2 = PFREE - 1
+C             this element takes newmem new memory in iw (possibly zero)
+              NEWMEM = PFREE - PME1
+              MEM = MEM + NEWMEM
+              MAXMEM = MAX (MAXMEM, MEM)
+              ENDIF
+
+C          -------------------------------------------------------------
+C          me has now been converted into an element in iw (pme1..pme2)
+C          -------------------------------------------------------------
+
+C          degme holds the external degree of new element
+           DEGREE (ME) = DEGME
+           PE (ME) = PME1
+           LEN (ME) = PME2 - PME1 + 1
+
+C          -------------------------------------------------------------
+C          make sure that wflg is not too large.  With the current
+C          value of wflg, wflg+n must not cause integer overflow
+C          -------------------------------------------------------------
+
+           IF (WFLG .GE. WBIG) THEN
+              DO 130 X = 1, N
+                 IF (W (X) .NE. 0) W (X) = 1
+130              CONTINUE
+              WFLG = 2
+              ENDIF
+
+C=======================================================================
+C  DEGREE UPDATE AND ELEMENT ABSORPTION
+C=======================================================================
+
+C          -------------------------------------------------------------
+C          Scan 2:  for each i in Lme, sum up the degree of Lme (which
+C          is degme), plus the sum of the external degrees of each Le
+C          for the elements e appearing within i, plus the
+C          supervariables in i.  Place i in hash list.
+C          -------------------------------------------------------------
+
+           DO 180 PME = PME1, PME2
+              I = IW (PME)
+              P1 = PE (I)
+              P2 = P1 + ELEN (I) - 1
+              PN = P1
+              HASH = 0
+              DEG = 0
+
+C             ----------------------------------------------------------
+C             scan the element list associated with supervariable i
+C             ----------------------------------------------------------
+
+C             exact external degree:
+              WFLG = WFLG + 1
+              DO 160 P = P1, P2
+                 E = IW (P)
+                 IF (W (E) .NE. 0) THEN
+C                   e is an unabsorbed element
+                    DO 145 PJ = PE (E), PE (E) + LEN (E) - 1
+                       J = IW (PJ)
+                       NVJ = NV (J)
+                       IF (NVJ .GT. 0 .AND. W (J) .NE. WFLG) THEN
+C                         j is principal and not in Lme if nv (j) .gt. 0
+C                         and j is not yet seen if w (j) .ne. wflg
+                          W (J) = WFLG
+                          DEG = DEG + NVJ
+                          ENDIF
+145                    CONTINUE
+                    IW (PN) = E
+                    PN = PN + 1
+                    HASH = HASH + E
+                    ENDIF
+160              CONTINUE
+
+C             count the number of elements in i (including me):
+              ELEN (I) = PN - P1 + 1
+
+C             ----------------------------------------------------------
+C             scan the supervariables in the list associated with i
+C             ----------------------------------------------------------
+
+              P3 = PN
+              DO 170 P = P2 + 1, P1 + LEN (I) - 1
+                 J = IW (P)
+                 NVJ = NV (J)
+                 IF (NVJ .GT. 0) THEN
+C                   j is unabsorbed, and not in Lme.
+C                   add to degree and add to new list
+                    DEG = DEG + NVJ
+                    IW (PN) = J
+                    PN = PN + 1
+                    HASH = HASH + J
+                    ENDIF
+170              CONTINUE
+
+C             ----------------------------------------------------------
+C             update the degree and check for mass elimination
+C             ----------------------------------------------------------
+
+              IF (ELEN (I) .EQ. 1 .AND. P3 .EQ. PN) THEN
+
+C                -------------------------------------------------------
+C                mass elimination
+C                -------------------------------------------------------
+
+C                There is nothing left of this node except for an
+C                edge to the current pivot element.  elen (i) is 1,
+C                and there are no variables adjacent to node i.
+C                Absorb i into the current pivot element, me.
+
+                 PE (I) = -ME
+                 NVI = -NV (I)
+                 DEGME = DEGME - NVI
+                 NVPIV = NVPIV + NVI
+                 NEL = NEL + NVI
+                 NV (I) = 0
+                 ELEN (I) = 0
+
+              ELSE
+
+C                -------------------------------------------------------
+C                update the exact degree of i
+C                -------------------------------------------------------
+
+C                the following degree does not yet include the size
+C                of the current element, which is added later:
+                 DEGREE (I) = DEG
+
+C                -------------------------------------------------------
+C                add me to the list for i
+C                -------------------------------------------------------
+
+C                move first supervariable to end of list
+                 IW (PN) = IW (P3)
+C                move first element to end of element part of list
+                 IW (P3) = IW (P1)
+C                add new element to front of list.
+                 IW (P1) = ME
+C                store the new length of the list in len (i)
+                 LEN (I) = PN - P1 + 1
+
+C                -------------------------------------------------------
+C                place in hash bucket.  Save hash key of i in last (i).
+C                -------------------------------------------------------
+
+                 HASH = MOD (HASH, HMOD) + 1
+                 J = HEAD (HASH)
+                 IF (J .LE. 0) THEN
+C                   the degree list is empty, hash head is -j
+                    NEXT (I) = -J
+                    HEAD (HASH) = -I
+                 ELSE
+C                   degree list is not empty
+C                   use last (head (hash)) as hash head
+                    NEXT (I) = LAST (J)
+                    LAST (J) = I
+                    ENDIF
+                 LAST (I) = HASH
+                 ENDIF
+180           CONTINUE
+
+           DEGREE (ME) = DEGME
+
+C          -------------------------------------------------------------
+C          Clear the counter array, w (...), by incrementing wflg.
+C          -------------------------------------------------------------
+
+           WFLG = WFLG + 1
+
+C          make sure that wflg+n does not cause integer overflow
+           IF (WFLG .GE. WBIG) THEN
+              DO 190 X = 1, N
+                 IF (W (X) .NE. 0) W (X) = 1
+190              CONTINUE
+              WFLG = 2
+              ENDIF
+C          at this point, w (1..n) .lt. wflg holds
+
+C=======================================================================
+C  SUPERVARIABLE DETECTION
+C=======================================================================
+
+           DO 250 PME = PME1, PME2
+              I = IW (PME)
+              IF (NV (I) .LT. 0) THEN
+C                i is a principal variable in Lme
+
+C                -------------------------------------------------------
+C                examine all hash buckets with 2 or more variables.  We
+C                do this by examing all unique hash keys for super-
+C                variables in the pattern Lme of the current element, me
+C                -------------------------------------------------------
+
+                 HASH = LAST (I)
+C                let i = head of hash bucket, and empty the hash bucket
+                 J = HEAD (HASH)
+                 IF (J .EQ. 0) GOTO 250
+                 IF (J .LT. 0) THEN
+C                   degree list is empty
+                    I = -J
+                    HEAD (HASH) = 0
+                 ELSE
+C                   degree list is not empty, restore last () of head
+                    I = LAST (J)
+                    LAST (J) = 0
+                    ENDIF
+                 IF (I .EQ. 0) GOTO 250
+
+C                while loop:
+200              CONTINUE
+                 IF (NEXT (I) .NE. 0) THEN
+
+C                   ----------------------------------------------------
+C                   this bucket has one or more variables following i.
+C                   scan all of them to see if i can absorb any entries
+C                   that follow i in hash bucket.  Scatter i into w.
+C                   ----------------------------------------------------
+
+                    LN = LEN (I)
+                    ELN = ELEN (I)
+C                   do not flag the first element in the list (me)
+                    DO 210 P = PE (I) + 1, PE (I) + LN - 1
+                       W (IW (P)) = WFLG
+210                    CONTINUE
+
+C                   ----------------------------------------------------
+C                   scan every other entry j following i in bucket
+C                   ----------------------------------------------------
+
+                    JLAST = I
+                    J = NEXT (I)
+
+C                   while loop:
+220                 CONTINUE
+                    IF (J .NE. 0) THEN
+
+C                      -------------------------------------------------
+C                      check if j and i have identical nonzero pattern
+C                      -------------------------------------------------
+
+                       IF (LEN (J) .NE. LN) THEN
+C                         i and j do not have same size data structure
+                          GOTO 240
+                          ENDIF
+                       IF (ELEN (J) .NE. ELN) THEN
+C                         i and j do not have same number of adjacent el
+                          GOTO 240
+                          ENDIF
+C                      do not flag the first element in the list (me)
+                       DO 230 P = PE (J) + 1, PE (J) + LN - 1
+                          IF (W (IW (P)) .NE. WFLG) THEN
+C                            an entry (iw(p)) is in j but not in i
+                             GOTO 240
+                             ENDIF
+230                       CONTINUE
+
+C                      -------------------------------------------------
+C                      found it!  j can be absorbed into i
+C                      -------------------------------------------------
+
+                       PE (J) = -I
+C                      both nv (i) and nv (j) are negated since they
+C                      are in Lme, and the absolute values of each
+C                      are the number of variables in i and j:
+                       NV (I) = NV (I) + NV (J)
+                       NV (J) = 0
+                       ELEN (J) = 0
+C                      delete j from hash bucket
+                       J = NEXT (J)
+                       NEXT (JLAST) = J
+                       GOTO 220
+
+C                      -------------------------------------------------
+240                    CONTINUE
+C                      j cannot be absorbed into i
+C                      -------------------------------------------------
+
+                       JLAST = J
+                       J = NEXT (J)
+                       GOTO 220
+                       ENDIF
+
+C                   ----------------------------------------------------
+C                   no more variables can be absorbed into i
+C                   go to next i in bucket and clear flag array
+C                   ----------------------------------------------------
+
+                    WFLG = WFLG + 1
+                    I = NEXT (I)
+                    IF (I .NE. 0) GOTO 200
+                    ENDIF
+                 ENDIF
+250           CONTINUE
+
+C=======================================================================
+C  RESTORE DEGREE LISTS AND REMOVE NONPRINCIPAL SUPERVAR. FROM ELEMENT
+C=======================================================================
+
+           P = PME1
+           NLEFT = N - NEL
+           DO 260 PME = PME1, PME2
+              I = IW (PME)
+              NVI = -NV (I)
+              IF (NVI .GT. 0) THEN
+C                i is a principal variable in Lme
+C                restore nv (i) to signify that i is principal
+                 NV (I) = NVI
+
+C                -------------------------------------------------------
+C                compute the external degree (add size of current elem)
+C                -------------------------------------------------------
+
+                 DEG = MAX (1, DEGREE (I) + DEGME - NVI)
+
+C                -------------------------------------------------------
+C                place the supervariable at the head of the degree list
+C                -------------------------------------------------------
+
+                 INEXT = HEAD (DEG)
+                 IF (INEXT .NE. 0) LAST (INEXT) = I
+                 NEXT (I) = INEXT
+                 LAST (I) = 0
+                 HEAD (DEG) = I
+
+C                -------------------------------------------------------
+C                save the new degree, and find the minimum degree
+C                -------------------------------------------------------
+
+                 MINDEG = MIN (MINDEG, DEG)
+                 DEGREE (I) = DEG
+
+C                -------------------------------------------------------
+C                place the supervariable in the element pattern
+C                -------------------------------------------------------
+
+                 IW (P) = I
+                 P = P + 1
+                 ENDIF
+260           CONTINUE
+
+C=======================================================================
+C  FINALIZE THE NEW ELEMENT
+C=======================================================================
+
+           NV (ME) = NVPIV + DEGME
+C          nv (me) is now the degree of pivot (including diagonal part)
+C          save the length of the list for the new element me
+           LEN (ME) = P - PME1
+           IF (LEN (ME) .EQ. 0) THEN
+C             there is nothing left of the current pivot element
+              PE (ME) = 0
+              W (ME) = 0
+              ENDIF
+           IF (NEWMEM .NE. 0) THEN
+C             element was not constructed in place: deallocate part
+C             of it (final size is less than or equal to newmem,
+C             since newly nonprincipal variables have been removed).
+              PFREE = P
+              MEM = MEM - NEWMEM + LEN (ME)
+              ENDIF
+
+C=======================================================================
+C          END WHILE (selecting pivots)
+           GOTO 30
+           ENDIF
+C=======================================================================
+
+C=======================================================================
+C  COMPUTE THE PERMUTATION VECTORS
+C=======================================================================
+
+C       ----------------------------------------------------------------
+C       The time taken by the following code is O(n).  At this
+C       point, elen (e) = -k has been done for all elements e,
+C       and elen (i) = 0 has been done for all nonprincipal
+C       variables i.  At this point, there are no principal
+C       supervariables left, and all elements are absorbed.
+C       ----------------------------------------------------------------
+
+C       ----------------------------------------------------------------
+C       compute the ordering of unordered nonprincipal variables
+C       ----------------------------------------------------------------
+
+        DO 290 I = 1, N
+           IF (ELEN (I) .EQ. 0) THEN
+
+C             ----------------------------------------------------------
+C             i is an un-ordered row.  Traverse the tree from i until
+C             reaching an element, e.  The element, e, was the
+C             principal supervariable of i and all nodes in the path
+C             from i to when e was selected as pivot.
+C             ----------------------------------------------------------
+
+              J = -PE (I)
+C             while (j is a variable) do:
+270           CONTINUE
+              IF (ELEN (J) .GE. 0) THEN
+                 J = -PE (J)
+                 GOTO 270
+                 ENDIF
+              E = J
+
+C             ----------------------------------------------------------
+C             get the current pivot ordering of e
+C             ----------------------------------------------------------
+
+              K = -ELEN (E)
+
+C             ----------------------------------------------------------
+C             traverse the path again from i to e, and compress the
+C             path (all nodes point to e).  Path compression allows
+C             this code to compute in O(n) time.  Order the unordered
+C             nodes in the path, and place the element e at the end.
+C             ----------------------------------------------------------
+
+              J = I
+C             while (j is a variable) do:
+280           CONTINUE
+              IF (ELEN (J) .GE. 0) THEN
+                 JNEXT = -PE (J)
+                 PE (J) = -E
+                 IF (ELEN (J) .EQ. 0) THEN
+C                   j is an unordered row
+                    ELEN (J) = K
+                    K = K + 1
+                    ENDIF
+                 J = JNEXT
+                 GOTO 280
+                 ENDIF
+C             leave elen (e) negative, so we know it is an element
+              ELEN (E) = -K
+              ENDIF
+290        CONTINUE
+
+C       ----------------------------------------------------------------
+C       reset the inverse permutation (elen (1..n)) to be positive,
+C       and compute the permutation (last (1..n)).
+C       ----------------------------------------------------------------
+
+        DO 300 I = 1, N
+           K = ABS (ELEN (I))
+           LAST (K) = I
+           ELEN (I) = K
+300        CONTINUE
+
+C=======================================================================
+C  RETURN THE MEMORY USAGE IN IW
+C=======================================================================
+
+C       If maxmem is less than or equal to iwlen, then no compressions
+C       occurred, and iw (maxmem+1 ... iwlen) was unused.  Otherwise
+C       compressions did occur, and iwlen would have had to have been
+C       greater than or equal to maxmem for no compressions to occur.
+C       Return the value of maxmem in the pfree argument.
+
+        PFREE = MAXMEM
+
+        RETURN
+        END
+
diff --git a/contrib/taucs/external/src/amdhaf.c b/contrib/taucs/external/src/amdhaf.c
new file mode 100644
index 0000000000000000000000000000000000000000..1dda5e50cd0b5e5abb168840f052a3160777dd05
--- /dev/null
+++ b/contrib/taucs/external/src/amdhaf.c
@@ -0,0 +1,1441 @@
+/* amdhaf.f -- translated by f2c (version of 23 April 1993  18:34:30).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+#include "f2c.h"
+
+/* Subroutine */ int amdhaf_(n, pe, iw, len, iwlen, pfree, nv, next, last, 
+	head, elen, degree, ncmpa, w, iovflo)
+integer *n, *pe, *iw, *len, *iwlen, *pfree, *nv, *next, *last, *head, *elen, *
+	degree, *ncmpa, *w, *iovflo;
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3, i__4, i__5;
+
+    /* Local variables */
+    static integer hash, pend, hmod, lenj, dmax_, wbig, wflg, psrc, pdst, e, 
+	    i, j, k, p, degme, x, nleft, ilast, jlast, inext, jnext, p1, 
+	    nvpiv, p2, p3, me, ln, pj, pn, mindeg, elenme, slenme, maxmem, 
+	    newmem, deg, eln, mem, nel, pme, nvi, nvj, pme1, pme2, knt1, knt2,
+	     knt3;
+
+/* -----------------------------------------------------------------------
+ */
+/*  The MC47 / AMD suite of minimum degree ordering algorithms. */
+
+/*  This code is one of seven variations of a single algorithm: */
+/*  the primary routine (MC47B/BD, only available in the Harwell */
+/*  Subroutine Library), and 6 variations that differ only in */
+/*  how they compute the degree (available in NETLIB). */
+
+/*  For information on the Harwell Subroutine Library, contact */
+/*  John Harding, Harwell Subroutine Library, B 552, AEA Technology, */
+/*  Harwell, Didcot, Oxon OX11 0RA, telephone (44) 1235 434573, */
+/*  fax (44) 1235 434340, email john.harding@aeat.co.uk, who will */
+/*  provide details of price and conditions of use. */
+/* -----------------------------------------------------------------------
+ */
+/* ***********************************************************************
+ */
+/* NOTICE:  "The AMD routines (AMDEXA, AMDBAR, AMDHAF, AMDHAT, AMDTRU, */
+/* and AMDATR) may be used SOLELY for educational, research, and */
+/* benchmarking purposes by non-profit organizations and the U.S. */
+/* government.  Commercial and other organizations may make use of the */
+/* AMD routines SOLELY for benchmarking purposes only.  The AMD */
+/* routines may be modified by or on behalf of the User for such */
+/* use but at no time shall the AMD routines or any such modified */
+/* version of them become the property of the User.  The AMD routines */
+/* are provided without warranty of any kind, either expressed or */
+/* implied.  Neither the Authors nor their employers shall be liable */
+/* for any direct or consequential loss or damage whatsoever arising */
+/* out of the use or misuse of the AMD routines by the User.  The AMD */
+/* routines must not be sold.  You may make copies of the AMD routines, */
+/* but this NOTICE and the Copyright notice must appear in all copies. */
+/* Any other use of the AMD routines requires written permission. */
+/* Your use of the AMD routines is an implicit agreement to these */
+/* conditions." */
+/* ***********************************************************************
+ */
+/* -----------------------------------------------------------------------
+ */
+/* AMDhaf:  approximate minimum (half-and-half) degree ordering algorithm 
+*/
+/* -----------------------------------------------------------------------
+ */
+/*  Variation 3:  "Half-and-half" approximate external degree, */
+/*  combining the exact external degree for "2-adjacent" nodes and */
+/*  the Gilbert-Moler-Schreiber approximate external degree otherise */
+/*  (see  J. R. Gilbert, C. Moler, and R.  Schreiber, Sparse matrices */
+/*  in MATLAB:  design and implementation, SIAM J. Matrix Analysis and */
+/*  Applications, vol. 13, 1992, pp. 333-356).  Note that some of the */
+/*  comments in the code below reflect the MC47-style degree */
+/*  approximation. */
+
+/*  This method is often almost as fast as MC47B/BD (although it has */
+/*  been observed to be nearly 3 times slower than MC47B/BD for some */
+/*  matrices), and often computes comparable orderings as MC47B/BD. */
+/*  It can compute poor orderings for some matrices, however. */
+
+/*  We recommend using MC47B/BD instead of this routine since MC47B/BD */
+/*  gives better or comparable results in slightly less time. */
+/* -----------------------------------------------------------------------
+ */
+/* Given a representation of the nonzero pattern of a symmetric matrix, */
+/*       A, (excluding the diagonal) perform an approximate minimum */
+/*       (half-and-half) degree ordering to compute a pivot order such */
+/*       that the introduction of nonzeros (fill-in) in the Cholesky */
+/*       factors A = LL^T are kept low.  At each step, the pivot */
+/*       selected is the one with the minimum "half-and-half" upper-bound 
+*/
+/*       on the external degree. */
+/* ********************************************************************** 
+*/
+/* ***** CAUTION:  ARGUMENTS ARE NOT CHECKED FOR ERRORS ON INPUT.  ****** 
+*/
+/* ********************************************************************** 
+*/
+/* ** If you want error checking, a more versatile input format, and a ** 
+*/
+/* ** simpler user interface, then use MC47A/AD in the Harwell         ** 
+*/
+/* ** Subroutine Library, which checks for errors, transforms the      ** 
+*/
+/* ** input, and calls MC47B/BD.                                       ** 
+*/
+/* ********************************************************************** 
+*/
+/*       References:  (UF Tech Reports are available via anonymous ftp */
+/*       to ftp.cis.ufl.edu:cis/tech-reports). */
+
+/*       [1] Timothy A. Davis and Iain Duff, "An unsymmetric-pattern */
+/*               multifrontal method for sparse LU factorization", */
+/*               SIAM J. Matrix Analysis and Applications, to appear. */
+/*               also Univ. of Florida Technical Report TR-94-038. */
+/*               Discusses UMFPACK / MA38. */
+
+/*       [2] Patrick Amestoy, Timothy A. Davis, and Iain S. Duff, */
+/*               "An approximate minimum degree ordering algorithm," */
+/*               SIAM J. Matrix Analysis and Applications (to appear), */
+/*               also Univ. of Florida Technical Report TR-94-039. */
+/*               Discusses this routine. */
+
+/*       [3] Alan George and Joseph Liu, "The evolution of the */
+/*               minimum degree ordering algorithm," SIAM Review, vol. */
+/*               31, no. 1, pp. 1-19, March 1989.  We list below the */
+/*               features mentioned in that paper that this code */
+/*               includes: */
+
+/*       mass elimination: */
+/*               Yes.  MA27 relied on supervariable detection for mass */
+/*               elimination. */
+/*       indistinguishable nodes: */
+/*               Yes (we call these "supervariables").  This was also in 
+*/
+/*               the MA27 code - although we modified the method of */
+/*               detecting them (the previous hash was the true degree, */
+/*               which we no longer keep track of).  A supervariable is */
+/*               a set of rows with identical nonzero pattern.  All */
+/*               variables in a supervariable are eliminated together. */
+/*               Each supervariable has as its numerical name that of */
+/*               one of its variables (its principal variable). */
+/*       quotient graph representation: */
+/*               Yes.  We use the term "element" for the cliques formed */
+/*               during elimination.  This was also in the MA27 code. */
+/*               The algorithm can operate in place, but it will work */
+/*               more efficiently if given some "elbow room." */
+/*       element absorption: */
+/*               Yes.  This was also in the MA27 code. */
+/*       external degree: */
+/*               Yes.  The MA27 code was based on the true degree. */
+/*       incomplete degree update and multiple elimination: */
+/*               No.  This was not in MA27, either.  Our method of */
+/*               degree update within MC47B/BD is element-based, not */
+/*               variable-based.  It is thus not well-suited for use */
+/*               with incomplete degree update or multiple elimination. */
+/* -----------------------------------------------------------------------
+ */
+/* Authors, and Copyright (C) 1995 by: */
+/*       Timothy A. Davis, Patrick Amestoy, Iain S. Duff, & John K. Reid. 
+*/
+
+/* Acknowledgements: */
+/*       This work (and the UMFPACK package) was supported by the */
+/*       National Science Foundation (ASC-9111263 and DMS-9223088). */
+/*       The UMFPACK/MA38 approximate degree update algorithm, the */
+/*       unsymmetric analog which forms the basis of MC47B/BD, was */
+/*       developed while Tim Davis was supported by CERFACS (Toulouse, */
+/*       France) in a post-doctoral position. */
+
+/* Date:  September, 1995 */
+/* -----------------------------------------------------------------------
+ */
+/* -----------------------------------------------------------------------
+ */
+/* INPUT ARGUMENTS (unaltered): */
+/* -----------------------------------------------------------------------
+ */
+/* n:    The matrix order. */
+
+/*       Restriction:  1 .le. n .lt. (iovflo/2)-2 */
+/* iwlen:        The length of iw (1..iwlen).  On input, the matrix is */
+/*       stored in iw (1..pfree-1).  However, iw (1..iwlen) should be */
+/*       slightly larger than what is required to hold the matrix, at */
+/*       least iwlen .ge. pfree + n is recommended.  Otherwise, */
+/*       excessive compressions will take place. */
+/*       *** We do not recommend running this algorithm with *** */
+/*       ***      iwlen .lt. pfree + n.                      *** */
+/*       *** Better performance will be obtained if          *** */
+/*       ***      iwlen .ge. pfree + n                       *** */
+/*       *** or better yet                                   *** */
+/*       ***      iwlen .gt. 1.2 * pfree                     *** */
+/*       *** (where pfree is its value on input).            *** */
+/*       The algorithm will not run at all if iwlen .lt. pfree-1. */
+
+/*       Restriction: iwlen .ge. pfree-1 */
+/* iovflo:       The largest positive integer that your computer can */
+/*       represent (-iovflo should also be representable).  On a 32-bit */
+/*       computer with 2's-complement arithmetic, */
+/*       iovflo = (2^31)-1 = 2,147,483,648. */
+/* -----------------------------------------------------------------------
+ */
+/* INPUT/OUPUT ARGUMENTS: */
+/* -----------------------------------------------------------------------
+ */
+/* pe:   On input, pe (i) is the index in iw of the start of row i, or */
+/*       zero if row i has no off-diagonal non-zeros. */
+
+/*       During execution, it is used for both supervariables and */
+/*       elements: */
+
+/*       * Principal supervariable i:  index into iw of the */
+/*               description of supervariable i.  A supervariable */
+/*               represents one or more rows of the matrix */
+/*               with identical nonzero pattern. */
+/*       * Non-principal supervariable i:  if i has been absorbed */
+/*               into another supervariable j, then pe (i) = -j. */
+/*               That is, j has the same pattern as i. */
+/*               Note that j might later be absorbed into another */
+/*               supervariable j2, in which case pe (i) is still -j, */
+/*               and pe (j) = -j2. */
+/*       * Unabsorbed element e:  the index into iw of the description */
+/*               of element e, if e has not yet been absorbed by a */
+/*               subsequent element.  Element e is created when */
+/*               the supervariable of the same name is selected as */
+/*               the pivot. */
+/*       * Absorbed element e:  if element e is absorbed into element */
+/*               e2, then pe (e) = -e2.  This occurs when the pattern of 
+*/
+/*               e (that is, Le) is found to be a subset of the pattern */
+/*               of e2 (that is, Le2).  If element e is "null" (it has */
+/*               no nonzeros outside its pivot block), then pe (e) = 0. */
+
+/*       On output, pe holds the assembly tree/forest, which implicitly */
+/*       represents a pivot order with identical fill-in as the actual */
+/*       order (via a depth-first search of the tree). */
+
+/*       On output: */
+/*       If nv (i) .gt. 0, then i represents a node in the assembly tree, 
+*/
+/*       and the parent of i is -pe (i), or zero if i is a root. */
+/*       If nv (i) = 0, then (i,-pe (i)) represents an edge in a */
+/*       subtree, the root of which is a node in the assembly tree. */
+/* pfree:        On input the tail end of the array, iw (pfree..iwlen), */
+/*       is empty, and the matrix is stored in iw (1..pfree-1). */
+/*       During execution, additional data is placed in iw, and pfree */
+/*       is modified so that iw (pfree..iwlen) is always the unused part 
+*/
+/*       of iw.  On output, pfree is set equal to the size of iw that */
+/*       would have been needed for no compressions to occur.  If */
+/*       ncmpa is zero, then pfree (on output) is less than or equal to */
+/*       iwlen, and the space iw (pfree+1 ... iwlen) was not used. */
+/*       Otherwise, pfree (on output) is greater than iwlen, and all the 
+*/
+/*       memory in iw was used. */
+/* -----------------------------------------------------------------------
+ */
+/* INPUT/MODIFIED (undefined on output): */
+/* -----------------------------------------------------------------------
+ */
+/* len:  On input, len (i) holds the number of entries in row i of the */
+/*       matrix, excluding the diagonal.  The contents of len (1..n) */
+/*       are undefined on output. */
+/* iw:   On input, iw (1..pfree-1) holds the description of each row i */
+/*       in the matrix.  The matrix must be symmetric, and both upper */
+/*       and lower triangular parts must be present.  The diagonal must */
+/*       not be present.  Row i is held as follows: */
+
+/*               len (i):  the length of the row i data structure */
+/*               iw (pe (i) ... pe (i) + len (i) - 1): */
+/*                       the list of column indices for nonzeros */
+/*                       in row i (simple supervariables), excluding */
+/*                       the diagonal.  All supervariables start with */
+/*                       one row/column each (supervariable i is just */
+/*                       row i). */
+/*               if len (i) is zero on input, then pe (i) is ignored */
+/*               on input. */
+
+/*               Note that the rows need not be in any particular order, 
+*/
+/*               and there may be empty space between the rows. */
+
+/*       During execution, the supervariable i experiences fill-in. */
+/*       This is represented by placing in i a list of the elements */
+/*       that cause fill-in in supervariable i: */
+
+/*               len (i):  the length of supervariable i */
+/*               iw (pe (i) ... pe (i) + elen (i) - 1): */
+/*                       the list of elements that contain i.  This list 
+*/
+/*                       is kept short by removing absorbed elements. */
+/*               iw (pe (i) + elen (i) ... pe (i) + len (i) - 1): */
+/*                       the list of supervariables in i.  This list */
+/*                       is kept short by removing nonprincipal */
+/*                       variables, and any entry j that is also */
+/*                       contained in at least one of the elements */
+/*                       (j in Le) in the list for i (e in row i). */
+
+/*       When supervariable i is selected as pivot, we create an */
+/*       element e of the same name (e=i): */
+
+/*               len (e):  the length of element e */
+/*               iw (pe (e) ... pe (e) + len (e) - 1): */
+/*                       the list of supervariables in element e. */
+
+/*       An element represents the fill-in that occurs when supervariable 
+*/
+/*       i is selected as pivot (which represents the selection of row i 
+*/
+/*       and all non-principal variables whose principal variable is i). 
+*/
+/*       We use the term Le to denote the set of all supervariables */
+/*       in element e.  Absorbed supervariables and elements are pruned */
+/*       from these lists when computationally convenient. */
+
+/*       CAUTION:  THE INPUT MATRIX IS OVERWRITTEN DURING COMPUTATION. */
+/*       The contents of iw are undefined on output. */
+/* -----------------------------------------------------------------------
+ */
+/* OUTPUT (need not be set on input): */
+/* -----------------------------------------------------------------------
+ */
+/* nv:   During execution, abs (nv (i)) is equal to the number of rows */
+/*       that are represented by the principal supervariable i.  If i is 
+*/
+/*       a nonprincipal variable, then nv (i) = 0.  Initially, */
+/*       nv (i) = 1 for all i.  nv (i) .lt. 0 signifies that i is a */
+/*       principal variable in the pattern Lme of the current pivot */
+/*       element me.  On output, nv (e) holds the true degree of element 
+*/
+/*       e at the time it was created (including the diagonal part). */
+/* ncmpa:        The number of times iw was compressed.  If this is */
+/*       excessive, then the execution took longer than what could have */
+/*       been.  To reduce ncmpa, try increasing iwlen to be 10% or 20% */
+/*       larger than the value of pfree on input (or at least */
+/*       iwlen .ge. pfree + n).  The fastest performance will be */
+/*       obtained when ncmpa is returned as zero.  If iwlen is set to */
+/*       the value returned by pfree on *output*, then no compressions */
+/*       will occur. */
+/* elen: See the description of iw above.  At the start of execution, */
+/*       elen (i) is set to zero.  During execution, elen (i) is the */
+/*       number of elements in the list for supervariable i.  When e */
+/*       becomes an element, elen (e) = -nel is set, where nel is the */
+/*       current step of factorization.  elen (i) = 0 is done when i */
+/*       becomes nonprincipal. */
+
+/*       For variables, elen (i) .ge. 0 holds until just before the */
+/*       permutation vectors are computed.  For elements, */
+/*       elen (e) .lt. 0 holds. */
+
+/*       On output elen (1..n) holds the inverse permutation (the same */
+/*       as the 'INVP' argument in Sparspak).  That is, if k = elen (i), 
+*/
+/*       then row i is the kth pivot row.  Row i of A appears as the */
+/*       (elen(i))-th row in the permuted matrix, PAP^T. */
+/* last: In a degree list, last (i) is the supervariable preceding i, */
+/*       or zero if i is the head of the list.  In a hash bucket, */
+/*       last (i) is the hash key for i.  last (head (hash)) is also */
+/*       used as the head of a hash bucket if head (hash) contains a */
+/*       degree list (see head, below). */
+
+/*       On output, last (1..n) holds the permutation (the same as the */
+/*       'PERM' argument in Sparspak).  That is, if i = last (k), then */
+/*       row i is the kth pivot row.  Row last (k) of A is the k-th row */
+/*       in the permuted matrix, PAP^T. */
+/* -----------------------------------------------------------------------
+ */
+/* LOCAL (not input or output - used only during execution): */
+/* -----------------------------------------------------------------------
+ */
+/* degree:       If i is a supervariable, then degree (i) holds the */
+/*       current approximation of the external degree of row i (an upper 
+*/
+/*       bound).  The external degree is the number of nonzeros in row i, 
+*/
+/*       minus abs (nv (i)) (the diagonal part).  The bound is equal to */
+/*       the external degree if elen (i) is less than or equal to two. */
+
+/*       We also use the term "external degree" for elements e to refer */
+/*       to |Le \ Lme|.  If e is an element, then degree (e) holds |Le|, 
+*/
+/*       which is the degree of the off-diagonal part of the element e */
+/*       (not including the diagonal part). */
+/* head: head is used for degree lists.  head (deg) is the first */
+/*       supervariable in a degree list (all supervariables i in a */
+/*       degree list deg have the same approximate degree, namely, */
+/*       deg = degree (i)).  If the list deg is empty then */
+/*       head (deg) = 0. */
+
+/*       During supervariable detection head (hash) also serves as a */
+/*       pointer to a hash bucket. */
+/*       If head (hash) .gt. 0, there is a degree list of degree hash. */
+/*               The hash bucket head pointer is last (head (hash)). */
+/*       If head (hash) = 0, then the degree list and hash bucket are */
+/*               both empty. */
+/*       If head (hash) .lt. 0, then the degree list is empty, and */
+/*               -head (hash) is the head of the hash bucket. */
+/*       After supervariable detection is complete, all hash buckets */
+/*       are empty, and the (last (head (hash)) = 0) condition is */
+/*       restored for the non-empty degree lists. */
+/* next: next (i) is the supervariable following i in a link list, or */
+/*       zero if i is the last in the list.  Used for two kinds of */
+/*       lists:  degree lists and hash buckets (a supervariable can be */
+/*       in only one kind of list at a time). */
+/* w:    The flag array w determines the status of elements and */
+/*       variables, and the external degree of elements. */
+
+/*       for elements: */
+/*          if w (e) = 0, then the element e is absorbed */
+/*          if w (e) .ge. wflg, then w (e) - wflg is the size of */
+/*               the set |Le \ Lme|, in terms of nonzeros (the */
+/*               sum of abs (nv (i)) for each principal variable i that */
+/*               is both in the pattern of element e and NOT in the */
+/*               pattern of the current pivot element, me). */
+/*          if wflg .gt. w (e) .gt. 0, then e is not absorbed and has */
+/*               not yet been seen in the scan of the element lists in */
+/*               the computation of |Le\Lme| in loop 150 below. */
+
+/*       for variables: */
+/*          during supervariable detection, if w (j) .ne. wflg then j is 
+*/
+/*          not in the pattern of variable i */
+
+/*       The w array is initialized by setting w (i) = 1 for all i, */
+/*       and by setting wflg = 2.  It is reinitialized if wflg becomes */
+/*       too large (to ensure that wflg+n does not cause integer */
+/*       overflow). */
+/* -----------------------------------------------------------------------
+ */
+/* LOCAL INTEGERS: */
+/* -----------------------------------------------------------------------
+ */
+/* deg:          the degree of a variable or element */
+/* degme:        size, |Lme|, of the current element, me (= degree (me)) 
+*/
+/* dext:         external degree, |Le \ Lme|, of some element e */
+/* dmax:         largest |Le| seen so far */
+/* e:            an element */
+/* elenme:       the length, elen (me), of element list of pivotal var. */
+/* eln:          the length, elen (...), of an element list */
+/* hash:         the computed value of the hash function */
+/* hmod:         the hash function is computed modulo hmod = max (1,n-1) 
+*/
+/* i:            a supervariable */
+/* ilast:        the entry in a link list preceding i */
+/* inext:        the entry in a link list following i */
+/* j:            a supervariable */
+/* jlast:        the entry in a link list preceding j */
+/* jnext:        the entry in a link list, or path, following j */
+/* k:            the pivot order of an element or variable */
+/* knt1:         loop counter used during element construction */
+/* knt2:         loop counter used during element construction */
+/* knt3:         loop counter used during compression */
+/* lenj:         len (j) */
+/* ln:           length of a supervariable list */
+/* maxmem:       amount of memory needed for no compressions */
+/* me:           current supervariable being eliminated, and the */
+/*                       current element created by eliminating that */
+/*                       supervariable */
+/* mem:          memory in use assuming no compressions have occurred */
+/* mindeg:       current minimum degree */
+/* nel:          number of pivots selected so far */
+/* newmem:       amount of new memory needed for current pivot element */
+/* nleft:        n - nel, the number of nonpivotal rows/columns remaining 
+*/
+/* nvi:          the number of variables in a supervariable i (= nv (i)) 
+*/
+/* nvj:          the number of variables in a supervariable j (= nv (j)) 
+*/
+/* nvpiv:        number of pivots in current element */
+/* slenme:       number of variables in variable list of pivotal variable 
+*/
+/* wbig:         = iovflo - n.  wflg is not allowed to be .ge. wbig. */
+/* we:           w (e) */
+/* wflg:         used for flagging the w array.  See description of iw. */
+/* wnvi:         wflg - nv (i) */
+/* x:            either a supervariable or an element */
+/* -----------------------------------------------------------------------
+ */
+/* LOCAL POINTERS: */
+/* -----------------------------------------------------------------------
+ */
+/*               Any parameter (pe (...) or pfree) or local variable */
+/*               starting with "p" (for Pointer) is an index into iw, */
+/*               and all indices into iw use variables starting with */
+/*               "p."  The only exception to this rule is the iwlen */
+/*               input argument. */
+/* p:            pointer into lots of things */
+/* p1:           pe (i) for some variable i (start of element list) */
+/* p2:           pe (i) + elen (i) -  1 for some var. i (end of el. list) 
+*/
+/* p3:           index of first supervariable in clean list */
+/* pdst:         destination pointer, for compression */
+/* pend:         end of memory to compress */
+/* pj:           pointer into an element or variable */
+/* pme:          pointer into the current element (pme1...pme2) */
+/* pme1:         the current element, me, is stored in iw (pme1...pme2) */
+/* pme2:         the end of the current element */
+/* pn:           pointer into a "clean" variable, also used to compress */
+/* psrc:         source pointer, for compression */
+/* -----------------------------------------------------------------------
+ */
+/*  FUNCTIONS CALLED: */
+/* -----------------------------------------------------------------------
+ */
+/* =======================================================================
+ */
+/*  INITIALIZATIONS */
+/* =======================================================================
+ */
+    /* Parameter adjustments */
+    --w;
+    --degree;
+    --elen;
+    --head;
+    --last;
+    --next;
+    --nv;
+    --len;
+    --iw;
+    --pe;
+
+    /* Function Body */
+    wflg = 2;
+    mindeg = 1;
+    *ncmpa = 0;
+    nel = 0;
+/* Computing MAX */
+    i__1 = 1, i__2 = *n - 1;
+    hmod = max(i__1,i__2);
+    dmax_ = 0;
+    wbig = *iovflo - *n;
+    mem = *pfree - 1;
+    maxmem = mem;
+    i__1 = *n;
+    for (i = 1; i <= i__1; ++i) {
+	last[i] = 0;
+	head[i] = 0;
+	nv[i] = 1;
+	w[i] = 1;
+	elen[i] = 0;
+	degree[i] = len[i];
+/* L10: */
+    }
+/*       ---------------------------------------------------------------- 
+*/
+/*       initialize degree lists and eliminate rows with no off-diag. nz. 
+*/
+/*       ---------------------------------------------------------------- 
+*/
+    i__1 = *n;
+    for (i = 1; i <= i__1; ++i) {
+	deg = degree[i];
+	if (deg > 0) {
+/*             --------------------------------------------------
+-------- */
+/*             place i in the degree list corresponding to its deg
+ree */
+/*             --------------------------------------------------
+-------- */
+	    inext = head[deg];
+	    if (inext != 0) {
+		last[inext] = i;
+	    }
+	    next[i] = inext;
+	    head[deg] = i;
+	} else {
+/*             --------------------------------------------------
+-------- */
+/*             we have a variable that can be eliminated at once b
+ecause */
+/*             there is no off-diagonal non-zero in its row. */
+/*             --------------------------------------------------
+-------- */
+	    ++nel;
+	    elen[i] = -nel;
+	    pe[i] = 0;
+	    w[i] = 0;
+	}
+/* L20: */
+    }
+/* =======================================================================
+ */
+/*  WHILE (selecting pivots) DO */
+/* =======================================================================
+ */
+L30:
+    if (nel < *n) {
+/* ==================================================================
+===== */
+/*  GET PIVOT OF MINIMUM DEGREE */
+/* ==================================================================
+===== */
+/*          ---------------------------------------------------------
+---- */
+/*          find next supervariable for elimination */
+/*          ---------------------------------------------------------
+---- */
+	i__1 = *n;
+	for (deg = mindeg; deg <= i__1; ++deg) {
+	    me = head[deg];
+	    if (me > 0) {
+		goto L50;
+	    }
+/* L40: */
+	}
+L50:
+	mindeg = deg;
+/*          ---------------------------------------------------------
+---- */
+/*          remove chosen variable from link list */
+/*          ---------------------------------------------------------
+---- */
+	inext = next[me];
+	if (inext != 0) {
+	    last[inext] = 0;
+	}
+	head[deg] = inext;
+/*          ---------------------------------------------------------
+---- */
+/*          me represents the elimination of pivots nel+1 to nel+nv(me
+). */
+/*          place me itself as the first in this set.  It will be move
+d */
+/*          to the nel+nv(me) position when the permutation vectors ar
+e */
+/*          computed. */
+/*          ---------------------------------------------------------
+---- */
+	elenme = elen[me];
+	elen[me] = -(nel + 1);
+	nvpiv = nv[me];
+	nel += nvpiv;
+/* ==================================================================
+===== */
+/*  CONSTRUCT NEW ELEMENT */
+/* ==================================================================
+===== */
+/*          ---------------------------------------------------------
+---- */
+/*          At this point, me is the pivotal supervariable.  It will b
+e */
+/*          converted into the current element.  Scan list of the */
+/*          pivotal supervariable, me, setting tree pointers and */
+/*          constructing new list of supervariables for the new elemen
+t, */
+/*          me.  p is a pointer to the current position in the old lis
+t. */
+/*          ---------------------------------------------------------
+---- */
+/*          flag the variable "me" as being in Lme by negating nv (me)
+ */
+	nv[me] = -nvpiv;
+	degme = 0;
+	if (elenme == 0) {
+/*             --------------------------------------------------
+-------- */
+/*             construct the new element in place */
+/*             --------------------------------------------------
+-------- */
+	    pme1 = pe[me];
+	    pme2 = pme1 - 1;
+	    i__1 = pme1 + len[me] - 1;
+	    for (p = pme1; p <= i__1; ++p) {
+		i = iw[p];
+		nvi = nv[i];
+		if (nvi > 0) {
+/*                   ------------------------------------
+---------------- */
+/*                   i is a principal variable not yet pla
+ced in Lme. */
+/*                   store i in new list */
+/*                   ------------------------------------
+---------------- */
+		    degme += nvi;
+/*                   flag i as being in Lme by negating nv
+ (i) */
+		    nv[i] = -nvi;
+		    ++pme2;
+		    iw[pme2] = i;
+/*                   ------------------------------------
+---------------- */
+/*                   remove variable i from degree list. 
+*/
+/*                   ------------------------------------
+---------------- */
+		    ilast = last[i];
+		    inext = next[i];
+		    if (inext != 0) {
+			last[inext] = ilast;
+		    }
+		    if (ilast != 0) {
+			next[ilast] = inext;
+		    } else {
+/*                      i is at the head of the degree
+ list */
+			head[degree[i]] = inext;
+		    }
+		}
+/* L60: */
+	    }
+/*             this element takes no new memory in iw: */
+	    newmem = 0;
+	} else {
+/*             --------------------------------------------------
+-------- */
+/*             construct the new element in empty space, iw (pfree
+ ...) */
+/*             --------------------------------------------------
+-------- */
+	    p = pe[me];
+	    pme1 = *pfree;
+	    slenme = len[me] - elenme;
+	    i__1 = elenme + 1;
+	    for (knt1 = 1; knt1 <= i__1; ++knt1) {
+		if (knt1 > elenme) {
+/*                   search the supervariables in me. */
+		    e = me;
+		    pj = p;
+		    ln = slenme;
+		} else {
+/*                   search the elements in me. */
+		    e = iw[p];
+		    ++p;
+		    pj = pe[e];
+		    ln = len[e];
+		}
+/*                -------------------------------------------
+------------ */
+/*                search for different supervariables and add 
+them to the */
+/*                new list, compressing when necessary. this l
+oop is */
+/*                executed once for each element in the list a
+nd once for */
+/*                all the supervariables in the list. */
+/*                -------------------------------------------
+------------ */
+		i__2 = ln;
+		for (knt2 = 1; knt2 <= i__2; ++knt2) {
+		    i = iw[pj];
+		    ++pj;
+		    nvi = nv[i];
+		    if (nvi > 0) {
+/*                      -----------------------------
+-------------------- */
+/*                      compress iw, if necessary */
+/*                      -----------------------------
+-------------------- */
+			if (*pfree > *iwlen) {
+/*                         prepare for compressing
+ iw by adjusting */
+/*                         pointers and lengths so
+ that the lists being */
+/*                         searched in the inner a
+nd outer loops contain */
+/*                         only the remaining entr
+ies. */
+			    pe[me] = p;
+			    len[me] -= knt1;
+			    if (len[me] == 0) {
+/*                            nothing left of 
+supervariable me */
+				pe[me] = 0;
+			    }
+			    pe[e] = pj;
+			    len[e] = ln - knt2;
+			    if (len[e] == 0) {
+/*                            nothing left of 
+element e */
+				pe[e] = 0;
+			    }
+			    ++(*ncmpa);
+/*                         store first item in pe 
+*/
+/*                         set first entry to -ite
+m */
+			    i__3 = *n;
+			    for (j = 1; j <= i__3; ++j) {
+				pn = pe[j];
+				if (pn > 0) {
+				    pe[j] = iw[pn];
+				    iw[pn] = -j;
+				}
+/* L70: */
+			    }
+/*                         psrc/pdst point to sour
+ce/destination */
+			    pdst = 1;
+			    psrc = 1;
+			    pend = pme1 - 1;
+/*                         while loop: */
+L80:
+			    if (psrc <= pend) {
+/*                            search for next 
+negative entry */
+				j = -iw[psrc];
+				++psrc;
+				if (j > 0) {
+				    iw[pdst] = pe[j];
+				    pe[j] = pdst;
+				    ++pdst;
+/*                               copy from
+ source to destination */
+				    lenj = len[j];
+				    i__3 = lenj - 2;
+				    for (knt3 = 0; knt3 <= i__3; ++knt3) {
+					iw[pdst + knt3] = iw[psrc + knt3];
+/* L90: */
+				    }
+				    pdst = pdst + lenj - 1;
+				    psrc = psrc + lenj - 1;
+				}
+				goto L80;
+			    }
+/*                         move the new partially-
+constructed element */
+			    p1 = pdst;
+			    i__3 = *pfree - 1;
+			    for (psrc = pme1; psrc <= i__3; ++psrc) {
+				iw[pdst] = iw[psrc];
+				++pdst;
+/* L100: */
+			    }
+			    pme1 = p1;
+			    *pfree = pdst;
+			    pj = pe[e];
+			    p = pe[me];
+			}
+/*                      -----------------------------
+-------------------- */
+/*                      i is a principal variable not 
+yet placed in Lme */
+/*                      store i in new list */
+/*                      -----------------------------
+-------------------- */
+			degme += nvi;
+/*                      flag i as being in Lme by nega
+ting nv (i) */
+			nv[i] = -nvi;
+			iw[*pfree] = i;
+			++(*pfree);
+/*                      -----------------------------
+-------------------- */
+/*                      remove variable i from degree 
+link list */
+/*                      -----------------------------
+-------------------- */
+			ilast = last[i];
+			inext = next[i];
+			if (inext != 0) {
+			    last[inext] = ilast;
+			}
+			if (ilast != 0) {
+			    next[ilast] = inext;
+			} else {
+/*                         i is at the head of the
+ degree list */
+			    head[degree[i]] = inext;
+			}
+		    }
+/* L110: */
+		}
+		if (e != me) {
+/*                   set tree pointer and flag to indicate
+ element e is */
+/*                   absorbed into new element me (the par
+ent of e is me) */
+		    pe[e] = -me;
+		    w[e] = 0;
+		}
+/* L120: */
+	    }
+	    pme2 = *pfree - 1;
+/*             this element takes newmem new memory in iw (possibl
+y zero) */
+	    newmem = *pfree - pme1;
+	    mem += newmem;
+	    maxmem = max(maxmem,mem);
+	}
+/*          ---------------------------------------------------------
+---- */
+/*          me has now been converted into an element in iw (pme1..pme
+2) */
+/*          ---------------------------------------------------------
+---- */
+/*          degme holds the external degree of new element */
+	degree[me] = degme;
+	pe[me] = pme1;
+	len[me] = pme2 - pme1 + 1;
+/*          ---------------------------------------------------------
+---- */
+/*          make sure that wflg is not too large.  With the current */
+/*          value of wflg, wflg+n must not cause integer overflow */
+/*          ---------------------------------------------------------
+---- */
+	if (wflg >= wbig) {
+	    i__1 = *n;
+	    for (x = 1; x <= i__1; ++x) {
+		if (w[x] != 0) {
+		    w[x] = 1;
+		}
+/* L130: */
+	    }
+	    wflg = 2;
+	}
+/* ==================================================================
+===== */
+/*  DEGREE UPDATE AND ELEMENT ABSORPTION */
+/* ==================================================================
+===== */
+/*          ---------------------------------------------------------
+---- */
+/*          Scan 2:  for each i in Lme, sum up the degree of Lme (whic
+h */
+/*          is degme), plus the sum of the external degrees of each Le
+ */
+/*          for the elements e appearing within i, plus the */
+/*          supervariables in i.  Place i in hash list. */
+/*          ---------------------------------------------------------
+---- */
+	i__1 = pme2;
+	for (pme = pme1; pme <= i__1; ++pme) {
+	    i = iw[pme];
+	    p1 = pe[i];
+	    p2 = p1 + elen[i] - 1;
+	    pn = p1;
+	    hash = 0;
+	    deg = 0;
+/*             --------------------------------------------------
+-------- */
+/*             scan the element list associated with supervariable
+ i */
+/*             --------------------------------------------------
+-------- */
+/*             half-and-half approximate degree: */
+	    ++wflg;
+/*             Gilbert-Moler-Schreiber approximate external degree
+ */
+/*             on first pass: */
+	    nvi = -nv[i];
+	    i__2 = p2;
+	    for (p = p1; p <= i__2; ++p) {
+		e = iw[p];
+		if (w[e] != 0) {
+/*                   e is an unabsorbed element */
+		    deg = deg + degree[e] - nvi;
+		    iw[pn] = e;
+		    ++pn;
+		    hash += e;
+		}
+/* L160: */
+	    }
+	    if (pn == p1 + 1) {
+/*                exact external degree for elen = 2: */
+/*                (elen (i) is currently 1, but will become 2 
+when */
+/*                me is added to the element list) */
+		deg = 0;
+		e = iw[p1];
+		i__2 = pe[e] + len[e] - 1;
+		for (p = pe[e]; p <= i__2; ++p) {
+		    j = iw[p];
+		    nvj = nv[j];
+		    if (nvj > 0 && w[j] != wflg) {
+/*                      j is principal and not in Lme 
+if nv (j) .gt. 0 */
+/*                      and j is not yet seen if w (j)
+ .ne. wflg */
+			w[j] = wflg;
+			deg += nvj;
+		    }
+/* L145: */
+		}
+	    }
+/*             count the number of elements in i (including me): 
+*/
+	    elen[i] = pn - p1 + 1;
+/*             --------------------------------------------------
+-------- */
+/*             scan the supervariables in the list associated with
+ i */
+/*             --------------------------------------------------
+-------- */
+	    p3 = pn;
+	    i__2 = p1 + len[i] - 1;
+	    for (p = p2 + 1; p <= i__2; ++p) {
+		j = iw[p];
+		nvj = nv[j];
+		if (nvj > 0) {
+/*                   j is unabsorbed, and not in Lme. */
+/*                   add to degree and add to new list */
+		    deg += nvj;
+		    iw[pn] = j;
+		    ++pn;
+		    hash += j;
+		}
+/* L170: */
+	    }
+/*             --------------------------------------------------
+-------- */
+/*             update the degree and check for mass elimination */
+/*             --------------------------------------------------
+-------- */
+	    if (elen[i] == 1 && p3 == pn) {
+/*                -------------------------------------------
+------------ */
+/*                mass elimination */
+/*                -------------------------------------------
+------------ */
+/*                There is nothing left of this node except fo
+r an */
+/*                edge to the current pivot element.  elen (i)
+ is 1, */
+/*                and there are no variables adjacent to node 
+i. */
+/*                Absorb i into the current pivot element, me.
+ */
+		pe[i] = -me;
+		nvi = -nv[i];
+		degme -= nvi;
+		nvpiv += nvi;
+		nel += nvi;
+		nv[i] = 0;
+		elen[i] = 0;
+	    } else {
+/*                -------------------------------------------
+------------ */
+/*                update the upper-bound degree of i */
+/*                -------------------------------------------
+------------ */
+/*                the following degree does not yet include th
+e size */
+/*                of the current element, which is added later
+: */
+/* Computing MIN */
+		i__2 = degree[i];
+		degree[i] = min(i__2,deg);
+/*                -------------------------------------------
+------------ */
+/*                add me to the list for i */
+/*                -------------------------------------------
+------------ */
+/*                move first supervariable to end of list */
+		iw[pn] = iw[p3];
+/*                move first element to end of element part of
+ list */
+		iw[p3] = iw[p1];
+/*                add new element to front of list. */
+		iw[p1] = me;
+/*                store the new length of the list in len (i) 
+*/
+		len[i] = pn - p1 + 1;
+/*                -------------------------------------------
+------------ */
+/*                place in hash bucket.  Save hash key of i in
+ last (i). */
+/*                -------------------------------------------
+------------ */
+		hash = hash % hmod + 1;
+		j = head[hash];
+		if (j <= 0) {
+/*                   the degree list is empty, hash head i
+s -j */
+		    next[i] = -j;
+		    head[hash] = -i;
+		} else {
+/*                   degree list is not empty */
+/*                   use last (head (hash)) as hash head 
+*/
+		    next[i] = last[j];
+		    last[j] = i;
+		}
+		last[i] = hash;
+	    }
+/* L180: */
+	}
+	degree[me] = degme;
+/*          ---------------------------------------------------------
+---- */
+/*          Clear the counter array, w (...), by incrementing wflg. */
+/*          ---------------------------------------------------------
+---- */
+	++wflg;
+/*          make sure that wflg+n does not cause integer overflow */
+	if (wflg >= wbig) {
+	    i__1 = *n;
+	    for (x = 1; x <= i__1; ++x) {
+		if (w[x] != 0) {
+		    w[x] = 1;
+		}
+/* L190: */
+	    }
+	    wflg = 2;
+	}
+/*          at this point, w (1..n) .lt. wflg holds */
+/* ==================================================================
+===== */
+/*  SUPERVARIABLE DETECTION */
+/* ==================================================================
+===== */
+	i__1 = pme2;
+	for (pme = pme1; pme <= i__1; ++pme) {
+	    i = iw[pme];
+	    if (nv[i] < 0) {
+/*                i is a principal variable in Lme */
+/*                -------------------------------------------
+------------ */
+/*                examine all hash buckets with 2 or more vari
+ables.  We */
+/*                do this by examing all unique hash keys for 
+super- */
+/*                variables in the pattern Lme of the current 
+element, me */
+/*                -------------------------------------------
+------------ */
+		hash = last[i];
+/*                let i = head of hash bucket, and empty the h
+ash bucket */
+		j = head[hash];
+		if (j == 0) {
+		    goto L250;
+		}
+		if (j < 0) {
+/*                   degree list is empty */
+		    i = -j;
+		    head[hash] = 0;
+		} else {
+/*                   degree list is not empty, restore las
+t () of head */
+		    i = last[j];
+		    last[j] = 0;
+		}
+		if (i == 0) {
+		    goto L250;
+		}
+/*                while loop: */
+L200:
+		if (next[i] != 0) {
+/*                   ------------------------------------
+---------------- */
+/*                   this bucket has one or more variables
+ following i. */
+/*                   scan all of them to see if i can abso
+rb any entries */
+/*                   that follow i in hash bucket.  Scatte
+r i into w. */
+/*                   ------------------------------------
+---------------- */
+		    ln = len[i];
+		    eln = elen[i];
+/*                   do not flag the first element in the 
+list (me) */
+		    i__2 = pe[i] + ln - 1;
+		    for (p = pe[i] + 1; p <= i__2; ++p) {
+			w[iw[p]] = wflg;
+/* L210: */
+		    }
+/*                   ------------------------------------
+---------------- */
+/*                   scan every other entry j following i 
+in bucket */
+/*                   ------------------------------------
+---------------- */
+		    jlast = i;
+		    j = next[i];
+/*                   while loop: */
+L220:
+		    if (j != 0) {
+/*                      -----------------------------
+-------------------- */
+/*                      check if j and i have identica
+l nonzero pattern */
+/*                      -----------------------------
+-------------------- */
+			if (len[j] != ln) {
+/*                         i and j do not have sam
+e size data structure */
+			    goto L240;
+			}
+			if (elen[j] != eln) {
+/*                         i and j do not have sam
+e number of adjacent el */
+			    goto L240;
+			}
+/*                      do not flag the first element 
+in the list (me) */
+			i__2 = pe[j] + ln - 1;
+			for (p = pe[j] + 1; p <= i__2; ++p) {
+			    if (w[iw[p]] != wflg) {
+/*                            an entry (iw(p))
+ is in j but not in i */
+				goto L240;
+			    }
+/* L230: */
+			}
+/*                      -----------------------------
+-------------------- */
+/*                      found it!  j can be absorbed i
+nto i */
+/*                      -----------------------------
+-------------------- */
+			pe[j] = -i;
+/*                      both nv (i) and nv (j) are neg
+ated since they */
+/*                      are in Lme, and the absolute v
+alues of each */
+/*                      are the number of variables in
+ i and j: */
+			nv[i] += nv[j];
+			nv[j] = 0;
+			elen[j] = 0;
+/*                      delete j from hash bucket */
+			j = next[j];
+			next[jlast] = j;
+			goto L220;
+/*                      -----------------------------
+-------------------- */
+L240:
+/*                      j cannot be absorbed into i */
+/*                      -----------------------------
+-------------------- */
+			jlast = j;
+			j = next[j];
+			goto L220;
+		    }
+/*                   ------------------------------------
+---------------- */
+/*                   no more variables can be absorbed int
+o i */
+/*                   go to next i in bucket and clear flag
+ array */
+/*                   ------------------------------------
+---------------- */
+		    ++wflg;
+		    i = next[i];
+		    if (i != 0) {
+			goto L200;
+		    }
+		}
+	    }
+L250:
+	    ;
+	}
+/* ==================================================================
+===== */
+/*  RESTORE DEGREE LISTS AND REMOVE NONPRINCIPAL SUPERVAR. FROM ELEMEN
+T */
+/* ==================================================================
+===== */
+	p = pme1;
+	nleft = *n - nel;
+	i__1 = pme2;
+	for (pme = pme1; pme <= i__1; ++pme) {
+	    i = iw[pme];
+	    nvi = -nv[i];
+	    if (nvi > 0) {
+/*                i is a principal variable in Lme */
+/*                restore nv (i) to signify that i is principa
+l */
+		nv[i] = nvi;
+/*                -------------------------------------------
+------------ */
+/*                compute the external degree (add size of cur
+rent elem) */
+/*                -------------------------------------------
+------------ */
+/* Computing MAX */
+/* Computing MIN */
+		i__4 = degree[i] + degme - nvi, i__5 = nleft - nvi;
+		i__2 = 1, i__3 = min(i__4,i__5);
+		deg = max(i__2,i__3);
+/*                -------------------------------------------
+------------ */
+/*                place the supervariable at the head of the d
+egree list */
+/*                -------------------------------------------
+------------ */
+		inext = head[deg];
+		if (inext != 0) {
+		    last[inext] = i;
+		}
+		next[i] = inext;
+		last[i] = 0;
+		head[deg] = i;
+/*                -------------------------------------------
+------------ */
+/*                save the new degree, and find the minimum de
+gree */
+/*                -------------------------------------------
+------------ */
+		mindeg = min(mindeg,deg);
+		degree[i] = deg;
+/*                -------------------------------------------
+------------ */
+/*                place the supervariable in the element patte
+rn */
+/*                -------------------------------------------
+------------ */
+		iw[p] = i;
+		++p;
+	    }
+/* L260: */
+	}
+/* ==================================================================
+===== */
+/*  FINALIZE THE NEW ELEMENT */
+/* ==================================================================
+===== */
+	nv[me] = nvpiv + degme;
+/*          nv (me) is now the degree of pivot (including diagonal par
+t) */
+/*          save the length of the list for the new element me */
+	len[me] = p - pme1;
+	if (len[me] == 0) {
+/*             there is nothing left of the current pivot element 
+*/
+	    pe[me] = 0;
+	    w[me] = 0;
+	}
+	if (newmem != 0) {
+/*             element was not constructed in place: deallocate pa
+rt */
+/*             of it (final size is less than or equal to newmem, 
+*/
+/*             since newly nonprincipal variables have been remove
+d). */
+	    *pfree = p;
+	    mem = mem - newmem + len[me];
+	}
+/* ==================================================================
+===== */
+/*          END WHILE (selecting pivots) */
+	goto L30;
+    }
+/* =======================================================================
+ */
+/* =======================================================================
+ */
+/*  COMPUTE THE PERMUTATION VECTORS */
+/* =======================================================================
+ */
+/*       ---------------------------------------------------------------- 
+*/
+/*       The time taken by the following code is O(n).  At this */
+/*       point, elen (e) = -k has been done for all elements e, */
+/*       and elen (i) = 0 has been done for all nonprincipal */
+/*       variables i.  At this point, there are no principal */
+/*       supervariables left, and all elements are absorbed. */
+/*       ---------------------------------------------------------------- 
+*/
+/*       ---------------------------------------------------------------- 
+*/
+/*       compute the ordering of unordered nonprincipal variables */
+/*       ---------------------------------------------------------------- 
+*/
+    i__1 = *n;
+    for (i = 1; i <= i__1; ++i) {
+	if (elen[i] == 0) {
+/*             --------------------------------------------------
+-------- */
+/*             i is an un-ordered row.  Traverse the tree from i u
+ntil */
+/*             reaching an element, e.  The element, e, was the */
+/*             principal supervariable of i and all nodes in the p
+ath */
+/*             from i to when e was selected as pivot. */
+/*             --------------------------------------------------
+-------- */
+	    j = -pe[i];
+/*             while (j is a variable) do: */
+L270:
+	    if (elen[j] >= 0) {
+		j = -pe[j];
+		goto L270;
+	    }
+	    e = j;
+/*             --------------------------------------------------
+-------- */
+/*             get the current pivot ordering of e */
+/*             --------------------------------------------------
+-------- */
+	    k = -elen[e];
+/*             --------------------------------------------------
+-------- */
+/*             traverse the path again from i to e, and compress t
+he */
+/*             path (all nodes point to e).  Path compression allo
+ws */
+/*             this code to compute in O(n) time.  Order the unord
+ered */
+/*             nodes in the path, and place the element e at the e
+nd. */
+/*             --------------------------------------------------
+-------- */
+	    j = i;
+/*             while (j is a variable) do: */
+L280:
+	    if (elen[j] >= 0) {
+		jnext = -pe[j];
+		pe[j] = -e;
+		if (elen[j] == 0) {
+/*                   j is an unordered row */
+		    elen[j] = k;
+		    ++k;
+		}
+		j = jnext;
+		goto L280;
+	    }
+/*             leave elen (e) negative, so we know it is an elemen
+t */
+	    elen[e] = -k;
+	}
+/* L290: */
+    }
+/*       ---------------------------------------------------------------- 
+*/
+/*       reset the inverse permutation (elen (1..n)) to be positive, */
+/*       and compute the permutation (last (1..n)). */
+/*       ---------------------------------------------------------------- 
+*/
+    i__1 = *n;
+    for (i = 1; i <= i__1; ++i) {
+	k = (i__2 = elen[i], abs(i__2));
+	last[k] = i;
+	elen[i] = k;
+/* L300: */
+    }
+/* =======================================================================
+ */
+/*  RETURN THE MEMORY USAGE IN IW */
+/* =======================================================================
+ */
+/*       If maxmem is less than or equal to iwlen, then no compressions */
+/*       occurred, and iw (maxmem+1 ... iwlen) was unused.  Otherwise */
+/*       compressions did occur, and iwlen would have had to have been */
+/*       greater than or equal to maxmem for no compressions to occur. */
+/*       Return the value of maxmem in the pfree argument. */
+    *pfree = maxmem;
+    return 0;
+} /* amdhaf_ */
+
diff --git a/contrib/taucs/external/src/amdhaf.f b/contrib/taucs/external/src/amdhaf.f
new file mode 100644
index 0000000000000000000000000000000000000000..6612a9582d066f5759eb8cdb518e1eeaff3ca00d
--- /dev/null
+++ b/contrib/taucs/external/src/amdhaf.f
@@ -0,0 +1,1251 @@
+
+        SUBROUTINE AMDHAF
+     $          (N, PE, IW, LEN, IWLEN, PFREE, NV, NEXT,
+     $          LAST, HEAD, ELEN, DEGREE, NCMPA, W, IOVFLO)
+
+        INTEGER N, IWLEN, PFREE, NCMPA, IOVFLO, IW (IWLEN), PE (N),
+     $          DEGREE (N), NV (N), NEXT (N), LAST (N), HEAD (N),
+     $          ELEN (N), W (N), LEN (N)
+
+C-----------------------------------------------------------------------
+C  The MC47 / AMD suite of minimum degree ordering algorithms.
+C
+C  This code is one of seven variations of a single algorithm:
+C  the primary routine (MC47B/BD, only available in the Harwell
+C  Subroutine Library), and 6 variations that differ only in
+C  how they compute the degree (available in NETLIB).
+C
+C  For information on the Harwell Subroutine Library, contact
+C  John Harding, Harwell Subroutine Library, B 552, AEA Technology,
+C  Harwell, Didcot, Oxon OX11 0RA, telephone (44) 1235 434573,
+C  fax (44) 1235 434340, email john.harding@aeat.co.uk, who will
+C  provide details of price and conditions of use.
+C-----------------------------------------------------------------------
+
+************************************************************************
+* NOTICE:  "The AMD routines (AMDEXA, AMDBAR, AMDHAF, AMDHAT, AMDTRU,
+* and AMDATR) may be used SOLELY for educational, research, and
+* benchmarking purposes by non-profit organizations and the U.S.
+* government.  Commercial and other organizations may make use of the
+* AMD routines SOLELY for benchmarking purposes only.  The AMD
+* routines may be modified by or on behalf of the User for such
+* use but at no time shall the AMD routines or any such modified
+* version of them become the property of the User.  The AMD routines
+* are provided without warranty of any kind, either expressed or
+* implied.  Neither the Authors nor their employers shall be liable
+* for any direct or consequential loss or damage whatsoever arising
+* out of the use or misuse of the AMD routines by the User.  The AMD
+* routines must not be sold.  You may make copies of the AMD routines,
+* but this NOTICE and the Copyright notice must appear in all copies.
+* Any other use of the AMD routines requires written permission.
+* Your use of the AMD routines is an implicit agreement to these
+* conditions."
+************************************************************************
+
+C-----------------------------------------------------------------------
+C AMDhaf:  approximate minimum (half-and-half) degree ordering algorithm
+C-----------------------------------------------------------------------
+
+C  Variation 3:  "Half-and-half" approximate external degree,
+C  combining the exact external degree for "2-adjacent" nodes and
+C  the Gilbert-Moler-Schreiber approximate external degree otherise
+C  (see  J. R. Gilbert, C. Moler, and R.  Schreiber, Sparse matrices
+C  in MATLAB:  design and implementation, SIAM J. Matrix Analysis and
+C  Applications, vol. 13, 1992, pp. 333-356).  Note that some of the
+C  comments in the code below reflect the MC47-style degree
+C  approximation.
+C
+C  This method is often almost as fast as MC47B/BD (although it has
+C  been observed to be nearly 3 times slower than MC47B/BD for some
+C  matrices), and often computes comparable orderings as MC47B/BD.
+C  It can compute poor orderings for some matrices, however.
+C
+C  We recommend using MC47B/BD instead of this routine since MC47B/BD
+C  gives better or comparable results in slightly less time.
+
+C-----------------------------------------------------------------------
+
+C Given a representation of the nonzero pattern of a symmetric matrix,
+C       A, (excluding the diagonal) perform an approximate minimum
+C       (half-and-half) degree ordering to compute a pivot order such
+C       that the introduction of nonzeros (fill-in) in the Cholesky
+C       factors A = LL^T are kept low.  At each step, the pivot
+C       selected is the one with the minimum "half-and-half" upper-bound
+C       on the external degree.
+
+C **********************************************************************
+C ***** CAUTION:  ARGUMENTS ARE NOT CHECKED FOR ERRORS ON INPUT.  ******
+C **********************************************************************
+C ** If you want error checking, a more versatile input format, and a **
+C ** simpler user interface, then use MC47A/AD in the Harwell         **
+C ** Subroutine Library, which checks for errors, transforms the      **
+C ** input, and calls MC47B/BD.                                       **
+C **********************************************************************
+
+C       References:  (UF Tech Reports are available via anonymous ftp
+C       to ftp.cis.ufl.edu:cis/tech-reports).
+C
+C       [1] Timothy A. Davis and Iain Duff, "An unsymmetric-pattern
+C               multifrontal method for sparse LU factorization",
+C               SIAM J. Matrix Analysis and Applications, to appear.
+C               also Univ. of Florida Technical Report TR-94-038.
+C               Discusses UMFPACK / MA38.
+C
+C       [2] Patrick Amestoy, Timothy A. Davis, and Iain S. Duff,
+C               "An approximate minimum degree ordering algorithm,"
+C               SIAM J. Matrix Analysis and Applications (to appear),
+C               also Univ. of Florida Technical Report TR-94-039.
+C               Discusses this routine.
+C
+C       [3] Alan George and Joseph Liu, "The evolution of the
+C               minimum degree ordering algorithm," SIAM Review, vol.
+C               31, no. 1, pp. 1-19, March 1989.  We list below the
+C               features mentioned in that paper that this code
+C               includes:
+C
+C       mass elimination:
+C               Yes.  MA27 relied on supervariable detection for mass
+C               elimination.
+C       indistinguishable nodes:
+C               Yes (we call these "supervariables").  This was also in
+C               the MA27 code - although we modified the method of
+C               detecting them (the previous hash was the true degree,
+C               which we no longer keep track of).  A supervariable is
+C               a set of rows with identical nonzero pattern.  All
+C               variables in a supervariable are eliminated together.
+C               Each supervariable has as its numerical name that of
+C               one of its variables (its principal variable).
+C       quotient graph representation:
+C               Yes.  We use the term "element" for the cliques formed
+C               during elimination.  This was also in the MA27 code.
+C               The algorithm can operate in place, but it will work
+C               more efficiently if given some "elbow room."
+C       element absorption:
+C               Yes.  This was also in the MA27 code.
+C       external degree:
+C               Yes.  The MA27 code was based on the true degree.
+C       incomplete degree update and multiple elimination:
+C               No.  This was not in MA27, either.  Our method of
+C               degree update within MC47B/BD is element-based, not
+C               variable-based.  It is thus not well-suited for use
+C               with incomplete degree update or multiple elimination.
+
+C-----------------------------------------------------------------------
+C Authors, and Copyright (C) 1995 by:
+C       Timothy A. Davis, Patrick Amestoy, Iain S. Duff, & John K. Reid.
+C
+C Acknowledgements:
+C       This work (and the UMFPACK package) was supported by the
+C       National Science Foundation (ASC-9111263 and DMS-9223088).
+C       The UMFPACK/MA38 approximate degree update algorithm, the
+C       unsymmetric analog which forms the basis of MC47B/BD, was
+C       developed while Tim Davis was supported by CERFACS (Toulouse,
+C       France) in a post-doctoral position.
+C
+C Date:  September, 1995
+C-----------------------------------------------------------------------
+
+C-----------------------------------------------------------------------
+C INPUT ARGUMENTS (unaltered):
+C-----------------------------------------------------------------------
+
+C n:    The matrix order.
+C
+C       Restriction:  1 .le. n .lt. (iovflo/2)-2
+
+C iwlen:        The length of iw (1..iwlen).  On input, the matrix is
+C       stored in iw (1..pfree-1).  However, iw (1..iwlen) should be
+C       slightly larger than what is required to hold the matrix, at
+C       least iwlen .ge. pfree + n is recommended.  Otherwise,
+C       excessive compressions will take place.
+C       *** We do not recommend running this algorithm with ***
+C       ***      iwlen .lt. pfree + n.                      ***
+C       *** Better performance will be obtained if          ***
+C       ***      iwlen .ge. pfree + n                       ***
+C       *** or better yet                                   ***
+C       ***      iwlen .gt. 1.2 * pfree                     ***
+C       *** (where pfree is its value on input).            ***
+C       The algorithm will not run at all if iwlen .lt. pfree-1.
+C
+C       Restriction: iwlen .ge. pfree-1
+
+C iovflo:       The largest positive integer that your computer can
+C       represent (-iovflo should also be representable).  On a 32-bit
+C       computer with 2's-complement arithmetic,
+C       iovflo = (2^31)-1 = 2,147,483,648.
+
+C-----------------------------------------------------------------------
+C INPUT/OUPUT ARGUMENTS:
+C-----------------------------------------------------------------------
+
+C pe:   On input, pe (i) is the index in iw of the start of row i, or
+C       zero if row i has no off-diagonal non-zeros.
+C
+C       During execution, it is used for both supervariables and
+C       elements:
+C
+C       * Principal supervariable i:  index into iw of the
+C               description of supervariable i.  A supervariable
+C               represents one or more rows of the matrix
+C               with identical nonzero pattern.
+C       * Non-principal supervariable i:  if i has been absorbed
+C               into another supervariable j, then pe (i) = -j.
+C               That is, j has the same pattern as i.
+C               Note that j might later be absorbed into another
+C               supervariable j2, in which case pe (i) is still -j,
+C               and pe (j) = -j2.
+C       * Unabsorbed element e:  the index into iw of the description
+C               of element e, if e has not yet been absorbed by a
+C               subsequent element.  Element e is created when
+C               the supervariable of the same name is selected as
+C               the pivot.
+C       * Absorbed element e:  if element e is absorbed into element
+C               e2, then pe (e) = -e2.  This occurs when the pattern of
+C               e (that is, Le) is found to be a subset of the pattern
+C               of e2 (that is, Le2).  If element e is "null" (it has
+C               no nonzeros outside its pivot block), then pe (e) = 0.
+C
+C       On output, pe holds the assembly tree/forest, which implicitly
+C       represents a pivot order with identical fill-in as the actual
+C       order (via a depth-first search of the tree).
+C
+C       On output:
+C       If nv (i) .gt. 0, then i represents a node in the assembly tree,
+C       and the parent of i is -pe (i), or zero if i is a root.
+C       If nv (i) = 0, then (i,-pe (i)) represents an edge in a
+C       subtree, the root of which is a node in the assembly tree.
+
+C pfree:        On input the tail end of the array, iw (pfree..iwlen),
+C       is empty, and the matrix is stored in iw (1..pfree-1).
+C       During execution, additional data is placed in iw, and pfree
+C       is modified so that iw (pfree..iwlen) is always the unused part
+C       of iw.  On output, pfree is set equal to the size of iw that
+C       would have been needed for no compressions to occur.  If
+C       ncmpa is zero, then pfree (on output) is less than or equal to
+C       iwlen, and the space iw (pfree+1 ... iwlen) was not used.
+C       Otherwise, pfree (on output) is greater than iwlen, and all the
+C       memory in iw was used.
+
+C-----------------------------------------------------------------------
+C INPUT/MODIFIED (undefined on output):
+C-----------------------------------------------------------------------
+
+C len:  On input, len (i) holds the number of entries in row i of the
+C       matrix, excluding the diagonal.  The contents of len (1..n)
+C       are undefined on output.
+
+C iw:   On input, iw (1..pfree-1) holds the description of each row i
+C       in the matrix.  The matrix must be symmetric, and both upper
+C       and lower triangular parts must be present.  The diagonal must
+C       not be present.  Row i is held as follows:
+C
+C               len (i):  the length of the row i data structure
+C               iw (pe (i) ... pe (i) + len (i) - 1):
+C                       the list of column indices for nonzeros
+C                       in row i (simple supervariables), excluding
+C                       the diagonal.  All supervariables start with
+C                       one row/column each (supervariable i is just
+C                       row i).
+C               if len (i) is zero on input, then pe (i) is ignored
+C               on input.
+C
+C               Note that the rows need not be in any particular order,
+C               and there may be empty space between the rows.
+C
+C       During execution, the supervariable i experiences fill-in.
+C       This is represented by placing in i a list of the elements
+C       that cause fill-in in supervariable i:
+C
+C               len (i):  the length of supervariable i
+C               iw (pe (i) ... pe (i) + elen (i) - 1):
+C                       the list of elements that contain i.  This list
+C                       is kept short by removing absorbed elements.
+C               iw (pe (i) + elen (i) ... pe (i) + len (i) - 1):
+C                       the list of supervariables in i.  This list
+C                       is kept short by removing nonprincipal
+C                       variables, and any entry j that is also
+C                       contained in at least one of the elements
+C                       (j in Le) in the list for i (e in row i).
+C
+C       When supervariable i is selected as pivot, we create an
+C       element e of the same name (e=i):
+C
+C               len (e):  the length of element e
+C               iw (pe (e) ... pe (e) + len (e) - 1):
+C                       the list of supervariables in element e.
+C
+C       An element represents the fill-in that occurs when supervariable
+C       i is selected as pivot (which represents the selection of row i
+C       and all non-principal variables whose principal variable is i).
+C       We use the term Le to denote the set of all supervariables
+C       in element e.  Absorbed supervariables and elements are pruned
+C       from these lists when computationally convenient.
+C
+C       CAUTION:  THE INPUT MATRIX IS OVERWRITTEN DURING COMPUTATION.
+C       The contents of iw are undefined on output.
+
+C-----------------------------------------------------------------------
+C OUTPUT (need not be set on input):
+C-----------------------------------------------------------------------
+
+C nv:   During execution, abs (nv (i)) is equal to the number of rows
+C       that are represented by the principal supervariable i.  If i is
+C       a nonprincipal variable, then nv (i) = 0.  Initially,
+C       nv (i) = 1 for all i.  nv (i) .lt. 0 signifies that i is a
+C       principal variable in the pattern Lme of the current pivot
+C       element me.  On output, nv (e) holds the true degree of element
+C       e at the time it was created (including the diagonal part).
+
+C ncmpa:        The number of times iw was compressed.  If this is
+C       excessive, then the execution took longer than what could have
+C       been.  To reduce ncmpa, try increasing iwlen to be 10% or 20%
+C       larger than the value of pfree on input (or at least
+C       iwlen .ge. pfree + n).  The fastest performance will be
+C       obtained when ncmpa is returned as zero.  If iwlen is set to
+C       the value returned by pfree on *output*, then no compressions
+C       will occur.
+
+C elen: See the description of iw above.  At the start of execution,
+C       elen (i) is set to zero.  During execution, elen (i) is the
+C       number of elements in the list for supervariable i.  When e
+C       becomes an element, elen (e) = -nel is set, where nel is the
+C       current step of factorization.  elen (i) = 0 is done when i
+C       becomes nonprincipal.
+C
+C       For variables, elen (i) .ge. 0 holds until just before the
+C       permutation vectors are computed.  For elements,
+C       elen (e) .lt. 0 holds.
+C
+C       On output elen (1..n) holds the inverse permutation (the same
+C       as the 'INVP' argument in Sparspak).  That is, if k = elen (i),
+C       then row i is the kth pivot row.  Row i of A appears as the
+C       (elen(i))-th row in the permuted matrix, PAP^T.
+
+C last: In a degree list, last (i) is the supervariable preceding i,
+C       or zero if i is the head of the list.  In a hash bucket,
+C       last (i) is the hash key for i.  last (head (hash)) is also
+C       used as the head of a hash bucket if head (hash) contains a
+C       degree list (see head, below).
+C
+C       On output, last (1..n) holds the permutation (the same as the
+C       'PERM' argument in Sparspak).  That is, if i = last (k), then
+C       row i is the kth pivot row.  Row last (k) of A is the k-th row
+C       in the permuted matrix, PAP^T.
+
+C-----------------------------------------------------------------------
+C LOCAL (not input or output - used only during execution):
+C-----------------------------------------------------------------------
+
+C degree:       If i is a supervariable, then degree (i) holds the
+C       current approximation of the external degree of row i (an upper
+C       bound).  The external degree is the number of nonzeros in row i,
+C       minus abs (nv (i)) (the diagonal part).  The bound is equal to
+C       the external degree if elen (i) is less than or equal to two.
+C
+C       We also use the term "external degree" for elements e to refer
+C       to |Le \ Lme|.  If e is an element, then degree (e) holds |Le|,
+C       which is the degree of the off-diagonal part of the element e
+C       (not including the diagonal part).
+
+C head: head is used for degree lists.  head (deg) is the first
+C       supervariable in a degree list (all supervariables i in a
+C       degree list deg have the same approximate degree, namely,
+C       deg = degree (i)).  If the list deg is empty then
+C       head (deg) = 0.
+C
+C       During supervariable detection head (hash) also serves as a
+C       pointer to a hash bucket.
+C       If head (hash) .gt. 0, there is a degree list of degree hash.
+C               The hash bucket head pointer is last (head (hash)).
+C       If head (hash) = 0, then the degree list and hash bucket are
+C               both empty.
+C       If head (hash) .lt. 0, then the degree list is empty, and
+C               -head (hash) is the head of the hash bucket.
+C       After supervariable detection is complete, all hash buckets
+C       are empty, and the (last (head (hash)) = 0) condition is
+C       restored for the non-empty degree lists.
+
+C next: next (i) is the supervariable following i in a link list, or
+C       zero if i is the last in the list.  Used for two kinds of
+C       lists:  degree lists and hash buckets (a supervariable can be
+C       in only one kind of list at a time).
+
+C w:    The flag array w determines the status of elements and
+C       variables, and the external degree of elements.
+C
+C       for elements:
+C          if w (e) = 0, then the element e is absorbed
+C          if w (e) .ge. wflg, then w (e) - wflg is the size of
+C               the set |Le \ Lme|, in terms of nonzeros (the
+C               sum of abs (nv (i)) for each principal variable i that
+C               is both in the pattern of element e and NOT in the
+C               pattern of the current pivot element, me).
+C          if wflg .gt. w (e) .gt. 0, then e is not absorbed and has
+C               not yet been seen in the scan of the element lists in
+C               the computation of |Le\Lme| in loop 150 below.
+C
+C       for variables:
+C          during supervariable detection, if w (j) .ne. wflg then j is
+C          not in the pattern of variable i
+C
+C       The w array is initialized by setting w (i) = 1 for all i,
+C       and by setting wflg = 2.  It is reinitialized if wflg becomes
+C       too large (to ensure that wflg+n does not cause integer
+C       overflow).
+
+C-----------------------------------------------------------------------
+C LOCAL INTEGERS:
+C-----------------------------------------------------------------------
+
+        INTEGER DEG, DEGME, DEXT, DMAX, E, ELENME, ELN, HASH, HMOD, I,
+     $          ILAST, INEXT, J, JLAST, JNEXT, K, KNT1, KNT2, KNT3,
+     $          LENJ, LN, MAXMEM, ME, MEM, MINDEG, NEL, NEWMEM,
+     $          NLEFT, NVI, NVJ, NVPIV, SLENME, WBIG, WE, WFLG, WNVI, X
+
+C deg:          the degree of a variable or element
+C degme:        size, |Lme|, of the current element, me (= degree (me))
+C dext:         external degree, |Le \ Lme|, of some element e
+C dmax:         largest |Le| seen so far
+C e:            an element
+C elenme:       the length, elen (me), of element list of pivotal var.
+C eln:          the length, elen (...), of an element list
+C hash:         the computed value of the hash function
+C hmod:         the hash function is computed modulo hmod = max (1,n-1)
+C i:            a supervariable
+C ilast:        the entry in a link list preceding i
+C inext:        the entry in a link list following i
+C j:            a supervariable
+C jlast:        the entry in a link list preceding j
+C jnext:        the entry in a link list, or path, following j
+C k:            the pivot order of an element or variable
+C knt1:         loop counter used during element construction
+C knt2:         loop counter used during element construction
+C knt3:         loop counter used during compression
+C lenj:         len (j)
+C ln:           length of a supervariable list
+C maxmem:       amount of memory needed for no compressions
+C me:           current supervariable being eliminated, and the
+C                       current element created by eliminating that
+C                       supervariable
+C mem:          memory in use assuming no compressions have occurred
+C mindeg:       current minimum degree
+C nel:          number of pivots selected so far
+C newmem:       amount of new memory needed for current pivot element
+C nleft:        n - nel, the number of nonpivotal rows/columns remaining
+C nvi:          the number of variables in a supervariable i (= nv (i))
+C nvj:          the number of variables in a supervariable j (= nv (j))
+C nvpiv:        number of pivots in current element
+C slenme:       number of variables in variable list of pivotal variable
+C wbig:         = iovflo - n.  wflg is not allowed to be .ge. wbig.
+C we:           w (e)
+C wflg:         used for flagging the w array.  See description of iw.
+C wnvi:         wflg - nv (i)
+C x:            either a supervariable or an element
+
+C-----------------------------------------------------------------------
+C LOCAL POINTERS:
+C-----------------------------------------------------------------------
+
+        INTEGER P, P1, P2, P3, PDST, PEND, PJ, PME, PME1, PME2, PN, PSRC
+
+C               Any parameter (pe (...) or pfree) or local variable
+C               starting with "p" (for Pointer) is an index into iw,
+C               and all indices into iw use variables starting with
+C               "p."  The only exception to this rule is the iwlen
+C               input argument.
+
+C p:            pointer into lots of things
+C p1:           pe (i) for some variable i (start of element list)
+C p2:           pe (i) + elen (i) -  1 for some var. i (end of el. list)
+C p3:           index of first supervariable in clean list
+C pdst:         destination pointer, for compression
+C pend:         end of memory to compress
+C pj:           pointer into an element or variable
+C pme:          pointer into the current element (pme1...pme2)
+C pme1:         the current element, me, is stored in iw (pme1...pme2)
+C pme2:         the end of the current element
+C pn:           pointer into a "clean" variable, also used to compress
+C psrc:         source pointer, for compression
+
+C-----------------------------------------------------------------------
+C  FUNCTIONS CALLED:
+C-----------------------------------------------------------------------
+
+        INTRINSIC MAX, MIN, MOD
+
+C=======================================================================
+C  INITIALIZATIONS
+C=======================================================================
+
+        WFLG = 2
+        MINDEG = 1
+        NCMPA = 0
+        NEL = 0
+        HMOD = MAX (1, N-1)
+        DMAX = 0
+        WBIG = IOVFLO - N
+        MEM = PFREE - 1
+        MAXMEM = MEM
+
+        DO 10 I = 1, N
+           LAST (I) = 0
+           HEAD (I) = 0
+           NV (I) = 1
+           W (I) = 1
+           ELEN (I) = 0
+           DEGREE (I) = LEN (I)
+10         CONTINUE
+
+C       ----------------------------------------------------------------
+C       initialize degree lists and eliminate rows with no off-diag. nz.
+C       ----------------------------------------------------------------
+
+        DO 20 I = 1, N
+
+           DEG = DEGREE (I)
+
+           IF (DEG .GT. 0) THEN
+
+C             ----------------------------------------------------------
+C             place i in the degree list corresponding to its degree
+C             ----------------------------------------------------------
+
+              INEXT = HEAD (DEG)
+              IF (INEXT .NE. 0) LAST (INEXT) = I
+              NEXT (I) = INEXT
+              HEAD (DEG) = I
+
+           ELSE
+
+C             ----------------------------------------------------------
+C             we have a variable that can be eliminated at once because
+C             there is no off-diagonal non-zero in its row.
+C             ----------------------------------------------------------
+
+              NEL = NEL + 1
+              ELEN (I) = -NEL
+              PE (I) = 0
+              W (I) = 0
+
+              ENDIF
+
+20         CONTINUE
+
+C=======================================================================
+C  WHILE (selecting pivots) DO
+C=======================================================================
+
+30      CONTINUE
+        IF (NEL .LT. N) THEN
+
+C=======================================================================
+C  GET PIVOT OF MINIMUM DEGREE
+C=======================================================================
+
+C          -------------------------------------------------------------
+C          find next supervariable for elimination
+C          -------------------------------------------------------------
+
+           DO 40 DEG = MINDEG, N
+              ME = HEAD (DEG)
+              IF (ME .GT. 0) GOTO 50
+40            CONTINUE
+50         CONTINUE
+           MINDEG = DEG
+
+C          -------------------------------------------------------------
+C          remove chosen variable from link list
+C          -------------------------------------------------------------
+
+           INEXT = NEXT (ME)
+           IF (INEXT .NE. 0) LAST (INEXT) = 0
+           HEAD (DEG) = INEXT
+
+C          -------------------------------------------------------------
+C          me represents the elimination of pivots nel+1 to nel+nv(me).
+C          place me itself as the first in this set.  It will be moved
+C          to the nel+nv(me) position when the permutation vectors are
+C          computed.
+C          -------------------------------------------------------------
+
+           ELENME = ELEN (ME)
+           ELEN (ME) = - (NEL + 1)
+           NVPIV = NV (ME)
+           NEL = NEL + NVPIV
+
+C=======================================================================
+C  CONSTRUCT NEW ELEMENT
+C=======================================================================
+
+C          -------------------------------------------------------------
+C          At this point, me is the pivotal supervariable.  It will be
+C          converted into the current element.  Scan list of the
+C          pivotal supervariable, me, setting tree pointers and
+C          constructing new list of supervariables for the new element,
+C          me.  p is a pointer to the current position in the old list.
+C          -------------------------------------------------------------
+
+C          flag the variable "me" as being in Lme by negating nv (me)
+           NV (ME) = -NVPIV
+           DEGME = 0
+
+           IF (ELENME .EQ. 0) THEN
+
+C             ----------------------------------------------------------
+C             construct the new element in place
+C             ----------------------------------------------------------
+
+              PME1 = PE (ME)
+              PME2 = PME1 - 1
+
+              DO 60 P = PME1, PME1 + LEN (ME) - 1
+                 I = IW (P)
+                 NVI = NV (I)
+                 IF (NVI .GT. 0) THEN
+
+C                   ----------------------------------------------------
+C                   i is a principal variable not yet placed in Lme.
+C                   store i in new list
+C                   ----------------------------------------------------
+
+                    DEGME = DEGME + NVI
+C                   flag i as being in Lme by negating nv (i)
+                    NV (I) = -NVI
+                    PME2 = PME2 + 1
+                    IW (PME2) = I
+
+C                   ----------------------------------------------------
+C                   remove variable i from degree list.
+C                   ----------------------------------------------------
+
+                    ILAST = LAST (I)
+                    INEXT = NEXT (I)
+                    IF (INEXT .NE. 0) LAST (INEXT) = ILAST
+                    IF (ILAST .NE. 0) THEN
+                       NEXT (ILAST) = INEXT
+                    ELSE
+C                      i is at the head of the degree list
+                       HEAD (DEGREE (I)) = INEXT
+                       ENDIF
+
+                    ENDIF
+60               CONTINUE
+C             this element takes no new memory in iw:
+              NEWMEM = 0
+
+           ELSE
+
+C             ----------------------------------------------------------
+C             construct the new element in empty space, iw (pfree ...)
+C             ----------------------------------------------------------
+
+              P = PE (ME)
+              PME1 = PFREE
+              SLENME = LEN (ME) - ELENME
+
+              DO 120 KNT1 = 1, ELENME + 1
+
+                 IF (KNT1 .GT. ELENME) THEN
+C                   search the supervariables in me.
+                    E = ME
+                    PJ = P
+                    LN = SLENME
+                 ELSE
+C                   search the elements in me.
+                    E = IW (P)
+                    P = P + 1
+                    PJ = PE (E)
+                    LN = LEN (E)
+                    ENDIF
+
+C                -------------------------------------------------------
+C                search for different supervariables and add them to the
+C                new list, compressing when necessary. this loop is
+C                executed once for each element in the list and once for
+C                all the supervariables in the list.
+C                -------------------------------------------------------
+
+                 DO 110 KNT2 = 1, LN
+                    I = IW (PJ)
+                    PJ = PJ + 1
+                    NVI = NV (I)
+                    IF (NVI .GT. 0) THEN
+
+C                      -------------------------------------------------
+C                      compress iw, if necessary
+C                      -------------------------------------------------
+
+                       IF (PFREE .GT. IWLEN) THEN
+C                         prepare for compressing iw by adjusting
+C                         pointers and lengths so that the lists being
+C                         searched in the inner and outer loops contain
+C                         only the remaining entries.
+
+                          PE (ME) = P
+                          LEN (ME) = LEN (ME) - KNT1
+                          IF (LEN (ME) .EQ. 0) THEN
+C                            nothing left of supervariable me
+                             PE (ME) = 0
+                             ENDIF
+                          PE (E) = PJ
+                          LEN (E) = LN - KNT2
+                          IF (LEN (E) .EQ. 0) THEN
+C                            nothing left of element e
+                             PE (E) = 0
+                             ENDIF
+
+                          NCMPA = NCMPA + 1
+C                         store first item in pe
+C                         set first entry to -item
+                          DO 70 J = 1, N
+                             PN = PE (J)
+                             IF (PN .GT. 0) THEN
+                                PE (J) = IW (PN)
+                                IW (PN) = -J
+                                ENDIF
+70                           CONTINUE
+
+C                         psrc/pdst point to source/destination
+                          PDST = 1
+                          PSRC = 1
+                          PEND = PME1 - 1
+
+C                         while loop:
+80                        CONTINUE
+                          IF (PSRC .LE. PEND) THEN
+C                            search for next negative entry
+                             J = -IW (PSRC)
+                             PSRC = PSRC + 1
+                             IF (J .GT. 0) THEN
+                                IW (PDST) = PE (J)
+                                PE (J) = PDST
+                                PDST = PDST + 1
+C                               copy from source to destination
+                                LENJ = LEN (J)
+                                DO 90 KNT3 = 0, LENJ - 2
+                                   IW (PDST + KNT3) = IW (PSRC + KNT3)
+90                                 CONTINUE
+                                PDST = PDST + LENJ - 1
+                                PSRC = PSRC + LENJ - 1
+                                ENDIF
+                             GOTO 80
+                             ENDIF
+
+C                         move the new partially-constructed element
+                          P1 = PDST
+                          DO 100 PSRC = PME1, PFREE - 1
+                             IW (PDST) = IW (PSRC)
+                             PDST = PDST + 1
+100                          CONTINUE
+                          PME1 = P1
+                          PFREE = PDST
+                          PJ = PE (E)
+                          P = PE (ME)
+                          ENDIF
+
+C                      -------------------------------------------------
+C                      i is a principal variable not yet placed in Lme
+C                      store i in new list
+C                      -------------------------------------------------
+
+                       DEGME = DEGME + NVI
+C                      flag i as being in Lme by negating nv (i)
+                       NV (I) = -NVI
+                       IW (PFREE) = I
+                       PFREE = PFREE + 1
+
+C                      -------------------------------------------------
+C                      remove variable i from degree link list
+C                      -------------------------------------------------
+
+                       ILAST = LAST (I)
+                       INEXT = NEXT (I)
+                       IF (INEXT .NE. 0) LAST (INEXT) = ILAST
+                       IF (ILAST .NE. 0) THEN
+                          NEXT (ILAST) = INEXT
+                       ELSE
+C                         i is at the head of the degree list
+                          HEAD (DEGREE (I)) = INEXT
+                          ENDIF
+
+                       ENDIF
+110                 CONTINUE
+
+                 IF (E .NE. ME) THEN
+C                   set tree pointer and flag to indicate element e is
+C                   absorbed into new element me (the parent of e is me)
+                    PE (E) = -ME
+                    W (E) = 0
+                    ENDIF
+120              CONTINUE
+
+              PME2 = PFREE - 1
+C             this element takes newmem new memory in iw (possibly zero)
+              NEWMEM = PFREE - PME1
+              MEM = MEM + NEWMEM
+              MAXMEM = MAX (MAXMEM, MEM)
+              ENDIF
+
+C          -------------------------------------------------------------
+C          me has now been converted into an element in iw (pme1..pme2)
+C          -------------------------------------------------------------
+
+C          degme holds the external degree of new element
+           DEGREE (ME) = DEGME
+           PE (ME) = PME1
+           LEN (ME) = PME2 - PME1 + 1
+
+C          -------------------------------------------------------------
+C          make sure that wflg is not too large.  With the current
+C          value of wflg, wflg+n must not cause integer overflow
+C          -------------------------------------------------------------
+
+           IF (WFLG .GE. WBIG) THEN
+              DO 130 X = 1, N
+                 IF (W (X) .NE. 0) W (X) = 1
+130              CONTINUE
+              WFLG = 2
+              ENDIF
+
+C=======================================================================
+C  DEGREE UPDATE AND ELEMENT ABSORPTION
+C=======================================================================
+
+C          -------------------------------------------------------------
+C          Scan 2:  for each i in Lme, sum up the degree of Lme (which
+C          is degme), plus the sum of the external degrees of each Le
+C          for the elements e appearing within i, plus the
+C          supervariables in i.  Place i in hash list.
+C          -------------------------------------------------------------
+
+           DO 180 PME = PME1, PME2
+              I = IW (PME)
+              P1 = PE (I)
+              P2 = P1 + ELEN (I) - 1
+              PN = P1
+              HASH = 0
+              DEG = 0
+
+C             ----------------------------------------------------------
+C             scan the element list associated with supervariable i
+C             ----------------------------------------------------------
+
+C             half-and-half approximate degree:
+              WFLG = WFLG + 1
+C             Gilbert-Moler-Schreiber approximate external degree
+C             on first pass:
+              NVI = -NV (I)
+              DO 160 P = P1, P2
+                 E = IW (P)
+                 IF (W (E) .NE. 0) THEN
+C                   e is an unabsorbed element
+                    DEG = DEG + DEGREE (E) - NVI
+                    IW (PN) = E
+                    PN = PN + 1
+                    HASH = HASH + E
+                    ENDIF
+160              CONTINUE
+              IF (PN .EQ. P1 + 1) THEN
+C                exact external degree for elen = 2:
+C                (elen (i) is currently 1, but will become 2 when
+C                me is added to the element list)
+                 DEG = 0
+                 E = IW (P1)
+                 DO 145 P = PE (E), PE (E) + LEN (E) - 1
+                    J = IW (P)
+                    NVJ = NV (J)
+                    IF (NVJ .GT. 0 .AND. W (J) .NE. WFLG) THEN
+C                      j is principal and not in Lme if nv (j) .gt. 0
+C                      and j is not yet seen if w (j) .ne. wflg
+                       W (J) = WFLG
+                       DEG = DEG + NVJ
+                       ENDIF
+145                 CONTINUE
+                 ENDIF
+
+C             count the number of elements in i (including me):
+              ELEN (I) = PN - P1 + 1
+
+C             ----------------------------------------------------------
+C             scan the supervariables in the list associated with i
+C             ----------------------------------------------------------
+
+              P3 = PN
+              DO 170 P = P2 + 1, P1 + LEN (I) - 1
+                 J = IW (P)
+                 NVJ = NV (J)
+                 IF (NVJ .GT. 0) THEN
+C                   j is unabsorbed, and not in Lme.
+C                   add to degree and add to new list
+                    DEG = DEG + NVJ
+                    IW (PN) = J
+                    PN = PN + 1
+                    HASH = HASH + J
+                    ENDIF
+170              CONTINUE
+
+C             ----------------------------------------------------------
+C             update the degree and check for mass elimination
+C             ----------------------------------------------------------
+
+              IF (ELEN (I) .EQ. 1 .AND. P3 .EQ. PN) THEN
+
+C                -------------------------------------------------------
+C                mass elimination
+C                -------------------------------------------------------
+
+C                There is nothing left of this node except for an
+C                edge to the current pivot element.  elen (i) is 1,
+C                and there are no variables adjacent to node i.
+C                Absorb i into the current pivot element, me.
+
+                 PE (I) = -ME
+                 NVI = -NV (I)
+                 DEGME = DEGME - NVI
+                 NVPIV = NVPIV + NVI
+                 NEL = NEL + NVI
+                 NV (I) = 0
+                 ELEN (I) = 0
+
+              ELSE
+
+C                -------------------------------------------------------
+C                update the upper-bound degree of i
+C                -------------------------------------------------------
+
+C                the following degree does not yet include the size
+C                of the current element, which is added later:
+                 DEGREE (I) = MIN (DEGREE (I), DEG)
+
+C                -------------------------------------------------------
+C                add me to the list for i
+C                -------------------------------------------------------
+
+C                move first supervariable to end of list
+                 IW (PN) = IW (P3)
+C                move first element to end of element part of list
+                 IW (P3) = IW (P1)
+C                add new element to front of list.
+                 IW (P1) = ME
+C                store the new length of the list in len (i)
+                 LEN (I) = PN - P1 + 1
+
+C                -------------------------------------------------------
+C                place in hash bucket.  Save hash key of i in last (i).
+C                -------------------------------------------------------
+
+                 HASH = MOD (HASH, HMOD) + 1
+                 J = HEAD (HASH)
+                 IF (J .LE. 0) THEN
+C                   the degree list is empty, hash head is -j
+                    NEXT (I) = -J
+                    HEAD (HASH) = -I
+                 ELSE
+C                   degree list is not empty
+C                   use last (head (hash)) as hash head
+                    NEXT (I) = LAST (J)
+                    LAST (J) = I
+                    ENDIF
+                 LAST (I) = HASH
+                 ENDIF
+180           CONTINUE
+
+           DEGREE (ME) = DEGME
+
+C          -------------------------------------------------------------
+C          Clear the counter array, w (...), by incrementing wflg.
+C          -------------------------------------------------------------
+
+           WFLG = WFLG + 1
+
+C          make sure that wflg+n does not cause integer overflow
+           IF (WFLG .GE. WBIG) THEN
+              DO 190 X = 1, N
+                 IF (W (X) .NE. 0) W (X) = 1
+190              CONTINUE
+              WFLG = 2
+              ENDIF
+C          at this point, w (1..n) .lt. wflg holds
+
+C=======================================================================
+C  SUPERVARIABLE DETECTION
+C=======================================================================
+
+           DO 250 PME = PME1, PME2
+              I = IW (PME)
+              IF (NV (I) .LT. 0) THEN
+C                i is a principal variable in Lme
+
+C                -------------------------------------------------------
+C                examine all hash buckets with 2 or more variables.  We
+C                do this by examing all unique hash keys for super-
+C                variables in the pattern Lme of the current element, me
+C                -------------------------------------------------------
+
+                 HASH = LAST (I)
+C                let i = head of hash bucket, and empty the hash bucket
+                 J = HEAD (HASH)
+                 IF (J .EQ. 0) GOTO 250
+                 IF (J .LT. 0) THEN
+C                   degree list is empty
+                    I = -J
+                    HEAD (HASH) = 0
+                 ELSE
+C                   degree list is not empty, restore last () of head
+                    I = LAST (J)
+                    LAST (J) = 0
+                    ENDIF
+                 IF (I .EQ. 0) GOTO 250
+
+C                while loop:
+200              CONTINUE
+                 IF (NEXT (I) .NE. 0) THEN
+
+C                   ----------------------------------------------------
+C                   this bucket has one or more variables following i.
+C                   scan all of them to see if i can absorb any entries
+C                   that follow i in hash bucket.  Scatter i into w.
+C                   ----------------------------------------------------
+
+                    LN = LEN (I)
+                    ELN = ELEN (I)
+C                   do not flag the first element in the list (me)
+                    DO 210 P = PE (I) + 1, PE (I) + LN - 1
+                       W (IW (P)) = WFLG
+210                    CONTINUE
+
+C                   ----------------------------------------------------
+C                   scan every other entry j following i in bucket
+C                   ----------------------------------------------------
+
+                    JLAST = I
+                    J = NEXT (I)
+
+C                   while loop:
+220                 CONTINUE
+                    IF (J .NE. 0) THEN
+
+C                      -------------------------------------------------
+C                      check if j and i have identical nonzero pattern
+C                      -------------------------------------------------
+
+                       IF (LEN (J) .NE. LN) THEN
+C                         i and j do not have same size data structure
+                          GOTO 240
+                          ENDIF
+                       IF (ELEN (J) .NE. ELN) THEN
+C                         i and j do not have same number of adjacent el
+                          GOTO 240
+                          ENDIF
+C                      do not flag the first element in the list (me)
+                       DO 230 P = PE (J) + 1, PE (J) + LN - 1
+                          IF (W (IW (P)) .NE. WFLG) THEN
+C                            an entry (iw(p)) is in j but not in i
+                             GOTO 240
+                             ENDIF
+230                       CONTINUE
+
+C                      -------------------------------------------------
+C                      found it!  j can be absorbed into i
+C                      -------------------------------------------------
+
+                       PE (J) = -I
+C                      both nv (i) and nv (j) are negated since they
+C                      are in Lme, and the absolute values of each
+C                      are the number of variables in i and j:
+                       NV (I) = NV (I) + NV (J)
+                       NV (J) = 0
+                       ELEN (J) = 0
+C                      delete j from hash bucket
+                       J = NEXT (J)
+                       NEXT (JLAST) = J
+                       GOTO 220
+
+C                      -------------------------------------------------
+240                    CONTINUE
+C                      j cannot be absorbed into i
+C                      -------------------------------------------------
+
+                       JLAST = J
+                       J = NEXT (J)
+                       GOTO 220
+                       ENDIF
+
+C                   ----------------------------------------------------
+C                   no more variables can be absorbed into i
+C                   go to next i in bucket and clear flag array
+C                   ----------------------------------------------------
+
+                    WFLG = WFLG + 1
+                    I = NEXT (I)
+                    IF (I .NE. 0) GOTO 200
+                    ENDIF
+                 ENDIF
+250           CONTINUE
+
+C=======================================================================
+C  RESTORE DEGREE LISTS AND REMOVE NONPRINCIPAL SUPERVAR. FROM ELEMENT
+C=======================================================================
+
+           P = PME1
+           NLEFT = N - NEL
+           DO 260 PME = PME1, PME2
+              I = IW (PME)
+              NVI = -NV (I)
+              IF (NVI .GT. 0) THEN
+C                i is a principal variable in Lme
+C                restore nv (i) to signify that i is principal
+                 NV (I) = NVI
+
+C                -------------------------------------------------------
+C                compute the external degree (add size of current elem)
+C                -------------------------------------------------------
+
+                 DEG = MAX (1, MIN (DEGREE (I) + DEGME-NVI, NLEFT-NVI))
+
+C                -------------------------------------------------------
+C                place the supervariable at the head of the degree list
+C                -------------------------------------------------------
+
+                 INEXT = HEAD (DEG)
+                 IF (INEXT .NE. 0) LAST (INEXT) = I
+                 NEXT (I) = INEXT
+                 LAST (I) = 0
+                 HEAD (DEG) = I
+
+C                -------------------------------------------------------
+C                save the new degree, and find the minimum degree
+C                -------------------------------------------------------
+
+                 MINDEG = MIN (MINDEG, DEG)
+                 DEGREE (I) = DEG
+
+C                -------------------------------------------------------
+C                place the supervariable in the element pattern
+C                -------------------------------------------------------
+
+                 IW (P) = I
+                 P = P + 1
+                 ENDIF
+260           CONTINUE
+
+C=======================================================================
+C  FINALIZE THE NEW ELEMENT
+C=======================================================================
+
+           NV (ME) = NVPIV + DEGME
+C          nv (me) is now the degree of pivot (including diagonal part)
+C          save the length of the list for the new element me
+           LEN (ME) = P - PME1
+           IF (LEN (ME) .EQ. 0) THEN
+C             there is nothing left of the current pivot element
+              PE (ME) = 0
+              W (ME) = 0
+              ENDIF
+           IF (NEWMEM .NE. 0) THEN
+C             element was not constructed in place: deallocate part
+C             of it (final size is less than or equal to newmem,
+C             since newly nonprincipal variables have been removed).
+              PFREE = P
+              MEM = MEM - NEWMEM + LEN (ME)
+              ENDIF
+
+C=======================================================================
+C          END WHILE (selecting pivots)
+           GOTO 30
+           ENDIF
+C=======================================================================
+
+C=======================================================================
+C  COMPUTE THE PERMUTATION VECTORS
+C=======================================================================
+
+C       ----------------------------------------------------------------
+C       The time taken by the following code is O(n).  At this
+C       point, elen (e) = -k has been done for all elements e,
+C       and elen (i) = 0 has been done for all nonprincipal
+C       variables i.  At this point, there are no principal
+C       supervariables left, and all elements are absorbed.
+C       ----------------------------------------------------------------
+
+C       ----------------------------------------------------------------
+C       compute the ordering of unordered nonprincipal variables
+C       ----------------------------------------------------------------
+
+        DO 290 I = 1, N
+           IF (ELEN (I) .EQ. 0) THEN
+
+C             ----------------------------------------------------------
+C             i is an un-ordered row.  Traverse the tree from i until
+C             reaching an element, e.  The element, e, was the
+C             principal supervariable of i and all nodes in the path
+C             from i to when e was selected as pivot.
+C             ----------------------------------------------------------
+
+              J = -PE (I)
+C             while (j is a variable) do:
+270           CONTINUE
+              IF (ELEN (J) .GE. 0) THEN
+                 J = -PE (J)
+                 GOTO 270
+                 ENDIF
+              E = J
+
+C             ----------------------------------------------------------
+C             get the current pivot ordering of e
+C             ----------------------------------------------------------
+
+              K = -ELEN (E)
+
+C             ----------------------------------------------------------
+C             traverse the path again from i to e, and compress the
+C             path (all nodes point to e).  Path compression allows
+C             this code to compute in O(n) time.  Order the unordered
+C             nodes in the path, and place the element e at the end.
+C             ----------------------------------------------------------
+
+              J = I
+C             while (j is a variable) do:
+280           CONTINUE
+              IF (ELEN (J) .GE. 0) THEN
+                 JNEXT = -PE (J)
+                 PE (J) = -E
+                 IF (ELEN (J) .EQ. 0) THEN
+C                   j is an unordered row
+                    ELEN (J) = K
+                    K = K + 1
+                    ENDIF
+                 J = JNEXT
+                 GOTO 280
+                 ENDIF
+C             leave elen (e) negative, so we know it is an element
+              ELEN (E) = -K
+              ENDIF
+290        CONTINUE
+
+C       ----------------------------------------------------------------
+C       reset the inverse permutation (elen (1..n)) to be positive,
+C       and compute the permutation (last (1..n)).
+C       ----------------------------------------------------------------
+
+        DO 300 I = 1, N
+           K = ABS (ELEN (I))
+           LAST (K) = I
+           ELEN (I) = K
+300        CONTINUE
+
+C=======================================================================
+C  RETURN THE MEMORY USAGE IN IW
+C=======================================================================
+
+C       If maxmem is less than or equal to iwlen, then no compressions
+C       occurred, and iw (maxmem+1 ... iwlen) was unused.  Otherwise
+C       compressions did occur, and iwlen would have had to have been
+C       greater than or equal to maxmem for no compressions to occur.
+C       Return the value of maxmem in the pfree argument.
+
+        PFREE = MAXMEM
+
+        RETURN
+        END
+
diff --git a/contrib/taucs/external/src/amdhat.c b/contrib/taucs/external/src/amdhat.c
new file mode 100644
index 0000000000000000000000000000000000000000..21887560cacfa05ea760e9a305f1fe0327078744
--- /dev/null
+++ b/contrib/taucs/external/src/amdhat.c
@@ -0,0 +1,1396 @@
+/* amdhat.f -- translated by f2c (version of 23 April 1993  18:34:30).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+#include "f2c.h"
+
+/* Subroutine */ int amdhat_(n, pe, iw, len, iwlen, pfree, nv, next, last, 
+	head, elen, degree, ncmpa, w, iovflo)
+integer *n, *pe, *iw, *len, *iwlen, *pfree, *nv, *next, *last, *head, *elen, *
+	degree, *ncmpa, *w, *iovflo;
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3, i__4, i__5;
+
+    /* Local variables */
+    static integer hash, pend, hmod, lenj, dmax_, wbig, wflg, psrc, pdst, e, 
+	    i, j, k, p, degme, x, nleft, ilast, jlast, inext, jnext, p1, 
+	    nvpiv, p2, p3, me, ln, pj, pn, mindeg, elenme, slenme, maxmem, 
+	    newmem, deg, eln, mem, nel, pme, nvi, nvj, pme1, pme2, knt1, knt2,
+	     knt3;
+
+/* -----------------------------------------------------------------------
+ */
+/*  The MC47 / AMD suite of minimum degree ordering algorithms. */
+
+/*  This code is one of seven variations of a single algorithm: */
+/*  the primary routine (MC47B/BD, only available in the Harwell */
+/*  Subroutine Library), and 6 variations that differ only in */
+/*  how they compute the degree (available in NETLIB). */
+
+/*  For information on the Harwell Subroutine Library, contact */
+/*  John Harding, Harwell Subroutine Library, B 552, AEA Technology, */
+/*  Harwell, Didcot, Oxon OX11 0RA, telephone (44) 1235 434573, */
+/*  fax (44) 1235 434340, email john.harding@aeat.co.uk, who will */
+/*  provide details of price and conditions of use. */
+/* -----------------------------------------------------------------------
+ */
+/* ***********************************************************************
+ */
+/* NOTICE:  "The AMD routines (AMDEXA, AMDBAR, AMDHAF, AMDHAT, AMDTRU, */
+/* and AMDATR) may be used SOLELY for educational, research, and */
+/* benchmarking purposes by non-profit organizations and the U.S. */
+/* government.  Commercial and other organizations may make use of the */
+/* AMD routines SOLELY for benchmarking purposes only.  The AMD */
+/* routines may be modified by or on behalf of the User for such */
+/* use but at no time shall the AMD routines or any such modified */
+/* version of them become the property of the User.  The AMD routines */
+/* are provided without warranty of any kind, either expressed or */
+/* implied.  Neither the Authors nor their employers shall be liable */
+/* for any direct or consequential loss or damage whatsoever arising */
+/* out of the use or misuse of the AMD routines by the User.  The AMD */
+/* routines must not be sold.  You may make copies of the AMD routines, */
+/* but this NOTICE and the Copyright notice must appear in all copies. */
+/* Any other use of the AMD routines requires written permission. */
+/* Your use of the AMD routines is an implicit agreement to these */
+/* conditions." */
+/* ***********************************************************************
+ */
+/* -----------------------------------------------------------------------
+ */
+/* AMDhat:  approximate minimum (Gilbert-Moler-Schrieber) degree ordering 
+*/
+/*          algorithm */
+/* -----------------------------------------------------------------------
+ */
+/*  Variation 4:  Gilbert-Moler-Schreiber approximate external */
+/*  degree (as used in MATLAB, for example.  See J. R. Gilbert, C. */
+/*  Moler, and R. Schreiber, Sparse matrices in MATLAB:  design and */
+/*  implementation, SIAM J. Matrix Analysis and Applications, vol. 13, */
+/*  1992, pp. 333-356).  Note that some of the comments in the code */
+/*  below reflect the MC47-style degree approximation. */
+
+/*  This method is sometimes slightly faster than MC47B/BD, but nearly */
+/*  always computes an ordering with worse fill-in than MC47B/BD.  It */
+/*  has been observed to be nearly 3 times slower than MC47B/BD for some 
+*/
+/*  matrices because of the increase in fill-in. */
+
+/*  We recommend using MC47B/BD instead of this routine since MC47B/BD */
+/*  gives better results in about the same time. */
+/* -----------------------------------------------------------------------
+ */
+/* Given a representation of the nonzero pattern of a symmetric matrix, */
+/*       A, (excluding the diagonal) perform an approximate minimum */
+/*       (GMS-style) degree ordering to compute a pivot order such */
+/*       that the introduction of nonzeros (fill-in) in the Cholesky */
+/*       factors A = LL^T are kept low.  At each step, the pivot */
+/*       selected is the one with the minimum Gilbert-Moler-Schrieber- */
+/*       style upper-bound on the external degree. */
+/* ********************************************************************** 
+*/
+/* ***** CAUTION:  ARGUMENTS ARE NOT CHECKED FOR ERRORS ON INPUT.  ****** 
+*/
+/* ********************************************************************** 
+*/
+/* ** If you want error checking, a more versatile input format, and a ** 
+*/
+/* ** simpler user interface, then use MC47A/AD in the Harwell         ** 
+*/
+/* ** Subroutine Library, which checks for errors, transforms the      ** 
+*/
+/* ** input, and calls MC47B/BD.                                       ** 
+*/
+/* ********************************************************************** 
+*/
+/*       References:  (UF Tech Reports are available via anonymous ftp */
+/*       to ftp.cis.ufl.edu:cis/tech-reports). */
+
+/*       [1] Timothy A. Davis and Iain Duff, "An unsymmetric-pattern */
+/*               multifrontal method for sparse LU factorization", */
+/*               SIAM J. Matrix Analysis and Applications, to appear. */
+/*               also Univ. of Florida Technical Report TR-94-038. */
+/*               Discusses UMFPACK / MA38. */
+
+/*       [2] Patrick Amestoy, Timothy A. Davis, and Iain S. Duff, */
+/*               "An approximate minimum degree ordering algorithm," */
+/*               SIAM J. Matrix Analysis and Applications (to appear), */
+/*               also Univ. of Florida Technical Report TR-94-039. */
+/*               Discusses this routine. */
+
+/*       [3] Alan George and Joseph Liu, "The evolution of the */
+/*               minimum degree ordering algorithm," SIAM Review, vol. */
+/*               31, no. 1, pp. 1-19, March 1989.  We list below the */
+/*               features mentioned in that paper that this code */
+/*               includes: */
+
+/*       mass elimination: */
+/*               Yes.  MA27 relied on supervariable detection for mass */
+/*               elimination. */
+/*       indistinguishable nodes: */
+/*               Yes (we call these "supervariables").  This was also in 
+*/
+/*               the MA27 code - although we modified the method of */
+/*               detecting them (the previous hash was the true degree, */
+/*               which we no longer keep track of).  A supervariable is */
+/*               a set of rows with identical nonzero pattern.  All */
+/*               variables in a supervariable are eliminated together. */
+/*               Each supervariable has as its numerical name that of */
+/*               one of its variables (its principal variable). */
+/*       quotient graph representation: */
+/*               Yes.  We use the term "element" for the cliques formed */
+/*               during elimination.  This was also in the MA27 code. */
+/*               The algorithm can operate in place, but it will work */
+/*               more efficiently if given some "elbow room." */
+/*       element absorption: */
+/*               Yes.  This was also in the MA27 code. */
+/*       external degree: */
+/*               Yes.  The MA27 code was based on the true degree. */
+/*       incomplete degree update and multiple elimination: */
+/*               No.  This was not in MA27, either.  Our method of */
+/*               degree update within MC47B/BD is element-based, not */
+/*               variable-based.  It is thus not well-suited for use */
+/*               with incomplete degree update or multiple elimination. */
+/* -----------------------------------------------------------------------
+ */
+/* Authors, and Copyright (C) 1995 by: */
+/*       Timothy A. Davis, Patrick Amestoy, Iain S. Duff, & John K. Reid. 
+*/
+
+/* Acknowledgements: */
+/*       This work (and the UMFPACK package) was supported by the */
+/*       National Science Foundation (ASC-9111263 and DMS-9223088). */
+/*       The UMFPACK/MA38 approximate degree update algorithm, the */
+/*       unsymmetric analog which forms the basis of MC47B/BD, was */
+/*       developed while Tim Davis was supported by CERFACS (Toulouse, */
+/*       France) in a post-doctoral position. */
+
+/* Date:  September, 1995 */
+/* -----------------------------------------------------------------------
+ */
+/* -----------------------------------------------------------------------
+ */
+/* INPUT ARGUMENTS (unaltered): */
+/* -----------------------------------------------------------------------
+ */
+/* n:    The matrix order. */
+
+/*       Restriction:  1 .le. n .lt. (iovflo/2)-2 */
+/* iwlen:        The length of iw (1..iwlen).  On input, the matrix is */
+/*       stored in iw (1..pfree-1).  However, iw (1..iwlen) should be */
+/*       slightly larger than what is required to hold the matrix, at */
+/*       least iwlen .ge. pfree + n is recommended.  Otherwise, */
+/*       excessive compressions will take place. */
+/*       *** We do not recommend running this algorithm with *** */
+/*       ***      iwlen .lt. pfree + n.                      *** */
+/*       *** Better performance will be obtained if          *** */
+/*       ***      iwlen .ge. pfree + n                       *** */
+/*       *** or better yet                                   *** */
+/*       ***      iwlen .gt. 1.2 * pfree                     *** */
+/*       *** (where pfree is its value on input).            *** */
+/*       The algorithm will not run at all if iwlen .lt. pfree-1. */
+
+/*       Restriction: iwlen .ge. pfree-1 */
+/* iovflo:       The largest positive integer that your computer can */
+/*       represent (-iovflo should also be representable).  On a 32-bit */
+/*       computer with 2's-complement arithmetic, */
+/*       iovflo = (2^31)-1 = 2,147,483,648. */
+/* -----------------------------------------------------------------------
+ */
+/* INPUT/OUPUT ARGUMENTS: */
+/* -----------------------------------------------------------------------
+ */
+/* pe:   On input, pe (i) is the index in iw of the start of row i, or */
+/*       zero if row i has no off-diagonal non-zeros. */
+
+/*       During execution, it is used for both supervariables and */
+/*       elements: */
+
+/*       * Principal supervariable i:  index into iw of the */
+/*               description of supervariable i.  A supervariable */
+/*               represents one or more rows of the matrix */
+/*               with identical nonzero pattern. */
+/*       * Non-principal supervariable i:  if i has been absorbed */
+/*               into another supervariable j, then pe (i) = -j. */
+/*               That is, j has the same pattern as i. */
+/*               Note that j might later be absorbed into another */
+/*               supervariable j2, in which case pe (i) is still -j, */
+/*               and pe (j) = -j2. */
+/*       * Unabsorbed element e:  the index into iw of the description */
+/*               of element e, if e has not yet been absorbed by a */
+/*               subsequent element.  Element e is created when */
+/*               the supervariable of the same name is selected as */
+/*               the pivot. */
+/*       * Absorbed element e:  if element e is absorbed into element */
+/*               e2, then pe (e) = -e2.  This occurs when the pattern of 
+*/
+/*               e (that is, Le) is found to be a subset of the pattern */
+/*               of e2 (that is, Le2).  If element e is "null" (it has */
+/*               no nonzeros outside its pivot block), then pe (e) = 0. */
+
+/*       On output, pe holds the assembly tree/forest, which implicitly */
+/*       represents a pivot order with identical fill-in as the actual */
+/*       order (via a depth-first search of the tree). */
+
+/*       On output: */
+/*       If nv (i) .gt. 0, then i represents a node in the assembly tree, 
+*/
+/*       and the parent of i is -pe (i), or zero if i is a root. */
+/*       If nv (i) = 0, then (i,-pe (i)) represents an edge in a */
+/*       subtree, the root of which is a node in the assembly tree. */
+/* pfree:        On input the tail end of the array, iw (pfree..iwlen), */
+/*       is empty, and the matrix is stored in iw (1..pfree-1). */
+/*       During execution, additional data is placed in iw, and pfree */
+/*       is modified so that iw (pfree..iwlen) is always the unused part 
+*/
+/*       of iw.  On output, pfree is set equal to the size of iw that */
+/*       would have been needed for no compressions to occur.  If */
+/*       ncmpa is zero, then pfree (on output) is less than or equal to */
+/*       iwlen, and the space iw (pfree+1 ... iwlen) was not used. */
+/*       Otherwise, pfree (on output) is greater than iwlen, and all the 
+*/
+/*       memory in iw was used. */
+/* -----------------------------------------------------------------------
+ */
+/* INPUT/MODIFIED (undefined on output): */
+/* -----------------------------------------------------------------------
+ */
+/* len:  On input, len (i) holds the number of entries in row i of the */
+/*       matrix, excluding the diagonal.  The contents of len (1..n) */
+/*       are undefined on output. */
+/* iw:   On input, iw (1..pfree-1) holds the description of each row i */
+/*       in the matrix.  The matrix must be symmetric, and both upper */
+/*       and lower triangular parts must be present.  The diagonal must */
+/*       not be present.  Row i is held as follows: */
+
+/*               len (i):  the length of the row i data structure */
+/*               iw (pe (i) ... pe (i) + len (i) - 1): */
+/*                       the list of column indices for nonzeros */
+/*                       in row i (simple supervariables), excluding */
+/*                       the diagonal.  All supervariables start with */
+/*                       one row/column each (supervariable i is just */
+/*                       row i). */
+/*               if len (i) is zero on input, then pe (i) is ignored */
+/*               on input. */
+
+/*               Note that the rows need not be in any particular order, 
+*/
+/*               and there may be empty space between the rows. */
+
+/*       During execution, the supervariable i experiences fill-in. */
+/*       This is represented by placing in i a list of the elements */
+/*       that cause fill-in in supervariable i: */
+
+/*               len (i):  the length of supervariable i */
+/*               iw (pe (i) ... pe (i) + elen (i) - 1): */
+/*                       the list of elements that contain i.  This list 
+*/
+/*                       is kept short by removing absorbed elements. */
+/*               iw (pe (i) + elen (i) ... pe (i) + len (i) - 1): */
+/*                       the list of supervariables in i.  This list */
+/*                       is kept short by removing nonprincipal */
+/*                       variables, and any entry j that is also */
+/*                       contained in at least one of the elements */
+/*                       (j in Le) in the list for i (e in row i). */
+
+/*       When supervariable i is selected as pivot, we create an */
+/*       element e of the same name (e=i): */
+
+/*               len (e):  the length of element e */
+/*               iw (pe (e) ... pe (e) + len (e) - 1): */
+/*                       the list of supervariables in element e. */
+
+/*       An element represents the fill-in that occurs when supervariable 
+*/
+/*       i is selected as pivot (which represents the selection of row i 
+*/
+/*       and all non-principal variables whose principal variable is i). 
+*/
+/*       We use the term Le to denote the set of all supervariables */
+/*       in element e.  Absorbed supervariables and elements are pruned */
+/*       from these lists when computationally convenient. */
+
+/*       CAUTION:  THE INPUT MATRIX IS OVERWRITTEN DURING COMPUTATION. */
+/*       The contents of iw are undefined on output. */
+/* -----------------------------------------------------------------------
+ */
+/* OUTPUT (need not be set on input): */
+/* -----------------------------------------------------------------------
+ */
+/* nv:   During execution, abs (nv (i)) is equal to the number of rows */
+/*       that are represented by the principal supervariable i.  If i is 
+*/
+/*       a nonprincipal variable, then nv (i) = 0.  Initially, */
+/*       nv (i) = 1 for all i.  nv (i) .lt. 0 signifies that i is a */
+/*       principal variable in the pattern Lme of the current pivot */
+/*       element me.  On output, nv (e) holds the true degree of element 
+*/
+/*       e at the time it was created (including the diagonal part). */
+/* ncmpa:        The number of times iw was compressed.  If this is */
+/*       excessive, then the execution took longer than what could have */
+/*       been.  To reduce ncmpa, try increasing iwlen to be 10% or 20% */
+/*       larger than the value of pfree on input (or at least */
+/*       iwlen .ge. pfree + n).  The fastest performance will be */
+/*       obtained when ncmpa is returned as zero.  If iwlen is set to */
+/*       the value returned by pfree on *output*, then no compressions */
+/*       will occur. */
+/* elen: See the description of iw above.  At the start of execution, */
+/*       elen (i) is set to zero.  During execution, elen (i) is the */
+/*       number of elements in the list for supervariable i.  When e */
+/*       becomes an element, elen (e) = -nel is set, where nel is the */
+/*       current step of factorization.  elen (i) = 0 is done when i */
+/*       becomes nonprincipal. */
+
+/*       For variables, elen (i) .ge. 0 holds until just before the */
+/*       permutation vectors are computed.  For elements, */
+/*       elen (e) .lt. 0 holds. */
+
+/*       On output elen (1..n) holds the inverse permutation (the same */
+/*       as the 'INVP' argument in Sparspak).  That is, if k = elen (i), 
+*/
+/*       then row i is the kth pivot row.  Row i of A appears as the */
+/*       (elen(i))-th row in the permuted matrix, PAP^T. */
+/* last: In a degree list, last (i) is the supervariable preceding i, */
+/*       or zero if i is the head of the list.  In a hash bucket, */
+/*       last (i) is the hash key for i.  last (head (hash)) is also */
+/*       used as the head of a hash bucket if head (hash) contains a */
+/*       degree list (see head, below). */
+
+/*       On output, last (1..n) holds the permutation (the same as the */
+/*       'PERM' argument in Sparspak).  That is, if i = last (k), then */
+/*       row i is the kth pivot row.  Row last (k) of A is the k-th row */
+/*       in the permuted matrix, PAP^T. */
+/* -----------------------------------------------------------------------
+ */
+/* LOCAL (not input or output - used only during execution): */
+/* -----------------------------------------------------------------------
+ */
+/* degree:       If i is a supervariable, then degree (i) holds the */
+/*       current approximation of the external degree of row i (an upper 
+*/
+/*       bound).  The external degree is the number of nonzeros in row i, 
+*/
+/*       minus abs (nv (i)) (the diagonal part).  The bound is equal to */
+/*       the external degree if elen (i) is less than or equal to two. */
+
+/*       We also use the term "external degree" for elements e to refer */
+/*       to |Le \ Lme|.  If e is an element, then degree (e) holds |Le|, 
+*/
+/*       which is the degree of the off-diagonal part of the element e */
+/*       (not including the diagonal part). */
+/* head: head is used for degree lists.  head (deg) is the first */
+/*       supervariable in a degree list (all supervariables i in a */
+/*       degree list deg have the same approximate degree, namely, */
+/*       deg = degree (i)).  If the list deg is empty then */
+/*       head (deg) = 0. */
+
+/*       During supervariable detection head (hash) also serves as a */
+/*       pointer to a hash bucket. */
+/*       If head (hash) .gt. 0, there is a degree list of degree hash. */
+/*               The hash bucket head pointer is last (head (hash)). */
+/*       If head (hash) = 0, then the degree list and hash bucket are */
+/*               both empty. */
+/*       If head (hash) .lt. 0, then the degree list is empty, and */
+/*               -head (hash) is the head of the hash bucket. */
+/*       After supervariable detection is complete, all hash buckets */
+/*       are empty, and the (last (head (hash)) = 0) condition is */
+/*       restored for the non-empty degree lists. */
+/* next: next (i) is the supervariable following i in a link list, or */
+/*       zero if i is the last in the list.  Used for two kinds of */
+/*       lists:  degree lists and hash buckets (a supervariable can be */
+/*       in only one kind of list at a time). */
+/* w:    The flag array w determines the status of elements and */
+/*       variables, and the external degree of elements. */
+
+/*       for elements: */
+/*          if w (e) = 0, then the element e is absorbed */
+/*          if w (e) .ge. wflg, then w (e) - wflg is the size of */
+/*               the set |Le \ Lme|, in terms of nonzeros (the */
+/*               sum of abs (nv (i)) for each principal variable i that */
+/*               is both in the pattern of element e and NOT in the */
+/*               pattern of the current pivot element, me). */
+/*          if wflg .gt. w (e) .gt. 0, then e is not absorbed and has */
+/*               not yet been seen in the scan of the element lists in */
+/*               the computation of |Le\Lme| in loop 150 below. */
+
+/*       for variables: */
+/*          during supervariable detection, if w (j) .ne. wflg then j is 
+*/
+/*          not in the pattern of variable i */
+
+/*       The w array is initialized by setting w (i) = 1 for all i, */
+/*       and by setting wflg = 2.  It is reinitialized if wflg becomes */
+/*       too large (to ensure that wflg+n does not cause integer */
+/*       overflow). */
+/* -----------------------------------------------------------------------
+ */
+/* LOCAL INTEGERS: */
+/* -----------------------------------------------------------------------
+ */
+/* deg:          the degree of a variable or element */
+/* degme:        size, |Lme|, of the current element, me (= degree (me)) 
+*/
+/* dext:         external degree, |Le \ Lme|, of some element e */
+/* dmax:         largest |Le| seen so far */
+/* e:            an element */
+/* elenme:       the length, elen (me), of element list of pivotal var. */
+/* eln:          the length, elen (...), of an element list */
+/* hash:         the computed value of the hash function */
+/* hmod:         the hash function is computed modulo hmod = max (1,n-1) 
+*/
+/* i:            a supervariable */
+/* ilast:        the entry in a link list preceding i */
+/* inext:        the entry in a link list following i */
+/* j:            a supervariable */
+/* jlast:        the entry in a link list preceding j */
+/* jnext:        the entry in a link list, or path, following j */
+/* k:            the pivot order of an element or variable */
+/* knt1:         loop counter used during element construction */
+/* knt2:         loop counter used during element construction */
+/* knt3:         loop counter used during compression */
+/* lenj:         len (j) */
+/* ln:           length of a supervariable list */
+/* maxmem:       amount of memory needed for no compressions */
+/* me:           current supervariable being eliminated, and the */
+/*                       current element created by eliminating that */
+/*                       supervariable */
+/* mem:          memory in use assuming no compressions have occurred */
+/* mindeg:       current minimum degree */
+/* nel:          number of pivots selected so far */
+/* newmem:       amount of new memory needed for current pivot element */
+/* nleft:        n - nel, the number of nonpivotal rows/columns remaining 
+*/
+/* nvi:          the number of variables in a supervariable i (= nv (i)) 
+*/
+/* nvj:          the number of variables in a supervariable j (= nv (j)) 
+*/
+/* nvpiv:        number of pivots in current element */
+/* slenme:       number of variables in variable list of pivotal variable 
+*/
+/* wbig:         = iovflo - n.  wflg is not allowed to be .ge. wbig. */
+/* we:           w (e) */
+/* wflg:         used for flagging the w array.  See description of iw. */
+/* wnvi:         wflg - nv (i) */
+/* x:            either a supervariable or an element */
+/* -----------------------------------------------------------------------
+ */
+/* LOCAL POINTERS: */
+/* -----------------------------------------------------------------------
+ */
+/*               Any parameter (pe (...) or pfree) or local variable */
+/*               starting with "p" (for Pointer) is an index into iw, */
+/*               and all indices into iw use variables starting with */
+/*               "p."  The only exception to this rule is the iwlen */
+/*               input argument. */
+/* p:            pointer into lots of things */
+/* p1:           pe (i) for some variable i (start of element list) */
+/* p2:           pe (i) + elen (i) -  1 for some var. i (end of el. list) 
+*/
+/* p3:           index of first supervariable in clean list */
+/* pdst:         destination pointer, for compression */
+/* pend:         end of memory to compress */
+/* pj:           pointer into an element or variable */
+/* pme:          pointer into the current element (pme1...pme2) */
+/* pme1:         the current element, me, is stored in iw (pme1...pme2) */
+/* pme2:         the end of the current element */
+/* pn:           pointer into a "clean" variable, also used to compress */
+/* psrc:         source pointer, for compression */
+/* -----------------------------------------------------------------------
+ */
+/*  FUNCTIONS CALLED: */
+/* -----------------------------------------------------------------------
+ */
+/* =======================================================================
+ */
+/*  INITIALIZATIONS */
+/* =======================================================================
+ */
+    /* Parameter adjustments */
+    --w;
+    --degree;
+    --elen;
+    --head;
+    --last;
+    --next;
+    --nv;
+    --len;
+    --iw;
+    --pe;
+
+    /* Function Body */
+    wflg = 2;
+    mindeg = 1;
+    *ncmpa = 0;
+    nel = 0;
+/* Computing MAX */
+    i__1 = 1, i__2 = *n - 1;
+    hmod = max(i__1,i__2);
+    dmax_ = 0;
+    wbig = *iovflo - *n;
+    mem = *pfree - 1;
+    maxmem = mem;
+    i__1 = *n;
+    for (i = 1; i <= i__1; ++i) {
+	last[i] = 0;
+	head[i] = 0;
+	nv[i] = 1;
+	w[i] = 1;
+	elen[i] = 0;
+	degree[i] = len[i];
+/* L10: */
+    }
+/*       ---------------------------------------------------------------- 
+*/
+/*       initialize degree lists and eliminate rows with no off-diag. nz. 
+*/
+/*       ---------------------------------------------------------------- 
+*/
+    i__1 = *n;
+    for (i = 1; i <= i__1; ++i) {
+	deg = degree[i];
+	if (deg > 0) {
+/*             --------------------------------------------------
+-------- */
+/*             place i in the degree list corresponding to its deg
+ree */
+/*             --------------------------------------------------
+-------- */
+	    inext = head[deg];
+	    if (inext != 0) {
+		last[inext] = i;
+	    }
+	    next[i] = inext;
+	    head[deg] = i;
+	} else {
+/*             --------------------------------------------------
+-------- */
+/*             we have a variable that can be eliminated at once b
+ecause */
+/*             there is no off-diagonal non-zero in its row. */
+/*             --------------------------------------------------
+-------- */
+	    ++nel;
+	    elen[i] = -nel;
+	    pe[i] = 0;
+	    w[i] = 0;
+	}
+/* L20: */
+    }
+/* =======================================================================
+ */
+/*  WHILE (selecting pivots) DO */
+/* =======================================================================
+ */
+L30:
+    if (nel < *n) {
+/* ==================================================================
+===== */
+/*  GET PIVOT OF MINIMUM DEGREE */
+/* ==================================================================
+===== */
+/*          ---------------------------------------------------------
+---- */
+/*          find next supervariable for elimination */
+/*          ---------------------------------------------------------
+---- */
+	i__1 = *n;
+	for (deg = mindeg; deg <= i__1; ++deg) {
+	    me = head[deg];
+	    if (me > 0) {
+		goto L50;
+	    }
+/* L40: */
+	}
+L50:
+	mindeg = deg;
+/*          ---------------------------------------------------------
+---- */
+/*          remove chosen variable from link list */
+/*          ---------------------------------------------------------
+---- */
+	inext = next[me];
+	if (inext != 0) {
+	    last[inext] = 0;
+	}
+	head[deg] = inext;
+/*          ---------------------------------------------------------
+---- */
+/*          me represents the elimination of pivots nel+1 to nel+nv(me
+). */
+/*          place me itself as the first in this set.  It will be move
+d */
+/*          to the nel+nv(me) position when the permutation vectors ar
+e */
+/*          computed. */
+/*          ---------------------------------------------------------
+---- */
+	elenme = elen[me];
+	elen[me] = -(nel + 1);
+	nvpiv = nv[me];
+	nel += nvpiv;
+/* ==================================================================
+===== */
+/*  CONSTRUCT NEW ELEMENT */
+/* ==================================================================
+===== */
+/*          ---------------------------------------------------------
+---- */
+/*          At this point, me is the pivotal supervariable.  It will b
+e */
+/*          converted into the current element.  Scan list of the */
+/*          pivotal supervariable, me, setting tree pointers and */
+/*          constructing new list of supervariables for the new elemen
+t, */
+/*          me.  p is a pointer to the current position in the old lis
+t. */
+/*          ---------------------------------------------------------
+---- */
+/*          flag the variable "me" as being in Lme by negating nv (me)
+ */
+	nv[me] = -nvpiv;
+	degme = 0;
+	if (elenme == 0) {
+/*             --------------------------------------------------
+-------- */
+/*             construct the new element in place */
+/*             --------------------------------------------------
+-------- */
+	    pme1 = pe[me];
+	    pme2 = pme1 - 1;
+	    i__1 = pme1 + len[me] - 1;
+	    for (p = pme1; p <= i__1; ++p) {
+		i = iw[p];
+		nvi = nv[i];
+		if (nvi > 0) {
+/*                   ------------------------------------
+---------------- */
+/*                   i is a principal variable not yet pla
+ced in Lme. */
+/*                   store i in new list */
+/*                   ------------------------------------
+---------------- */
+		    degme += nvi;
+/*                   flag i as being in Lme by negating nv
+ (i) */
+		    nv[i] = -nvi;
+		    ++pme2;
+		    iw[pme2] = i;
+/*                   ------------------------------------
+---------------- */
+/*                   remove variable i from degree list. 
+*/
+/*                   ------------------------------------
+---------------- */
+		    ilast = last[i];
+		    inext = next[i];
+		    if (inext != 0) {
+			last[inext] = ilast;
+		    }
+		    if (ilast != 0) {
+			next[ilast] = inext;
+		    } else {
+/*                      i is at the head of the degree
+ list */
+			head[degree[i]] = inext;
+		    }
+		}
+/* L60: */
+	    }
+/*             this element takes no new memory in iw: */
+	    newmem = 0;
+	} else {
+/*             --------------------------------------------------
+-------- */
+/*             construct the new element in empty space, iw (pfree
+ ...) */
+/*             --------------------------------------------------
+-------- */
+	    p = pe[me];
+	    pme1 = *pfree;
+	    slenme = len[me] - elenme;
+	    i__1 = elenme + 1;
+	    for (knt1 = 1; knt1 <= i__1; ++knt1) {
+		if (knt1 > elenme) {
+/*                   search the supervariables in me. */
+		    e = me;
+		    pj = p;
+		    ln = slenme;
+		} else {
+/*                   search the elements in me. */
+		    e = iw[p];
+		    ++p;
+		    pj = pe[e];
+		    ln = len[e];
+		}
+/*                -------------------------------------------
+------------ */
+/*                search for different supervariables and add 
+them to the */
+/*                new list, compressing when necessary. this l
+oop is */
+/*                executed once for each element in the list a
+nd once for */
+/*                all the supervariables in the list. */
+/*                -------------------------------------------
+------------ */
+		i__2 = ln;
+		for (knt2 = 1; knt2 <= i__2; ++knt2) {
+		    i = iw[pj];
+		    ++pj;
+		    nvi = nv[i];
+		    if (nvi > 0) {
+/*                      -----------------------------
+-------------------- */
+/*                      compress iw, if necessary */
+/*                      -----------------------------
+-------------------- */
+			if (*pfree > *iwlen) {
+/*                         prepare for compressing
+ iw by adjusting */
+/*                         pointers and lengths so
+ that the lists being */
+/*                         searched in the inner a
+nd outer loops contain */
+/*                         only the remaining entr
+ies. */
+			    pe[me] = p;
+			    len[me] -= knt1;
+			    if (len[me] == 0) {
+/*                            nothing left of 
+supervariable me */
+				pe[me] = 0;
+			    }
+			    pe[e] = pj;
+			    len[e] = ln - knt2;
+			    if (len[e] == 0) {
+/*                            nothing left of 
+element e */
+				pe[e] = 0;
+			    }
+			    ++(*ncmpa);
+/*                         store first item in pe 
+*/
+/*                         set first entry to -ite
+m */
+			    i__3 = *n;
+			    for (j = 1; j <= i__3; ++j) {
+				pn = pe[j];
+				if (pn > 0) {
+				    pe[j] = iw[pn];
+				    iw[pn] = -j;
+				}
+/* L70: */
+			    }
+/*                         psrc/pdst point to sour
+ce/destination */
+			    pdst = 1;
+			    psrc = 1;
+			    pend = pme1 - 1;
+/*                         while loop: */
+L80:
+			    if (psrc <= pend) {
+/*                            search for next 
+negative entry */
+				j = -iw[psrc];
+				++psrc;
+				if (j > 0) {
+				    iw[pdst] = pe[j];
+				    pe[j] = pdst;
+				    ++pdst;
+/*                               copy from
+ source to destination */
+				    lenj = len[j];
+				    i__3 = lenj - 2;
+				    for (knt3 = 0; knt3 <= i__3; ++knt3) {
+					iw[pdst + knt3] = iw[psrc + knt3];
+/* L90: */
+				    }
+				    pdst = pdst + lenj - 1;
+				    psrc = psrc + lenj - 1;
+				}
+				goto L80;
+			    }
+/*                         move the new partially-
+constructed element */
+			    p1 = pdst;
+			    i__3 = *pfree - 1;
+			    for (psrc = pme1; psrc <= i__3; ++psrc) {
+				iw[pdst] = iw[psrc];
+				++pdst;
+/* L100: */
+			    }
+			    pme1 = p1;
+			    *pfree = pdst;
+			    pj = pe[e];
+			    p = pe[me];
+			}
+/*                      -----------------------------
+-------------------- */
+/*                      i is a principal variable not 
+yet placed in Lme */
+/*                      store i in new list */
+/*                      -----------------------------
+-------------------- */
+			degme += nvi;
+/*                      flag i as being in Lme by nega
+ting nv (i) */
+			nv[i] = -nvi;
+			iw[*pfree] = i;
+			++(*pfree);
+/*                      -----------------------------
+-------------------- */
+/*                      remove variable i from degree 
+link list */
+/*                      -----------------------------
+-------------------- */
+			ilast = last[i];
+			inext = next[i];
+			if (inext != 0) {
+			    last[inext] = ilast;
+			}
+			if (ilast != 0) {
+			    next[ilast] = inext;
+			} else {
+/*                         i is at the head of the
+ degree list */
+			    head[degree[i]] = inext;
+			}
+		    }
+/* L110: */
+		}
+		if (e != me) {
+/*                   set tree pointer and flag to indicate
+ element e is */
+/*                   absorbed into new element me (the par
+ent of e is me) */
+		    pe[e] = -me;
+		    w[e] = 0;
+		}
+/* L120: */
+	    }
+	    pme2 = *pfree - 1;
+/*             this element takes newmem new memory in iw (possibl
+y zero) */
+	    newmem = *pfree - pme1;
+	    mem += newmem;
+	    maxmem = max(maxmem,mem);
+	}
+/*          ---------------------------------------------------------
+---- */
+/*          me has now been converted into an element in iw (pme1..pme
+2) */
+/*          ---------------------------------------------------------
+---- */
+/*          degme holds the external degree of new element */
+	degree[me] = degme;
+	pe[me] = pme1;
+	len[me] = pme2 - pme1 + 1;
+/*          ---------------------------------------------------------
+---- */
+/*          make sure that wflg is not too large.  With the current */
+/*          value of wflg, wflg+n must not cause integer overflow */
+/*          ---------------------------------------------------------
+---- */
+	if (wflg >= wbig) {
+	    i__1 = *n;
+	    for (x = 1; x <= i__1; ++x) {
+		if (w[x] != 0) {
+		    w[x] = 1;
+		}
+/* L130: */
+	    }
+	    wflg = 2;
+	}
+/* ==================================================================
+===== */
+/*  DEGREE UPDATE AND ELEMENT ABSORPTION */
+/* ==================================================================
+===== */
+/*          ---------------------------------------------------------
+---- */
+/*          Scan 2:  for each i in Lme, sum up the degree of Lme (whic
+h */
+/*          is degme), plus the sum of the external degrees of each Le
+ */
+/*          for the elements e appearing within i, plus the */
+/*          supervariables in i.  Place i in hash list. */
+/*          ---------------------------------------------------------
+---- */
+	i__1 = pme2;
+	for (pme = pme1; pme <= i__1; ++pme) {
+	    i = iw[pme];
+	    p1 = pe[i];
+	    p2 = p1 + elen[i] - 1;
+	    pn = p1;
+	    hash = 0;
+	    deg = 0;
+/*             --------------------------------------------------
+-------- */
+/*             scan the element list associated with supervariable
+ i */
+/*             --------------------------------------------------
+-------- */
+/*             Gilbert-Moler-Schreiber approximate degree: */
+	    nvi = -nv[i];
+	    i__2 = p2;
+	    for (p = p1; p <= i__2; ++p) {
+		e = iw[p];
+		if (w[e] != 0) {
+/*                   e is an unabsorbed element */
+		    deg = deg + degree[e] - nvi;
+		    iw[pn] = e;
+		    ++pn;
+		    hash += e;
+		}
+/* L160: */
+	    }
+/*             count the number of elements in i (including me): 
+*/
+	    elen[i] = pn - p1 + 1;
+/*             --------------------------------------------------
+-------- */
+/*             scan the supervariables in the list associated with
+ i */
+/*             --------------------------------------------------
+-------- */
+	    p3 = pn;
+	    i__2 = p1 + len[i] - 1;
+	    for (p = p2 + 1; p <= i__2; ++p) {
+		j = iw[p];
+		nvj = nv[j];
+		if (nvj > 0) {
+/*                   j is unabsorbed, and not in Lme. */
+/*                   add to degree and add to new list */
+		    deg += nvj;
+		    iw[pn] = j;
+		    ++pn;
+		    hash += j;
+		}
+/* L170: */
+	    }
+/*             --------------------------------------------------
+-------- */
+/*             update the degree and check for mass elimination */
+/*             --------------------------------------------------
+-------- */
+	    if (elen[i] == 1 && p3 == pn) {
+/*                -------------------------------------------
+------------ */
+/*                mass elimination */
+/*                -------------------------------------------
+------------ */
+/*                There is nothing left of this node except fo
+r an */
+/*                edge to the current pivot element.  elen (i)
+ is 1, */
+/*                and there are no variables adjacent to node 
+i. */
+/*                Absorb i into the current pivot element, me.
+ */
+		pe[i] = -me;
+		nvi = -nv[i];
+		degme -= nvi;
+		nvpiv += nvi;
+		nel += nvi;
+		nv[i] = 0;
+		elen[i] = 0;
+	    } else {
+/*                -------------------------------------------
+------------ */
+/*                update the upper-bound degree of i */
+/*                -------------------------------------------
+------------ */
+/*                the following degree does not yet include th
+e size */
+/*                of the current element, which is added later
+: */
+/* Computing MIN */
+		i__2 = degree[i];
+		degree[i] = min(i__2,deg);
+/*                -------------------------------------------
+------------ */
+/*                add me to the list for i */
+/*                -------------------------------------------
+------------ */
+/*                move first supervariable to end of list */
+		iw[pn] = iw[p3];
+/*                move first element to end of element part of
+ list */
+		iw[p3] = iw[p1];
+/*                add new element to front of list. */
+		iw[p1] = me;
+/*                store the new length of the list in len (i) 
+*/
+		len[i] = pn - p1 + 1;
+/*                -------------------------------------------
+------------ */
+/*                place in hash bucket.  Save hash key of i in
+ last (i). */
+/*                -------------------------------------------
+------------ */
+		hash = hash % hmod + 1;
+		j = head[hash];
+		if (j <= 0) {
+/*                   the degree list is empty, hash head i
+s -j */
+		    next[i] = -j;
+		    head[hash] = -i;
+		} else {
+/*                   degree list is not empty */
+/*                   use last (head (hash)) as hash head 
+*/
+		    next[i] = last[j];
+		    last[j] = i;
+		}
+		last[i] = hash;
+	    }
+/* L180: */
+	}
+	degree[me] = degme;
+/* ==================================================================
+===== */
+/*  SUPERVARIABLE DETECTION */
+/* ==================================================================
+===== */
+	i__1 = pme2;
+	for (pme = pme1; pme <= i__1; ++pme) {
+	    i = iw[pme];
+	    if (nv[i] < 0) {
+/*                i is a principal variable in Lme */
+/*                -------------------------------------------
+------------ */
+/*                examine all hash buckets with 2 or more vari
+ables.  We */
+/*                do this by examing all unique hash keys for 
+super- */
+/*                variables in the pattern Lme of the current 
+element, me */
+/*                -------------------------------------------
+------------ */
+		hash = last[i];
+/*                let i = head of hash bucket, and empty the h
+ash bucket */
+		j = head[hash];
+		if (j == 0) {
+		    goto L250;
+		}
+		if (j < 0) {
+/*                   degree list is empty */
+		    i = -j;
+		    head[hash] = 0;
+		} else {
+/*                   degree list is not empty, restore las
+t () of head */
+		    i = last[j];
+		    last[j] = 0;
+		}
+		if (i == 0) {
+		    goto L250;
+		}
+/*                while loop: */
+L200:
+		if (next[i] != 0) {
+/*                   ------------------------------------
+---------------- */
+/*                   this bucket has one or more variables
+ following i. */
+/*                   scan all of them to see if i can abso
+rb any entries */
+/*                   that follow i in hash bucket.  Scatte
+r i into w. */
+/*                   ------------------------------------
+---------------- */
+		    ln = len[i];
+		    eln = elen[i];
+/*                   do not flag the first element in the 
+list (me) */
+		    i__2 = pe[i] + ln - 1;
+		    for (p = pe[i] + 1; p <= i__2; ++p) {
+			w[iw[p]] = wflg;
+/* L210: */
+		    }
+/*                   ------------------------------------
+---------------- */
+/*                   scan every other entry j following i 
+in bucket */
+/*                   ------------------------------------
+---------------- */
+		    jlast = i;
+		    j = next[i];
+/*                   while loop: */
+L220:
+		    if (j != 0) {
+/*                      -----------------------------
+-------------------- */
+/*                      check if j and i have identica
+l nonzero pattern */
+/*                      -----------------------------
+-------------------- */
+			if (len[j] != ln) {
+/*                         i and j do not have sam
+e size data structure */
+			    goto L240;
+			}
+			if (elen[j] != eln) {
+/*                         i and j do not have sam
+e number of adjacent el */
+			    goto L240;
+			}
+/*                      do not flag the first element 
+in the list (me) */
+			i__2 = pe[j] + ln - 1;
+			for (p = pe[j] + 1; p <= i__2; ++p) {
+			    if (w[iw[p]] != wflg) {
+/*                            an entry (iw(p))
+ is in j but not in i */
+				goto L240;
+			    }
+/* L230: */
+			}
+/*                      -----------------------------
+-------------------- */
+/*                      found it!  j can be absorbed i
+nto i */
+/*                      -----------------------------
+-------------------- */
+			pe[j] = -i;
+/*                      both nv (i) and nv (j) are neg
+ated since they */
+/*                      are in Lme, and the absolute v
+alues of each */
+/*                      are the number of variables in
+ i and j: */
+			nv[i] += nv[j];
+			nv[j] = 0;
+			elen[j] = 0;
+/*                      delete j from hash bucket */
+			j = next[j];
+			next[jlast] = j;
+			goto L220;
+/*                      -----------------------------
+-------------------- */
+L240:
+/*                      j cannot be absorbed into i */
+/*                      -----------------------------
+-------------------- */
+			jlast = j;
+			j = next[j];
+			goto L220;
+		    }
+/*                   ------------------------------------
+---------------- */
+/*                   no more variables can be absorbed int
+o i */
+/*                   go to next i in bucket and clear flag
+ array */
+/*                   ------------------------------------
+---------------- */
+		    ++wflg;
+		    i = next[i];
+		    if (i != 0) {
+			goto L200;
+		    }
+		}
+	    }
+L250:
+	    ;
+	}
+/* ==================================================================
+===== */
+/*  RESTORE DEGREE LISTS AND REMOVE NONPRINCIPAL SUPERVAR. FROM ELEMEN
+T */
+/* ==================================================================
+===== */
+	p = pme1;
+	nleft = *n - nel;
+	i__1 = pme2;
+	for (pme = pme1; pme <= i__1; ++pme) {
+	    i = iw[pme];
+	    nvi = -nv[i];
+	    if (nvi > 0) {
+/*                i is a principal variable in Lme */
+/*                restore nv (i) to signify that i is principa
+l */
+		nv[i] = nvi;
+/*                -------------------------------------------
+------------ */
+/*                compute the external degree (add size of cur
+rent elem) */
+/*                -------------------------------------------
+------------ */
+/* Computing MAX */
+/* Computing MIN */
+		i__4 = degree[i] + degme - nvi, i__5 = nleft - nvi;
+		i__2 = 1, i__3 = min(i__4,i__5);
+		deg = max(i__2,i__3);
+/*                -------------------------------------------
+------------ */
+/*                place the supervariable at the head of the d
+egree list */
+/*                -------------------------------------------
+------------ */
+		inext = head[deg];
+		if (inext != 0) {
+		    last[inext] = i;
+		}
+		next[i] = inext;
+		last[i] = 0;
+		head[deg] = i;
+/*                -------------------------------------------
+------------ */
+/*                save the new degree, and find the minimum de
+gree */
+/*                -------------------------------------------
+------------ */
+		mindeg = min(mindeg,deg);
+		degree[i] = deg;
+/*                -------------------------------------------
+------------ */
+/*                place the supervariable in the element patte
+rn */
+/*                -------------------------------------------
+------------ */
+		iw[p] = i;
+		++p;
+	    }
+/* L260: */
+	}
+/* ==================================================================
+===== */
+/*  FINALIZE THE NEW ELEMENT */
+/* ==================================================================
+===== */
+	nv[me] = nvpiv + degme;
+/*          nv (me) is now the degree of pivot (including diagonal par
+t) */
+/*          save the length of the list for the new element me */
+	len[me] = p - pme1;
+	if (len[me] == 0) {
+/*             there is nothing left of the current pivot element 
+*/
+	    pe[me] = 0;
+	    w[me] = 0;
+	}
+	if (newmem != 0) {
+/*             element was not constructed in place: deallocate pa
+rt */
+/*             of it (final size is less than or equal to newmem, 
+*/
+/*             since newly nonprincipal variables have been remove
+d). */
+	    *pfree = p;
+	    mem = mem - newmem + len[me];
+	}
+/* ==================================================================
+===== */
+/*          END WHILE (selecting pivots) */
+	goto L30;
+    }
+/* =======================================================================
+ */
+/* =======================================================================
+ */
+/*  COMPUTE THE PERMUTATION VECTORS */
+/* =======================================================================
+ */
+/*       ---------------------------------------------------------------- 
+*/
+/*       The time taken by the following code is O(n).  At this */
+/*       point, elen (e) = -k has been done for all elements e, */
+/*       and elen (i) = 0 has been done for all nonprincipal */
+/*       variables i.  At this point, there are no principal */
+/*       supervariables left, and all elements are absorbed. */
+/*       ---------------------------------------------------------------- 
+*/
+/*       ---------------------------------------------------------------- 
+*/
+/*       compute the ordering of unordered nonprincipal variables */
+/*       ---------------------------------------------------------------- 
+*/
+    i__1 = *n;
+    for (i = 1; i <= i__1; ++i) {
+	if (elen[i] == 0) {
+/*             --------------------------------------------------
+-------- */
+/*             i is an un-ordered row.  Traverse the tree from i u
+ntil */
+/*             reaching an element, e.  The element, e, was the */
+/*             principal supervariable of i and all nodes in the p
+ath */
+/*             from i to when e was selected as pivot. */
+/*             --------------------------------------------------
+-------- */
+	    j = -pe[i];
+/*             while (j is a variable) do: */
+L270:
+	    if (elen[j] >= 0) {
+		j = -pe[j];
+		goto L270;
+	    }
+	    e = j;
+/*             --------------------------------------------------
+-------- */
+/*             get the current pivot ordering of e */
+/*             --------------------------------------------------
+-------- */
+	    k = -elen[e];
+/*             --------------------------------------------------
+-------- */
+/*             traverse the path again from i to e, and compress t
+he */
+/*             path (all nodes point to e).  Path compression allo
+ws */
+/*             this code to compute in O(n) time.  Order the unord
+ered */
+/*             nodes in the path, and place the element e at the e
+nd. */
+/*             --------------------------------------------------
+-------- */
+	    j = i;
+/*             while (j is a variable) do: */
+L280:
+	    if (elen[j] >= 0) {
+		jnext = -pe[j];
+		pe[j] = -e;
+		if (elen[j] == 0) {
+/*                   j is an unordered row */
+		    elen[j] = k;
+		    ++k;
+		}
+		j = jnext;
+		goto L280;
+	    }
+/*             leave elen (e) negative, so we know it is an elemen
+t */
+	    elen[e] = -k;
+	}
+/* L290: */
+    }
+/*       ---------------------------------------------------------------- 
+*/
+/*       reset the inverse permutation (elen (1..n)) to be positive, */
+/*       and compute the permutation (last (1..n)). */
+/*       ---------------------------------------------------------------- 
+*/
+    i__1 = *n;
+    for (i = 1; i <= i__1; ++i) {
+	k = (i__2 = elen[i], abs(i__2));
+	last[k] = i;
+	elen[i] = k;
+/* L300: */
+    }
+/* =======================================================================
+ */
+/*  RETURN THE MEMORY USAGE IN IW */
+/* =======================================================================
+ */
+/*       If maxmem is less than or equal to iwlen, then no compressions */
+/*       occurred, and iw (maxmem+1 ... iwlen) was unused.  Otherwise */
+/*       compressions did occur, and iwlen would have had to have been */
+/*       greater than or equal to maxmem for no compressions to occur. */
+/*       Return the value of maxmem in the pfree argument. */
+    *pfree = maxmem;
+    return 0;
+} /* amdhat_ */
+
diff --git a/contrib/taucs/external/src/amdhat.f b/contrib/taucs/external/src/amdhat.f
new file mode 100644
index 0000000000000000000000000000000000000000..fd2955585716bf68e51bf3ee1a774feded87e389
--- /dev/null
+++ b/contrib/taucs/external/src/amdhat.f
@@ -0,0 +1,1215 @@
+
+        SUBROUTINE AMDHAT
+     $          (N, PE, IW, LEN, IWLEN, PFREE, NV, NEXT,
+     $          LAST, HEAD, ELEN, DEGREE, NCMPA, W, IOVFLO)
+
+        INTEGER N, IWLEN, PFREE, NCMPA, IOVFLO, IW (IWLEN), PE (N),
+     $          DEGREE (N), NV (N), NEXT (N), LAST (N), HEAD (N),
+     $          ELEN (N), W (N), LEN (N)
+
+C-----------------------------------------------------------------------
+C  The MC47 / AMD suite of minimum degree ordering algorithms.
+C
+C  This code is one of seven variations of a single algorithm:
+C  the primary routine (MC47B/BD, only available in the Harwell
+C  Subroutine Library), and 6 variations that differ only in
+C  how they compute the degree (available in NETLIB).
+C
+C  For information on the Harwell Subroutine Library, contact
+C  John Harding, Harwell Subroutine Library, B 552, AEA Technology,
+C  Harwell, Didcot, Oxon OX11 0RA, telephone (44) 1235 434573,
+C  fax (44) 1235 434340, email john.harding@aeat.co.uk, who will
+C  provide details of price and conditions of use.
+C-----------------------------------------------------------------------
+
+************************************************************************
+* NOTICE:  "The AMD routines (AMDEXA, AMDBAR, AMDHAF, AMDHAT, AMDTRU,
+* and AMDATR) may be used SOLELY for educational, research, and
+* benchmarking purposes by non-profit organizations and the U.S.
+* government.  Commercial and other organizations may make use of the
+* AMD routines SOLELY for benchmarking purposes only.  The AMD
+* routines may be modified by or on behalf of the User for such
+* use but at no time shall the AMD routines or any such modified
+* version of them become the property of the User.  The AMD routines
+* are provided without warranty of any kind, either expressed or
+* implied.  Neither the Authors nor their employers shall be liable
+* for any direct or consequential loss or damage whatsoever arising
+* out of the use or misuse of the AMD routines by the User.  The AMD
+* routines must not be sold.  You may make copies of the AMD routines,
+* but this NOTICE and the Copyright notice must appear in all copies.
+* Any other use of the AMD routines requires written permission.
+* Your use of the AMD routines is an implicit agreement to these
+* conditions."
+************************************************************************
+
+C-----------------------------------------------------------------------
+C AMDhat:  approximate minimum (Gilbert-Moler-Schrieber) degree ordering
+C          algorithm
+C-----------------------------------------------------------------------
+
+C  Variation 4:  Gilbert-Moler-Schreiber approximate external
+C  degree (as used in MATLAB, for example.  See J. R. Gilbert, C.
+C  Moler, and R. Schreiber, Sparse matrices in MATLAB:  design and
+C  implementation, SIAM J. Matrix Analysis and Applications, vol. 13,
+C  1992, pp. 333-356).  Note that some of the comments in the code
+C  below reflect the MC47-style degree approximation.
+C
+C  This method is sometimes slightly faster than MC47B/BD, but nearly
+C  always computes an ordering with worse fill-in than MC47B/BD.  It
+C  has been observed to be nearly 3 times slower than MC47B/BD for some
+C  matrices because of the increase in fill-in.
+C
+C  We recommend using MC47B/BD instead of this routine since MC47B/BD
+C  gives better results in about the same time.
+
+C-----------------------------------------------------------------------
+
+C Given a representation of the nonzero pattern of a symmetric matrix,
+C       A, (excluding the diagonal) perform an approximate minimum
+C       (GMS-style) degree ordering to compute a pivot order such
+C       that the introduction of nonzeros (fill-in) in the Cholesky
+C       factors A = LL^T are kept low.  At each step, the pivot
+C       selected is the one with the minimum Gilbert-Moler-Schrieber-
+C       style upper-bound on the external degree.
+
+C **********************************************************************
+C ***** CAUTION:  ARGUMENTS ARE NOT CHECKED FOR ERRORS ON INPUT.  ******
+C **********************************************************************
+C ** If you want error checking, a more versatile input format, and a **
+C ** simpler user interface, then use MC47A/AD in the Harwell         **
+C ** Subroutine Library, which checks for errors, transforms the      **
+C ** input, and calls MC47B/BD.                                       **
+C **********************************************************************
+
+C       References:  (UF Tech Reports are available via anonymous ftp
+C       to ftp.cis.ufl.edu:cis/tech-reports).
+C
+C       [1] Timothy A. Davis and Iain Duff, "An unsymmetric-pattern
+C               multifrontal method for sparse LU factorization",
+C               SIAM J. Matrix Analysis and Applications, to appear.
+C               also Univ. of Florida Technical Report TR-94-038.
+C               Discusses UMFPACK / MA38.
+C
+C       [2] Patrick Amestoy, Timothy A. Davis, and Iain S. Duff,
+C               "An approximate minimum degree ordering algorithm,"
+C               SIAM J. Matrix Analysis and Applications (to appear),
+C               also Univ. of Florida Technical Report TR-94-039.
+C               Discusses this routine.
+C
+C       [3] Alan George and Joseph Liu, "The evolution of the
+C               minimum degree ordering algorithm," SIAM Review, vol.
+C               31, no. 1, pp. 1-19, March 1989.  We list below the
+C               features mentioned in that paper that this code
+C               includes:
+C
+C       mass elimination:
+C               Yes.  MA27 relied on supervariable detection for mass
+C               elimination.
+C       indistinguishable nodes:
+C               Yes (we call these "supervariables").  This was also in
+C               the MA27 code - although we modified the method of
+C               detecting them (the previous hash was the true degree,
+C               which we no longer keep track of).  A supervariable is
+C               a set of rows with identical nonzero pattern.  All
+C               variables in a supervariable are eliminated together.
+C               Each supervariable has as its numerical name that of
+C               one of its variables (its principal variable).
+C       quotient graph representation:
+C               Yes.  We use the term "element" for the cliques formed
+C               during elimination.  This was also in the MA27 code.
+C               The algorithm can operate in place, but it will work
+C               more efficiently if given some "elbow room."
+C       element absorption:
+C               Yes.  This was also in the MA27 code.
+C       external degree:
+C               Yes.  The MA27 code was based on the true degree.
+C       incomplete degree update and multiple elimination:
+C               No.  This was not in MA27, either.  Our method of
+C               degree update within MC47B/BD is element-based, not
+C               variable-based.  It is thus not well-suited for use
+C               with incomplete degree update or multiple elimination.
+
+C-----------------------------------------------------------------------
+C Authors, and Copyright (C) 1995 by:
+C       Timothy A. Davis, Patrick Amestoy, Iain S. Duff, & John K. Reid.
+C
+C Acknowledgements:
+C       This work (and the UMFPACK package) was supported by the
+C       National Science Foundation (ASC-9111263 and DMS-9223088).
+C       The UMFPACK/MA38 approximate degree update algorithm, the
+C       unsymmetric analog which forms the basis of MC47B/BD, was
+C       developed while Tim Davis was supported by CERFACS (Toulouse,
+C       France) in a post-doctoral position.
+C
+C Date:  September, 1995
+C-----------------------------------------------------------------------
+
+C-----------------------------------------------------------------------
+C INPUT ARGUMENTS (unaltered):
+C-----------------------------------------------------------------------
+
+C n:    The matrix order.
+C
+C       Restriction:  1 .le. n .lt. (iovflo/2)-2
+
+C iwlen:        The length of iw (1..iwlen).  On input, the matrix is
+C       stored in iw (1..pfree-1).  However, iw (1..iwlen) should be
+C       slightly larger than what is required to hold the matrix, at
+C       least iwlen .ge. pfree + n is recommended.  Otherwise,
+C       excessive compressions will take place.
+C       *** We do not recommend running this algorithm with ***
+C       ***      iwlen .lt. pfree + n.                      ***
+C       *** Better performance will be obtained if          ***
+C       ***      iwlen .ge. pfree + n                       ***
+C       *** or better yet                                   ***
+C       ***      iwlen .gt. 1.2 * pfree                     ***
+C       *** (where pfree is its value on input).            ***
+C       The algorithm will not run at all if iwlen .lt. pfree-1.
+C
+C       Restriction: iwlen .ge. pfree-1
+
+C iovflo:       The largest positive integer that your computer can
+C       represent (-iovflo should also be representable).  On a 32-bit
+C       computer with 2's-complement arithmetic,
+C       iovflo = (2^31)-1 = 2,147,483,648.
+
+C-----------------------------------------------------------------------
+C INPUT/OUPUT ARGUMENTS:
+C-----------------------------------------------------------------------
+
+C pe:   On input, pe (i) is the index in iw of the start of row i, or
+C       zero if row i has no off-diagonal non-zeros.
+C
+C       During execution, it is used for both supervariables and
+C       elements:
+C
+C       * Principal supervariable i:  index into iw of the
+C               description of supervariable i.  A supervariable
+C               represents one or more rows of the matrix
+C               with identical nonzero pattern.
+C       * Non-principal supervariable i:  if i has been absorbed
+C               into another supervariable j, then pe (i) = -j.
+C               That is, j has the same pattern as i.
+C               Note that j might later be absorbed into another
+C               supervariable j2, in which case pe (i) is still -j,
+C               and pe (j) = -j2.
+C       * Unabsorbed element e:  the index into iw of the description
+C               of element e, if e has not yet been absorbed by a
+C               subsequent element.  Element e is created when
+C               the supervariable of the same name is selected as
+C               the pivot.
+C       * Absorbed element e:  if element e is absorbed into element
+C               e2, then pe (e) = -e2.  This occurs when the pattern of
+C               e (that is, Le) is found to be a subset of the pattern
+C               of e2 (that is, Le2).  If element e is "null" (it has
+C               no nonzeros outside its pivot block), then pe (e) = 0.
+C
+C       On output, pe holds the assembly tree/forest, which implicitly
+C       represents a pivot order with identical fill-in as the actual
+C       order (via a depth-first search of the tree).
+C
+C       On output:
+C       If nv (i) .gt. 0, then i represents a node in the assembly tree,
+C       and the parent of i is -pe (i), or zero if i is a root.
+C       If nv (i) = 0, then (i,-pe (i)) represents an edge in a
+C       subtree, the root of which is a node in the assembly tree.
+
+C pfree:        On input the tail end of the array, iw (pfree..iwlen),
+C       is empty, and the matrix is stored in iw (1..pfree-1).
+C       During execution, additional data is placed in iw, and pfree
+C       is modified so that iw (pfree..iwlen) is always the unused part
+C       of iw.  On output, pfree is set equal to the size of iw that
+C       would have been needed for no compressions to occur.  If
+C       ncmpa is zero, then pfree (on output) is less than or equal to
+C       iwlen, and the space iw (pfree+1 ... iwlen) was not used.
+C       Otherwise, pfree (on output) is greater than iwlen, and all the
+C       memory in iw was used.
+
+C-----------------------------------------------------------------------
+C INPUT/MODIFIED (undefined on output):
+C-----------------------------------------------------------------------
+
+C len:  On input, len (i) holds the number of entries in row i of the
+C       matrix, excluding the diagonal.  The contents of len (1..n)
+C       are undefined on output.
+
+C iw:   On input, iw (1..pfree-1) holds the description of each row i
+C       in the matrix.  The matrix must be symmetric, and both upper
+C       and lower triangular parts must be present.  The diagonal must
+C       not be present.  Row i is held as follows:
+C
+C               len (i):  the length of the row i data structure
+C               iw (pe (i) ... pe (i) + len (i) - 1):
+C                       the list of column indices for nonzeros
+C                       in row i (simple supervariables), excluding
+C                       the diagonal.  All supervariables start with
+C                       one row/column each (supervariable i is just
+C                       row i).
+C               if len (i) is zero on input, then pe (i) is ignored
+C               on input.
+C
+C               Note that the rows need not be in any particular order,
+C               and there may be empty space between the rows.
+C
+C       During execution, the supervariable i experiences fill-in.
+C       This is represented by placing in i a list of the elements
+C       that cause fill-in in supervariable i:
+C
+C               len (i):  the length of supervariable i
+C               iw (pe (i) ... pe (i) + elen (i) - 1):
+C                       the list of elements that contain i.  This list
+C                       is kept short by removing absorbed elements.
+C               iw (pe (i) + elen (i) ... pe (i) + len (i) - 1):
+C                       the list of supervariables in i.  This list
+C                       is kept short by removing nonprincipal
+C                       variables, and any entry j that is also
+C                       contained in at least one of the elements
+C                       (j in Le) in the list for i (e in row i).
+C
+C       When supervariable i is selected as pivot, we create an
+C       element e of the same name (e=i):
+C
+C               len (e):  the length of element e
+C               iw (pe (e) ... pe (e) + len (e) - 1):
+C                       the list of supervariables in element e.
+C
+C       An element represents the fill-in that occurs when supervariable
+C       i is selected as pivot (which represents the selection of row i
+C       and all non-principal variables whose principal variable is i).
+C       We use the term Le to denote the set of all supervariables
+C       in element e.  Absorbed supervariables and elements are pruned
+C       from these lists when computationally convenient.
+C
+C       CAUTION:  THE INPUT MATRIX IS OVERWRITTEN DURING COMPUTATION.
+C       The contents of iw are undefined on output.
+
+C-----------------------------------------------------------------------
+C OUTPUT (need not be set on input):
+C-----------------------------------------------------------------------
+
+C nv:   During execution, abs (nv (i)) is equal to the number of rows
+C       that are represented by the principal supervariable i.  If i is
+C       a nonprincipal variable, then nv (i) = 0.  Initially,
+C       nv (i) = 1 for all i.  nv (i) .lt. 0 signifies that i is a
+C       principal variable in the pattern Lme of the current pivot
+C       element me.  On output, nv (e) holds the true degree of element
+C       e at the time it was created (including the diagonal part).
+
+C ncmpa:        The number of times iw was compressed.  If this is
+C       excessive, then the execution took longer than what could have
+C       been.  To reduce ncmpa, try increasing iwlen to be 10% or 20%
+C       larger than the value of pfree on input (or at least
+C       iwlen .ge. pfree + n).  The fastest performance will be
+C       obtained when ncmpa is returned as zero.  If iwlen is set to
+C       the value returned by pfree on *output*, then no compressions
+C       will occur.
+
+C elen: See the description of iw above.  At the start of execution,
+C       elen (i) is set to zero.  During execution, elen (i) is the
+C       number of elements in the list for supervariable i.  When e
+C       becomes an element, elen (e) = -nel is set, where nel is the
+C       current step of factorization.  elen (i) = 0 is done when i
+C       becomes nonprincipal.
+C
+C       For variables, elen (i) .ge. 0 holds until just before the
+C       permutation vectors are computed.  For elements,
+C       elen (e) .lt. 0 holds.
+C
+C       On output elen (1..n) holds the inverse permutation (the same
+C       as the 'INVP' argument in Sparspak).  That is, if k = elen (i),
+C       then row i is the kth pivot row.  Row i of A appears as the
+C       (elen(i))-th row in the permuted matrix, PAP^T.
+
+C last: In a degree list, last (i) is the supervariable preceding i,
+C       or zero if i is the head of the list.  In a hash bucket,
+C       last (i) is the hash key for i.  last (head (hash)) is also
+C       used as the head of a hash bucket if head (hash) contains a
+C       degree list (see head, below).
+C
+C       On output, last (1..n) holds the permutation (the same as the
+C       'PERM' argument in Sparspak).  That is, if i = last (k), then
+C       row i is the kth pivot row.  Row last (k) of A is the k-th row
+C       in the permuted matrix, PAP^T.
+
+C-----------------------------------------------------------------------
+C LOCAL (not input or output - used only during execution):
+C-----------------------------------------------------------------------
+
+C degree:       If i is a supervariable, then degree (i) holds the
+C       current approximation of the external degree of row i (an upper
+C       bound).  The external degree is the number of nonzeros in row i,
+C       minus abs (nv (i)) (the diagonal part).  The bound is equal to
+C       the external degree if elen (i) is less than or equal to two.
+C
+C       We also use the term "external degree" for elements e to refer
+C       to |Le \ Lme|.  If e is an element, then degree (e) holds |Le|,
+C       which is the degree of the off-diagonal part of the element e
+C       (not including the diagonal part).
+
+C head: head is used for degree lists.  head (deg) is the first
+C       supervariable in a degree list (all supervariables i in a
+C       degree list deg have the same approximate degree, namely,
+C       deg = degree (i)).  If the list deg is empty then
+C       head (deg) = 0.
+C
+C       During supervariable detection head (hash) also serves as a
+C       pointer to a hash bucket.
+C       If head (hash) .gt. 0, there is a degree list of degree hash.
+C               The hash bucket head pointer is last (head (hash)).
+C       If head (hash) = 0, then the degree list and hash bucket are
+C               both empty.
+C       If head (hash) .lt. 0, then the degree list is empty, and
+C               -head (hash) is the head of the hash bucket.
+C       After supervariable detection is complete, all hash buckets
+C       are empty, and the (last (head (hash)) = 0) condition is
+C       restored for the non-empty degree lists.
+
+C next: next (i) is the supervariable following i in a link list, or
+C       zero if i is the last in the list.  Used for two kinds of
+C       lists:  degree lists and hash buckets (a supervariable can be
+C       in only one kind of list at a time).
+
+C w:    The flag array w determines the status of elements and
+C       variables, and the external degree of elements.
+C
+C       for elements:
+C          if w (e) = 0, then the element e is absorbed
+C          if w (e) .ge. wflg, then w (e) - wflg is the size of
+C               the set |Le \ Lme|, in terms of nonzeros (the
+C               sum of abs (nv (i)) for each principal variable i that
+C               is both in the pattern of element e and NOT in the
+C               pattern of the current pivot element, me).
+C          if wflg .gt. w (e) .gt. 0, then e is not absorbed and has
+C               not yet been seen in the scan of the element lists in
+C               the computation of |Le\Lme| in loop 150 below.
+C
+C       for variables:
+C          during supervariable detection, if w (j) .ne. wflg then j is
+C          not in the pattern of variable i
+C
+C       The w array is initialized by setting w (i) = 1 for all i,
+C       and by setting wflg = 2.  It is reinitialized if wflg becomes
+C       too large (to ensure that wflg+n does not cause integer
+C       overflow).
+
+C-----------------------------------------------------------------------
+C LOCAL INTEGERS:
+C-----------------------------------------------------------------------
+
+        INTEGER DEG, DEGME, DEXT, DMAX, E, ELENME, ELN, HASH, HMOD, I,
+     $          ILAST, INEXT, J, JLAST, JNEXT, K, KNT1, KNT2, KNT3,
+     $          LENJ, LN, MAXMEM, ME, MEM, MINDEG, NEL, NEWMEM,
+     $          NLEFT, NVI, NVJ, NVPIV, SLENME, WBIG, WE, WFLG, WNVI, X
+
+C deg:          the degree of a variable or element
+C degme:        size, |Lme|, of the current element, me (= degree (me))
+C dext:         external degree, |Le \ Lme|, of some element e
+C dmax:         largest |Le| seen so far
+C e:            an element
+C elenme:       the length, elen (me), of element list of pivotal var.
+C eln:          the length, elen (...), of an element list
+C hash:         the computed value of the hash function
+C hmod:         the hash function is computed modulo hmod = max (1,n-1)
+C i:            a supervariable
+C ilast:        the entry in a link list preceding i
+C inext:        the entry in a link list following i
+C j:            a supervariable
+C jlast:        the entry in a link list preceding j
+C jnext:        the entry in a link list, or path, following j
+C k:            the pivot order of an element or variable
+C knt1:         loop counter used during element construction
+C knt2:         loop counter used during element construction
+C knt3:         loop counter used during compression
+C lenj:         len (j)
+C ln:           length of a supervariable list
+C maxmem:       amount of memory needed for no compressions
+C me:           current supervariable being eliminated, and the
+C                       current element created by eliminating that
+C                       supervariable
+C mem:          memory in use assuming no compressions have occurred
+C mindeg:       current minimum degree
+C nel:          number of pivots selected so far
+C newmem:       amount of new memory needed for current pivot element
+C nleft:        n - nel, the number of nonpivotal rows/columns remaining
+C nvi:          the number of variables in a supervariable i (= nv (i))
+C nvj:          the number of variables in a supervariable j (= nv (j))
+C nvpiv:        number of pivots in current element
+C slenme:       number of variables in variable list of pivotal variable
+C wbig:         = iovflo - n.  wflg is not allowed to be .ge. wbig.
+C we:           w (e)
+C wflg:         used for flagging the w array.  See description of iw.
+C wnvi:         wflg - nv (i)
+C x:            either a supervariable or an element
+
+C-----------------------------------------------------------------------
+C LOCAL POINTERS:
+C-----------------------------------------------------------------------
+
+        INTEGER P, P1, P2, P3, PDST, PEND, PJ, PME, PME1, PME2, PN, PSRC
+
+C               Any parameter (pe (...) or pfree) or local variable
+C               starting with "p" (for Pointer) is an index into iw,
+C               and all indices into iw use variables starting with
+C               "p."  The only exception to this rule is the iwlen
+C               input argument.
+
+C p:            pointer into lots of things
+C p1:           pe (i) for some variable i (start of element list)
+C p2:           pe (i) + elen (i) -  1 for some var. i (end of el. list)
+C p3:           index of first supervariable in clean list
+C pdst:         destination pointer, for compression
+C pend:         end of memory to compress
+C pj:           pointer into an element or variable
+C pme:          pointer into the current element (pme1...pme2)
+C pme1:         the current element, me, is stored in iw (pme1...pme2)
+C pme2:         the end of the current element
+C pn:           pointer into a "clean" variable, also used to compress
+C psrc:         source pointer, for compression
+
+C-----------------------------------------------------------------------
+C  FUNCTIONS CALLED:
+C-----------------------------------------------------------------------
+
+        INTRINSIC MAX, MIN, MOD
+
+C=======================================================================
+C  INITIALIZATIONS
+C=======================================================================
+
+        WFLG = 2
+        MINDEG = 1
+        NCMPA = 0
+        NEL = 0
+        HMOD = MAX (1, N-1)
+        DMAX = 0
+        WBIG = IOVFLO - N
+        MEM = PFREE - 1
+        MAXMEM = MEM
+
+        DO 10 I = 1, N
+           LAST (I) = 0
+           HEAD (I) = 0
+           NV (I) = 1
+           W (I) = 1
+           ELEN (I) = 0
+           DEGREE (I) = LEN (I)
+10         CONTINUE
+
+C       ----------------------------------------------------------------
+C       initialize degree lists and eliminate rows with no off-diag. nz.
+C       ----------------------------------------------------------------
+
+        DO 20 I = 1, N
+
+           DEG = DEGREE (I)
+
+           IF (DEG .GT. 0) THEN
+
+C             ----------------------------------------------------------
+C             place i in the degree list corresponding to its degree
+C             ----------------------------------------------------------
+
+              INEXT = HEAD (DEG)
+              IF (INEXT .NE. 0) LAST (INEXT) = I
+              NEXT (I) = INEXT
+              HEAD (DEG) = I
+
+           ELSE
+
+C             ----------------------------------------------------------
+C             we have a variable that can be eliminated at once because
+C             there is no off-diagonal non-zero in its row.
+C             ----------------------------------------------------------
+
+              NEL = NEL + 1
+              ELEN (I) = -NEL
+              PE (I) = 0
+              W (I) = 0
+
+              ENDIF
+
+20         CONTINUE
+
+C=======================================================================
+C  WHILE (selecting pivots) DO
+C=======================================================================
+
+30      CONTINUE
+        IF (NEL .LT. N) THEN
+
+C=======================================================================
+C  GET PIVOT OF MINIMUM DEGREE
+C=======================================================================
+
+C          -------------------------------------------------------------
+C          find next supervariable for elimination
+C          -------------------------------------------------------------
+
+           DO 40 DEG = MINDEG, N
+              ME = HEAD (DEG)
+              IF (ME .GT. 0) GOTO 50
+40            CONTINUE
+50         CONTINUE
+           MINDEG = DEG
+
+C          -------------------------------------------------------------
+C          remove chosen variable from link list
+C          -------------------------------------------------------------
+
+           INEXT = NEXT (ME)
+           IF (INEXT .NE. 0) LAST (INEXT) = 0
+           HEAD (DEG) = INEXT
+
+C          -------------------------------------------------------------
+C          me represents the elimination of pivots nel+1 to nel+nv(me).
+C          place me itself as the first in this set.  It will be moved
+C          to the nel+nv(me) position when the permutation vectors are
+C          computed.
+C          -------------------------------------------------------------
+
+           ELENME = ELEN (ME)
+           ELEN (ME) = - (NEL + 1)
+           NVPIV = NV (ME)
+           NEL = NEL + NVPIV
+
+C=======================================================================
+C  CONSTRUCT NEW ELEMENT
+C=======================================================================
+
+C          -------------------------------------------------------------
+C          At this point, me is the pivotal supervariable.  It will be
+C          converted into the current element.  Scan list of the
+C          pivotal supervariable, me, setting tree pointers and
+C          constructing new list of supervariables for the new element,
+C          me.  p is a pointer to the current position in the old list.
+C          -------------------------------------------------------------
+
+C          flag the variable "me" as being in Lme by negating nv (me)
+           NV (ME) = -NVPIV
+           DEGME = 0
+
+           IF (ELENME .EQ. 0) THEN
+
+C             ----------------------------------------------------------
+C             construct the new element in place
+C             ----------------------------------------------------------
+
+              PME1 = PE (ME)
+              PME2 = PME1 - 1
+
+              DO 60 P = PME1, PME1 + LEN (ME) - 1
+                 I = IW (P)
+                 NVI = NV (I)
+                 IF (NVI .GT. 0) THEN
+
+C                   ----------------------------------------------------
+C                   i is a principal variable not yet placed in Lme.
+C                   store i in new list
+C                   ----------------------------------------------------
+
+                    DEGME = DEGME + NVI
+C                   flag i as being in Lme by negating nv (i)
+                    NV (I) = -NVI
+                    PME2 = PME2 + 1
+                    IW (PME2) = I
+
+C                   ----------------------------------------------------
+C                   remove variable i from degree list.
+C                   ----------------------------------------------------
+
+                    ILAST = LAST (I)
+                    INEXT = NEXT (I)
+                    IF (INEXT .NE. 0) LAST (INEXT) = ILAST
+                    IF (ILAST .NE. 0) THEN
+                       NEXT (ILAST) = INEXT
+                    ELSE
+C                      i is at the head of the degree list
+                       HEAD (DEGREE (I)) = INEXT
+                       ENDIF
+
+                    ENDIF
+60               CONTINUE
+C             this element takes no new memory in iw:
+              NEWMEM = 0
+
+           ELSE
+
+C             ----------------------------------------------------------
+C             construct the new element in empty space, iw (pfree ...)
+C             ----------------------------------------------------------
+
+              P = PE (ME)
+              PME1 = PFREE
+              SLENME = LEN (ME) - ELENME
+
+              DO 120 KNT1 = 1, ELENME + 1
+
+                 IF (KNT1 .GT. ELENME) THEN
+C                   search the supervariables in me.
+                    E = ME
+                    PJ = P
+                    LN = SLENME
+                 ELSE
+C                   search the elements in me.
+                    E = IW (P)
+                    P = P + 1
+                    PJ = PE (E)
+                    LN = LEN (E)
+                    ENDIF
+
+C                -------------------------------------------------------
+C                search for different supervariables and add them to the
+C                new list, compressing when necessary. this loop is
+C                executed once for each element in the list and once for
+C                all the supervariables in the list.
+C                -------------------------------------------------------
+
+                 DO 110 KNT2 = 1, LN
+                    I = IW (PJ)
+                    PJ = PJ + 1
+                    NVI = NV (I)
+                    IF (NVI .GT. 0) THEN
+
+C                      -------------------------------------------------
+C                      compress iw, if necessary
+C                      -------------------------------------------------
+
+                       IF (PFREE .GT. IWLEN) THEN
+C                         prepare for compressing iw by adjusting
+C                         pointers and lengths so that the lists being
+C                         searched in the inner and outer loops contain
+C                         only the remaining entries.
+
+                          PE (ME) = P
+                          LEN (ME) = LEN (ME) - KNT1
+                          IF (LEN (ME) .EQ. 0) THEN
+C                            nothing left of supervariable me
+                             PE (ME) = 0
+                             ENDIF
+                          PE (E) = PJ
+                          LEN (E) = LN - KNT2
+                          IF (LEN (E) .EQ. 0) THEN
+C                            nothing left of element e
+                             PE (E) = 0
+                             ENDIF
+
+                          NCMPA = NCMPA + 1
+C                         store first item in pe
+C                         set first entry to -item
+                          DO 70 J = 1, N
+                             PN = PE (J)
+                             IF (PN .GT. 0) THEN
+                                PE (J) = IW (PN)
+                                IW (PN) = -J
+                                ENDIF
+70                           CONTINUE
+
+C                         psrc/pdst point to source/destination
+                          PDST = 1
+                          PSRC = 1
+                          PEND = PME1 - 1
+
+C                         while loop:
+80                        CONTINUE
+                          IF (PSRC .LE. PEND) THEN
+C                            search for next negative entry
+                             J = -IW (PSRC)
+                             PSRC = PSRC + 1
+                             IF (J .GT. 0) THEN
+                                IW (PDST) = PE (J)
+                                PE (J) = PDST
+                                PDST = PDST + 1
+C                               copy from source to destination
+                                LENJ = LEN (J)
+                                DO 90 KNT3 = 0, LENJ - 2
+                                   IW (PDST + KNT3) = IW (PSRC + KNT3)
+90                                 CONTINUE
+                                PDST = PDST + LENJ - 1
+                                PSRC = PSRC + LENJ - 1
+                                ENDIF
+                             GOTO 80
+                             ENDIF
+
+C                         move the new partially-constructed element
+                          P1 = PDST
+                          DO 100 PSRC = PME1, PFREE - 1
+                             IW (PDST) = IW (PSRC)
+                             PDST = PDST + 1
+100                          CONTINUE
+                          PME1 = P1
+                          PFREE = PDST
+                          PJ = PE (E)
+                          P = PE (ME)
+                          ENDIF
+
+C                      -------------------------------------------------
+C                      i is a principal variable not yet placed in Lme
+C                      store i in new list
+C                      -------------------------------------------------
+
+                       DEGME = DEGME + NVI
+C                      flag i as being in Lme by negating nv (i)
+                       NV (I) = -NVI
+                       IW (PFREE) = I
+                       PFREE = PFREE + 1
+
+C                      -------------------------------------------------
+C                      remove variable i from degree link list
+C                      -------------------------------------------------
+
+                       ILAST = LAST (I)
+                       INEXT = NEXT (I)
+                       IF (INEXT .NE. 0) LAST (INEXT) = ILAST
+                       IF (ILAST .NE. 0) THEN
+                          NEXT (ILAST) = INEXT
+                       ELSE
+C                         i is at the head of the degree list
+                          HEAD (DEGREE (I)) = INEXT
+                          ENDIF
+
+                       ENDIF
+110                 CONTINUE
+
+                 IF (E .NE. ME) THEN
+C                   set tree pointer and flag to indicate element e is
+C                   absorbed into new element me (the parent of e is me)
+                    PE (E) = -ME
+                    W (E) = 0
+                    ENDIF
+120              CONTINUE
+
+              PME2 = PFREE - 1
+C             this element takes newmem new memory in iw (possibly zero)
+              NEWMEM = PFREE - PME1
+              MEM = MEM + NEWMEM
+              MAXMEM = MAX (MAXMEM, MEM)
+              ENDIF
+
+C          -------------------------------------------------------------
+C          me has now been converted into an element in iw (pme1..pme2)
+C          -------------------------------------------------------------
+
+C          degme holds the external degree of new element
+           DEGREE (ME) = DEGME
+           PE (ME) = PME1
+           LEN (ME) = PME2 - PME1 + 1
+
+C          -------------------------------------------------------------
+C          make sure that wflg is not too large.  With the current
+C          value of wflg, wflg+n must not cause integer overflow
+C          -------------------------------------------------------------
+
+           IF (WFLG .GE. WBIG) THEN
+              DO 130 X = 1, N
+                 IF (W (X) .NE. 0) W (X) = 1
+130              CONTINUE
+              WFLG = 2
+              ENDIF
+
+C=======================================================================
+C  DEGREE UPDATE AND ELEMENT ABSORPTION
+C=======================================================================
+
+C          -------------------------------------------------------------
+C          Scan 2:  for each i in Lme, sum up the degree of Lme (which
+C          is degme), plus the sum of the external degrees of each Le
+C          for the elements e appearing within i, plus the
+C          supervariables in i.  Place i in hash list.
+C          -------------------------------------------------------------
+
+           DO 180 PME = PME1, PME2
+              I = IW (PME)
+              P1 = PE (I)
+              P2 = P1 + ELEN (I) - 1
+              PN = P1
+              HASH = 0
+              DEG = 0
+
+C             ----------------------------------------------------------
+C             scan the element list associated with supervariable i
+C             ----------------------------------------------------------
+
+C             Gilbert-Moler-Schreiber approximate degree:
+              NVI = -NV (I)
+              DO 160 P = P1, P2
+                 E = IW (P)
+                 IF (W (E) .NE. 0) THEN
+C                   e is an unabsorbed element
+                    DEG = DEG + DEGREE (E) - NVI
+                    IW (PN) = E
+                    PN = PN + 1
+                    HASH = HASH + E
+                    ENDIF
+160              CONTINUE
+
+C             count the number of elements in i (including me):
+              ELEN (I) = PN - P1 + 1
+
+C             ----------------------------------------------------------
+C             scan the supervariables in the list associated with i
+C             ----------------------------------------------------------
+
+              P3 = PN
+              DO 170 P = P2 + 1, P1 + LEN (I) - 1
+                 J = IW (P)
+                 NVJ = NV (J)
+                 IF (NVJ .GT. 0) THEN
+C                   j is unabsorbed, and not in Lme.
+C                   add to degree and add to new list
+                    DEG = DEG + NVJ
+                    IW (PN) = J
+                    PN = PN + 1
+                    HASH = HASH + J
+                    ENDIF
+170              CONTINUE
+
+C             ----------------------------------------------------------
+C             update the degree and check for mass elimination
+C             ----------------------------------------------------------
+
+              IF (ELEN (I) .EQ. 1 .AND. P3 .EQ. PN) THEN
+
+C                -------------------------------------------------------
+C                mass elimination
+C                -------------------------------------------------------
+
+C                There is nothing left of this node except for an
+C                edge to the current pivot element.  elen (i) is 1,
+C                and there are no variables adjacent to node i.
+C                Absorb i into the current pivot element, me.
+
+                 PE (I) = -ME
+                 NVI = -NV (I)
+                 DEGME = DEGME - NVI
+                 NVPIV = NVPIV + NVI
+                 NEL = NEL + NVI
+                 NV (I) = 0
+                 ELEN (I) = 0
+
+              ELSE
+
+C                -------------------------------------------------------
+C                update the upper-bound degree of i
+C                -------------------------------------------------------
+
+C                the following degree does not yet include the size
+C                of the current element, which is added later:
+                 DEGREE (I) = MIN (DEGREE (I), DEG)
+
+C                -------------------------------------------------------
+C                add me to the list for i
+C                -------------------------------------------------------
+
+C                move first supervariable to end of list
+                 IW (PN) = IW (P3)
+C                move first element to end of element part of list
+                 IW (P3) = IW (P1)
+C                add new element to front of list.
+                 IW (P1) = ME
+C                store the new length of the list in len (i)
+                 LEN (I) = PN - P1 + 1
+
+C                -------------------------------------------------------
+C                place in hash bucket.  Save hash key of i in last (i).
+C                -------------------------------------------------------
+
+                 HASH = MOD (HASH, HMOD) + 1
+                 J = HEAD (HASH)
+                 IF (J .LE. 0) THEN
+C                   the degree list is empty, hash head is -j
+                    NEXT (I) = -J
+                    HEAD (HASH) = -I
+                 ELSE
+C                   degree list is not empty
+C                   use last (head (hash)) as hash head
+                    NEXT (I) = LAST (J)
+                    LAST (J) = I
+                    ENDIF
+                 LAST (I) = HASH
+                 ENDIF
+180           CONTINUE
+
+           DEGREE (ME) = DEGME
+
+C=======================================================================
+C  SUPERVARIABLE DETECTION
+C=======================================================================
+
+           DO 250 PME = PME1, PME2
+              I = IW (PME)
+              IF (NV (I) .LT. 0) THEN
+C                i is a principal variable in Lme
+
+C                -------------------------------------------------------
+C                examine all hash buckets with 2 or more variables.  We
+C                do this by examing all unique hash keys for super-
+C                variables in the pattern Lme of the current element, me
+C                -------------------------------------------------------
+
+                 HASH = LAST (I)
+C                let i = head of hash bucket, and empty the hash bucket
+                 J = HEAD (HASH)
+                 IF (J .EQ. 0) GOTO 250
+                 IF (J .LT. 0) THEN
+C                   degree list is empty
+                    I = -J
+                    HEAD (HASH) = 0
+                 ELSE
+C                   degree list is not empty, restore last () of head
+                    I = LAST (J)
+                    LAST (J) = 0
+                    ENDIF
+                 IF (I .EQ. 0) GOTO 250
+
+C                while loop:
+200              CONTINUE
+                 IF (NEXT (I) .NE. 0) THEN
+
+C                   ----------------------------------------------------
+C                   this bucket has one or more variables following i.
+C                   scan all of them to see if i can absorb any entries
+C                   that follow i in hash bucket.  Scatter i into w.
+C                   ----------------------------------------------------
+
+                    LN = LEN (I)
+                    ELN = ELEN (I)
+C                   do not flag the first element in the list (me)
+                    DO 210 P = PE (I) + 1, PE (I) + LN - 1
+                       W (IW (P)) = WFLG
+210                    CONTINUE
+
+C                   ----------------------------------------------------
+C                   scan every other entry j following i in bucket
+C                   ----------------------------------------------------
+
+                    JLAST = I
+                    J = NEXT (I)
+
+C                   while loop:
+220                 CONTINUE
+                    IF (J .NE. 0) THEN
+
+C                      -------------------------------------------------
+C                      check if j and i have identical nonzero pattern
+C                      -------------------------------------------------
+
+                       IF (LEN (J) .NE. LN) THEN
+C                         i and j do not have same size data structure
+                          GOTO 240
+                          ENDIF
+                       IF (ELEN (J) .NE. ELN) THEN
+C                         i and j do not have same number of adjacent el
+                          GOTO 240
+                          ENDIF
+C                      do not flag the first element in the list (me)
+                       DO 230 P = PE (J) + 1, PE (J) + LN - 1
+                          IF (W (IW (P)) .NE. WFLG) THEN
+C                            an entry (iw(p)) is in j but not in i
+                             GOTO 240
+                             ENDIF
+230                       CONTINUE
+
+C                      -------------------------------------------------
+C                      found it!  j can be absorbed into i
+C                      -------------------------------------------------
+
+                       PE (J) = -I
+C                      both nv (i) and nv (j) are negated since they
+C                      are in Lme, and the absolute values of each
+C                      are the number of variables in i and j:
+                       NV (I) = NV (I) + NV (J)
+                       NV (J) = 0
+                       ELEN (J) = 0
+C                      delete j from hash bucket
+                       J = NEXT (J)
+                       NEXT (JLAST) = J
+                       GOTO 220
+
+C                      -------------------------------------------------
+240                    CONTINUE
+C                      j cannot be absorbed into i
+C                      -------------------------------------------------
+
+                       JLAST = J
+                       J = NEXT (J)
+                       GOTO 220
+                       ENDIF
+
+C                   ----------------------------------------------------
+C                   no more variables can be absorbed into i
+C                   go to next i in bucket and clear flag array
+C                   ----------------------------------------------------
+
+                    WFLG = WFLG + 1
+                    I = NEXT (I)
+                    IF (I .NE. 0) GOTO 200
+                    ENDIF
+                 ENDIF
+250           CONTINUE
+
+C=======================================================================
+C  RESTORE DEGREE LISTS AND REMOVE NONPRINCIPAL SUPERVAR. FROM ELEMENT
+C=======================================================================
+
+           P = PME1
+           NLEFT = N - NEL
+           DO 260 PME = PME1, PME2
+              I = IW (PME)
+              NVI = -NV (I)
+              IF (NVI .GT. 0) THEN
+C                i is a principal variable in Lme
+C                restore nv (i) to signify that i is principal
+                 NV (I) = NVI
+
+C                -------------------------------------------------------
+C                compute the external degree (add size of current elem)
+C                -------------------------------------------------------
+
+                 DEG = MAX (1, MIN (DEGREE (I) + DEGME-NVI, NLEFT-NVI))
+
+C                -------------------------------------------------------
+C                place the supervariable at the head of the degree list
+C                -------------------------------------------------------
+
+                 INEXT = HEAD (DEG)
+                 IF (INEXT .NE. 0) LAST (INEXT) = I
+                 NEXT (I) = INEXT
+                 LAST (I) = 0
+                 HEAD (DEG) = I
+
+C                -------------------------------------------------------
+C                save the new degree, and find the minimum degree
+C                -------------------------------------------------------
+
+                 MINDEG = MIN (MINDEG, DEG)
+                 DEGREE (I) = DEG
+
+C                -------------------------------------------------------
+C                place the supervariable in the element pattern
+C                -------------------------------------------------------
+
+                 IW (P) = I
+                 P = P + 1
+                 ENDIF
+260           CONTINUE
+
+C=======================================================================
+C  FINALIZE THE NEW ELEMENT
+C=======================================================================
+
+           NV (ME) = NVPIV + DEGME
+C          nv (me) is now the degree of pivot (including diagonal part)
+C          save the length of the list for the new element me
+           LEN (ME) = P - PME1
+           IF (LEN (ME) .EQ. 0) THEN
+C             there is nothing left of the current pivot element
+              PE (ME) = 0
+              W (ME) = 0
+              ENDIF
+           IF (NEWMEM .NE. 0) THEN
+C             element was not constructed in place: deallocate part
+C             of it (final size is less than or equal to newmem,
+C             since newly nonprincipal variables have been removed).
+              PFREE = P
+              MEM = MEM - NEWMEM + LEN (ME)
+              ENDIF
+
+C=======================================================================
+C          END WHILE (selecting pivots)
+           GOTO 30
+           ENDIF
+C=======================================================================
+
+C=======================================================================
+C  COMPUTE THE PERMUTATION VECTORS
+C=======================================================================
+
+C       ----------------------------------------------------------------
+C       The time taken by the following code is O(n).  At this
+C       point, elen (e) = -k has been done for all elements e,
+C       and elen (i) = 0 has been done for all nonprincipal
+C       variables i.  At this point, there are no principal
+C       supervariables left, and all elements are absorbed.
+C       ----------------------------------------------------------------
+
+C       ----------------------------------------------------------------
+C       compute the ordering of unordered nonprincipal variables
+C       ----------------------------------------------------------------
+
+        DO 290 I = 1, N
+           IF (ELEN (I) .EQ. 0) THEN
+
+C             ----------------------------------------------------------
+C             i is an un-ordered row.  Traverse the tree from i until
+C             reaching an element, e.  The element, e, was the
+C             principal supervariable of i and all nodes in the path
+C             from i to when e was selected as pivot.
+C             ----------------------------------------------------------
+
+              J = -PE (I)
+C             while (j is a variable) do:
+270           CONTINUE
+              IF (ELEN (J) .GE. 0) THEN
+                 J = -PE (J)
+                 GOTO 270
+                 ENDIF
+              E = J
+
+C             ----------------------------------------------------------
+C             get the current pivot ordering of e
+C             ----------------------------------------------------------
+
+              K = -ELEN (E)
+
+C             ----------------------------------------------------------
+C             traverse the path again from i to e, and compress the
+C             path (all nodes point to e).  Path compression allows
+C             this code to compute in O(n) time.  Order the unordered
+C             nodes in the path, and place the element e at the end.
+C             ----------------------------------------------------------
+
+              J = I
+C             while (j is a variable) do:
+280           CONTINUE
+              IF (ELEN (J) .GE. 0) THEN
+                 JNEXT = -PE (J)
+                 PE (J) = -E
+                 IF (ELEN (J) .EQ. 0) THEN
+C                   j is an unordered row
+                    ELEN (J) = K
+                    K = K + 1
+                    ENDIF
+                 J = JNEXT
+                 GOTO 280
+                 ENDIF
+C             leave elen (e) negative, so we know it is an element
+              ELEN (E) = -K
+              ENDIF
+290        CONTINUE
+
+C       ----------------------------------------------------------------
+C       reset the inverse permutation (elen (1..n)) to be positive,
+C       and compute the permutation (last (1..n)).
+C       ----------------------------------------------------------------
+
+        DO 300 I = 1, N
+           K = ABS (ELEN (I))
+           LAST (K) = I
+           ELEN (I) = K
+300        CONTINUE
+
+C=======================================================================
+C  RETURN THE MEMORY USAGE IN IW
+C=======================================================================
+
+C       If maxmem is less than or equal to iwlen, then no compressions
+C       occurred, and iw (maxmem+1 ... iwlen) was unused.  Otherwise
+C       compressions did occur, and iwlen would have had to have been
+C       greater than or equal to maxmem for no compressions to occur.
+C       Return the value of maxmem in the pfree argument.
+
+        PFREE = MAXMEM
+
+        RETURN
+        END
+
diff --git a/contrib/taucs/external/src/amdpre.c b/contrib/taucs/external/src/amdpre.c
new file mode 100644
index 0000000000000000000000000000000000000000..a157cd075f0c2ec57b67ad8875c90f17a6b50ccc
--- /dev/null
+++ b/contrib/taucs/external/src/amdpre.c
@@ -0,0 +1,454 @@
+/* amdpre.f -- translated by f2c (version of 23 April 1993  18:34:30).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+#include "f2c.h"
+
+/* --------------------------------------------------------- */
+/* ftp://ftp.cise.ufl.edu/pub/faculty/davis/AMD/amdpre.f */
+/* --------------------------------------------------------- */
+
+/* 	AMDPRE:  approximate minimum degree ordering */
+/* 	algorithm.  Removes "dense" nodes and then */
+/* 	calls AMDBAR.  See the tech report describing this */
+/* 	code at: */
+
+/* 	ftp://ftp.cise.ufl.edu/pub/faculty/davis/AMD/amdpre.ps */
+
+/*       Written by:  Dr. Tim Davis and Joseph L Carmen. */
+/* 	davis@cise.ufl.edu */
+
+/* 	The primary purpose of this preprossor program is */
+/* 	to detect dense nodes and partition the matrix into */
+/*       four quadrants.  Where the top left quadrant holds the */
+/*       sparse nodes and the bottom right quadrant holds the */
+/*       dense nodes. The top left is then sent to the AMD program */
+/*       which returns an ordering.  The AMDpre orders the bottom */
+/*       right in degree order, and returns the ordering for the */
+/*       entire matrix. */
+
+/* 	May 1, 1997 */
+
+/* 	NOTE:  This routine calls AMDBAR.  It can easily */
+/* 	be modified to call the other AMD routines. */
+
+/* --------------------------------------------------------- */
+/* Subroutine */ int amdpre_(n, pe, iw, len, iwlen, pfree, nv, next, last, 
+	head, elen, degree, ncmpa, w, iovflo, mapping)
+integer *n, *pe, *iw, *len, *iwlen, *pfree, *nv, *next, *last, *head, *elen, *
+	degree, *ncmpa, *w, *iovflo, *mapping;
+{
+    /* System generated locals */
+    integer i__1, i__2;
+
+    /* Builtin functions */
+    double sqrt();
+
+    /* Local variables */
+    static integer flag_, node, pnum, lastnode, i, j;
+    static real z;
+    static integer dense, ntemp;
+    extern /* Subroutine */ int amdbar_();
+    static integer number, deg, current;
+
+/* -------------------------------------------------------- */
+
+
+/* n:	The matrix order. */
+
+
+/* iwlen: The length of iw (1..iwlen).  On input, the matrix is */
+/* 	 stored in iw (1..pfree-1).  However, iw (1..iwlen) should be */
+/* 	 slightly larger than what is required to hold the matrix, at */
+/* 	 least iwlen .ge. pfree + n is recommended. */
+
+/* pe:	On input, pe (i) is the index in iw of the start of row i, or */
+/* 	zero if row i has no off-diagonal non-zeros.  Must of these */
+/* 	values will changed if the iw array is compressed. */
+
+
+/* pfree:  On input the tail end of the array, iw (pfree..iwlen), */
+/* 	  is empty, and the matrix is stored in iw (1..pfree-1).  This */
+/* 	  will change if any rows are removed. */
+
+
+
+/* len:  On input, len (i) holds the number of entries in row i of the */
+/* 	matrix, excluding the diagonal.  The contents of len (1..n) */
+/* 	are undefined on output.  Some entries will change if rows */
+/* 	are removed. */
+/* iw:	On input, iw (1..pfree-1) holds the description of each row i */
+/* 	in the matrix.  The matrix must be symmetric, and both upper */
+/* 	and lower triangular parts must be present.  The diagonal must */
+/* 	not be present.  Row i is held as follows: */
+
+/* 		len (i):  the length of the row i data structure */
+/* 		iw (pe (i) ... pe (i) + len (i) - 1): */
+
+/* 		Note that the rows need not be in any particular order, */
+/* 		and there may be empty space between the rows. */
+
+/* last:	On output, last (1..n) holds the permutation (the same as the */
+/* 	'PERM' argument in Sparspak).  That is, if i = last (k), then */
+/* 	row i is the kth pivot row.  Row last (k) of A is the k-th row */
+/* 	in the permuted matrix, PAP^T. */
+
+/* elen:	On output elen (1..n) holds the inverse permutation (the same */
+/* 	as the 'INVP' argument in Sparspak).  That is, if k = elen (i), */
+/* 	then row i is the kth pivot row.  Row i of A appears as the */
+/* 	(elen(i))-th row in the permuted matrix, PAP^T. */
+/* 	During execution, elen(i) holds the node in the matrix and */
+/* 	is divided into two parts: */
+
+/* head:	During execution, head(i) holds the nodes of degree i, where */
+/* 	i > dense  and i <= n.  The only entries in the head are nodes that */
+/* 	will be removed from the iw array.  head(i) is the starting point */
+/* 	for a linked list to the next(i) pointer array. */
+
+/* next:	During execution, is a linked list where next(i) holds */
+/* 	pointers to next(j) where i != j. If next(i) == 0 then */
+/* 	i is the last node in the list which started at head(j). */
+
+/* mapping: 	The single most important array in the preprocessor. */
+/* 		the mapping array is the inverse of the elen array. */
+/* 		This array cannot be changed in the AMD program. The */
+/* 		mapping array is used to convert the nodes in the */
+/* 		last(n) array returned from the AMD program to their */
+/* 		original value. */
+/* 		(need not be defined by the user on input) */
+
+/* --------------------------------------------------------------------- 
+*/
+/* --------------------------------------------------------------------- 
+*/
+
+/*       Local declarations */
+
+/*       The first row of the integer list is required to be */
+/*       saved through the call to the AMD.  The rest are just */
+/*       control variables */
+
+
+/* --------------------------------------------------------------------- 
+*/
+/* -------------------------------------------------------------- */
+
+/* Z:	 The variable Z has two functions: */
+/*        1) When Z is set equal to 0 the preprocessor will be */
+/*           bypassed. */
+/*        2) Z is also used to adjust dense.  The value given to Z */
+/* 	    depends on the matrix and will adjust the value of dense */
+/*           where dense = sqrt(n) * Z.  The default value for Z is 1.0 */
+/*           The calling program should be modified to pass in this value.
+ */
+
+/* lastnode: The final value of lastnode is the number of nodes */
+/*           sent to the AMD. */
+
+/* flag:	 initially set equal to 0.  If the preprocessor detects a dense 
+*/
+/*        row flag will then be set equal to 1. */
+
+/* ntemp: Before the call to the AMD, ntemp is used to save the original 
+*/
+/* 	 value of n. */
+
+/* dense: This is the key to the preprocessor.  A good value to dense */
+/* 	 will give good results, however, there is no algorithm that will */
+/* 	 select the optimal dense.  A common dense to choose is where */
+/* 	 dense = sqrt(n) * Z. */
+
+
+/* pnum:       value of previous node in degree linked list, also used */
+/*             as a pointer to entries in the iw array. */
+/* number:     value of node in current position of linked list */
+/* node:       temporary storage of a node */
+/* current:    used to track position in an array */
+/* deg:        temporary storage of a node's degree */
+/* i:          do loop control */
+/* j:          do loop control */
+
+/* -----------------------------------------------------------------------
+ */
+/* ---------------------------------------------------------------------- 
+*/
+
+/*       User can change the value of Z to adjust dense here */
+
+/* ---------------------------------------------------------------------- 
+*/
+    /* Parameter adjustments */
+    --mapping;
+    --w;
+    --degree;
+    --elen;
+    --head;
+    --last;
+    --next;
+    --nv;
+    --len;
+    --iw;
+    --pe;
+
+    /* Function Body */
+    z = (float)1.;
+/* ---------------------------------------------------------------------- 
+*/
+
+/*       Start AMD preprocessing */
+
+/* ---------------------------------------------------------------------- 
+*/
+/*       ** do not change the value of flag */
+    flag_ = 0;
+    if (z > (float)0.) {
+/* ------------------------------------------------------------------
+---- */
+
+/* 	Compute dense. */
+
+/* ------------------------------------------------------------------
+--- */
+/*             ** set dense equal to sqrt(n) */
+	dense = z * sqrt((real) (*n));
+/* ------------------------------------------------------------------
+--- */
+/* 	 initialize head(n) and next(n) */
+/* ------------------------------------------------------------------
+--- */
+	i__1 = *n;
+	for (i = 1; i <= i__1; ++i) {
+	    head[i] = 0;
+	    next[i] = 0;
+/* L10: */
+	}
+/* ------------------------------------------------------------------
+--- */
+/*        create the degree hash buckets and linked lists */
+/*        for the dense nodes */
+/* ------------------------------------------------------------------
+--- */
+	i__1 = *n;
+	for (i = 1; i <= i__1; ++i) {
+	    deg = len[i];
+	    if (deg > dense) {
+/*                ** a dense row was found */
+		flag_ = 1;
+/*                ** insert node in degree list */
+		next[i] = head[deg];
+		head[deg] = i;
+	    }
+/* L20: */
+	}
+/* ------------------------------------------------------------------
+--- */
+
+/*       1) Recalculate the degree length of all nodes adjacent to */
+/*       the dense nodes in the degree list.  (Note:  Many of the */
+/*       dense nodes in the degree list will no longer be dense after 
+*/
+/*       this section.) */
+
+/*       2) Constuct the ordering for the nodes not sent to AMD by */
+/*       selecting the most dense node in the degree list and */
+/*       then reduce the lengths of all adjacent nodes. Repeat this */
+/*       until no nodes are left with length higher than dense. */
+/*       The dense nodes are placed in the last(n) array. */
+/*          NOTE:  1) nodes are placed after the final value */
+/*                     of lastnode in the last(n) array */
+/*                2) the AMD routine will not effect anything after la
+stnode*/
+/*                    in the last(n) array. */
+/*                3) nodes are saved in degree order and in thier orig
+inal*/
+/*                    state, i.e., no reverse mapping is needed on the
+se. */
+/* ------------------------------------------------------------------
+--- */
+	if (flag_ == 1) {
+	    lastnode = *n;
+	    ++dense;
+	    current = *n;
+/*             ** get node from bucket */
+L40:
+	    node = head[current];
+/*             ** main loop control */
+/* L60: */
+	    if (node == 0) {
+		--current;
+		if (current < dense) {
+		    goto L70;
+		} else {
+		    goto L40;
+		}
+	    }
+/*   	      ** remove node from bucket */
+	    head[current] = next[node];
+/*             ** get degree of current node */
+	    deg = len[node];
+/*             ** skip this node if degree was changed to less tha
+n dense */
+	    if (deg < dense) {
+		goto L40;
+	    }
+/*             ** check if degree was changed */
+	    if (deg < current) {
+/*                ** insert back into linked list at the lower
+ degree */
+		next[node] = head[deg];
+		head[deg] = node;
+	    } else {
+/*                ** insert into last(n) */
+		last[lastnode] = node;
+		--lastnode;
+/*                ** len is flagged for use in the mapping con
+truction */
+		len[node] = *n << 1;
+/*                ** update degree lengths of adjacent nodes 
+*/
+		if (node < *n) {
+		    pnum = pe[node + 1] - 1;
+		} else {
+		    pnum = *pfree - 1;
+		}
+		i__1 = pnum;
+		for (i = pe[node]; i <= i__1; ++i) {
+		    number = iw[i];
+		    --len[number];
+/* L65: */
+		}
+	    }
+	    goto L40;
+L70:
+/* --------------------------------------------------------------
+------- */
+/* ************  begin loop to contruct the mapping array */
+/*                the mapping array will place the low dense nodes
+ */
+/*                at the begining and the high dense rows at the e
+nd */
+/*                the mapping array is basically a renumbering of 
+the */
+/*                nodes. */
+/*       ***  NOTE: */
+/*                 forward mapping == elen(n) */
+/*                 reverse mapping == mapping(n) */
+/* --------------------------------------------------------------
+------- */
+	    lastnode = *n;
+	    current = 1;
+	    i__1 = *n;
+	    for (i = 1; i <= i__1; ++i) {
+		deg = len[i];
+		if (deg < dense) {
+/*                   ** insert node at beginning part of e
+len array */
+		    elen[i] = current;
+		    mapping[current] = i;
+		    ++current;
+		} else {
+/*                   ** insert node at end part of elen ar
+ray */
+		    elen[i] = lastnode;
+		    mapping[lastnode] = i;
+		    --lastnode;
+		}
+/* L80: */
+	    }
+/* --------------------------------------------------------------
+------- */
+/* *********  construct the new iw array */
+/*       include only the nodes that are less than or equal to */
+/*       lastnode in the iw array.  lastnode is currently */
+/*       equal to the highest node value that will go to */
+/*       the amd routine.  elen is used for the forward mapping. 
+*/
+/* --------------------------------------------------------------
+------- */
+	    current = 1;
+	    node = 1;
+	    i__1 = *n - 1;
+	    for (i = 1; i <= i__1; ++i) {
+/*                ** compare forward mapping on node i to last
+node */
+		if (elen[i] <= lastnode) {
+/*                   **  place node in the new iw array */
+		    pnum = pe[i];
+		    pe[node] = current;
+		    i__2 = pe[i + 1] - 1;
+		    for (j = pnum; j <= i__2; ++j) {
+			number = elen[iw[j]];
+/*                      ** remove adjacent nodes great
+er than lastnode */
+			if (number <= lastnode) {
+			    iw[current] = number;
+			    ++current;
+			}
+/* L100: */
+		    }
+/*                   ** insert new length of node in len a
+rray */
+		    len[node] = current - pe[node];
+		    ++node;
+		}
+/* L90: */
+	    }
+/*             ** repeat above process for the last node */
+	    if (elen[*n] <= lastnode) {
+		pnum = pe[*n];
+		pe[node] = current;
+		i__1 = *pfree - 1;
+		for (j = pnum; j <= i__1; ++j) {
+		    number = elen[iw[j]];
+		    if (number <= lastnode) {
+			iw[current] = number;
+			++current;
+		    }
+/* L110: */
+		}
+		len[node] = current - pe[node];
+		++node;
+	    }
+	    ntemp = *n;
+	    *pfree = current;
+	    *n = lastnode;
+	}
+    }
+/* --------------------------------------------------------------------- 
+*/
+
+/*       Call the AMD ordering program */
+
+/* --------------------------------------------------------------------- 
+*/
+    amdbar_(n, &pe[1], &iw[1], &len[1], iwlen, pfree, &nv[1], &next[1], &last[
+	    1], &head[1], &elen[1], &degree[1], ncmpa, &w[1], iovflo);
+    if (flag_ == 1) {
+	lastnode = *n;
+	*n = ntemp;
+/* ------------------------------------------------------------------
+--- */
+/*        Change nodes in last(1 ... lastnode) to original nodes */
+/* ------------------------------------------------------------------
+--- */
+	i__1 = lastnode;
+	for (i = 1; i <= i__1; ++i) {
+	    last[i] = mapping[last[i]];
+/* L120: */
+	}
+/* ------------------------------------------------------------------
+--- */
+/*        Invert last(1 ... n) to elen(1 ... n) */
+/* ------------------------------------------------------------------
+--- */
+	i__1 = *n;
+	for (i = 1; i <= i__1; ++i) {
+	    number = last[i];
+	    elen[number] = i;
+/* L130: */
+	}
+    }
+    return 0;
+} /* amdpre_ */
+
diff --git a/contrib/taucs/external/src/amdpre.f b/contrib/taucs/external/src/amdpre.f
new file mode 100644
index 0000000000000000000000000000000000000000..25803dcf2ef5335a15799163ea94c4cb56e2226e
--- /dev/null
+++ b/contrib/taucs/external/src/amdpre.f
@@ -0,0 +1,439 @@
+c---------------------------------------------------------
+c ftp://ftp.cise.ufl.edu/pub/faculty/davis/AMD/amdpre.f
+c---------------------------------------------------------
+c
+c	AMDPRE:  approximate minimum degree ordering
+c	algorithm.  Removes "dense" nodes and then
+c	calls AMDBAR.  See the tech report describing this
+c	code at:
+c
+c	ftp://ftp.cise.ufl.edu/pub/faculty/davis/AMD/amdpre.ps
+c
+c       Written by:  Dr. Tim Davis and Joseph L Carmen.
+c	davis@cise.ufl.edu
+c
+c	The primary purpose of this preprossor program is 
+c	to detect dense nodes and partition the matrix into  
+c       four quadrants.  Where the top left quadrant holds the 
+c       sparse nodes and the bottom right quadrant holds the 
+c       dense nodes. The top left is then sent to the AMD program 
+c       which returns an ordering.  The AMDpre orders the bottom 
+c       right in degree order, and returns the ordering for the 
+c       entire matrix.
+c
+c	May 1, 1997
+c
+c	NOTE:  This routine calls AMDBAR.  It can easily
+c	be modified to call the other AMD routines.
+c
+c---------------------------------------------------------
+
+	subroutine amdpre
+     $		(n, pe, iw, len, iwlen, pfree, nv, next,
+     $		last, head, elen, degree, ncmpa, w, iovflo,
+     $          mapping)
+
+	integer n, iwlen, pfree, ncmpa, iovflo, iw (iwlen), pe (n),
+     $		degree (n), nv (n), next (n), last (n), head (n),
+     $		elen (n), w (n), len (n), mapping (n)
+
+c--------------------------------------------------------
+c
+c
+c n:	The matrix order.
+c
+c
+c iwlen: The length of iw (1..iwlen).  On input, the matrix is
+c	 stored in iw (1..pfree-1).  However, iw (1..iwlen) should be
+c	 slightly larger than what is required to hold the matrix, at
+c	 least iwlen .ge. pfree + n is recommended. 
+c
+c pe:	On input, pe (i) is the index in iw of the start of row i, or
+c	zero if row i has no off-diagonal non-zeros.  Must of these
+c	values will changed if the iw array is compressed.
+c
+c	
+c pfree:  On input the tail end of the array, iw (pfree..iwlen),
+c	  is empty, and the matrix is stored in iw (1..pfree-1).  This 
+c	  will change if any rows are removed.
+c
+c
+c
+c len:  On input, len (i) holds the number of entries in row i of the
+c	matrix, excluding the diagonal.  The contents of len (1..n)
+c	are undefined on output.  Some entries will change if rows
+c	are removed.
+
+c iw:	On input, iw (1..pfree-1) holds the description of each row i
+c	in the matrix.  The matrix must be symmetric, and both upper
+c	and lower triangular parts must be present.  The diagonal must
+c	not be present.  Row i is held as follows:
+c
+c		len (i):  the length of the row i data structure
+c		iw (pe (i) ... pe (i) + len (i) - 1):
+c
+c		Note that the rows need not be in any particular order,
+c		and there may be empty space between the rows.
+c
+c last:	On output, last (1..n) holds the permutation (the same as the
+c	'PERM' argument in Sparspak).  That is, if i = last (k), then
+c	row i is the kth pivot row.  Row last (k) of A is the k-th row
+c	in the permuted matrix, PAP^T.
+c
+c elen:	On output elen (1..n) holds the inverse permutation (the same
+c	as the 'INVP' argument in Sparspak).  That is, if k = elen (i),
+c	then row i is the kth pivot row.  Row i of A appears as the
+c	(elen(i))-th row in the permuted matrix, PAP^T.
+c	During execution, elen(i) holds the node in the matrix and   
+c	is divided into two parts: 
+c
+c head:	During execution, head(i) holds the nodes of degree i, where 
+c	i > dense  and i <= n.  The only entries in the head are nodes that
+c	will be removed from the iw array.  head(i) is the starting point
+c	for a linked list to the next(i) pointer array.
+c
+c next:	During execution, is a linked list where next(i) holds 
+c	pointers to next(j) where i != j. If next(i) == 0 then
+c	i is the last node in the list which started at head(j). 
+c	
+c mapping: 	The single most important array in the preprocessor.
+c		the mapping array is the inverse of the elen array.
+c		This array cannot be changed in the AMD program. The
+c		mapping array is used to convert the nodes in the 
+c		last(n) array returned from the AMD program to their
+c		original value. 
+c		(need not be defined by the user on input) 
+c
+c---------------------------------------------------------------------
+
+c---------------------------------------------------------------------
+c
+c       Local declarations
+c
+c       The first row of the integer list is required to be
+c       saved through the call to the AMD.  The rest are just
+c       control variables
+c
+c
+c---------------------------------------------------------------------
+
+	integer ntemp, flag,
+     $          lastnode, dense, current, pnum, deg,
+     $          number, node, i, j
+	real  Z
+
+c--------------------------------------------------------------
+c
+c Z:	 The variable Z has two functions:
+c        1) When Z is set equal to 0 the preprocessor will be 
+c           bypassed.
+c        2) Z is also used to adjust dense.  The value given to Z
+c	    depends on the matrix and will adjust the value of dense
+c           where dense = sqrt(n) * Z.  The default value for Z is 1.0
+c           The calling program should be modified to pass in this value.
+c
+c lastnode: The final value of lastnode is the number of nodes
+c           sent to the AMD.
+c
+c flag:	 initially set equal to 0.  If the preprocessor detects a dense
+c        row flag will then be set equal to 1.
+c
+c ntemp: Before the call to the AMD, ntemp is used to save the original
+c	 value of n.
+c
+c dense: This is the key to the preprocessor.  A good value to dense 
+c	 will give good results, however, there is no algorithm that will 
+c	 select the optimal dense.  A common dense to choose is where
+c	 dense = sqrt(n) * Z. 
+c
+c
+c pnum:       value of previous node in degree linked list, also used
+c             as a pointer to entries in the iw array.
+c number:     value of node in current position of linked list
+c node:       temporary storage of a node
+c current:    used to track position in an array
+c deg:        temporary storage of a node's degree
+c i:          do loop control
+c j:          do loop control
+c
+c----------------------------------------------------------------------- 
+
+c----------------------------------------------------------------------
+c
+c       User can change the value of Z to adjust dense here
+c
+c----------------------------------------------------------------------
+	
+	Z = 1.0
+
+c----------------------------------------------------------------------
+c
+c       Start AMD preprocessing
+c
+c----------------------------------------------------------------------
+
+c       ** do not change the value of flag
+	flag = 0
+
+	if (Z .gt. 0) then
+
+c----------------------------------------------------------------------
+c
+c	Compute dense.
+c       
+c---------------------------------------------------------------------
+
+c             ** set dense equal to sqrt(n)
+	      dense = Z * sqrt(real(n)) 
+
+c---------------------------------------------------------------------
+c	 initialize head(n) and next(n)
+c---------------------------------------------------------------------
+
+	   do 10 i = 1, n
+	      head(i) = 0
+	      next(i) = 0
+ 10	   continue
+	   
+
+c---------------------------------------------------------------------
+c        create the degree hash buckets and linked lists 
+c        for the dense nodes
+c---------------------------------------------------------------------
+
+	   do 20 i = 1, n
+	      deg = len(i)
+	      if ( deg  .gt. dense) then
+		 
+c                ** a dense row was found 
+		 flag = 1
+
+c                ** insert node in degree list
+		 next(i) = head(deg)
+		 head(deg) = i
+
+	      end if
+
+ 20	   continue
+
+c---------------------------------------------------------------------
+c
+c       1) Recalculate the degree length of all nodes adjacent to
+c       the dense nodes in the degree list.  (Note:  Many of the 
+c       dense nodes in the degree list will no longer be dense after
+c       this section.)
+c
+c       2) Constuct the ordering for the nodes not sent to AMD by 
+c       selecting the most dense node in the degree list and 
+c       then reduce the lengths of all adjacent nodes. Repeat this
+c       until no nodes are left with length higher than dense.
+c       The dense nodes are placed in the last(n) array.
+c          NOTE:  1) nodes are placed after the final value
+c                     of lastnode in the last(n) array
+c                 2) the AMD routine will not effect anything after lastnode
+c                    in the last(n) array.
+c                 3) nodes are saved in degree order and in thier original 
+c                    state, i.e., no reverse mapping is needed on these.
+c---------------------------------------------------------------------
+
+
+	   if (flag .eq. 1) then
+	      
+	      lastnode = n
+	      dense = dense + 1
+	      current = n
+	      
+c             ** get node from bucket
+ 40	      node = head(current)
+
+
+c             ** main loop control
+ 60	      if (node .eq. 0) then
+		 current = current - 1
+		 if (current .lt. dense) then
+		    go to 70
+		 else
+		    go to 40
+		 endif
+	      endif
+
+c   	      ** remove node from bucket
+	      head(current) = next(node)
+		 
+c             ** get degree of current node
+	      deg = len(node)
+
+
+		 
+c             ** skip this node if degree was changed to less than dense
+	      if (deg .lt. dense) then
+		 go to 40
+	      endif
+		 
+c             ** check if degree was changed
+	      if (deg .lt. current) then
+		 
+c                ** insert back into linked list at the lower degree
+		 next(node) = head(deg)
+		 head(deg) = node
+		 
+	      else
+		    
+c                ** insert into last(n)
+		 last(lastnode) = node
+		 lastnode = lastnode - 1
+		    
+c                ** len is flagged for use in the mapping contruction 
+		 len(node) = 2 * n
+		    
+c                ** update degree lengths of adjacent nodes
+		 if (node .lt. n) then
+		    pnum = pe(node + 1) - 1
+		 else
+		    pnum = pfree - 1
+		 endif
+		 do 65 i = pe(node), pnum
+		    number = iw(i)
+		    len(number) = len(number) - 1
+ 65		 continue
+	      endif
+
+
+	      go to 40
+
+ 70	      continue
+	   
+c---------------------------------------------------------------------
+c ************  begin loop to contruct the mapping array
+c                the mapping array will place the low dense nodes
+c                at the begining and the high dense rows at the end
+c                the mapping array is basically a renumbering of the 
+c                nodes.
+c       ***  NOTE:
+c                 forward mapping == elen(n)
+c                 reverse mapping == mapping(n)
+c---------------------------------------------------------------------
+	      lastnode = n
+	      current = 1
+	      
+	      do 80 i = 1, n
+		 deg = len(i)
+		 if (deg .lt. dense) then
+		    
+c                   ** insert node at beginning part of elen array
+		    elen(i) = current
+		    mapping(current) = i
+		    current = current + 1
+		 else
+		    
+c                   ** insert node at end part of elen array
+		    elen(i) = lastnode
+		    mapping(lastnode) = i
+		    lastnode = lastnode - 1
+		 endif
+ 80	      continue
+	      
+c---------------------------------------------------------------------
+c *********  construct the new iw array 
+c       include only the nodes that are less than or equal to
+c       lastnode in the iw array.  lastnode is currently
+c       equal to the highest node value that will go to
+c       the amd routine.  elen is used for the forward mapping.
+c---------------------------------------------------------------------
+
+	      current = 1
+	      node = 1
+	      
+	      do 90 i = 1 , n-1
+		 
+c                ** compare forward mapping on node i to lastnode
+		 if (elen(i) .le. lastnode) then
+		    
+c                   **  place node in the new iw array
+		    pnum = pe(i)
+		    pe(node) = current
+		    do 100 j = pnum, pe(i+1)-1 
+		       number = elen(iw(j))
+		       
+c                      ** remove adjacent nodes greater than lastnode
+		       if (number .le. lastnode) then
+			  iw(current) = number
+			  current = current + 1
+		       end if 
+ 100		    continue
+		    
+c                   ** insert new length of node in len array
+		    len(node) = current - pe(node)
+		    node = node + 1
+		 end if
+ 90	      continue
+	      
+c             ** repeat above process for the last node
+	      if (elen(n) .le. lastnode) then
+		 pnum = pe(n)
+		 pe(node) = current
+		 do 110 j = pnum, pfree-1
+		    number = elen(iw(j))
+		    if (number .le. lastnode) then
+		       iw(current) = number
+		       current = current + 1
+		    end if 
+ 110		 continue
+		 len(node) = current - pe(node)
+		 node = node + 1
+	      end if
+	      
+	      ntemp = n
+	      pfree = current
+	      n = lastnode
+	   end if
+	endif
+
+c---------------------------------------------------------------------
+c
+c       Call the AMD ordering program
+c
+c---------------------------------------------------------------------
+
+
+	call amdbar
+     $         (n, pe, iw, len, iwlen, pfree, nv, next,
+     $		last, head, elen, degree, ncmpa, w, iovflo)
+
+
+	
+	if (flag .eq. 1) then
+	   lastnode = n
+	   n = ntemp
+
+c---------------------------------------------------------------------
+c        Change nodes in last(1 ... lastnode) to original nodes
+c---------------------------------------------------------------------
+
+	   do 120 i = 1, lastnode
+	      last(i) = mapping(last(i))
+ 120	   continue
+
+c---------------------------------------------------------------------
+c        Invert last(1 ... n) to elen(1 ... n)
+c---------------------------------------------------------------------
+
+	   do 130 i = 1, n
+	      number = last(i)
+	      elen(number) = i
+ 130	   continue
+	   
+	   
+	end if
+
+
+	return
+	end
+c---------------------------------------------------------------------
+c	end of preprocessor subroutine
+c---------------------------------------------------------------------
+
+
+
+
+
diff --git a/contrib/taucs/external/src/amdtru.c b/contrib/taucs/external/src/amdtru.c
new file mode 100644
index 0000000000000000000000000000000000000000..ded9e2e634341c21e808334d6366e48e69709b24
--- /dev/null
+++ b/contrib/taucs/external/src/amdtru.c
@@ -0,0 +1,1418 @@
+/* amdtru.f -- translated by f2c (version of 23 April 1993  18:34:30).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+#include "f2c.h"
+
+/* Subroutine */ int amdtru_(n, pe, iw, len, iwlen, pfree, nv, next, last, 
+	head, elen, degree, ncmpa, w, iovflo)
+integer *n, *pe, *iw, *len, *iwlen, *pfree, *nv, *next, *last, *head, *elen, *
+	degree, *ncmpa, *w, *iovflo;
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer hash, pend, hmod, lenj, dmax_, wbig, wflg, psrc, pdst, e, 
+	    i, j, k, p, degme, x, nleft, ilast, jlast, inext, jnext, p1, 
+	    nvpiv, p2, p3, me, ln, pj, pn, mindeg, elenme, slenme, maxmem, 
+	    newmem, deg, eln, mem, nel, pme, nvi, nvj, pme1, pme2, knt1, knt2,
+	     knt3;
+
+/* -----------------------------------------------------------------------
+ */
+/*  The MC47 / AMD suite of minimum degree ordering algorithms. */
+
+/*  This code is one of seven variations of a single algorithm: */
+/*  the primary routine (MC47B/BD, only available in the Harwell */
+/*  Subroutine Library), and 6 variations that differ only in */
+/*  how they compute the degree (available in NETLIB). */
+
+/*  For information on the Harwell Subroutine Library, contact */
+/*  John Harding, Harwell Subroutine Library, B 552, AEA Technology, */
+/*  Harwell, Didcot, Oxon OX11 0RA, telephone (44) 1235 434573, */
+/*  fax (44) 1235 434340, email john.harding@aeat.co.uk, who will */
+/*  provide details of price and conditions of use. */
+/* -----------------------------------------------------------------------
+ */
+/* ***********************************************************************
+ */
+/* NOTICE:  "The AMD routines (AMDEXA, AMDBAR, AMDHAF, AMDHAT, AMDTRU, */
+/* and AMDATR) may be used SOLELY for educational, research, and */
+/* benchmarking purposes by non-profit organizations and the U.S. */
+/* government.  Commercial and other organizations may make use of the */
+/* AMD routines SOLELY for benchmarking purposes only.  The AMD */
+/* routines may be modified by or on behalf of the User for such */
+/* use but at no time shall the AMD routines or any such modified */
+/* version of them become the property of the User.  The AMD routines */
+/* are provided without warranty of any kind, either expressed or */
+/* implied.  Neither the Authors nor their employers shall be liable */
+/* for any direct or consequential loss or damage whatsoever arising */
+/* out of the use or misuse of the AMD routines by the User.  The AMD */
+/* routines must not be sold.  You may make copies of the AMD routines, */
+/* but this NOTICE and the Copyright notice must appear in all copies. */
+/* Any other use of the AMD routines requires written permission. */
+/* Your use of the AMD routines is an implicit agreement to these */
+/* conditions." */
+/* ***********************************************************************
+ */
+/* -----------------------------------------------------------------------
+ */
+/* AMDtru:  exact minimum (true) degree ordering algorithm */
+/* -----------------------------------------------------------------------
+ */
+/*  Variation 5:  exact true degree (as used in MA27, for example. */
+/*  See I. S. Duff and J. K. Reid, "The multifrontal solution of */
+/*  indefinite sparse symmetric linear equations, ACM Trans. Math. */
+/*  Software, vol. 9, pp. 302-325, 1983).  This code is very similar to */
+/*  MA27, except that MA27 does aggressive absorption and uses a */
+/*  different hash function for supervariable detection.  Note that some 
+*/
+/*  of the comments in the code below reflect the MC47-style degree */
+/*  approximation. */
+
+/*  We recommend using MC47B/BD instead of this routine since MC47B/BD */
+/*  gives better results in much less time (this code has been observed */
+/*  to be up to 73 times slower than MC47B/BD). */
+/* Given a representation of the nonzero pattern of a symmetric matrix, */
+/*       A, (excluding the diagonal) perform an exact minimum */
+/*       (true) degree ordering to compute a pivot order such */
+/*       that the introduction of nonzeros (fill-in) in the Cholesky */
+/*       factors A = LL^T are kept low.  At each step, the pivot */
+/*       selected is the one with the minimum exact true degree. */
+/* ********************************************************************** 
+*/
+/* ***** CAUTION:  ARGUMENTS ARE NOT CHECKED FOR ERRORS ON INPUT.  ****** 
+*/
+/* ********************************************************************** 
+*/
+/* ** If you want error checking, a more versatile input format, and a ** 
+*/
+/* ** simpler user interface, then use MC47A/AD in the Harwell         ** 
+*/
+/* ** Subroutine Library, which checks for errors, transforms the      ** 
+*/
+/* ** input, and calls MC47B/BD.                                       ** 
+*/
+/* ********************************************************************** 
+*/
+/*       References:  (UF Tech Reports are available via anonymous ftp */
+/*       to ftp.cis.ufl.edu:cis/tech-reports). */
+
+/*       [1] Timothy A. Davis and Iain Duff, "An unsymmetric-pattern */
+/*               multifrontal method for sparse LU factorization", */
+/*               SIAM J. Matrix Analysis and Applications, to appear. */
+/*               also Univ. of Florida Technical Report TR-94-038. */
+/*               Discusses UMFPACK / MA38. */
+
+/*       [2] Patrick Amestoy, Timothy A. Davis, and Iain S. Duff, */
+/*               "An approximate minimum degree ordering algorithm," */
+/*               SIAM J. Matrix Analysis and Applications (to appear), */
+/*               also Univ. of Florida Technical Report TR-94-039. */
+/*               Discusses this routine. */
+
+/*       [3] Alan George and Joseph Liu, "The evolution of the */
+/*               minimum degree ordering algorithm," SIAM Review, vol. */
+/*               31, no. 1, pp. 1-19, March 1989.  We list below the */
+/*               features mentioned in that paper that this code */
+/*               includes: */
+
+/*       mass elimination: */
+/*               Yes.  MA27 relied on supervariable detection for mass */
+/*               elimination. */
+/*       indistinguishable nodes: */
+/*               Yes (we call these "supervariables").  This was also in 
+*/
+/*               the MA27 code - although we modified the method of */
+/*               detecting them (the previous hash was the true degree, */
+/*               which we no longer keep track of).  A supervariable is */
+/*               a set of rows with identical nonzero pattern.  All */
+/*               variables in a supervariable are eliminated together. */
+/*               Each supervariable has as its numerical name that of */
+/*               one of its variables (its principal variable). */
+/*       quotient graph representation: */
+/*               Yes.  We use the term "element" for the cliques formed */
+/*               during elimination.  This was also in the MA27 code. */
+/*               The algorithm can operate in place, but it will work */
+/*               more efficiently if given some "elbow room." */
+/*       element absorption: */
+/*               Yes.  This was also in the MA27 code. */
+/*       external degree: */
+/*               Yes.  The MA27 code was based on the true degree. */
+/*       incomplete degree update and multiple elimination: */
+/*               No.  This was not in MA27, either.  Our method of */
+/*               degree update within MC47B/BD is element-based, not */
+/*               variable-based.  It is thus not well-suited for use */
+/*               with incomplete degree update or multiple elimination. */
+/* -----------------------------------------------------------------------
+ */
+/* Authors, and Copyright (C) 1995 by: */
+/*       Timothy A. Davis, Patrick Amestoy, Iain S. Duff, & John K. Reid. 
+*/
+
+/* Acknowledgements: */
+/*       This work (and the UMFPACK package) was supported by the */
+/*       National Science Foundation (ASC-9111263 and DMS-9223088). */
+/*       The UMFPACK/MA38 approximate degree update algorithm, the */
+/*       unsymmetric analog which forms the basis of MC47B/BD, was */
+/*       developed while Tim Davis was supported by CERFACS (Toulouse, */
+/*       France) in a post-doctoral position. */
+
+/* Date:  September, 1995 */
+/* -----------------------------------------------------------------------
+ */
+/* -----------------------------------------------------------------------
+ */
+/* INPUT ARGUMENTS (unaltered): */
+/* -----------------------------------------------------------------------
+ */
+/* n:    The matrix order. */
+
+/*       Restriction:  1 .le. n .lt. (iovflo/2)-2 */
+/* iwlen:        The length of iw (1..iwlen).  On input, the matrix is */
+/*       stored in iw (1..pfree-1).  However, iw (1..iwlen) should be */
+/*       slightly larger than what is required to hold the matrix, at */
+/*       least iwlen .ge. pfree + n is recommended.  Otherwise, */
+/*       excessive compressions will take place. */
+/*       *** We do not recommend running this algorithm with *** */
+/*       ***      iwlen .lt. pfree + n.                      *** */
+/*       *** Better performance will be obtained if          *** */
+/*       ***      iwlen .ge. pfree + n                       *** */
+/*       *** or better yet                                   *** */
+/*       ***      iwlen .gt. 1.2 * pfree                     *** */
+/*       *** (where pfree is its value on input).            *** */
+/*       The algorithm will not run at all if iwlen .lt. pfree-1. */
+
+/*       Restriction: iwlen .ge. pfree-1 */
+/* iovflo:       The largest positive integer that your computer can */
+/*       represent (-iovflo should also be representable).  On a 32-bit */
+/*       computer with 2's-complement arithmetic, */
+/*       iovflo = (2^31)-1 = 2,147,483,648. */
+/* -----------------------------------------------------------------------
+ */
+/* INPUT/OUPUT ARGUMENTS: */
+/* -----------------------------------------------------------------------
+ */
+/* pe:   On input, pe (i) is the index in iw of the start of row i, or */
+/*       zero if row i has no off-diagonal non-zeros. */
+
+/*       During execution, it is used for both supervariables and */
+/*       elements: */
+
+/*       * Principal supervariable i:  index into iw of the */
+/*               description of supervariable i.  A supervariable */
+/*               represents one or more rows of the matrix */
+/*               with identical nonzero pattern. */
+/*       * Non-principal supervariable i:  if i has been absorbed */
+/*               into another supervariable j, then pe (i) = -j. */
+/*               That is, j has the same pattern as i. */
+/*               Note that j might later be absorbed into another */
+/*               supervariable j2, in which case pe (i) is still -j, */
+/*               and pe (j) = -j2. */
+/*       * Unabsorbed element e:  the index into iw of the description */
+/*               of element e, if e has not yet been absorbed by a */
+/*               subsequent element.  Element e is created when */
+/*               the supervariable of the same name is selected as */
+/*               the pivot. */
+/*       * Absorbed element e:  if element e is absorbed into element */
+/*               e2, then pe (e) = -e2.  This occurs when the pattern of 
+*/
+/*               e (that is, Le) is found to be a subset of the pattern */
+/*               of e2 (that is, Le2).  If element e is "null" (it has */
+/*               no nonzeros outside its pivot block), then pe (e) = 0. */
+
+/*       On output, pe holds the assembly tree/forest, which implicitly */
+/*       represents a pivot order with identical fill-in as the actual */
+/*       order (via a depth-first search of the tree). */
+
+/*       On output: */
+/*       If nv (i) .gt. 0, then i represents a node in the assembly tree, 
+*/
+/*       and the parent of i is -pe (i), or zero if i is a root. */
+/*       If nv (i) = 0, then (i,-pe (i)) represents an edge in a */
+/*       subtree, the root of which is a node in the assembly tree. */
+/* pfree:        On input the tail end of the array, iw (pfree..iwlen), */
+/*       is empty, and the matrix is stored in iw (1..pfree-1). */
+/*       During execution, additional data is placed in iw, and pfree */
+/*       is modified so that iw (pfree..iwlen) is always the unused part 
+*/
+/*       of iw.  On output, pfree is set equal to the size of iw that */
+/*       would have been needed for no compressions to occur.  If */
+/*       ncmpa is zero, then pfree (on output) is less than or equal to */
+/*       iwlen, and the space iw (pfree+1 ... iwlen) was not used. */
+/*       Otherwise, pfree (on output) is greater than iwlen, and all the 
+*/
+/*       memory in iw was used. */
+/* -----------------------------------------------------------------------
+ */
+/* INPUT/MODIFIED (undefined on output): */
+/* -----------------------------------------------------------------------
+ */
+/* len:  On input, len (i) holds the number of entries in row i of the */
+/*       matrix, excluding the diagonal.  The contents of len (1..n) */
+/*       are undefined on output. */
+/* iw:   On input, iw (1..pfree-1) holds the description of each row i */
+/*       in the matrix.  The matrix must be symmetric, and both upper */
+/*       and lower triangular parts must be present.  The diagonal must */
+/*       not be present.  Row i is held as follows: */
+
+/*               len (i):  the length of the row i data structure */
+/*               iw (pe (i) ... pe (i) + len (i) - 1): */
+/*                       the list of column indices for nonzeros */
+/*                       in row i (simple supervariables), excluding */
+/*                       the diagonal.  All supervariables start with */
+/*                       one row/column each (supervariable i is just */
+/*                       row i). */
+/*               if len (i) is zero on input, then pe (i) is ignored */
+/*               on input. */
+
+/*               Note that the rows need not be in any particular order, 
+*/
+/*               and there may be empty space between the rows. */
+
+/*       During execution, the supervariable i experiences fill-in. */
+/*       This is represented by placing in i a list of the elements */
+/*       that cause fill-in in supervariable i: */
+
+/*               len (i):  the length of supervariable i */
+/*               iw (pe (i) ... pe (i) + elen (i) - 1): */
+/*                       the list of elements that contain i.  This list 
+*/
+/*                       is kept short by removing absorbed elements. */
+/*               iw (pe (i) + elen (i) ... pe (i) + len (i) - 1): */
+/*                       the list of supervariables in i.  This list */
+/*                       is kept short by removing nonprincipal */
+/*                       variables, and any entry j that is also */
+/*                       contained in at least one of the elements */
+/*                       (j in Le) in the list for i (e in row i). */
+
+/*       When supervariable i is selected as pivot, we create an */
+/*       element e of the same name (e=i): */
+
+/*               len (e):  the length of element e */
+/*               iw (pe (e) ... pe (e) + len (e) - 1): */
+/*                       the list of supervariables in element e. */
+
+/*       An element represents the fill-in that occurs when supervariable 
+*/
+/*       i is selected as pivot (which represents the selection of row i 
+*/
+/*       and all non-principal variables whose principal variable is i). 
+*/
+/*       We use the term Le to denote the set of all supervariables */
+/*       in element e.  Absorbed supervariables and elements are pruned */
+/*       from these lists when computationally convenient. */
+
+/*       CAUTION:  THE INPUT MATRIX IS OVERWRITTEN DURING COMPUTATION. */
+/*       The contents of iw are undefined on output. */
+/* -----------------------------------------------------------------------
+ */
+/* OUTPUT (need not be set on input): */
+/* -----------------------------------------------------------------------
+ */
+/* nv:   During execution, abs (nv (i)) is equal to the number of rows */
+/*       that are represented by the principal supervariable i.  If i is 
+*/
+/*       a nonprincipal variable, then nv (i) = 0.  Initially, */
+/*       nv (i) = 1 for all i.  nv (i) .lt. 0 signifies that i is a */
+/*       principal variable in the pattern Lme of the current pivot */
+/*       element me.  On output, nv (e) holds the true degree of element 
+*/
+/*       e at the time it was created (including the diagonal part). */
+/* ncmpa:        The number of times iw was compressed.  If this is */
+/*       excessive, then the execution took longer than what could have */
+/*       been.  To reduce ncmpa, try increasing iwlen to be 10% or 20% */
+/*       larger than the value of pfree on input (or at least */
+/*       iwlen .ge. pfree + n).  The fastest performance will be */
+/*       obtained when ncmpa is returned as zero.  If iwlen is set to */
+/*       the value returned by pfree on *output*, then no compressions */
+/*       will occur. */
+/* elen: See the description of iw above.  At the start of execution, */
+/*       elen (i) is set to zero.  During execution, elen (i) is the */
+/*       number of elements in the list for supervariable i.  When e */
+/*       becomes an element, elen (e) = -nel is set, where nel is the */
+/*       current step of factorization.  elen (i) = 0 is done when i */
+/*       becomes nonprincipal. */
+
+/*       For variables, elen (i) .ge. 0 holds until just before the */
+/*       permutation vectors are computed.  For elements, */
+/*       elen (e) .lt. 0 holds. */
+
+/*       On output elen (1..n) holds the inverse permutation (the same */
+/*       as the 'INVP' argument in Sparspak).  That is, if k = elen (i), 
+*/
+/*       then row i is the kth pivot row.  Row i of A appears as the */
+/*       (elen(i))-th row in the permuted matrix, PAP^T. */
+/* last: In a degree list, last (i) is the supervariable preceding i, */
+/*       or zero if i is the head of the list.  In a hash bucket, */
+/*       last (i) is the hash key for i.  last (head (hash)) is also */
+/*       used as the head of a hash bucket if head (hash) contains a */
+/*       degree list (see head, below). */
+
+/*       On output, last (1..n) holds the permutation (the same as the */
+/*       'PERM' argument in Sparspak).  That is, if i = last (k), then */
+/*       row i is the kth pivot row.  Row last (k) of A is the k-th row */
+/*       in the permuted matrix, PAP^T. */
+/* -----------------------------------------------------------------------
+ */
+/* LOCAL (not input or output - used only during execution): */
+/* -----------------------------------------------------------------------
+ */
+/* degree:       If i is a supervariable, then degree (i) holds the */
+/*       current approximation of the external degree of row i (an upper 
+*/
+/*       bound).  The external degree is the number of nonzeros in row i, 
+*/
+/*       minus abs (nv (i)) (the diagonal part).  The bound is equal to */
+/*       the external degree if elen (i) is less than or equal to two. */
+
+/*       We also use the term "external degree" for elements e to refer */
+/*       to |Le \ Lme|.  If e is an element, then degree (e) holds |Le|, 
+*/
+/*       which is the degree of the off-diagonal part of the element e */
+/*       (not including the diagonal part). */
+/* head: head is used for degree lists.  head (deg) is the first */
+/*       supervariable in a degree list (all supervariables i in a */
+/*       degree list deg have the same approximate degree, namely, */
+/*       deg = degree (i)).  If the list deg is empty then */
+/*       head (deg) = 0. */
+
+/*       During supervariable detection head (hash) also serves as a */
+/*       pointer to a hash bucket. */
+/*       If head (hash) .gt. 0, there is a degree list of degree hash. */
+/*               The hash bucket head pointer is last (head (hash)). */
+/*       If head (hash) = 0, then the degree list and hash bucket are */
+/*               both empty. */
+/*       If head (hash) .lt. 0, then the degree list is empty, and */
+/*               -head (hash) is the head of the hash bucket. */
+/*       After supervariable detection is complete, all hash buckets */
+/*       are empty, and the (last (head (hash)) = 0) condition is */
+/*       restored for the non-empty degree lists. */
+/* next: next (i) is the supervariable following i in a link list, or */
+/*       zero if i is the last in the list.  Used for two kinds of */
+/*       lists:  degree lists and hash buckets (a supervariable can be */
+/*       in only one kind of list at a time). */
+/* w:    The flag array w determines the status of elements and */
+/*       variables, and the external degree of elements. */
+
+/*       for elements: */
+/*          if w (e) = 0, then the element e is absorbed */
+/*          if w (e) .ge. wflg, then w (e) - wflg is the size of */
+/*               the set |Le \ Lme|, in terms of nonzeros (the */
+/*               sum of abs (nv (i)) for each principal variable i that */
+/*               is both in the pattern of element e and NOT in the */
+/*               pattern of the current pivot element, me). */
+/*          if wflg .gt. w (e) .gt. 0, then e is not absorbed and has */
+/*               not yet been seen in the scan of the element lists in */
+/*               the computation of |Le\Lme| in loop 150 below. */
+
+/*       for variables: */
+/*          during supervariable detection, if w (j) .ne. wflg then j is 
+*/
+/*          not in the pattern of variable i */
+
+/*       The w array is initialized by setting w (i) = 1 for all i, */
+/*       and by setting wflg = 2.  It is reinitialized if wflg becomes */
+/*       too large (to ensure that wflg+n does not cause integer */
+/*       overflow). */
+/* -----------------------------------------------------------------------
+ */
+/* LOCAL INTEGERS: */
+/* -----------------------------------------------------------------------
+ */
+/* deg:          the degree of a variable or element */
+/* degme:        size, |Lme|, of the current element, me (= degree (me)) 
+*/
+/* dext:         external degree, |Le \ Lme|, of some element e */
+/* dmax:         largest |Le| seen so far */
+/* e:            an element */
+/* elenme:       the length, elen (me), of element list of pivotal var. */
+/* eln:          the length, elen (...), of an element list */
+/* hash:         the computed value of the hash function */
+/* hmod:         the hash function is computed modulo hmod = max (1,n-1) 
+*/
+/* i:            a supervariable */
+/* ilast:        the entry in a link list preceding i */
+/* inext:        the entry in a link list following i */
+/* j:            a supervariable */
+/* jlast:        the entry in a link list preceding j */
+/* jnext:        the entry in a link list, or path, following j */
+/* k:            the pivot order of an element or variable */
+/* knt1:         loop counter used during element construction */
+/* knt2:         loop counter used during element construction */
+/* knt3:         loop counter used during compression */
+/* lenj:         len (j) */
+/* ln:           length of a supervariable list */
+/* maxmem:       amount of memory needed for no compressions */
+/* me:           current supervariable being eliminated, and the */
+/*                       current element created by eliminating that */
+/*                       supervariable */
+/* mem:          memory in use assuming no compressions have occurred */
+/* mindeg:       current minimum degree */
+/* nel:          number of pivots selected so far */
+/* newmem:       amount of new memory needed for current pivot element */
+/* nleft:        n - nel, the number of nonpivotal rows/columns remaining 
+*/
+/* nvi:          the number of variables in a supervariable i (= nv (i)) 
+*/
+/* nvj:          the number of variables in a supervariable j (= nv (j)) 
+*/
+/* nvpiv:        number of pivots in current element */
+/* slenme:       number of variables in variable list of pivotal variable 
+*/
+/* wbig:         = iovflo - n.  wflg is not allowed to be .ge. wbig. */
+/* we:           w (e) */
+/* wflg:         used for flagging the w array.  See description of iw. */
+/* wnvi:         wflg - nv (i) */
+/* x:            either a supervariable or an element */
+/* -----------------------------------------------------------------------
+ */
+/* LOCAL POINTERS: */
+/* -----------------------------------------------------------------------
+ */
+/*               Any parameter (pe (...) or pfree) or local variable */
+/*               starting with "p" (for Pointer) is an index into iw, */
+/*               and all indices into iw use variables starting with */
+/*               "p."  The only exception to this rule is the iwlen */
+/*               input argument. */
+/* p:            pointer into lots of things */
+/* p1:           pe (i) for some variable i (start of element list) */
+/* p2:           pe (i) + elen (i) -  1 for some var. i (end of el. list) 
+*/
+/* p3:           index of first supervariable in clean list */
+/* pdst:         destination pointer, for compression */
+/* pend:         end of memory to compress */
+/* pj:           pointer into an element or variable */
+/* pme:          pointer into the current element (pme1...pme2) */
+/* pme1:         the current element, me, is stored in iw (pme1...pme2) */
+/* pme2:         the end of the current element */
+/* pn:           pointer into a "clean" variable, also used to compress */
+/* psrc:         source pointer, for compression */
+/* -----------------------------------------------------------------------
+ */
+/*  FUNCTIONS CALLED: */
+/* -----------------------------------------------------------------------
+ */
+/* =======================================================================
+ */
+/*  INITIALIZATIONS */
+/* =======================================================================
+ */
+    /* Parameter adjustments */
+    --w;
+    --degree;
+    --elen;
+    --head;
+    --last;
+    --next;
+    --nv;
+    --len;
+    --iw;
+    --pe;
+
+    /* Function Body */
+    wflg = 2;
+    mindeg = 1;
+    *ncmpa = 0;
+    nel = 0;
+/* Computing MAX */
+    i__1 = 1, i__2 = *n - 1;
+    hmod = max(i__1,i__2);
+    dmax_ = 0;
+    wbig = *iovflo - *n;
+    mem = *pfree - 1;
+    maxmem = mem;
+    i__1 = *n;
+    for (i = 1; i <= i__1; ++i) {
+	last[i] = 0;
+	head[i] = 0;
+	nv[i] = 1;
+	w[i] = 1;
+	elen[i] = 0;
+	degree[i] = len[i];
+/* L10: */
+    }
+/*       ---------------------------------------------------------------- 
+*/
+/*       initialize degree lists and eliminate rows with no off-diag. nz. 
+*/
+/*       ---------------------------------------------------------------- 
+*/
+    i__1 = *n;
+    for (i = 1; i <= i__1; ++i) {
+	deg = degree[i];
+/*          include the diagonal in the true degree */
+	++deg;
+	degree[i] = deg;
+	if (deg > 1) {
+/*             --------------------------------------------------
+-------- */
+/*             place i in the degree list corresponding to its deg
+ree */
+/*             --------------------------------------------------
+-------- */
+	    inext = head[deg];
+	    if (inext != 0) {
+		last[inext] = i;
+	    }
+	    next[i] = inext;
+	    head[deg] = i;
+	} else {
+/*             --------------------------------------------------
+-------- */
+/*             we have a variable that can be eliminated at once b
+ecause */
+/*             there is no off-diagonal non-zero in its row. */
+/*             --------------------------------------------------
+-------- */
+	    degree[i] = 0;
+	    ++nel;
+	    elen[i] = -nel;
+	    pe[i] = 0;
+	    w[i] = 0;
+	}
+/* L20: */
+    }
+/* =======================================================================
+ */
+/*  WHILE (selecting pivots) DO */
+/* =======================================================================
+ */
+L30:
+    if (nel < *n) {
+/* ==================================================================
+===== */
+/*  GET PIVOT OF MINIMUM DEGREE */
+/* ==================================================================
+===== */
+/*          ---------------------------------------------------------
+---- */
+/*          find next supervariable for elimination */
+/*          ---------------------------------------------------------
+---- */
+	i__1 = *n;
+	for (deg = mindeg; deg <= i__1; ++deg) {
+	    me = head[deg];
+	    if (me > 0) {
+		goto L50;
+	    }
+/* L40: */
+	}
+L50:
+	mindeg = deg;
+/*          ---------------------------------------------------------
+---- */
+/*          remove chosen variable from link list */
+/*          ---------------------------------------------------------
+---- */
+	inext = next[me];
+	if (inext != 0) {
+	    last[inext] = 0;
+	}
+	head[deg] = inext;
+/*          ---------------------------------------------------------
+---- */
+/*          me represents the elimination of pivots nel+1 to nel+nv(me
+). */
+/*          place me itself as the first in this set.  It will be move
+d */
+/*          to the nel+nv(me) position when the permutation vectors ar
+e */
+/*          computed. */
+/*          ---------------------------------------------------------
+---- */
+	elenme = elen[me];
+	elen[me] = -(nel + 1);
+	nvpiv = nv[me];
+	nel += nvpiv;
+/* ==================================================================
+===== */
+/*  CONSTRUCT NEW ELEMENT */
+/* ==================================================================
+===== */
+/*          ---------------------------------------------------------
+---- */
+/*          At this point, me is the pivotal supervariable.  It will b
+e */
+/*          converted into the current element.  Scan list of the */
+/*          pivotal supervariable, me, setting tree pointers and */
+/*          constructing new list of supervariables for the new elemen
+t, */
+/*          me.  p is a pointer to the current position in the old lis
+t. */
+/*          ---------------------------------------------------------
+---- */
+/*          flag the variable "me" as being in Lme by negating nv (me)
+ */
+	nv[me] = -nvpiv;
+	degme = 0;
+	if (elenme == 0) {
+/*             --------------------------------------------------
+-------- */
+/*             construct the new element in place */
+/*             --------------------------------------------------
+-------- */
+	    pme1 = pe[me];
+	    pme2 = pme1 - 1;
+	    i__1 = pme1 + len[me] - 1;
+	    for (p = pme1; p <= i__1; ++p) {
+		i = iw[p];
+		nvi = nv[i];
+		if (nvi > 0) {
+/*                   ------------------------------------
+---------------- */
+/*                   i is a principal variable not yet pla
+ced in Lme. */
+/*                   store i in new list */
+/*                   ------------------------------------
+---------------- */
+		    degme += nvi;
+/*                   flag i as being in Lme by negating nv
+ (i) */
+		    nv[i] = -nvi;
+		    ++pme2;
+		    iw[pme2] = i;
+/*                   ------------------------------------
+---------------- */
+/*                   remove variable i from degree list. 
+*/
+/*                   ------------------------------------
+---------------- */
+		    ilast = last[i];
+		    inext = next[i];
+		    if (inext != 0) {
+			last[inext] = ilast;
+		    }
+		    if (ilast != 0) {
+			next[ilast] = inext;
+		    } else {
+/*                      i is at the head of the degree
+ list */
+			head[degree[i]] = inext;
+		    }
+		}
+/* L60: */
+	    }
+/*             this element takes no new memory in iw: */
+	    newmem = 0;
+	} else {
+/*             --------------------------------------------------
+-------- */
+/*             construct the new element in empty space, iw (pfree
+ ...) */
+/*             --------------------------------------------------
+-------- */
+	    p = pe[me];
+	    pme1 = *pfree;
+	    slenme = len[me] - elenme;
+	    i__1 = elenme + 1;
+	    for (knt1 = 1; knt1 <= i__1; ++knt1) {
+		if (knt1 > elenme) {
+/*                   search the supervariables in me. */
+		    e = me;
+		    pj = p;
+		    ln = slenme;
+		} else {
+/*                   search the elements in me. */
+		    e = iw[p];
+		    ++p;
+		    pj = pe[e];
+		    ln = len[e];
+		}
+/*                -------------------------------------------
+------------ */
+/*                search for different supervariables and add 
+them to the */
+/*                new list, compressing when necessary. this l
+oop is */
+/*                executed once for each element in the list a
+nd once for */
+/*                all the supervariables in the list. */
+/*                -------------------------------------------
+------------ */
+		i__2 = ln;
+		for (knt2 = 1; knt2 <= i__2; ++knt2) {
+		    i = iw[pj];
+		    ++pj;
+		    nvi = nv[i];
+		    if (nvi > 0) {
+/*                      -----------------------------
+-------------------- */
+/*                      compress iw, if necessary */
+/*                      -----------------------------
+-------------------- */
+			if (*pfree > *iwlen) {
+/*                         prepare for compressing
+ iw by adjusting */
+/*                         pointers and lengths so
+ that the lists being */
+/*                         searched in the inner a
+nd outer loops contain */
+/*                         only the remaining entr
+ies. */
+			    pe[me] = p;
+			    len[me] -= knt1;
+			    if (len[me] == 0) {
+/*                            nothing left of 
+supervariable me */
+				pe[me] = 0;
+			    }
+			    pe[e] = pj;
+			    len[e] = ln - knt2;
+			    if (len[e] == 0) {
+/*                            nothing left of 
+element e */
+				pe[e] = 0;
+			    }
+			    ++(*ncmpa);
+/*                         store first item in pe 
+*/
+/*                         set first entry to -ite
+m */
+			    i__3 = *n;
+			    for (j = 1; j <= i__3; ++j) {
+				pn = pe[j];
+				if (pn > 0) {
+				    pe[j] = iw[pn];
+				    iw[pn] = -j;
+				}
+/* L70: */
+			    }
+/*                         psrc/pdst point to sour
+ce/destination */
+			    pdst = 1;
+			    psrc = 1;
+			    pend = pme1 - 1;
+/*                         while loop: */
+L80:
+			    if (psrc <= pend) {
+/*                            search for next 
+negative entry */
+				j = -iw[psrc];
+				++psrc;
+				if (j > 0) {
+				    iw[pdst] = pe[j];
+				    pe[j] = pdst;
+				    ++pdst;
+/*                               copy from
+ source to destination */
+				    lenj = len[j];
+				    i__3 = lenj - 2;
+				    for (knt3 = 0; knt3 <= i__3; ++knt3) {
+					iw[pdst + knt3] = iw[psrc + knt3];
+/* L90: */
+				    }
+				    pdst = pdst + lenj - 1;
+				    psrc = psrc + lenj - 1;
+				}
+				goto L80;
+			    }
+/*                         move the new partially-
+constructed element */
+			    p1 = pdst;
+			    i__3 = *pfree - 1;
+			    for (psrc = pme1; psrc <= i__3; ++psrc) {
+				iw[pdst] = iw[psrc];
+				++pdst;
+/* L100: */
+			    }
+			    pme1 = p1;
+			    *pfree = pdst;
+			    pj = pe[e];
+			    p = pe[me];
+			}
+/*                      -----------------------------
+-------------------- */
+/*                      i is a principal variable not 
+yet placed in Lme */
+/*                      store i in new list */
+/*                      -----------------------------
+-------------------- */
+			degme += nvi;
+/*                      flag i as being in Lme by nega
+ting nv (i) */
+			nv[i] = -nvi;
+			iw[*pfree] = i;
+			++(*pfree);
+/*                      -----------------------------
+-------------------- */
+/*                      remove variable i from degree 
+link list */
+/*                      -----------------------------
+-------------------- */
+			ilast = last[i];
+			inext = next[i];
+			if (inext != 0) {
+			    last[inext] = ilast;
+			}
+			if (ilast != 0) {
+			    next[ilast] = inext;
+			} else {
+/*                         i is at the head of the
+ degree list */
+			    head[degree[i]] = inext;
+			}
+		    }
+/* L110: */
+		}
+		if (e != me) {
+/*                   set tree pointer and flag to indicate
+ element e is */
+/*                   absorbed into new element me (the par
+ent of e is me) */
+		    pe[e] = -me;
+		    w[e] = 0;
+		}
+/* L120: */
+	    }
+	    pme2 = *pfree - 1;
+/*             this element takes newmem new memory in iw (possibl
+y zero) */
+	    newmem = *pfree - pme1;
+	    mem += newmem;
+	    maxmem = max(maxmem,mem);
+	}
+/*          ---------------------------------------------------------
+---- */
+/*          me has now been converted into an element in iw (pme1..pme
+2) */
+/*          ---------------------------------------------------------
+---- */
+/*          degme holds the external degree of new element */
+	degree[me] = degme;
+	pe[me] = pme1;
+	len[me] = pme2 - pme1 + 1;
+/*          ---------------------------------------------------------
+---- */
+/*          make sure that wflg is not too large.  With the current */
+/*          value of wflg, wflg+n must not cause integer overflow */
+/*          ---------------------------------------------------------
+---- */
+	if (wflg >= wbig) {
+	    i__1 = *n;
+	    for (x = 1; x <= i__1; ++x) {
+		if (w[x] != 0) {
+		    w[x] = 1;
+		}
+/* L130: */
+	    }
+	    wflg = 2;
+	}
+/* ==================================================================
+===== */
+/*  DEGREE UPDATE AND ELEMENT ABSORPTION */
+/* ==================================================================
+===== */
+/*          ---------------------------------------------------------
+---- */
+/*          Scan 2:  for each i in Lme, sum up the degree of Lme (whic
+h */
+/*          is degme), plus the sum of the external degrees of each Le
+ */
+/*          for the elements e appearing within i, plus the */
+/*          supervariables in i.  Place i in hash list. */
+/*          ---------------------------------------------------------
+---- */
+	i__1 = pme2;
+	for (pme = pme1; pme <= i__1; ++pme) {
+	    i = iw[pme];
+	    p1 = pe[i];
+	    p2 = p1 + elen[i] - 1;
+	    pn = p1;
+	    hash = 0;
+	    deg = 0;
+/*             --------------------------------------------------
+-------- */
+/*             scan the element list associated with supervariable
+ i */
+/*             --------------------------------------------------
+-------- */
+/*             exact external degree: */
+	    ++wflg;
+	    i__2 = p2;
+	    for (p = p1; p <= i__2; ++p) {
+		e = iw[p];
+		if (w[e] != 0) {
+/*                   e is an unabsorbed element */
+		    i__3 = pe[e] + len[e] - 1;
+		    for (pj = pe[e]; pj <= i__3; ++pj) {
+			j = iw[pj];
+			nvj = nv[j];
+			if (nvj > 0 && w[j] != wflg) {
+/*                         j is principal and not 
+in Lme if nv (j) .gt. 0 */
+/*                         and j is not yet seen i
+f w (j) .ne. wflg */
+			    w[j] = wflg;
+			    deg += nvj;
+			}
+/* L145: */
+		    }
+		    iw[pn] = e;
+		    ++pn;
+		    hash += e;
+		}
+/* L160: */
+	    }
+/*             count the number of elements in i (including me): 
+*/
+	    elen[i] = pn - p1 + 1;
+/*             --------------------------------------------------
+-------- */
+/*             scan the supervariables in the list associated with
+ i */
+/*             --------------------------------------------------
+-------- */
+	    p3 = pn;
+	    i__2 = p1 + len[i] - 1;
+	    for (p = p2 + 1; p <= i__2; ++p) {
+		j = iw[p];
+		nvj = nv[j];
+		if (nvj > 0) {
+/*                   j is unabsorbed, and not in Lme. */
+/*                   add to degree and add to new list */
+		    deg += nvj;
+		    iw[pn] = j;
+		    ++pn;
+		    hash += j;
+		}
+/* L170: */
+	    }
+/*             --------------------------------------------------
+-------- */
+/*             update the degree and check for mass elimination */
+/*             --------------------------------------------------
+-------- */
+	    if (elen[i] == 1 && p3 == pn) {
+/*                -------------------------------------------
+------------ */
+/*                mass elimination */
+/*                -------------------------------------------
+------------ */
+/*                There is nothing left of this node except fo
+r an */
+/*                edge to the current pivot element.  elen (i)
+ is 1, */
+/*                and there are no variables adjacent to node 
+i. */
+/*                Absorb i into the current pivot element, me.
+ */
+		pe[i] = -me;
+		nvi = -nv[i];
+		degme -= nvi;
+		nvpiv += nvi;
+		nel += nvi;
+		nv[i] = 0;
+		elen[i] = 0;
+	    } else {
+/*                -------------------------------------------
+------------ */
+/*                update the exact degree of i */
+/*                -------------------------------------------
+------------ */
+/*                the following degree does not yet include th
+e size */
+/*                of the current element, which is added later
+: */
+		degree[i] = deg;
+/*                -------------------------------------------
+------------ */
+/*                add me to the list for i */
+/*                -------------------------------------------
+------------ */
+/*                move first supervariable to end of list */
+		iw[pn] = iw[p3];
+/*                move first element to end of element part of
+ list */
+		iw[p3] = iw[p1];
+/*                add new element to front of list. */
+		iw[p1] = me;
+/*                store the new length of the list in len (i) 
+*/
+		len[i] = pn - p1 + 1;
+/*                -------------------------------------------
+------------ */
+/*                place in hash bucket.  Save hash key of i in
+ last (i). */
+/*                -------------------------------------------
+------------ */
+		hash = hash % hmod + 1;
+		j = head[hash];
+		if (j <= 0) {
+/*                   the degree list is empty, hash head i
+s -j */
+		    next[i] = -j;
+		    head[hash] = -i;
+		} else {
+/*                   degree list is not empty */
+/*                   use last (head (hash)) as hash head 
+*/
+		    next[i] = last[j];
+		    last[j] = i;
+		}
+		last[i] = hash;
+	    }
+/* L180: */
+	}
+	degree[me] = degme;
+/*          ---------------------------------------------------------
+---- */
+/*          Clear the counter array, w (...), by incrementing wflg. */
+/*          ---------------------------------------------------------
+---- */
+	++wflg;
+/*          make sure that wflg+n does not cause integer overflow */
+	if (wflg >= wbig) {
+	    i__1 = *n;
+	    for (x = 1; x <= i__1; ++x) {
+		if (w[x] != 0) {
+		    w[x] = 1;
+		}
+/* L190: */
+	    }
+	    wflg = 2;
+	}
+/*          at this point, w (1..n) .lt. wflg holds */
+/* ==================================================================
+===== */
+/*  SUPERVARIABLE DETECTION */
+/* ==================================================================
+===== */
+	i__1 = pme2;
+	for (pme = pme1; pme <= i__1; ++pme) {
+	    i = iw[pme];
+	    if (nv[i] < 0) {
+/*                i is a principal variable in Lme */
+/*                -------------------------------------------
+------------ */
+/*                examine all hash buckets with 2 or more vari
+ables.  We */
+/*                do this by examing all unique hash keys for 
+super- */
+/*                variables in the pattern Lme of the current 
+element, me */
+/*                -------------------------------------------
+------------ */
+		hash = last[i];
+/*                let i = head of hash bucket, and empty the h
+ash bucket */
+		j = head[hash];
+		if (j == 0) {
+		    goto L250;
+		}
+		if (j < 0) {
+/*                   degree list is empty */
+		    i = -j;
+		    head[hash] = 0;
+		} else {
+/*                   degree list is not empty, restore las
+t () of head */
+		    i = last[j];
+		    last[j] = 0;
+		}
+		if (i == 0) {
+		    goto L250;
+		}
+/*                while loop: */
+L200:
+		if (next[i] != 0) {
+/*                   ------------------------------------
+---------------- */
+/*                   this bucket has one or more variables
+ following i. */
+/*                   scan all of them to see if i can abso
+rb any entries */
+/*                   that follow i in hash bucket.  Scatte
+r i into w. */
+/*                   ------------------------------------
+---------------- */
+		    ln = len[i];
+		    eln = elen[i];
+/*                   do not flag the first element in the 
+list (me) */
+		    i__2 = pe[i] + ln - 1;
+		    for (p = pe[i] + 1; p <= i__2; ++p) {
+			w[iw[p]] = wflg;
+/* L210: */
+		    }
+/*                   ------------------------------------
+---------------- */
+/*                   scan every other entry j following i 
+in bucket */
+/*                   ------------------------------------
+---------------- */
+		    jlast = i;
+		    j = next[i];
+/*                   while loop: */
+L220:
+		    if (j != 0) {
+/*                      -----------------------------
+-------------------- */
+/*                      check if j and i have identica
+l nonzero pattern */
+/*                      -----------------------------
+-------------------- */
+			if (len[j] != ln) {
+/*                         i and j do not have sam
+e size data structure */
+			    goto L240;
+			}
+			if (elen[j] != eln) {
+/*                         i and j do not have sam
+e number of adjacent el */
+			    goto L240;
+			}
+/*                      do not flag the first element 
+in the list (me) */
+			i__2 = pe[j] + ln - 1;
+			for (p = pe[j] + 1; p <= i__2; ++p) {
+			    if (w[iw[p]] != wflg) {
+/*                            an entry (iw(p))
+ is in j but not in i */
+				goto L240;
+			    }
+/* L230: */
+			}
+/*                      -----------------------------
+-------------------- */
+/*                      found it!  j can be absorbed i
+nto i */
+/*                      -----------------------------
+-------------------- */
+			pe[j] = -i;
+/*                      both nv (i) and nv (j) are neg
+ated since they */
+/*                      are in Lme, and the absolute v
+alues of each */
+/*                      are the number of variables in
+ i and j: */
+			nv[i] += nv[j];
+			nv[j] = 0;
+			elen[j] = 0;
+/*                      delete j from hash bucket */
+			j = next[j];
+			next[jlast] = j;
+			goto L220;
+/*                      -----------------------------
+-------------------- */
+L240:
+/*                      j cannot be absorbed into i */
+/*                      -----------------------------
+-------------------- */
+			jlast = j;
+			j = next[j];
+			goto L220;
+		    }
+/*                   ------------------------------------
+---------------- */
+/*                   no more variables can be absorbed int
+o i */
+/*                   go to next i in bucket and clear flag
+ array */
+/*                   ------------------------------------
+---------------- */
+		    ++wflg;
+		    i = next[i];
+		    if (i != 0) {
+			goto L200;
+		    }
+		}
+	    }
+L250:
+	    ;
+	}
+/* ==================================================================
+===== */
+/*  RESTORE DEGREE LISTS AND REMOVE NONPRINCIPAL SUPERVAR. FROM ELEMEN
+T */
+/* ==================================================================
+===== */
+	p = pme1;
+	nleft = *n - nel;
+	i__1 = pme2;
+	for (pme = pme1; pme <= i__1; ++pme) {
+	    i = iw[pme];
+	    nvi = -nv[i];
+	    if (nvi > 0) {
+/*                i is a principal variable in Lme */
+/*                restore nv (i) to signify that i is principa
+l */
+		nv[i] = nvi;
+/*                -------------------------------------------
+------------ */
+/*                compute the true degree (add size of current
+ element) */
+/*                -------------------------------------------
+------------ */
+		deg = degree[i] + degme;
+/*                -------------------------------------------
+------------ */
+/*                place the supervariable at the head of the d
+egree list */
+/*                -------------------------------------------
+------------ */
+		inext = head[deg];
+		if (inext != 0) {
+		    last[inext] = i;
+		}
+		next[i] = inext;
+		last[i] = 0;
+		head[deg] = i;
+/*                -------------------------------------------
+------------ */
+/*                save the new degree, and find the minimum de
+gree */
+/*                -------------------------------------------
+------------ */
+		mindeg = min(mindeg,deg);
+		degree[i] = deg;
+/*                -------------------------------------------
+------------ */
+/*                place the supervariable in the element patte
+rn */
+/*                -------------------------------------------
+------------ */
+		iw[p] = i;
+		++p;
+	    }
+/* L260: */
+	}
+/* ==================================================================
+===== */
+/*  FINALIZE THE NEW ELEMENT */
+/* ==================================================================
+===== */
+	nv[me] = nvpiv + degme;
+/*          nv (me) is now the degree of pivot (including diagonal par
+t) */
+/*          save the length of the list for the new element me */
+	len[me] = p - pme1;
+	if (len[me] == 0) {
+/*             there is nothing left of the current pivot element 
+*/
+	    pe[me] = 0;
+	    w[me] = 0;
+	}
+	if (newmem != 0) {
+/*             element was not constructed in place: deallocate pa
+rt */
+/*             of it (final size is less than or equal to newmem, 
+*/
+/*             since newly nonprincipal variables have been remove
+d). */
+	    *pfree = p;
+	    mem = mem - newmem + len[me];
+	}
+/* ==================================================================
+===== */
+/*          END WHILE (selecting pivots) */
+	goto L30;
+    }
+/* =======================================================================
+ */
+/* =======================================================================
+ */
+/*  COMPUTE THE PERMUTATION VECTORS */
+/* =======================================================================
+ */
+/*       ---------------------------------------------------------------- 
+*/
+/*       The time taken by the following code is O(n).  At this */
+/*       point, elen (e) = -k has been done for all elements e, */
+/*       and elen (i) = 0 has been done for all nonprincipal */
+/*       variables i.  At this point, there are no principal */
+/*       supervariables left, and all elements are absorbed. */
+/*       ---------------------------------------------------------------- 
+*/
+/*       ---------------------------------------------------------------- 
+*/
+/*       compute the ordering of unordered nonprincipal variables */
+/*       ---------------------------------------------------------------- 
+*/
+    i__1 = *n;
+    for (i = 1; i <= i__1; ++i) {
+	if (elen[i] == 0) {
+/*             --------------------------------------------------
+-------- */
+/*             i is an un-ordered row.  Traverse the tree from i u
+ntil */
+/*             reaching an element, e.  The element, e, was the */
+/*             principal supervariable of i and all nodes in the p
+ath */
+/*             from i to when e was selected as pivot. */
+/*             --------------------------------------------------
+-------- */
+	    j = -pe[i];
+/*             while (j is a variable) do: */
+L270:
+	    if (elen[j] >= 0) {
+		j = -pe[j];
+		goto L270;
+	    }
+	    e = j;
+/*             --------------------------------------------------
+-------- */
+/*             get the current pivot ordering of e */
+/*             --------------------------------------------------
+-------- */
+	    k = -elen[e];
+/*             --------------------------------------------------
+-------- */
+/*             traverse the path again from i to e, and compress t
+he */
+/*             path (all nodes point to e).  Path compression allo
+ws */
+/*             this code to compute in O(n) time.  Order the unord
+ered */
+/*             nodes in the path, and place the element e at the e
+nd. */
+/*             --------------------------------------------------
+-------- */
+	    j = i;
+/*             while (j is a variable) do: */
+L280:
+	    if (elen[j] >= 0) {
+		jnext = -pe[j];
+		pe[j] = -e;
+		if (elen[j] == 0) {
+/*                   j is an unordered row */
+		    elen[j] = k;
+		    ++k;
+		}
+		j = jnext;
+		goto L280;
+	    }
+/*             leave elen (e) negative, so we know it is an elemen
+t */
+	    elen[e] = -k;
+	}
+/* L290: */
+    }
+/*       ---------------------------------------------------------------- 
+*/
+/*       reset the inverse permutation (elen (1..n)) to be positive, */
+/*       and compute the permutation (last (1..n)). */
+/*       ---------------------------------------------------------------- 
+*/
+    i__1 = *n;
+    for (i = 1; i <= i__1; ++i) {
+	k = (i__2 = elen[i], abs(i__2));
+	last[k] = i;
+	elen[i] = k;
+/* L300: */
+    }
+/* =======================================================================
+ */
+/*  RETURN THE MEMORY USAGE IN IW */
+/* =======================================================================
+ */
+/*       If maxmem is less than or equal to iwlen, then no compressions */
+/*       occurred, and iw (maxmem+1 ... iwlen) was unused.  Otherwise */
+/*       compressions did occur, and iwlen would have had to have been */
+/*       greater than or equal to maxmem for no compressions to occur. */
+/*       Return the value of maxmem in the pfree argument. */
+    *pfree = maxmem;
+    return 0;
+} /* amdtru_ */
+
diff --git a/contrib/taucs/external/src/amdtru.f b/contrib/taucs/external/src/amdtru.f
new file mode 100644
index 0000000000000000000000000000000000000000..8b5fe57a9a94fbc058f32fa9ed99e63bce2b9c65
--- /dev/null
+++ b/contrib/taucs/external/src/amdtru.f
@@ -0,0 +1,1238 @@
+
+        SUBROUTINE AMDTRU
+     $          (N, PE, IW, LEN, IWLEN, PFREE, NV, NEXT,
+     $          LAST, HEAD, ELEN, DEGREE, NCMPA, W, IOVFLO)
+
+        INTEGER N, IWLEN, PFREE, NCMPA, IOVFLO, IW (IWLEN), PE (N),
+     $          DEGREE (N), NV (N), NEXT (N), LAST (N), HEAD (N),
+     $          ELEN (N), W (N), LEN (N)
+
+C-----------------------------------------------------------------------
+C  The MC47 / AMD suite of minimum degree ordering algorithms.
+C
+C  This code is one of seven variations of a single algorithm:
+C  the primary routine (MC47B/BD, only available in the Harwell
+C  Subroutine Library), and 6 variations that differ only in
+C  how they compute the degree (available in NETLIB).
+C
+C  For information on the Harwell Subroutine Library, contact
+C  John Harding, Harwell Subroutine Library, B 552, AEA Technology,
+C  Harwell, Didcot, Oxon OX11 0RA, telephone (44) 1235 434573,
+C  fax (44) 1235 434340, email john.harding@aeat.co.uk, who will
+C  provide details of price and conditions of use.
+C-----------------------------------------------------------------------
+
+************************************************************************
+* NOTICE:  "The AMD routines (AMDEXA, AMDBAR, AMDHAF, AMDHAT, AMDTRU,
+* and AMDATR) may be used SOLELY for educational, research, and
+* benchmarking purposes by non-profit organizations and the U.S.
+* government.  Commercial and other organizations may make use of the
+* AMD routines SOLELY for benchmarking purposes only.  The AMD
+* routines may be modified by or on behalf of the User for such
+* use but at no time shall the AMD routines or any such modified
+* version of them become the property of the User.  The AMD routines
+* are provided without warranty of any kind, either expressed or
+* implied.  Neither the Authors nor their employers shall be liable
+* for any direct or consequential loss or damage whatsoever arising
+* out of the use or misuse of the AMD routines by the User.  The AMD
+* routines must not be sold.  You may make copies of the AMD routines,
+* but this NOTICE and the Copyright notice must appear in all copies.
+* Any other use of the AMD routines requires written permission.
+* Your use of the AMD routines is an implicit agreement to these
+* conditions."
+************************************************************************
+
+C-----------------------------------------------------------------------
+C AMDtru:  exact minimum (true) degree ordering algorithm
+C-----------------------------------------------------------------------
+
+C  Variation 5:  exact true degree (as used in MA27, for example.
+C  See I. S. Duff and J. K. Reid, "The multifrontal solution of
+C  indefinite sparse symmetric linear equations, ACM Trans. Math.
+C  Software, vol. 9, pp. 302-325, 1983).  This code is very similar to
+C  MA27, except that MA27 does aggressive absorption and uses a
+C  different hash function for supervariable detection.  Note that some
+C  of the comments in the code below reflect the MC47-style degree
+C  approximation.
+C
+C  We recommend using MC47B/BD instead of this routine since MC47B/BD
+C  gives better results in much less time (this code has been observed
+C  to be up to 73 times slower than MC47B/BD).
+
+C Given a representation of the nonzero pattern of a symmetric matrix,
+C       A, (excluding the diagonal) perform an exact minimum
+C       (true) degree ordering to compute a pivot order such
+C       that the introduction of nonzeros (fill-in) in the Cholesky
+C       factors A = LL^T are kept low.  At each step, the pivot
+C       selected is the one with the minimum exact true degree.
+
+C **********************************************************************
+C ***** CAUTION:  ARGUMENTS ARE NOT CHECKED FOR ERRORS ON INPUT.  ******
+C **********************************************************************
+C ** If you want error checking, a more versatile input format, and a **
+C ** simpler user interface, then use MC47A/AD in the Harwell         **
+C ** Subroutine Library, which checks for errors, transforms the      **
+C ** input, and calls MC47B/BD.                                       **
+C **********************************************************************
+
+C       References:  (UF Tech Reports are available via anonymous ftp
+C       to ftp.cis.ufl.edu:cis/tech-reports).
+C
+C       [1] Timothy A. Davis and Iain Duff, "An unsymmetric-pattern
+C               multifrontal method for sparse LU factorization",
+C               SIAM J. Matrix Analysis and Applications, to appear.
+C               also Univ. of Florida Technical Report TR-94-038.
+C               Discusses UMFPACK / MA38.
+C
+C       [2] Patrick Amestoy, Timothy A. Davis, and Iain S. Duff,
+C               "An approximate minimum degree ordering algorithm,"
+C               SIAM J. Matrix Analysis and Applications (to appear),
+C               also Univ. of Florida Technical Report TR-94-039.
+C               Discusses this routine.
+C
+C       [3] Alan George and Joseph Liu, "The evolution of the
+C               minimum degree ordering algorithm," SIAM Review, vol.
+C               31, no. 1, pp. 1-19, March 1989.  We list below the
+C               features mentioned in that paper that this code
+C               includes:
+C
+C       mass elimination:
+C               Yes.  MA27 relied on supervariable detection for mass
+C               elimination.
+C       indistinguishable nodes:
+C               Yes (we call these "supervariables").  This was also in
+C               the MA27 code - although we modified the method of
+C               detecting them (the previous hash was the true degree,
+C               which we no longer keep track of).  A supervariable is
+C               a set of rows with identical nonzero pattern.  All
+C               variables in a supervariable are eliminated together.
+C               Each supervariable has as its numerical name that of
+C               one of its variables (its principal variable).
+C       quotient graph representation:
+C               Yes.  We use the term "element" for the cliques formed
+C               during elimination.  This was also in the MA27 code.
+C               The algorithm can operate in place, but it will work
+C               more efficiently if given some "elbow room."
+C       element absorption:
+C               Yes.  This was also in the MA27 code.
+C       external degree:
+C               Yes.  The MA27 code was based on the true degree.
+C       incomplete degree update and multiple elimination:
+C               No.  This was not in MA27, either.  Our method of
+C               degree update within MC47B/BD is element-based, not
+C               variable-based.  It is thus not well-suited for use
+C               with incomplete degree update or multiple elimination.
+
+C-----------------------------------------------------------------------
+C Authors, and Copyright (C) 1995 by:
+C       Timothy A. Davis, Patrick Amestoy, Iain S. Duff, & John K. Reid.
+C
+C Acknowledgements:
+C       This work (and the UMFPACK package) was supported by the
+C       National Science Foundation (ASC-9111263 and DMS-9223088).
+C       The UMFPACK/MA38 approximate degree update algorithm, the
+C       unsymmetric analog which forms the basis of MC47B/BD, was
+C       developed while Tim Davis was supported by CERFACS (Toulouse,
+C       France) in a post-doctoral position.
+C
+C Date:  September, 1995
+C-----------------------------------------------------------------------
+
+C-----------------------------------------------------------------------
+C INPUT ARGUMENTS (unaltered):
+C-----------------------------------------------------------------------
+
+C n:    The matrix order.
+C
+C       Restriction:  1 .le. n .lt. (iovflo/2)-2
+
+C iwlen:        The length of iw (1..iwlen).  On input, the matrix is
+C       stored in iw (1..pfree-1).  However, iw (1..iwlen) should be
+C       slightly larger than what is required to hold the matrix, at
+C       least iwlen .ge. pfree + n is recommended.  Otherwise,
+C       excessive compressions will take place.
+C       *** We do not recommend running this algorithm with ***
+C       ***      iwlen .lt. pfree + n.                      ***
+C       *** Better performance will be obtained if          ***
+C       ***      iwlen .ge. pfree + n                       ***
+C       *** or better yet                                   ***
+C       ***      iwlen .gt. 1.2 * pfree                     ***
+C       *** (where pfree is its value on input).            ***
+C       The algorithm will not run at all if iwlen .lt. pfree-1.
+C
+C       Restriction: iwlen .ge. pfree-1
+
+C iovflo:       The largest positive integer that your computer can
+C       represent (-iovflo should also be representable).  On a 32-bit
+C       computer with 2's-complement arithmetic,
+C       iovflo = (2^31)-1 = 2,147,483,648.
+
+C-----------------------------------------------------------------------
+C INPUT/OUPUT ARGUMENTS:
+C-----------------------------------------------------------------------
+
+C pe:   On input, pe (i) is the index in iw of the start of row i, or
+C       zero if row i has no off-diagonal non-zeros.
+C
+C       During execution, it is used for both supervariables and
+C       elements:
+C
+C       * Principal supervariable i:  index into iw of the
+C               description of supervariable i.  A supervariable
+C               represents one or more rows of the matrix
+C               with identical nonzero pattern.
+C       * Non-principal supervariable i:  if i has been absorbed
+C               into another supervariable j, then pe (i) = -j.
+C               That is, j has the same pattern as i.
+C               Note that j might later be absorbed into another
+C               supervariable j2, in which case pe (i) is still -j,
+C               and pe (j) = -j2.
+C       * Unabsorbed element e:  the index into iw of the description
+C               of element e, if e has not yet been absorbed by a
+C               subsequent element.  Element e is created when
+C               the supervariable of the same name is selected as
+C               the pivot.
+C       * Absorbed element e:  if element e is absorbed into element
+C               e2, then pe (e) = -e2.  This occurs when the pattern of
+C               e (that is, Le) is found to be a subset of the pattern
+C               of e2 (that is, Le2).  If element e is "null" (it has
+C               no nonzeros outside its pivot block), then pe (e) = 0.
+C
+C       On output, pe holds the assembly tree/forest, which implicitly
+C       represents a pivot order with identical fill-in as the actual
+C       order (via a depth-first search of the tree).
+C
+C       On output:
+C       If nv (i) .gt. 0, then i represents a node in the assembly tree,
+C       and the parent of i is -pe (i), or zero if i is a root.
+C       If nv (i) = 0, then (i,-pe (i)) represents an edge in a
+C       subtree, the root of which is a node in the assembly tree.
+
+C pfree:        On input the tail end of the array, iw (pfree..iwlen),
+C       is empty, and the matrix is stored in iw (1..pfree-1).
+C       During execution, additional data is placed in iw, and pfree
+C       is modified so that iw (pfree..iwlen) is always the unused part
+C       of iw.  On output, pfree is set equal to the size of iw that
+C       would have been needed for no compressions to occur.  If
+C       ncmpa is zero, then pfree (on output) is less than or equal to
+C       iwlen, and the space iw (pfree+1 ... iwlen) was not used.
+C       Otherwise, pfree (on output) is greater than iwlen, and all the
+C       memory in iw was used.
+
+C-----------------------------------------------------------------------
+C INPUT/MODIFIED (undefined on output):
+C-----------------------------------------------------------------------
+
+C len:  On input, len (i) holds the number of entries in row i of the
+C       matrix, excluding the diagonal.  The contents of len (1..n)
+C       are undefined on output.
+
+C iw:   On input, iw (1..pfree-1) holds the description of each row i
+C       in the matrix.  The matrix must be symmetric, and both upper
+C       and lower triangular parts must be present.  The diagonal must
+C       not be present.  Row i is held as follows:
+C
+C               len (i):  the length of the row i data structure
+C               iw (pe (i) ... pe (i) + len (i) - 1):
+C                       the list of column indices for nonzeros
+C                       in row i (simple supervariables), excluding
+C                       the diagonal.  All supervariables start with
+C                       one row/column each (supervariable i is just
+C                       row i).
+C               if len (i) is zero on input, then pe (i) is ignored
+C               on input.
+C
+C               Note that the rows need not be in any particular order,
+C               and there may be empty space between the rows.
+C
+C       During execution, the supervariable i experiences fill-in.
+C       This is represented by placing in i a list of the elements
+C       that cause fill-in in supervariable i:
+C
+C               len (i):  the length of supervariable i
+C               iw (pe (i) ... pe (i) + elen (i) - 1):
+C                       the list of elements that contain i.  This list
+C                       is kept short by removing absorbed elements.
+C               iw (pe (i) + elen (i) ... pe (i) + len (i) - 1):
+C                       the list of supervariables in i.  This list
+C                       is kept short by removing nonprincipal
+C                       variables, and any entry j that is also
+C                       contained in at least one of the elements
+C                       (j in Le) in the list for i (e in row i).
+C
+C       When supervariable i is selected as pivot, we create an
+C       element e of the same name (e=i):
+C
+C               len (e):  the length of element e
+C               iw (pe (e) ... pe (e) + len (e) - 1):
+C                       the list of supervariables in element e.
+C
+C       An element represents the fill-in that occurs when supervariable
+C       i is selected as pivot (which represents the selection of row i
+C       and all non-principal variables whose principal variable is i).
+C       We use the term Le to denote the set of all supervariables
+C       in element e.  Absorbed supervariables and elements are pruned
+C       from these lists when computationally convenient.
+C
+C       CAUTION:  THE INPUT MATRIX IS OVERWRITTEN DURING COMPUTATION.
+C       The contents of iw are undefined on output.
+
+C-----------------------------------------------------------------------
+C OUTPUT (need not be set on input):
+C-----------------------------------------------------------------------
+
+C nv:   During execution, abs (nv (i)) is equal to the number of rows
+C       that are represented by the principal supervariable i.  If i is
+C       a nonprincipal variable, then nv (i) = 0.  Initially,
+C       nv (i) = 1 for all i.  nv (i) .lt. 0 signifies that i is a
+C       principal variable in the pattern Lme of the current pivot
+C       element me.  On output, nv (e) holds the true degree of element
+C       e at the time it was created (including the diagonal part).
+
+C ncmpa:        The number of times iw was compressed.  If this is
+C       excessive, then the execution took longer than what could have
+C       been.  To reduce ncmpa, try increasing iwlen to be 10% or 20%
+C       larger than the value of pfree on input (or at least
+C       iwlen .ge. pfree + n).  The fastest performance will be
+C       obtained when ncmpa is returned as zero.  If iwlen is set to
+C       the value returned by pfree on *output*, then no compressions
+C       will occur.
+
+C elen: See the description of iw above.  At the start of execution,
+C       elen (i) is set to zero.  During execution, elen (i) is the
+C       number of elements in the list for supervariable i.  When e
+C       becomes an element, elen (e) = -nel is set, where nel is the
+C       current step of factorization.  elen (i) = 0 is done when i
+C       becomes nonprincipal.
+C
+C       For variables, elen (i) .ge. 0 holds until just before the
+C       permutation vectors are computed.  For elements,
+C       elen (e) .lt. 0 holds.
+C
+C       On output elen (1..n) holds the inverse permutation (the same
+C       as the 'INVP' argument in Sparspak).  That is, if k = elen (i),
+C       then row i is the kth pivot row.  Row i of A appears as the
+C       (elen(i))-th row in the permuted matrix, PAP^T.
+
+C last: In a degree list, last (i) is the supervariable preceding i,
+C       or zero if i is the head of the list.  In a hash bucket,
+C       last (i) is the hash key for i.  last (head (hash)) is also
+C       used as the head of a hash bucket if head (hash) contains a
+C       degree list (see head, below).
+C
+C       On output, last (1..n) holds the permutation (the same as the
+C       'PERM' argument in Sparspak).  That is, if i = last (k), then
+C       row i is the kth pivot row.  Row last (k) of A is the k-th row
+C       in the permuted matrix, PAP^T.
+
+C-----------------------------------------------------------------------
+C LOCAL (not input or output - used only during execution):
+C-----------------------------------------------------------------------
+
+C degree:       If i is a supervariable, then degree (i) holds the
+C       current approximation of the external degree of row i (an upper
+C       bound).  The external degree is the number of nonzeros in row i,
+C       minus abs (nv (i)) (the diagonal part).  The bound is equal to
+C       the external degree if elen (i) is less than or equal to two.
+C
+C       We also use the term "external degree" for elements e to refer
+C       to |Le \ Lme|.  If e is an element, then degree (e) holds |Le|,
+C       which is the degree of the off-diagonal part of the element e
+C       (not including the diagonal part).
+
+C head: head is used for degree lists.  head (deg) is the first
+C       supervariable in a degree list (all supervariables i in a
+C       degree list deg have the same approximate degree, namely,
+C       deg = degree (i)).  If the list deg is empty then
+C       head (deg) = 0.
+C
+C       During supervariable detection head (hash) also serves as a
+C       pointer to a hash bucket.
+C       If head (hash) .gt. 0, there is a degree list of degree hash.
+C               The hash bucket head pointer is last (head (hash)).
+C       If head (hash) = 0, then the degree list and hash bucket are
+C               both empty.
+C       If head (hash) .lt. 0, then the degree list is empty, and
+C               -head (hash) is the head of the hash bucket.
+C       After supervariable detection is complete, all hash buckets
+C       are empty, and the (last (head (hash)) = 0) condition is
+C       restored for the non-empty degree lists.
+
+C next: next (i) is the supervariable following i in a link list, or
+C       zero if i is the last in the list.  Used for two kinds of
+C       lists:  degree lists and hash buckets (a supervariable can be
+C       in only one kind of list at a time).
+
+C w:    The flag array w determines the status of elements and
+C       variables, and the external degree of elements.
+C
+C       for elements:
+C          if w (e) = 0, then the element e is absorbed
+C          if w (e) .ge. wflg, then w (e) - wflg is the size of
+C               the set |Le \ Lme|, in terms of nonzeros (the
+C               sum of abs (nv (i)) for each principal variable i that
+C               is both in the pattern of element e and NOT in the
+C               pattern of the current pivot element, me).
+C          if wflg .gt. w (e) .gt. 0, then e is not absorbed and has
+C               not yet been seen in the scan of the element lists in
+C               the computation of |Le\Lme| in loop 150 below.
+C
+C       for variables:
+C          during supervariable detection, if w (j) .ne. wflg then j is
+C          not in the pattern of variable i
+C
+C       The w array is initialized by setting w (i) = 1 for all i,
+C       and by setting wflg = 2.  It is reinitialized if wflg becomes
+C       too large (to ensure that wflg+n does not cause integer
+C       overflow).
+
+C-----------------------------------------------------------------------
+C LOCAL INTEGERS:
+C-----------------------------------------------------------------------
+
+        INTEGER DEG, DEGME, DEXT, DMAX, E, ELENME, ELN, HASH, HMOD, I,
+     $          ILAST, INEXT, J, JLAST, JNEXT, K, KNT1, KNT2, KNT3,
+     $          LENJ, LN, MAXMEM, ME, MEM, MINDEG, NEL, NEWMEM,
+     $          NLEFT, NVI, NVJ, NVPIV, SLENME, WBIG, WE, WFLG, WNVI, X
+
+C deg:          the degree of a variable or element
+C degme:        size, |Lme|, of the current element, me (= degree (me))
+C dext:         external degree, |Le \ Lme|, of some element e
+C dmax:         largest |Le| seen so far
+C e:            an element
+C elenme:       the length, elen (me), of element list of pivotal var.
+C eln:          the length, elen (...), of an element list
+C hash:         the computed value of the hash function
+C hmod:         the hash function is computed modulo hmod = max (1,n-1)
+C i:            a supervariable
+C ilast:        the entry in a link list preceding i
+C inext:        the entry in a link list following i
+C j:            a supervariable
+C jlast:        the entry in a link list preceding j
+C jnext:        the entry in a link list, or path, following j
+C k:            the pivot order of an element or variable
+C knt1:         loop counter used during element construction
+C knt2:         loop counter used during element construction
+C knt3:         loop counter used during compression
+C lenj:         len (j)
+C ln:           length of a supervariable list
+C maxmem:       amount of memory needed for no compressions
+C me:           current supervariable being eliminated, and the
+C                       current element created by eliminating that
+C                       supervariable
+C mem:          memory in use assuming no compressions have occurred
+C mindeg:       current minimum degree
+C nel:          number of pivots selected so far
+C newmem:       amount of new memory needed for current pivot element
+C nleft:        n - nel, the number of nonpivotal rows/columns remaining
+C nvi:          the number of variables in a supervariable i (= nv (i))
+C nvj:          the number of variables in a supervariable j (= nv (j))
+C nvpiv:        number of pivots in current element
+C slenme:       number of variables in variable list of pivotal variable
+C wbig:         = iovflo - n.  wflg is not allowed to be .ge. wbig.
+C we:           w (e)
+C wflg:         used for flagging the w array.  See description of iw.
+C wnvi:         wflg - nv (i)
+C x:            either a supervariable or an element
+
+C-----------------------------------------------------------------------
+C LOCAL POINTERS:
+C-----------------------------------------------------------------------
+
+        INTEGER P, P1, P2, P3, PDST, PEND, PJ, PME, PME1, PME2, PN, PSRC
+
+C               Any parameter (pe (...) or pfree) or local variable
+C               starting with "p" (for Pointer) is an index into iw,
+C               and all indices into iw use variables starting with
+C               "p."  The only exception to this rule is the iwlen
+C               input argument.
+
+C p:            pointer into lots of things
+C p1:           pe (i) for some variable i (start of element list)
+C p2:           pe (i) + elen (i) -  1 for some var. i (end of el. list)
+C p3:           index of first supervariable in clean list
+C pdst:         destination pointer, for compression
+C pend:         end of memory to compress
+C pj:           pointer into an element or variable
+C pme:          pointer into the current element (pme1...pme2)
+C pme1:         the current element, me, is stored in iw (pme1...pme2)
+C pme2:         the end of the current element
+C pn:           pointer into a "clean" variable, also used to compress
+C psrc:         source pointer, for compression
+
+C-----------------------------------------------------------------------
+C  FUNCTIONS CALLED:
+C-----------------------------------------------------------------------
+
+        INTRINSIC MAX, MIN, MOD
+
+C=======================================================================
+C  INITIALIZATIONS
+C=======================================================================
+
+        WFLG = 2
+        MINDEG = 1
+        NCMPA = 0
+        NEL = 0
+        HMOD = MAX (1, N-1)
+        DMAX = 0
+        WBIG = IOVFLO - N
+        MEM = PFREE - 1
+        MAXMEM = MEM
+
+        DO 10 I = 1, N
+           LAST (I) = 0
+           HEAD (I) = 0
+           NV (I) = 1
+           W (I) = 1
+           ELEN (I) = 0
+           DEGREE (I) = LEN (I)
+10         CONTINUE
+
+C       ----------------------------------------------------------------
+C       initialize degree lists and eliminate rows with no off-diag. nz.
+C       ----------------------------------------------------------------
+
+        DO 20 I = 1, N
+
+           DEG = DEGREE (I)
+
+C          include the diagonal in the true degree
+           DEG = DEG + 1
+           DEGREE (I) = DEG
+           IF (DEG .GT. 1) THEN
+
+C             ----------------------------------------------------------
+C             place i in the degree list corresponding to its degree
+C             ----------------------------------------------------------
+
+              INEXT = HEAD (DEG)
+              IF (INEXT .NE. 0) LAST (INEXT) = I
+              NEXT (I) = INEXT
+              HEAD (DEG) = I
+
+           ELSE
+
+C             ----------------------------------------------------------
+C             we have a variable that can be eliminated at once because
+C             there is no off-diagonal non-zero in its row.
+C             ----------------------------------------------------------
+
+              DEGREE (I) = 0
+
+              NEL = NEL + 1
+              ELEN (I) = -NEL
+              PE (I) = 0
+              W (I) = 0
+
+              ENDIF
+
+20         CONTINUE
+
+C=======================================================================
+C  WHILE (selecting pivots) DO
+C=======================================================================
+
+30      CONTINUE
+        IF (NEL .LT. N) THEN
+
+C=======================================================================
+C  GET PIVOT OF MINIMUM DEGREE
+C=======================================================================
+
+C          -------------------------------------------------------------
+C          find next supervariable for elimination
+C          -------------------------------------------------------------
+
+           DO 40 DEG = MINDEG, N
+              ME = HEAD (DEG)
+              IF (ME .GT. 0) GOTO 50
+40            CONTINUE
+50         CONTINUE
+           MINDEG = DEG
+
+C          -------------------------------------------------------------
+C          remove chosen variable from link list
+C          -------------------------------------------------------------
+
+           INEXT = NEXT (ME)
+           IF (INEXT .NE. 0) LAST (INEXT) = 0
+           HEAD (DEG) = INEXT
+
+C          -------------------------------------------------------------
+C          me represents the elimination of pivots nel+1 to nel+nv(me).
+C          place me itself as the first in this set.  It will be moved
+C          to the nel+nv(me) position when the permutation vectors are
+C          computed.
+C          -------------------------------------------------------------
+
+           ELENME = ELEN (ME)
+           ELEN (ME) = - (NEL + 1)
+           NVPIV = NV (ME)
+           NEL = NEL + NVPIV
+
+C=======================================================================
+C  CONSTRUCT NEW ELEMENT
+C=======================================================================
+
+C          -------------------------------------------------------------
+C          At this point, me is the pivotal supervariable.  It will be
+C          converted into the current element.  Scan list of the
+C          pivotal supervariable, me, setting tree pointers and
+C          constructing new list of supervariables for the new element,
+C          me.  p is a pointer to the current position in the old list.
+C          -------------------------------------------------------------
+
+C          flag the variable "me" as being in Lme by negating nv (me)
+           NV (ME) = -NVPIV
+           DEGME = 0
+
+           IF (ELENME .EQ. 0) THEN
+
+C             ----------------------------------------------------------
+C             construct the new element in place
+C             ----------------------------------------------------------
+
+              PME1 = PE (ME)
+              PME2 = PME1 - 1
+
+              DO 60 P = PME1, PME1 + LEN (ME) - 1
+                 I = IW (P)
+                 NVI = NV (I)
+                 IF (NVI .GT. 0) THEN
+
+C                   ----------------------------------------------------
+C                   i is a principal variable not yet placed in Lme.
+C                   store i in new list
+C                   ----------------------------------------------------
+
+                    DEGME = DEGME + NVI
+C                   flag i as being in Lme by negating nv (i)
+                    NV (I) = -NVI
+                    PME2 = PME2 + 1
+                    IW (PME2) = I
+
+C                   ----------------------------------------------------
+C                   remove variable i from degree list.
+C                   ----------------------------------------------------
+
+                    ILAST = LAST (I)
+                    INEXT = NEXT (I)
+                    IF (INEXT .NE. 0) LAST (INEXT) = ILAST
+                    IF (ILAST .NE. 0) THEN
+                       NEXT (ILAST) = INEXT
+                    ELSE
+C                      i is at the head of the degree list
+                       HEAD (DEGREE (I)) = INEXT
+                       ENDIF
+
+                    ENDIF
+60               CONTINUE
+C             this element takes no new memory in iw:
+              NEWMEM = 0
+
+           ELSE
+
+C             ----------------------------------------------------------
+C             construct the new element in empty space, iw (pfree ...)
+C             ----------------------------------------------------------
+
+              P = PE (ME)
+              PME1 = PFREE
+              SLENME = LEN (ME) - ELENME
+
+              DO 120 KNT1 = 1, ELENME + 1
+
+                 IF (KNT1 .GT. ELENME) THEN
+C                   search the supervariables in me.
+                    E = ME
+                    PJ = P
+                    LN = SLENME
+                 ELSE
+C                   search the elements in me.
+                    E = IW (P)
+                    P = P + 1
+                    PJ = PE (E)
+                    LN = LEN (E)
+                    ENDIF
+
+C                -------------------------------------------------------
+C                search for different supervariables and add them to the
+C                new list, compressing when necessary. this loop is
+C                executed once for each element in the list and once for
+C                all the supervariables in the list.
+C                -------------------------------------------------------
+
+                 DO 110 KNT2 = 1, LN
+                    I = IW (PJ)
+                    PJ = PJ + 1
+                    NVI = NV (I)
+                    IF (NVI .GT. 0) THEN
+
+C                      -------------------------------------------------
+C                      compress iw, if necessary
+C                      -------------------------------------------------
+
+                       IF (PFREE .GT. IWLEN) THEN
+C                         prepare for compressing iw by adjusting
+C                         pointers and lengths so that the lists being
+C                         searched in the inner and outer loops contain
+C                         only the remaining entries.
+
+                          PE (ME) = P
+                          LEN (ME) = LEN (ME) - KNT1
+                          IF (LEN (ME) .EQ. 0) THEN
+C                            nothing left of supervariable me
+                             PE (ME) = 0
+                             ENDIF
+                          PE (E) = PJ
+                          LEN (E) = LN - KNT2
+                          IF (LEN (E) .EQ. 0) THEN
+C                            nothing left of element e
+                             PE (E) = 0
+                             ENDIF
+
+                          NCMPA = NCMPA + 1
+C                         store first item in pe
+C                         set first entry to -item
+                          DO 70 J = 1, N
+                             PN = PE (J)
+                             IF (PN .GT. 0) THEN
+                                PE (J) = IW (PN)
+                                IW (PN) = -J
+                                ENDIF
+70                           CONTINUE
+
+C                         psrc/pdst point to source/destination
+                          PDST = 1
+                          PSRC = 1
+                          PEND = PME1 - 1
+
+C                         while loop:
+80                        CONTINUE
+                          IF (PSRC .LE. PEND) THEN
+C                            search for next negative entry
+                             J = -IW (PSRC)
+                             PSRC = PSRC + 1
+                             IF (J .GT. 0) THEN
+                                IW (PDST) = PE (J)
+                                PE (J) = PDST
+                                PDST = PDST + 1
+C                               copy from source to destination
+                                LENJ = LEN (J)
+                                DO 90 KNT3 = 0, LENJ - 2
+                                   IW (PDST + KNT3) = IW (PSRC + KNT3)
+90                                 CONTINUE
+                                PDST = PDST + LENJ - 1
+                                PSRC = PSRC + LENJ - 1
+                                ENDIF
+                             GOTO 80
+                             ENDIF
+
+C                         move the new partially-constructed element
+                          P1 = PDST
+                          DO 100 PSRC = PME1, PFREE - 1
+                             IW (PDST) = IW (PSRC)
+                             PDST = PDST + 1
+100                          CONTINUE
+                          PME1 = P1
+                          PFREE = PDST
+                          PJ = PE (E)
+                          P = PE (ME)
+                          ENDIF
+
+C                      -------------------------------------------------
+C                      i is a principal variable not yet placed in Lme
+C                      store i in new list
+C                      -------------------------------------------------
+
+                       DEGME = DEGME + NVI
+C                      flag i as being in Lme by negating nv (i)
+                       NV (I) = -NVI
+                       IW (PFREE) = I
+                       PFREE = PFREE + 1
+
+C                      -------------------------------------------------
+C                      remove variable i from degree link list
+C                      -------------------------------------------------
+
+                       ILAST = LAST (I)
+                       INEXT = NEXT (I)
+                       IF (INEXT .NE. 0) LAST (INEXT) = ILAST
+                       IF (ILAST .NE. 0) THEN
+                          NEXT (ILAST) = INEXT
+                       ELSE
+C                         i is at the head of the degree list
+                          HEAD (DEGREE (I)) = INEXT
+                          ENDIF
+
+                       ENDIF
+110                 CONTINUE
+
+                 IF (E .NE. ME) THEN
+C                   set tree pointer and flag to indicate element e is
+C                   absorbed into new element me (the parent of e is me)
+                    PE (E) = -ME
+                    W (E) = 0
+                    ENDIF
+120              CONTINUE
+
+              PME2 = PFREE - 1
+C             this element takes newmem new memory in iw (possibly zero)
+              NEWMEM = PFREE - PME1
+              MEM = MEM + NEWMEM
+              MAXMEM = MAX (MAXMEM, MEM)
+              ENDIF
+
+C          -------------------------------------------------------------
+C          me has now been converted into an element in iw (pme1..pme2)
+C          -------------------------------------------------------------
+
+C          degme holds the external degree of new element
+           DEGREE (ME) = DEGME
+           PE (ME) = PME1
+           LEN (ME) = PME2 - PME1 + 1
+
+C          -------------------------------------------------------------
+C          make sure that wflg is not too large.  With the current
+C          value of wflg, wflg+n must not cause integer overflow
+C          -------------------------------------------------------------
+
+           IF (WFLG .GE. WBIG) THEN
+              DO 130 X = 1, N
+                 IF (W (X) .NE. 0) W (X) = 1
+130              CONTINUE
+              WFLG = 2
+              ENDIF
+
+C=======================================================================
+C  DEGREE UPDATE AND ELEMENT ABSORPTION
+C=======================================================================
+
+C          -------------------------------------------------------------
+C          Scan 2:  for each i in Lme, sum up the degree of Lme (which
+C          is degme), plus the sum of the external degrees of each Le
+C          for the elements e appearing within i, plus the
+C          supervariables in i.  Place i in hash list.
+C          -------------------------------------------------------------
+
+           DO 180 PME = PME1, PME2
+              I = IW (PME)
+              P1 = PE (I)
+              P2 = P1 + ELEN (I) - 1
+              PN = P1
+              HASH = 0
+              DEG = 0
+
+C             ----------------------------------------------------------
+C             scan the element list associated with supervariable i
+C             ----------------------------------------------------------
+
+C             exact external degree:
+              WFLG = WFLG + 1
+              DO 160 P = P1, P2
+                 E = IW (P)
+                 IF (W (E) .NE. 0) THEN
+C                   e is an unabsorbed element
+                    DO 145 PJ = PE (E), PE (E) + LEN (E) - 1
+                       J = IW (PJ)
+                       NVJ = NV (J)
+                       IF (NVJ .GT. 0 .AND. W (J) .NE. WFLG) THEN
+C                         j is principal and not in Lme if nv (j) .gt. 0
+C                         and j is not yet seen if w (j) .ne. wflg
+                          W (J) = WFLG
+                          DEG = DEG + NVJ
+                          ENDIF
+145                    CONTINUE
+                    IW (PN) = E
+                    PN = PN + 1
+                    HASH = HASH + E
+                    ENDIF
+160              CONTINUE
+
+C             count the number of elements in i (including me):
+              ELEN (I) = PN - P1 + 1
+
+C             ----------------------------------------------------------
+C             scan the supervariables in the list associated with i
+C             ----------------------------------------------------------
+
+              P3 = PN
+              DO 170 P = P2 + 1, P1 + LEN (I) - 1
+                 J = IW (P)
+                 NVJ = NV (J)
+                 IF (NVJ .GT. 0) THEN
+C                   j is unabsorbed, and not in Lme.
+C                   add to degree and add to new list
+                    DEG = DEG + NVJ
+                    IW (PN) = J
+                    PN = PN + 1
+                    HASH = HASH + J
+                    ENDIF
+170              CONTINUE
+
+C             ----------------------------------------------------------
+C             update the degree and check for mass elimination
+C             ----------------------------------------------------------
+
+              IF (ELEN (I) .EQ. 1 .AND. P3 .EQ. PN) THEN
+
+C                -------------------------------------------------------
+C                mass elimination
+C                -------------------------------------------------------
+
+C                There is nothing left of this node except for an
+C                edge to the current pivot element.  elen (i) is 1,
+C                and there are no variables adjacent to node i.
+C                Absorb i into the current pivot element, me.
+
+                 PE (I) = -ME
+                 NVI = -NV (I)
+                 DEGME = DEGME - NVI
+                 NVPIV = NVPIV + NVI
+                 NEL = NEL + NVI
+                 NV (I) = 0
+                 ELEN (I) = 0
+
+              ELSE
+
+C                -------------------------------------------------------
+C                update the exact degree of i
+C                -------------------------------------------------------
+
+C                the following degree does not yet include the size
+C                of the current element, which is added later:
+                 DEGREE (I) = DEG
+
+C                -------------------------------------------------------
+C                add me to the list for i
+C                -------------------------------------------------------
+
+C                move first supervariable to end of list
+                 IW (PN) = IW (P3)
+C                move first element to end of element part of list
+                 IW (P3) = IW (P1)
+C                add new element to front of list.
+                 IW (P1) = ME
+C                store the new length of the list in len (i)
+                 LEN (I) = PN - P1 + 1
+
+C                -------------------------------------------------------
+C                place in hash bucket.  Save hash key of i in last (i).
+C                -------------------------------------------------------
+
+                 HASH = MOD (HASH, HMOD) + 1
+                 J = HEAD (HASH)
+                 IF (J .LE. 0) THEN
+C                   the degree list is empty, hash head is -j
+                    NEXT (I) = -J
+                    HEAD (HASH) = -I
+                 ELSE
+C                   degree list is not empty
+C                   use last (head (hash)) as hash head
+                    NEXT (I) = LAST (J)
+                    LAST (J) = I
+                    ENDIF
+                 LAST (I) = HASH
+                 ENDIF
+180           CONTINUE
+
+           DEGREE (ME) = DEGME
+
+C          -------------------------------------------------------------
+C          Clear the counter array, w (...), by incrementing wflg.
+C          -------------------------------------------------------------
+
+           WFLG = WFLG + 1
+
+C          make sure that wflg+n does not cause integer overflow
+           IF (WFLG .GE. WBIG) THEN
+              DO 190 X = 1, N
+                 IF (W (X) .NE. 0) W (X) = 1
+190              CONTINUE
+              WFLG = 2
+              ENDIF
+C          at this point, w (1..n) .lt. wflg holds
+
+C=======================================================================
+C  SUPERVARIABLE DETECTION
+C=======================================================================
+
+           DO 250 PME = PME1, PME2
+              I = IW (PME)
+              IF (NV (I) .LT. 0) THEN
+C                i is a principal variable in Lme
+
+C                -------------------------------------------------------
+C                examine all hash buckets with 2 or more variables.  We
+C                do this by examing all unique hash keys for super-
+C                variables in the pattern Lme of the current element, me
+C                -------------------------------------------------------
+
+                 HASH = LAST (I)
+C                let i = head of hash bucket, and empty the hash bucket
+                 J = HEAD (HASH)
+                 IF (J .EQ. 0) GOTO 250
+                 IF (J .LT. 0) THEN
+C                   degree list is empty
+                    I = -J
+                    HEAD (HASH) = 0
+                 ELSE
+C                   degree list is not empty, restore last () of head
+                    I = LAST (J)
+                    LAST (J) = 0
+                    ENDIF
+                 IF (I .EQ. 0) GOTO 250
+
+C                while loop:
+200              CONTINUE
+                 IF (NEXT (I) .NE. 0) THEN
+
+C                   ----------------------------------------------------
+C                   this bucket has one or more variables following i.
+C                   scan all of them to see if i can absorb any entries
+C                   that follow i in hash bucket.  Scatter i into w.
+C                   ----------------------------------------------------
+
+                    LN = LEN (I)
+                    ELN = ELEN (I)
+C                   do not flag the first element in the list (me)
+                    DO 210 P = PE (I) + 1, PE (I) + LN - 1
+                       W (IW (P)) = WFLG
+210                    CONTINUE
+
+C                   ----------------------------------------------------
+C                   scan every other entry j following i in bucket
+C                   ----------------------------------------------------
+
+                    JLAST = I
+                    J = NEXT (I)
+
+C                   while loop:
+220                 CONTINUE
+                    IF (J .NE. 0) THEN
+
+C                      -------------------------------------------------
+C                      check if j and i have identical nonzero pattern
+C                      -------------------------------------------------
+
+                       IF (LEN (J) .NE. LN) THEN
+C                         i and j do not have same size data structure
+                          GOTO 240
+                          ENDIF
+                       IF (ELEN (J) .NE. ELN) THEN
+C                         i and j do not have same number of adjacent el
+                          GOTO 240
+                          ENDIF
+C                      do not flag the first element in the list (me)
+                       DO 230 P = PE (J) + 1, PE (J) + LN - 1
+                          IF (W (IW (P)) .NE. WFLG) THEN
+C                            an entry (iw(p)) is in j but not in i
+                             GOTO 240
+                             ENDIF
+230                       CONTINUE
+
+C                      -------------------------------------------------
+C                      found it!  j can be absorbed into i
+C                      -------------------------------------------------
+
+                       PE (J) = -I
+C                      both nv (i) and nv (j) are negated since they
+C                      are in Lme, and the absolute values of each
+C                      are the number of variables in i and j:
+                       NV (I) = NV (I) + NV (J)
+                       NV (J) = 0
+                       ELEN (J) = 0
+C                      delete j from hash bucket
+                       J = NEXT (J)
+                       NEXT (JLAST) = J
+                       GOTO 220
+
+C                      -------------------------------------------------
+240                    CONTINUE
+C                      j cannot be absorbed into i
+C                      -------------------------------------------------
+
+                       JLAST = J
+                       J = NEXT (J)
+                       GOTO 220
+                       ENDIF
+
+C                   ----------------------------------------------------
+C                   no more variables can be absorbed into i
+C                   go to next i in bucket and clear flag array
+C                   ----------------------------------------------------
+
+                    WFLG = WFLG + 1
+                    I = NEXT (I)
+                    IF (I .NE. 0) GOTO 200
+                    ENDIF
+                 ENDIF
+250           CONTINUE
+
+C=======================================================================
+C  RESTORE DEGREE LISTS AND REMOVE NONPRINCIPAL SUPERVAR. FROM ELEMENT
+C=======================================================================
+
+           P = PME1
+           NLEFT = N - NEL
+           DO 260 PME = PME1, PME2
+              I = IW (PME)
+              NVI = -NV (I)
+              IF (NVI .GT. 0) THEN
+C                i is a principal variable in Lme
+C                restore nv (i) to signify that i is principal
+                 NV (I) = NVI
+
+C                -------------------------------------------------------
+C                compute the true degree (add size of current element)
+C                -------------------------------------------------------
+
+                 DEG = DEGREE (I) + DEGME
+
+C                -------------------------------------------------------
+C                place the supervariable at the head of the degree list
+C                -------------------------------------------------------
+
+                 INEXT = HEAD (DEG)
+                 IF (INEXT .NE. 0) LAST (INEXT) = I
+                 NEXT (I) = INEXT
+                 LAST (I) = 0
+                 HEAD (DEG) = I
+
+C                -------------------------------------------------------
+C                save the new degree, and find the minimum degree
+C                -------------------------------------------------------
+
+                 MINDEG = MIN (MINDEG, DEG)
+                 DEGREE (I) = DEG
+
+C                -------------------------------------------------------
+C                place the supervariable in the element pattern
+C                -------------------------------------------------------
+
+                 IW (P) = I
+                 P = P + 1
+                 ENDIF
+260           CONTINUE
+
+C=======================================================================
+C  FINALIZE THE NEW ELEMENT
+C=======================================================================
+
+           NV (ME) = NVPIV + DEGME
+C          nv (me) is now the degree of pivot (including diagonal part)
+C          save the length of the list for the new element me
+           LEN (ME) = P - PME1
+           IF (LEN (ME) .EQ. 0) THEN
+C             there is nothing left of the current pivot element
+              PE (ME) = 0
+              W (ME) = 0
+              ENDIF
+           IF (NEWMEM .NE. 0) THEN
+C             element was not constructed in place: deallocate part
+C             of it (final size is less than or equal to newmem,
+C             since newly nonprincipal variables have been removed).
+              PFREE = P
+              MEM = MEM - NEWMEM + LEN (ME)
+              ENDIF
+
+C=======================================================================
+C          END WHILE (selecting pivots)
+           GOTO 30
+           ENDIF
+C=======================================================================
+
+C=======================================================================
+C  COMPUTE THE PERMUTATION VECTORS
+C=======================================================================
+
+C       ----------------------------------------------------------------
+C       The time taken by the following code is O(n).  At this
+C       point, elen (e) = -k has been done for all elements e,
+C       and elen (i) = 0 has been done for all nonprincipal
+C       variables i.  At this point, there are no principal
+C       supervariables left, and all elements are absorbed.
+C       ----------------------------------------------------------------
+
+C       ----------------------------------------------------------------
+C       compute the ordering of unordered nonprincipal variables
+C       ----------------------------------------------------------------
+
+        DO 290 I = 1, N
+           IF (ELEN (I) .EQ. 0) THEN
+
+C             ----------------------------------------------------------
+C             i is an un-ordered row.  Traverse the tree from i until
+C             reaching an element, e.  The element, e, was the
+C             principal supervariable of i and all nodes in the path
+C             from i to when e was selected as pivot.
+C             ----------------------------------------------------------
+
+              J = -PE (I)
+C             while (j is a variable) do:
+270           CONTINUE
+              IF (ELEN (J) .GE. 0) THEN
+                 J = -PE (J)
+                 GOTO 270
+                 ENDIF
+              E = J
+
+C             ----------------------------------------------------------
+C             get the current pivot ordering of e
+C             ----------------------------------------------------------
+
+              K = -ELEN (E)
+
+C             ----------------------------------------------------------
+C             traverse the path again from i to e, and compress the
+C             path (all nodes point to e).  Path compression allows
+C             this code to compute in O(n) time.  Order the unordered
+C             nodes in the path, and place the element e at the end.
+C             ----------------------------------------------------------
+
+              J = I
+C             while (j is a variable) do:
+280           CONTINUE
+              IF (ELEN (J) .GE. 0) THEN
+                 JNEXT = -PE (J)
+                 PE (J) = -E
+                 IF (ELEN (J) .EQ. 0) THEN
+C                   j is an unordered row
+                    ELEN (J) = K
+                    K = K + 1
+                    ENDIF
+                 J = JNEXT
+                 GOTO 280
+                 ENDIF
+C             leave elen (e) negative, so we know it is an element
+              ELEN (E) = -K
+              ENDIF
+290        CONTINUE
+
+C       ----------------------------------------------------------------
+C       reset the inverse permutation (elen (1..n)) to be positive,
+C       and compute the permutation (last (1..n)).
+C       ----------------------------------------------------------------
+
+        DO 300 I = 1, N
+           K = ABS (ELEN (I))
+           LAST (K) = I
+           ELEN (I) = K
+300        CONTINUE
+
+C=======================================================================
+C  RETURN THE MEMORY USAGE IN IW
+C=======================================================================
+
+C       If maxmem is less than or equal to iwlen, then no compressions
+C       occurred, and iw (maxmem+1 ... iwlen) was unused.  Otherwise
+C       compressions did occur, and iwlen would have had to have been
+C       greater than or equal to maxmem for no compressions to occur.
+C       Return the value of maxmem in the pfree argument.
+
+        PFREE = MAXMEM
+
+        RETURN
+        END
+
diff --git a/contrib/taucs/external/src/colamd.c b/contrib/taucs/external/src/colamd.c
new file mode 100644
index 0000000000000000000000000000000000000000..b60718f99385e2597334adf6ac9bf9fe1a020149
--- /dev/null
+++ b/contrib/taucs/external/src/colamd.c
@@ -0,0 +1,2583 @@
+/* ========================================================================== */
+/* === colamd - a sparse matrix column ordering algorithm =================== */
+/* ========================================================================== */
+
+/*
+    colamd:  An approximate minimum degree column ordering algorithm.
+
+    Purpose:
+
+	Colamd computes a permutation Q such that the Cholesky factorization of
+	(AQ)'(AQ) has less fill-in and requires fewer floating point operations
+	than A'A.  This also provides a good ordering for sparse partial
+	pivoting methods, P(AQ) = LU, where Q is computed prior to numerical
+	factorization, and P is computed during numerical factorization via
+	conventional partial pivoting with row interchanges.  Colamd is the
+	column ordering method used in SuperLU, part of the ScaLAPACK library.
+	It is also available as user-contributed software for Matlab 5.2,
+	available from MathWorks, Inc. (http://www.mathworks.com).  This
+	routine can be used in place of COLMMD in Matlab.  By default, the \
+	and / operators in Matlab perform a column ordering (using COLMMD)
+	prior to LU factorization using sparse partial pivoting, in the
+	built-in Matlab LU(A) routine.
+
+    Authors:
+
+	The authors of the code itself are Stefan I. Larimore and Timothy A.
+	Davis (davis@cise.ufl.edu), University of Florida.  The algorithm was
+	developed in collaboration with John Gilbert, Xerox PARC, and Esmond
+	Ng, Oak Ridge National Laboratory.
+
+    Date:
+
+	August 3, 1998.  Version 1.0.
+
+    Acknowledgements:
+
+	This work was supported by the National Science Foundation, under
+	grants DMS-9504974 and DMS-9803599.
+
+    Notice:
+
+	Copyright (c) 1998 by the University of Florida.  All Rights Reserved.
+
+	THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+	EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+
+	Permission is hereby granted to use or copy this program for any
+	purpose, provided the above notices are retained on all copies.
+	User documentation of any code that uses this code must cite the
+	Authors, the Copyright, and "Used by permission."  If this code is
+	accessible from within Matlab, then typing "help colamd" or "colamd"
+	(with no arguments) must cite the Authors.  Permission to modify the
+	code and to distribute modified code is granted, provided the above
+	notices are retained, and a notice that the code was modified is
+	included with the above copyright notice.  You must also retain the
+	Availability information below, of the original version.
+
+	This software is provided free of charge.
+
+    Availability:
+
+	This file is located at
+
+		http://www.cise.ufl.edu/~davis/colamd/colamd.c
+
+	The colamd.h file is required, located in the same directory.
+	The colamdmex.c file provides a Matlab interface for colamd.
+	The symamdmex.c file provides a Matlab interface for symamd, which is
+	a symmetric ordering based on this code, colamd.c.  All codes are
+	purely ANSI C compliant (they use no Unix-specific routines, include
+	files, etc.).
+*/
+
+/* ========================================================================== */
+/* === Description of user-callable routines ================================ */
+/* ========================================================================== */
+
+/*
+    Each user-callable routine (declared as PUBLIC) is briefly described below.
+    Refer to the comments preceding each routine for more details.
+
+    ----------------------------------------------------------------------------
+    colamd_recommended:
+    ----------------------------------------------------------------------------
+
+	Usage:
+
+	    Alen = colamd_recommended (nnz, n_row, n_col) ;
+
+	Purpose:
+
+	    Returns recommended value of Alen for use by colamd.  Returns -1
+	    if any input argument is negative.
+
+	Arguments:
+
+	    int nnz ;		Number of nonzeros in the matrix A.  This must
+				be the same value as p [n_col] in the call to
+				colamd - otherwise you will get a wrong value
+				of the recommended memory to use.
+	    int n_row ;		Number of rows in the matrix A.
+	    int n_col ;		Number of columns in the matrix A.
+
+    ----------------------------------------------------------------------------
+    colamd_set_defaults:
+    ----------------------------------------------------------------------------
+
+	Usage:
+
+	    colamd_set_defaults (knobs) ;
+
+	Purpose:
+
+	    Sets the default parameters.
+
+	Arguments:
+
+	    double knobs [COLAMD_KNOBS] ;	Output only.
+
+		Rows with more than (knobs [COLAMD_DENSE_ROW] * n_col) entries
+		are removed prior to ordering.  Columns with more than
+		(knobs [COLAMD_DENSE_COL] * n_row) entries are removed
+		prior to ordering, and placed last in the output column
+		ordering.  Default values of these two knobs are both 0.5.
+		Currently, only knobs [0] and knobs [1] are used, but future
+		versions may use more knobs.  If so, they will be properly set
+		to their defaults by the future version of colamd_set_defaults,
+		so that the code that calls colamd will not need to change,
+		assuming that you either use colamd_set_defaults, or pass a
+		(double *) NULL pointer as the knobs array to colamd.
+
+    ----------------------------------------------------------------------------
+    colamd:
+    ----------------------------------------------------------------------------
+
+	Usage:
+
+	    colamd (n_row, n_col, Alen, A, p, knobs) ;
+
+	Purpose:
+
+	    Computes a column ordering (Q) of A such that P(AQ)=LU or
+	    (AQ)'AQ=LL' have less fill-in and require fewer floating point
+	    operations than factorizing the unpermuted matrix A or A'A,
+	    respectively.
+
+	Arguments:
+
+	    int n_row ;
+
+		Number of rows in the matrix A.
+		Restriction:  n_row >= 0.
+		Colamd returns FALSE if n_row is negative.
+
+	    int n_col ;
+
+		Number of columns in the matrix A.
+		Restriction:  n_col >= 0.
+		Colamd returns FALSE if n_col is negative.
+
+	    int Alen ;
+
+		Restriction (see note):
+		Alen >= 2*nnz + 6*(n_col+1) + 4*(n_row+1) + n_col + COLAMD_STATS
+		Colamd returns FALSE if these conditions are not met.
+
+		Note:  this restriction makes an modest assumption regarding
+		the size of the two typedef'd structures, below.  We do,
+		however, guarantee that
+		Alen >= colamd_recommended (nnz, n_row, n_col)
+		will be sufficient.
+
+	    int A [Alen] ;	Input argument, stats on output.
+
+		A is an integer array of size Alen.  Alen must be at least as
+		large as the bare minimum value given above, but this is very
+		low, and can result in excessive run time.  For best
+		performance, we recommend that Alen be greater than or equal to
+		colamd_recommended (nnz, n_row, n_col), which adds
+		nnz/5 to the bare minimum value given above.
+
+		On input, the row indices of the entries in column c of the
+		matrix are held in A [(p [c]) ... (p [c+1]-1)].  The row indices
+		in a given column c need not be in ascending order, and
+		duplicate row indices may be be present.  However, colamd will
+		work a little faster if both of these conditions are met
+		(Colamd puts the matrix into this format, if it finds that the
+		the conditions are not met).
+
+		The matrix is 0-based.  That is, rows are in the range 0 to
+		n_row-1, and columns are in the range 0 to n_col-1.  Colamd
+		returns FALSE if any row index is out of range.
+
+		The contents of A are modified during ordering, and are thus
+		undefined on output with the exception of a few statistics
+		about the ordering (A [0..COLAMD_STATS-1]):
+		A [0]:  number of dense or empty rows ignored.
+		A [1]:  number of dense or empty columns ignored (and ordered
+			last in the output permutation p)
+		A [2]:  number of garbage collections performed.
+		A [3]:  0, if all row indices in each column were in sorted
+			  order, and no duplicates were present.
+			1, otherwise (in which case colamd had to do more work)
+		Note that a row can become "empty" if it contains only
+		"dense" and/or "empty" columns, and similarly a column can
+		become "empty" if it only contains "dense" and/or "empty" rows.
+		Future versions may return more statistics in A, but the usage
+		of these 4 entries in A will remain unchanged.
+
+	    int p [n_col+1] ;	Both input and output argument.
+
+		p is an integer array of size n_col+1.  On input, it holds the
+		"pointers" for the column form of the matrix A.  Column c of
+		the matrix A is held in A [(p [c]) ... (p [c+1]-1)].  The first
+		entry, p [0], must be zero, and p [c] <= p [c+1] must hold
+		for all c in the range 0 to n_col-1.  The value p [n_col] is
+		thus the total number of entries in the pattern of the matrix A.
+		Colamd returns FALSE if these conditions are not met.
+
+		On output, if colamd returns TRUE, the array p holds the column
+		permutation (Q, for P(AQ)=LU or (AQ)'(AQ)=LL'), where p [0] is
+		the first column index in the new ordering, and p [n_col-1] is
+		the last.  That is, p [k] = j means that column j of A is the
+		kth pivot column, in AQ, where k is in the range 0 to n_col-1
+		(p [0] = j means that column j of A is the first column in AQ).
+
+		If colamd returns FALSE, then no permutation is returned, and
+		p is undefined on output.
+
+	    double knobs [COLAMD_KNOBS] ;	Input only.
+
+		See colamd_set_defaults for a description.  If the knobs array
+		is not present (that is, if a (double *) NULL pointer is passed
+		in its place), then the default values of the parameters are
+		used instead.
+
+*/
+
+
+/* ========================================================================== */
+/* === Include files ======================================================== */
+/* ========================================================================== */
+
+/* limits.h:  the largest positive integer (INT_MAX) */
+#include <limits.h>
+
+/* colamd.h:  knob array size, stats output size, and global prototypes */
+#include "colamd.h"
+
+/* ========================================================================== */
+/* === Scaffolding code definitions  ======================================== */
+/* ========================================================================== */
+
+/* Ensure that debugging is turned off: */
+#ifndef NDEBUG
+#define NDEBUG
+#endif
+
+/* assert.h:  the assert macro (no debugging if NDEBUG is defined) */
+#include <assert.h>
+
+/*
+   Our "scaffolding code" philosophy:  In our opinion, well-written library
+   code should keep its "debugging" code, and just normally have it turned off
+   by the compiler so as not to interfere with performance.  This serves
+   several purposes:
+
+   (1) assertions act as comments to the reader, telling you what the code
+	expects at that point.  All assertions will always be true (unless
+	there really is a bug, of course).
+
+   (2) leaving in the scaffolding code assists anyone who would like to modify
+	the code, or understand the algorithm (by reading the debugging output,
+	one can get a glimpse into what the code is doing).
+
+   (3) (gasp!) for actually finding bugs.  This code has been heavily tested
+	and "should" be fully functional and bug-free ... but you never know...
+
+    To enable debugging, comment out the "#define NDEBUG" above.  The code will
+    become outrageously slow when debugging is enabled.  To control the level of
+    debugging output, set an environment variable D to 0 (little), 1 (some),
+    2, 3, or 4 (lots).
+*/
+
+/* ========================================================================== */
+/* === Row and Column structures ============================================ */
+/* ========================================================================== */
+
+typedef struct ColInfo_struct
+{
+    int start ;		/* index for A of first row in this column, or DEAD */
+			/* if column is dead */
+    int length ;	/* number of rows in this column */
+    union
+    {
+	int thickness ;	/* number of original columns represented by this */
+			/* col, if the column is alive */
+	int parent ;	/* parent in parent tree super-column structure, if */
+			/* the column is dead */
+    } shared1 ;
+    union
+    {
+	int score ;	/* the score used to maintain heap, if col is alive */
+	int order ;	/* pivot ordering of this column, if col is dead */
+    } shared2 ;
+    union
+    {
+	int headhash ;	/* head of a hash bucket, if col is at the head of */
+			/* a degree list */
+	int hash ;	/* hash value, if col is not in a degree list */
+	int prev ;	/* previous column in degree list, if col is in a */
+			/* degree list (but not at the head of a degree list) */
+    } shared3 ;
+    union
+    {
+	int degree_next ;	/* next column, if col is in a degree list */
+	int hash_next ;		/* next column, if col is in a hash list */
+    } shared4 ;
+
+} ColInfo ;
+
+typedef struct RowInfo_struct
+{
+    int start ;		/* index for A of first col in this row */
+    int length ;	/* number of principal columns in this row */
+    union
+    {
+	int degree ;	/* number of principal & non-principal columns in row */
+	int p ;		/* used as a row pointer in init_rows_cols () */
+    } shared1 ;
+    union
+    {
+	int mark ;	/* for computing set differences and marking dead rows*/
+	int first_column ;/* first column in row (used in garbage collection) */
+    } shared2 ;
+
+} RowInfo ;
+
+/* ========================================================================== */
+/* === Definitions ========================================================== */
+/* ========================================================================== */
+
+#define MAX(a,b) (((a) > (b)) ? (a) : (b))
+#define MIN(a,b) (((a) < (b)) ? (a) : (b))
+
+#define ONES_COMPLEMENT(r) (-(r)-1)
+
+#define TRUE	(1)
+#define FALSE	(0)
+#define EMPTY	(-1)
+
+/* Row and column status */
+#define ALIVE	(0)
+#define DEAD	(-1)
+
+/* Column status */
+#define DEAD_PRINCIPAL		(-1)
+#define DEAD_NON_PRINCIPAL	(-2)
+
+/* Macros for row and column status update and checking. */
+#define ROW_IS_DEAD(r)			ROW_IS_MARKED_DEAD (Row[r].shared2.mark)
+#define ROW_IS_MARKED_DEAD(row_mark)	(row_mark < ALIVE)
+#define ROW_IS_ALIVE(r)			(Row [r].shared2.mark >= ALIVE)
+#define COL_IS_DEAD(c)			(Col [c].start < ALIVE)
+#define COL_IS_ALIVE(c)			(Col [c].start >= ALIVE)
+#define COL_IS_DEAD_PRINCIPAL(c)	(Col [c].start == DEAD_PRINCIPAL)
+#define KILL_ROW(r)			{ Row [r].shared2.mark = DEAD ; }
+#define KILL_PRINCIPAL_COL(c)		{ Col [c].start = DEAD_PRINCIPAL ; }
+#define KILL_NON_PRINCIPAL_COL(c)	{ Col [c].start = DEAD_NON_PRINCIPAL ; }
+
+/* Routines are either PUBLIC (user-callable) or PRIVATE (not user-callable) */
+#define PUBLIC
+#define PRIVATE static
+
+/* ========================================================================== */
+/* === Prototypes of PRIVATE routines ======================================= */
+/* ========================================================================== */
+
+PRIVATE int init_rows_cols
+(
+    int n_row,
+    int n_col,
+    RowInfo Row [],
+    ColInfo Col [],
+    int A [],
+    int p []
+) ;
+
+PRIVATE void init_scoring
+(
+    int n_row,
+    int n_col,
+    RowInfo Row [],
+    ColInfo Col [],
+    int A [],
+    int head [],
+    double knobs [COLAMD_KNOBS],
+    int *p_n_row2,
+    int *p_n_col2,
+    int *p_max_deg
+) ;
+
+PRIVATE int find_ordering
+(
+    int n_row,
+    int n_col,
+    int Alen,
+    RowInfo Row [],
+    ColInfo Col [],
+    int A [],
+    int head [],
+    int n_col2,
+    int max_deg,
+    int pfree
+) ;
+
+PRIVATE void order_children
+(
+    int n_col,
+    ColInfo Col [],
+    int p []
+) ;
+
+PRIVATE void detect_super_cols
+(
+#ifndef NDEBUG
+    int n_col,
+    RowInfo Row [],
+#endif
+    ColInfo Col [],
+    int A [],
+    int head [],
+    int row_start,
+    int row_length
+) ;
+
+PRIVATE int garbage_collection
+(
+    int n_row,
+    int n_col,
+    RowInfo Row [],
+    ColInfo Col [],
+    int A [],
+    int *pfree
+) ;
+
+PRIVATE int clear_mark
+(
+    int n_row,
+    RowInfo Row []
+) ;
+
+/* ========================================================================== */
+/* === Debugging definitions ================================================ */
+/* ========================================================================== */
+
+#ifndef NDEBUG
+
+/* === With debugging ======================================================= */
+
+/* stdlib.h: for getenv and atoi, to get debugging level from environment */
+#include <stdlib.h>
+
+/* stdio.h:  for printf (no printing if debugging is turned off) */
+#include <stdio.h>
+
+PRIVATE void debug_deg_lists
+(
+    int n_row,
+    int n_col,
+    RowInfo Row [],
+    ColInfo Col [],
+    int head [],
+    int min_score,
+    int should,
+    int max_deg
+) ;
+
+PRIVATE void debug_mark
+(
+    int n_row,
+    RowInfo Row [],
+    int tag_mark,
+    int max_mark
+) ;
+
+PRIVATE void debug_matrix
+(
+    int n_row,
+    int n_col,
+    RowInfo Row [],
+    ColInfo Col [],
+    int A []
+) ;
+
+PRIVATE void debug_structures
+(
+    int n_row,
+    int n_col,
+    RowInfo Row [],
+    ColInfo Col [],
+    int A [],
+    int n_col2
+) ;
+
+/* the following is the *ONLY* global variable in this file, and is only */
+/* present when debugging */
+
+PRIVATE int debug_colamd ;	/* debug print level */
+
+#define DEBUG0(params) { (void) printf params ; }
+#define DEBUG1(params) { if (debug_colamd >= 1) (void) printf params ; }
+#define DEBUG2(params) { if (debug_colamd >= 2) (void) printf params ; }
+#define DEBUG3(params) { if (debug_colamd >= 3) (void) printf params ; }
+#define DEBUG4(params) { if (debug_colamd >= 4) (void) printf params ; }
+
+#else
+
+/* === No debugging ========================================================= */
+
+#define DEBUG0(params) ;
+#define DEBUG1(params) ;
+#define DEBUG2(params) ;
+#define DEBUG3(params) ;
+#define DEBUG4(params) ;
+
+#endif
+
+/* ========================================================================== */
+
+
+/* ========================================================================== */
+/* === USER-CALLABLE ROUTINES: ============================================== */
+/* ========================================================================== */
+
+
+/* ========================================================================== */
+/* === colamd_recommended =================================================== */
+/* ========================================================================== */
+
+/*
+    The colamd_recommended routine returns the suggested size for Alen.  This
+    value has been determined to provide good balance between the number of
+    garbage collections and the memory requirements for colamd.
+*/
+
+PUBLIC int colamd_recommended	/* returns recommended value of Alen. */
+(
+    /* === Parameters ======================================================= */
+
+    int nnz,			/* number of nonzeros in A */
+    int n_row,			/* number of rows in A */
+    int n_col			/* number of columns in A */
+)
+{
+    /* === Local variables ================================================== */
+
+    int minimum ;		/* bare minimum requirements */
+    int recommended ;		/* recommended value of Alen */
+
+    if (nnz < 0 || n_row < 0 || n_col < 0)
+    {
+	/* return -1 if any input argument is corrupted */
+	DEBUG0 (("colamd_recommended error!")) ;
+	DEBUG0 ((" nnz: %d, n_row: %d, n_col: %d\n", nnz, n_row, n_col)) ;
+	return (-1) ;
+    }
+
+    minimum =
+	2 * (nnz)		/* for A */
+	+ (((n_col) + 1) * sizeof (ColInfo) / sizeof (int))	/* for Col */
+	+ (((n_row) + 1) * sizeof (RowInfo) / sizeof (int))	/* for Row */
+	+ n_col			/* minimum elbow room to guarrantee success */
+	+ COLAMD_STATS ;	/* for output statistics */
+
+    /* recommended is equal to the minumum plus enough memory to keep the */
+    /* number garbage collections low */
+    recommended = minimum + nnz/5 ;
+
+    return (recommended) ;
+}
+
+
+/* ========================================================================== */
+/* === colamd_set_defaults ================================================== */
+/* ========================================================================== */
+
+/*
+    The colamd_set_defaults routine sets the default values of the user-
+    controllable parameters for colamd:
+
+	knobs [0]	rows with knobs[0]*n_col entries or more are removed
+			prior to ordering.
+
+	knobs [1]	columns with knobs[1]*n_row entries or more are removed
+			prior to ordering, and placed last in the column
+			permutation.
+
+	knobs [2..19]	unused, but future versions might use this
+*/
+
+PUBLIC void colamd_set_defaults
+(
+    /* === Parameters ======================================================= */
+
+    double knobs [COLAMD_KNOBS]		/* knob array */
+)
+{
+    /* === Local variables ================================================== */
+
+    int i ;
+
+    if (!knobs)
+    {
+	return ;			/* no knobs to initialize */
+    }
+    for (i = 0 ; i < COLAMD_KNOBS ; i++)
+    {
+	knobs [i] = 0 ;
+    }
+    knobs [COLAMD_DENSE_ROW] = 0.5 ;	/* ignore rows over 50% dense */
+    knobs [COLAMD_DENSE_COL] = 0.5 ;	/* ignore columns over 50% dense */
+}
+
+
+/* ========================================================================== */
+/* === colamd =============================================================== */
+/* ========================================================================== */
+
+/*
+    The colamd routine computes a column ordering Q of a sparse matrix
+    A such that the LU factorization P(AQ) = LU remains sparse, where P is
+    selected via partial pivoting.   The routine can also be viewed as
+    providing a permutation Q such that the Cholesky factorization
+    (AQ)'(AQ) = LL' remains sparse.
+
+    On input, the nonzero patterns of the columns of A are stored in the
+    array A, in order 0 to n_col-1.  A is held in 0-based form (rows in the
+    range 0 to n_row-1 and columns in the range 0 to n_col-1).  Row indices
+    for column c are located in A [(p [c]) ... (p [c+1]-1)], where p [0] = 0,
+    and thus p [n_col] is the number of entries in A.  The matrix is
+    destroyed on output.  The row indices within each column do not have to
+    be sorted (from small to large row indices), and duplicate row indices
+    may be present.  However, colamd will work a little faster if columns are
+    sorted and no duplicates are present.  Matlab 5.2 always passes the matrix
+    with sorted columns, and no duplicates.
+
+    The integer array A is of size Alen.  Alen must be at least of size
+    (where nnz is the number of entries in A):
+
+	nnz			for the input column form of A
+	+ nnz			for a row form of A that colamd generates
+	+ 6*(n_col+1)		for a ColInfo Col [0..n_col] array
+				(this assumes sizeof (ColInfo) is 6 int's).
+	+ 4*(n_row+1)		for a RowInfo Row [0..n_row] array
+				(this assumes sizeof (RowInfo) is 4 int's).
+	+ elbow_room		must be at least n_col.  We recommend at least
+				nnz/5 in addition to that.  If sufficient,
+				changes in the elbow room affect the ordering
+				time only, not the ordering itself.
+	+ COLAMD_STATS		for the output statistics
+
+    Colamd returns FALSE is memory is insufficient, or TRUE otherwise.
+
+    On input, the caller must specify:
+
+	n_row			the number of rows of A
+	n_col			the number of columns of A
+	Alen			the size of the array A
+	A [0 ... nnz-1]		the row indices, where nnz = p [n_col]
+	A [nnz ... Alen-1]	(need not be initialized by the user)
+	p [0 ... n_col]		the column pointers,  p [0] = 0, and p [n_col]
+				is the number of entries in A.  Column c of A
+				is stored in A [p [c] ... p [c+1]-1].
+	knobs [0 ... 19]	a set of parameters that control the behavior
+				of colamd.  If knobs is a NULL pointer the
+				defaults are used.  The user-callable
+				colamd_set_defaults routine sets the default
+				parameters.  See that routine for a description
+				of the user-controllable parameters.
+
+    If the return value of Colamd is TRUE, then on output:
+
+	p [0 ... n_col-1]	the column permutation. p [0] is the first
+				column index, and p [n_col-1] is the last.
+				That is, p [k] = j means that column j of A
+				is the kth column of AQ.
+
+	A			is undefined on output (the matrix pattern is
+				destroyed), except for the following statistics:
+
+	A [0]			the number of dense (or empty) rows ignored
+	A [1]			the number of dense (or empty) columms.  These
+				are ordered last, in their natural order.
+	A [2]			the number of garbage collections performed.
+				If this is excessive, then you would have
+				gotten your results faster if Alen was larger.
+	A [3]			0, if all row indices in each column were in
+				sorted order and no duplicates were present.
+				1, if there were unsorted or duplicate row
+				indices in the input.  You would have gotten
+				your results faster if A [3] was returned as 0.
+
+    If the return value of Colamd is FALSE, then A and p are undefined on
+    output.
+*/
+
+PUBLIC int colamd		/* returns TRUE if successful */
+(
+    /* === Parameters ======================================================= */
+
+    int n_row,			/* number of rows in A */
+    int n_col,			/* number of columns in A */
+    int Alen,			/* length of A */
+    int A [],			/* row indices of A */
+    int p [],			/* pointers to columns in A */
+    double knobs [COLAMD_KNOBS]	/* parameters (uses defaults if NULL) */
+)
+{
+    /* === Local variables ================================================== */
+
+    int i ;			/* loop index */
+    int nnz ;			/* nonzeros in A */
+    int Row_size ;		/* size of Row [], in integers */
+    int Col_size ;		/* size of Col [], in integers */
+    int elbow_room ;		/* remaining free space */
+    RowInfo *Row ;		/* pointer into A of Row [0..n_row] array */
+    ColInfo *Col ;		/* pointer into A of Col [0..n_col] array */
+    int n_col2 ;		/* number of non-dense, non-empty columns */
+    int n_row2 ;		/* number of non-dense, non-empty rows */
+    int ngarbage ;		/* number of garbage collections performed */
+    int max_deg ;		/* maximum row degree */
+    double default_knobs [COLAMD_KNOBS] ;	/* default knobs knobs array */
+    int init_result ;		/* return code from initialization */
+
+#ifndef NDEBUG
+    debug_colamd = 0 ;		/* no debug printing */
+    /* get "D" environment variable, which gives the debug printing level */
+    if (getenv ("D")) debug_colamd = atoi (getenv ("D")) ;
+    DEBUG0 (("debug version, D = %d (THIS WILL BE SLOOOOW!)\n", debug_colamd)) ;
+#endif
+
+    /* === Check the input arguments ======================================== */
+
+    if (n_row < 0 || n_col < 0 || !A || !p)
+    {
+	/* n_row and n_col must be non-negative, A and p must be present */
+	DEBUG0 (("colamd error! %d %d %d\n", n_row, n_col, Alen)) ;
+	return (FALSE) ;
+    }
+    nnz = p [n_col] ;
+    if (nnz < 0 || p [0] != 0)
+    {
+	/* nnz must be non-negative, and p [0] must be zero */
+	DEBUG0 (("colamd error! %d %d\n", nnz, p [0])) ;
+	return (FALSE) ;
+    }
+
+    /* === If no knobs, set default parameters ============================== */
+
+    if (!knobs)
+    {
+	knobs = default_knobs ;
+	colamd_set_defaults (knobs) ;
+    }
+
+    /* === Allocate the Row and Col arrays from array A ===================== */
+
+    Col_size = (n_col + 1) * sizeof (ColInfo) / sizeof (int) ;
+    Row_size = (n_row + 1) * sizeof (RowInfo) / sizeof (int) ;
+    elbow_room = Alen - (2*nnz + Col_size + Row_size) ;
+    if (elbow_room < n_col + COLAMD_STATS)
+    {
+	/* not enough space in array A to perform the ordering */
+	DEBUG0 (("colamd error! elbow_room %d, %d\n", elbow_room,n_col)) ;
+	return (FALSE) ;
+    }
+    Alen = 2*nnz + elbow_room ;
+    Col  = (ColInfo *) &A [Alen] ;
+    Row  = (RowInfo *) &A [Alen + Col_size] ;
+
+    /* === Construct the row and column data structures ===================== */
+
+    init_result = init_rows_cols (n_row, n_col, Row, Col, A, p) ;
+    if (init_result == -1)
+    {
+	/* input matrix is invalid */
+	DEBUG0 (("colamd error! matrix invalid\n")) ;
+	return (FALSE) ;
+    }
+
+    /* === Initialize scores, kill dense rows/columns ======================= */
+
+    init_scoring (n_row, n_col, Row, Col, A, p, knobs,
+	&n_row2, &n_col2, &max_deg) ;
+
+    /* === Order the supercolumns =========================================== */
+
+    ngarbage = find_ordering (n_row, n_col, Alen, Row, Col, A, p,
+	n_col2, max_deg, 2*nnz) ;
+
+    /* === Order the non-principal columns ================================== */
+
+    order_children (n_col, Col, p) ;
+
+    /* === Return statistics in A =========================================== */
+
+    for (i = 0 ; i < COLAMD_STATS ; i++)
+    {
+	A [i] = 0 ;
+    }
+    A [COLAMD_DENSE_ROW] = n_row - n_row2 ;
+    A [COLAMD_DENSE_COL] = n_col - n_col2 ;
+    A [COLAMD_DEFRAG_COUNT] = ngarbage ;
+    A [COLAMD_JUMBLED_COLS] = init_result ;
+
+    return (TRUE) ;
+}
+
+
+/* ========================================================================== */
+/* === NON-USER-CALLABLE ROUTINES: ========================================== */
+/* ========================================================================== */
+
+/* There are no user-callable routines beyond this point in the file */
+
+
+/* ========================================================================== */
+/* === init_rows_cols ======================================================= */
+/* ========================================================================== */
+
+/*
+    Takes the column form of the matrix in A and creates the row form of the
+    matrix.  Also, row and column attributes are stored in the Col and Row
+    structs.  If the columns are un-sorted or contain duplicate row indices,
+    this routine will also sort and remove duplicate row indices from the
+    column form of the matrix.  Returns -1 on error, 1 if columns jumbled,
+    or 0 if columns not jumbled.  Not user-callable.
+*/
+
+PRIVATE int init_rows_cols	/* returns status code */
+(
+    /* === Parameters ======================================================= */
+
+    int n_row,			/* number of rows of A */
+    int n_col,			/* number of columns of A */
+    RowInfo Row [],		/* of size n_row+1 */
+    ColInfo Col [],		/* of size n_col+1 */
+    int A [],			/* row indices of A, of size Alen */
+    int p []			/* pointers to columns in A, of size n_col+1 */
+)
+{
+    /* === Local variables ================================================== */
+
+    int col ;			/* a column index */
+    int row ;			/* a row index */
+    int *cp ;			/* a column pointer */
+    int *cp_end ;		/* a pointer to the end of a column */
+    int *rp ;			/* a row pointer */
+    int *rp_end ;		/* a pointer to the end of a row */
+    int last_start ;		/* start index of previous column in A */
+    int start ;			/* start index of column in A */
+    int last_row ;		/* previous row */
+    int jumbled_columns ;	/* indicates if columns are jumbled */
+
+    /* === Initialize columns, and check column pointers ==================== */
+
+    last_start = 0 ;
+    for (col = 0 ; col < n_col ; col++)
+    {
+	start = p [col] ;
+	if (start < last_start)
+	{
+	    /* column pointers must be non-decreasing */
+	    DEBUG0 (("colamd error!  last p %d p [col] %d\n",last_start,start));
+	    return (-1) ;
+	}
+	Col [col].start = start ;
+	Col [col].length = p [col+1] - start ;
+	Col [col].shared1.thickness = 1 ;
+	Col [col].shared2.score = 0 ;
+	Col [col].shared3.prev = EMPTY ;
+	Col [col].shared4.degree_next = EMPTY ;
+	last_start = start ;
+    }
+    /* must check the end pointer for last column */
+    if (p [n_col] < last_start)
+    {
+	/* column pointers must be non-decreasing */
+	DEBUG0 (("colamd error!  last p %d p [n_col] %d\n",p[col],last_start)) ;
+	return (-1) ;
+    }
+
+    /* p [0..n_col] no longer needed, used as "head" in subsequent routines */
+
+    /* === Scan columns, compute row degrees, and check row indices ========= */
+
+    jumbled_columns = FALSE ;
+
+    for (row = 0 ; row < n_row ; row++)
+    {
+	Row [row].length = 0 ;
+	Row [row].shared2.mark = -1 ;
+    }
+
+    for (col = 0 ; col < n_col ; col++)
+    {
+	last_row = -1 ;
+
+	cp = &A [p [col]] ;
+	cp_end = &A [p [col+1]] ;
+
+	while (cp < cp_end)
+	{
+	    row = *cp++ ;
+
+	    /* make sure row indices within range */
+	    if (row < 0 || row >= n_row)
+	    {
+		DEBUG0 (("colamd error!  col %d row %d last_row %d\n",
+			 col, row, last_row)) ;
+		return (-1) ;
+	    }
+	    else if (row <= last_row)
+	    {
+		/* row indices are not sorted or repeated, thus cols */
+		/* are jumbled */
+		jumbled_columns = TRUE ;
+	    }
+	    /* prevent repeated row from being counted */
+	    if (Row [row].shared2.mark != col)
+	    {
+		Row [row].length++ ;
+		Row [row].shared2.mark = col ;
+		last_row = row ;
+	    }
+	    else
+	    {
+		/* this is a repeated entry in the column, */
+		/* it will be removed */
+		Col [col].length-- ;
+	    }
+	}
+    }
+
+    /* === Compute row pointers ============================================= */
+
+    /* row form of the matrix starts directly after the column */
+    /* form of matrix in A */
+    Row [0].start = p [n_col] ;
+    Row [0].shared1.p = Row [0].start ;
+    Row [0].shared2.mark = -1 ;
+    for (row = 1 ; row < n_row ; row++)
+    {
+	Row [row].start = Row [row-1].start + Row [row-1].length ;
+	Row [row].shared1.p = Row [row].start ;
+	Row [row].shared2.mark = -1 ;
+    }
+
+    /* === Create row form ================================================== */
+
+    if (jumbled_columns)
+    {
+	/* if cols jumbled, watch for repeated row indices */
+	for (col = 0 ; col < n_col ; col++)
+	{
+	    cp = &A [p [col]] ;
+	    cp_end = &A [p [col+1]] ;
+	    while (cp < cp_end)
+	    {
+		row = *cp++ ;
+		if (Row [row].shared2.mark != col)
+		{
+		    A [(Row [row].shared1.p)++] = col ;
+		    Row [row].shared2.mark = col ;
+		}
+	    }
+	}
+    }
+    else
+    {
+	/* if cols not jumbled, we don't need the mark (this is faster) */
+	for (col = 0 ; col < n_col ; col++)
+	{
+	    cp = &A [p [col]] ;
+	    cp_end = &A [p [col+1]] ;
+	    while (cp < cp_end)
+	    {
+		A [(Row [*cp++].shared1.p)++] = col ;
+	    }
+	}
+    }
+
+    /* === Clear the row marks and set row degrees ========================== */
+
+    for (row = 0 ; row < n_row ; row++)
+    {
+	Row [row].shared2.mark = 0 ;
+	Row [row].shared1.degree = Row [row].length ;
+    }
+
+    /* === See if we need to re-create columns ============================== */
+
+    if (jumbled_columns)
+    {
+
+#ifndef NDEBUG
+	/* make sure column lengths are correct */
+	for (col = 0 ; col < n_col ; col++)
+	{
+	    p [col] = Col [col].length ;
+	}
+	for (row = 0 ; row < n_row ; row++)
+	{
+	    rp = &A [Row [row].start] ;
+	    rp_end = rp + Row [row].length ;
+	    while (rp < rp_end)
+	    {
+		p [*rp++]-- ;
+	    }
+	}
+	for (col = 0 ; col < n_col ; col++)
+	{
+	    assert (p [col] == 0) ;
+	}
+	/* now p is all zero (different than when debugging is turned off) */
+#endif
+
+	/* === Compute col pointers ========================================= */
+
+	/* col form of the matrix starts at A [0]. */
+	/* Note, we may have a gap between the col form and the row */
+	/* form if there were duplicate entries, if so, it will be */
+	/* removed upon the first garbage collection */
+	Col [0].start = 0 ;
+	p [0] = Col [0].start ;
+	for (col = 1 ; col < n_col ; col++)
+	{
+	    /* note that the lengths here are for pruned columns, i.e. */
+	    /* no duplicate row indices will exist for these columns */
+	    Col [col].start = Col [col-1].start + Col [col-1].length ;
+	    p [col] = Col [col].start ;
+	}
+
+	/* === Re-create col form =========================================== */
+
+	for (row = 0 ; row < n_row ; row++)
+	{
+	    rp = &A [Row [row].start] ;
+	    rp_end = rp + Row [row].length ;
+	    while (rp < rp_end)
+	    {
+		A [(p [*rp++])++] = row ;
+	    }
+	}
+	return (1) ;
+    }
+    else
+    {
+	/* no columns jumbled (this is faster) */
+	return (0) ;
+    }
+}
+
+
+/* ========================================================================== */
+/* === init_scoring ========================================================= */
+/* ========================================================================== */
+
+/*
+    Kills dense or empty columns and rows, calculates an initial score for
+    each column, and places all columns in the degree lists.  Not user-callable.
+*/
+
+PRIVATE void init_scoring
+(
+    /* === Parameters ======================================================= */
+
+    int n_row,			/* number of rows of A */
+    int n_col,			/* number of columns of A */
+    RowInfo Row [],		/* of size n_row+1 */
+    ColInfo Col [],		/* of size n_col+1 */
+    int A [],			/* column form and row form of A */
+    int head [],		/* of size n_col+1 */
+    double knobs [COLAMD_KNOBS],/* parameters */
+    int *p_n_row2,		/* number of non-dense, non-empty rows */
+    int *p_n_col2,		/* number of non-dense, non-empty columns */
+    int *p_max_deg		/* maximum row degree */
+)
+{
+    /* === Local variables ================================================== */
+
+    int c ;			/* a column index */
+    int r, row ;		/* a row index */
+    int *cp ;			/* a column pointer */
+    int deg ;			/* degree (# entries) of a row or column */
+    int *cp_end ;		/* a pointer to the end of a column */
+    int *new_cp ;		/* new column pointer */
+    int col_length ;		/* length of pruned column */
+    int score ;			/* current column score */
+    int n_col2 ;		/* number of non-dense, non-empty columns */
+    int n_row2 ;		/* number of non-dense, non-empty rows */
+    int dense_row_count ;	/* remove rows with more entries than this */
+    int dense_col_count ;	/* remove cols with more entries than this */
+    int min_score ;		/* smallest column score */
+    int max_deg ;		/* maximum row degree */
+    int next_col ;		/* Used to add to degree list.*/
+#ifndef NDEBUG
+    int debug_count ;		/* debug only. */
+#endif
+
+    /* === Extract knobs ==================================================== */
+
+    dense_row_count = MAX (0, MIN (knobs [COLAMD_DENSE_ROW] * n_col, n_col)) ;
+    dense_col_count = MAX (0, MIN (knobs [COLAMD_DENSE_COL] * n_row, n_row)) ;
+    DEBUG0 (("densecount: %d %d\n", dense_row_count, dense_col_count)) ;
+    max_deg = 0 ;
+    n_col2 = n_col ;
+    n_row2 = n_row ;
+
+    /* === Kill empty columns =============================================== */
+
+    /* Put the empty columns at the end in their natural, so that LU */
+    /* factorization can proceed as far as possible. */
+    for (c = n_col-1 ; c >= 0 ; c--)
+    {
+	deg = Col [c].length ;
+	if (deg == 0)
+	{
+	    /* this is a empty column, kill and order it last */
+	    Col [c].shared2.order = --n_col2 ;
+	    KILL_PRINCIPAL_COL (c) ;
+	}
+    }
+    DEBUG0 (("null columns killed: %d\n", n_col - n_col2)) ;
+
+    /* === Kill dense columns =============================================== */
+
+    /* Put the dense columns at the end, in their natural order */
+    for (c = n_col-1 ; c >= 0 ; c--)
+    {
+	/* skip any dead columns */
+	if (COL_IS_DEAD (c))
+	{
+	    continue ;
+	}
+	deg = Col [c].length ;
+	if (deg > dense_col_count)
+	{
+	    /* this is a dense column, kill and order it last */
+	    Col [c].shared2.order = --n_col2 ;
+	    /* decrement the row degrees */
+	    cp = &A [Col [c].start] ;
+	    cp_end = cp + Col [c].length ;
+	    while (cp < cp_end)
+	    {
+		Row [*cp++].shared1.degree-- ;
+	    }
+	    KILL_PRINCIPAL_COL (c) ;
+	}
+    }
+    DEBUG0 (("Dense and null columns killed: %d\n", n_col - n_col2)) ;
+
+    /* === Kill dense and empty rows ======================================== */
+
+    for (r = 0 ; r < n_row ; r++)
+    {
+	deg = Row [r].shared1.degree ;
+	assert (deg >= 0 && deg <= n_col) ;
+	if (deg > dense_row_count || deg == 0)
+	{
+	    /* kill a dense or empty row */
+	    KILL_ROW (r) ;
+	    --n_row2 ;
+	}
+	else
+	{
+	    /* keep track of max degree of remaining rows */
+	    max_deg = MAX (max_deg, deg) ;
+	}
+    }
+    DEBUG0 (("Dense and null rows killed: %d\n", n_row - n_row2)) ;
+
+    /* === Compute initial column scores ==================================== */
+
+    /* At this point the row degrees are accurate.  They reflect the number */
+    /* of "live" (non-dense) columns in each row.  No empty rows exist. */
+    /* Some "live" columns may contain only dead rows, however.  These are */
+    /* pruned in the code below. */
+
+    /* now find the initial matlab score for each column */
+    for (c = n_col-1 ; c >= 0 ; c--)
+    {
+	/* skip dead column */
+	if (COL_IS_DEAD (c))
+	{
+	    continue ;
+	}
+	score = 0 ;
+	cp = &A [Col [c].start] ;
+	new_cp = cp ;
+	cp_end = cp + Col [c].length ;
+	while (cp < cp_end)
+	{
+	    /* get a row */
+	    row = *cp++ ;
+	    /* skip if dead */
+	    if (ROW_IS_DEAD (row))
+	    {
+		continue ;
+	    }
+	    /* compact the column */
+	    *new_cp++ = row ;
+	    /* add row's external degree */
+	    score += Row [row].shared1.degree - 1 ;
+	    /* guard against integer overflow */
+	    score = MIN (score, n_col) ;
+	}
+	/* determine pruned column length */
+	col_length = (int) (new_cp - &A [Col [c].start]) ;
+	if (col_length == 0)
+	{
+	    /* a newly-made null column (all rows in this col are "dense" */
+	    /* and have already been killed) */
+	    DEBUG0 (("Newly null killed: %d\n", c)) ;
+	    Col [c].shared2.order = --n_col2 ;
+	    KILL_PRINCIPAL_COL (c) ;
+	}
+	else
+	{
+	    /* set column length and set score */
+	    assert (score >= 0) ;
+	    assert (score <= n_col) ;
+	    Col [c].length = col_length ;
+	    Col [c].shared2.score = score ;
+	}
+    }
+    DEBUG0 (("Dense, null, and newly-null columns killed: %d\n",n_col-n_col2)) ;
+
+    /* At this point, all empty rows and columns are dead.  All live columns */
+    /* are "clean" (containing no dead rows) and simplicial (no supercolumns */
+    /* yet).  Rows may contain dead columns, but all live rows contain at */
+    /* least one live column. */
+
+#ifndef NDEBUG
+    debug_structures (n_row, n_col, Row, Col, A, n_col2) ;
+#endif
+
+    /* === Initialize degree lists ========================================== */
+
+#ifndef NDEBUG
+    debug_count = 0 ;
+#endif
+
+    /* clear the hash buckets */
+    for (c = 0 ; c <= n_col ; c++)
+    {
+	head [c] = EMPTY ;
+    }
+    min_score = n_col ;
+    /* place in reverse order, so low column indices are at the front */
+    /* of the lists.  This is to encourage natural tie-breaking */
+    for (c = n_col-1 ; c >= 0 ; c--)
+    {
+	/* only add principal columns to degree lists */
+	if (COL_IS_ALIVE (c))
+	{
+	    DEBUG4 (("place %d score %d minscore %d ncol %d\n",
+		c, Col [c].shared2.score, min_score, n_col)) ;
+
+	    /* === Add columns score to DList =============================== */
+
+	    score = Col [c].shared2.score ;
+
+	    assert (min_score >= 0) ;
+	    assert (min_score <= n_col) ;
+	    assert (score >= 0) ;
+	    assert (score <= n_col) ;
+	    assert (head [score] >= EMPTY) ;
+
+	    /* now add this column to dList at proper score location */
+	    next_col = head [score] ;
+	    Col [c].shared3.prev = EMPTY ;
+	    Col [c].shared4.degree_next = next_col ;
+
+	    /* if there already was a column with the same score, set its */
+	    /* previous pointer to this new column */
+	    if (next_col != EMPTY)
+	    {
+		Col [next_col].shared3.prev = c ;
+	    }
+	    head [score] = c ;
+
+	    /* see if this score is less than current min */
+	    min_score = MIN (min_score, score) ;
+
+#ifndef NDEBUG
+	    debug_count++ ;
+#endif
+	}
+    }
+
+#ifndef NDEBUG
+    DEBUG0 (("Live cols %d out of %d, non-princ: %d\n",
+	debug_count, n_col, n_col-debug_count)) ;
+    assert (debug_count == n_col2) ;
+    debug_deg_lists (n_row, n_col, Row, Col, head, min_score, n_col2, max_deg) ;
+#endif
+
+    /* === Return number of remaining columns, and max row degree =========== */
+
+    *p_n_col2 = n_col2 ;
+    *p_n_row2 = n_row2 ;
+    *p_max_deg = max_deg ;
+}
+
+
+/* ========================================================================== */
+/* === find_ordering ======================================================== */
+/* ========================================================================== */
+
+/*
+    Order the principal columns of the supercolumn form of the matrix
+    (no supercolumns on input).  Uses a minimum approximate column minimum
+    degree ordering method.  Not user-callable.
+*/
+
+PRIVATE int find_ordering	/* return the number of garbage collections */
+(
+    /* === Parameters ======================================================= */
+
+    int n_row,			/* number of rows of A */
+    int n_col,			/* number of columns of A */
+    int Alen,			/* size of A, 2*nnz + elbow_room or larger */
+    RowInfo Row [],		/* of size n_row+1 */
+    ColInfo Col [],		/* of size n_col+1 */
+    int A [],			/* column form and row form of A */
+    int head [],		/* of size n_col+1 */
+    int n_col2,			/* Remaining columns to order */
+    int max_deg,		/* Maximum row degree */
+    int pfree			/* index of first free slot (2*nnz on entry) */
+)
+{
+    /* === Local variables ================================================== */
+
+    int k ;			/* current pivot ordering step */
+    int pivot_col ;		/* current pivot column */
+    int *cp ;			/* a column pointer */
+    int *rp ;			/* a row pointer */
+    int pivot_row ;		/* current pivot row */
+    int *new_cp ;		/* modified column pointer */
+    int *new_rp ;		/* modified row pointer */
+    int pivot_row_start ;	/* pointer to start of pivot row */
+    int pivot_row_degree ;	/* # of columns in pivot row */
+    int pivot_row_length ;	/* # of supercolumns in pivot row */
+    int pivot_col_score ;	/* score of pivot column */
+    int needed_memory ;		/* free space needed for pivot row */
+    int *cp_end ;		/* pointer to the end of a column */
+    int *rp_end ;		/* pointer to the end of a row */
+    int row ;			/* a row index */
+    int col ;			/* a column index */
+    int max_score ;		/* maximum possible score */
+    int cur_score ;		/* score of current column */
+    unsigned int hash ;		/* hash value for supernode detection */
+    int head_column ;		/* head of hash bucket */
+    int first_col ;		/* first column in hash bucket */
+    int tag_mark ;		/* marker value for mark array */
+    int row_mark ;		/* Row [row].shared2.mark */
+    int set_difference ;	/* set difference size of row with pivot row */
+    int min_score ;		/* smallest column score */
+    int col_thickness ;		/* "thickness" (# of columns in a supercol) */
+    int max_mark ;		/* maximum value of tag_mark */
+    int pivot_col_thickness ;	/* number of columns represented by pivot col */
+    int prev_col ;		/* Used by Dlist operations. */
+    int next_col ;		/* Used by Dlist operations. */
+    int ngarbage ;		/* number of garbage collections performed */
+#ifndef NDEBUG
+    int debug_d ;		/* debug loop counter */
+    int debug_step = 0 ;	/* debug loop counter */
+#endif
+
+    /* === Initialization and clear mark ==================================== */
+
+    max_mark = INT_MAX - n_col ;	/* INT_MAX defined in <limits.h> */
+    tag_mark = clear_mark (n_row, Row) ;
+    min_score = 0 ;
+    ngarbage = 0 ;
+    DEBUG0 (("Ordering.. n_col2=%d\n", n_col2)) ;
+
+    /* === Order the columns ================================================ */
+
+    for (k = 0 ; k < n_col2 ; /* 'k' is incremented below */)
+    {
+
+#ifndef NDEBUG
+	if (debug_step % 100 == 0)
+	{
+	    DEBUG0 (("\n...       Step k: %d out of n_col2: %d\n", k, n_col2)) ;
+	}
+	else
+	{
+	    DEBUG1 (("\n----------Step k: %d out of n_col2: %d\n", k, n_col2)) ;
+	}
+	debug_step++ ;
+	debug_deg_lists (n_row, n_col, Row, Col, head,
+		min_score, n_col2-k, max_deg) ;
+	debug_matrix (n_row, n_col, Row, Col, A) ;
+#endif
+
+	/* === Select pivot column, and order it ============================ */
+
+	/* make sure degree list isn't empty */
+	assert (min_score >= 0) ;
+	assert (min_score <= n_col) ;
+	assert (head [min_score] >= EMPTY) ;
+
+#ifndef NDEBUG
+	for (debug_d = 0 ; debug_d < min_score ; debug_d++)
+	{
+	    assert (head [debug_d] == EMPTY) ;
+	}
+#endif
+
+	/* get pivot column from head of minimum degree list */
+	while (head [min_score] == EMPTY && min_score < n_col)
+	{
+	    min_score++ ;
+	}
+	pivot_col = head [min_score] ;
+	assert (pivot_col >= 0 && pivot_col <= n_col) ;
+	next_col = Col [pivot_col].shared4.degree_next ;
+	head [min_score] = next_col ;
+	if (next_col != EMPTY)
+	{
+	    Col [next_col].shared3.prev = EMPTY ;
+	}
+
+	assert (COL_IS_ALIVE (pivot_col)) ;
+	DEBUG3 (("Pivot col: %d\n", pivot_col)) ;
+
+	/* remember score for defrag check */
+	pivot_col_score = Col [pivot_col].shared2.score ;
+
+	/* the pivot column is the kth column in the pivot order */
+	Col [pivot_col].shared2.order = k ;
+
+	/* increment order count by column thickness */
+	pivot_col_thickness = Col [pivot_col].shared1.thickness ;
+	k += pivot_col_thickness ;
+	assert (pivot_col_thickness > 0) ;
+
+	/* === Garbage_collection, if necessary ============================= */
+
+	needed_memory = MIN (pivot_col_score, n_col - k) ;
+	if (pfree + needed_memory >= Alen)
+	{
+	    pfree = garbage_collection (n_row, n_col, Row, Col, A, &A [pfree]) ;
+	    ngarbage++ ;
+	    /* after garbage collection we will have enough */
+	    assert (pfree + needed_memory < Alen) ;
+	    /* garbage collection has wiped out the Row[].shared2.mark array */
+	    tag_mark = clear_mark (n_row, Row) ;
+#ifndef NDEBUG
+	    debug_matrix (n_row, n_col, Row, Col, A) ;
+#endif
+	}
+
+	/* === Compute pivot row pattern ==================================== */
+
+	/* get starting location for this new merged row */
+	pivot_row_start = pfree ;
+
+	/* initialize new row counts to zero */
+	pivot_row_degree = 0 ;
+
+	/* tag pivot column as having been visited so it isn't included */
+	/* in merged pivot row */
+	Col [pivot_col].shared1.thickness = -pivot_col_thickness ;
+
+	/* pivot row is the union of all rows in the pivot column pattern */
+	cp = &A [Col [pivot_col].start] ;
+	cp_end = cp + Col [pivot_col].length ;
+	while (cp < cp_end)
+	{
+	    /* get a row */
+	    row = *cp++ ;
+	    DEBUG4 (("Pivot col pattern %d %d\n", ROW_IS_ALIVE (row), row)) ;
+	    /* skip if row is dead */
+	    if (ROW_IS_DEAD (row))
+	    {
+		continue ;
+	    }
+	    rp = &A [Row [row].start] ;
+	    rp_end = rp + Row [row].length ;
+	    while (rp < rp_end)
+	    {
+		/* get a column */
+		col = *rp++ ;
+		/* add the column, if alive and untagged */
+		col_thickness = Col [col].shared1.thickness ;
+		if (col_thickness > 0 && COL_IS_ALIVE (col))
+		{
+		    /* tag column in pivot row */
+		    Col [col].shared1.thickness = -col_thickness ;
+		    assert (pfree < Alen) ;
+		    /* place column in pivot row */
+		    A [pfree++] = col ;
+		    pivot_row_degree += col_thickness ;
+		}
+	    }
+	}
+
+	/* clear tag on pivot column */
+	Col [pivot_col].shared1.thickness = pivot_col_thickness ;
+	max_deg = MAX (max_deg, pivot_row_degree) ;
+
+#ifndef NDEBUG
+	DEBUG3 (("check2\n")) ;
+	debug_mark (n_row, Row, tag_mark, max_mark) ;
+#endif
+
+	/* === Kill all rows used to construct pivot row ==================== */
+
+	/* also kill pivot row, temporarily */
+	cp = &A [Col [pivot_col].start] ;
+	cp_end = cp + Col [pivot_col].length ;
+	while (cp < cp_end)
+	{
+	    /* may be killing an already dead row */
+	    row = *cp++ ;
+	    DEBUG2 (("Kill row in pivot col: %d\n", row)) ;
+	    KILL_ROW (row) ;
+	}
+
+	/* === Select a row index to use as the new pivot row =============== */
+
+	pivot_row_length = pfree - pivot_row_start ;
+	if (pivot_row_length > 0)
+	{
+	    /* pick the "pivot" row arbitrarily (first row in col) */
+	    pivot_row = A [Col [pivot_col].start] ;
+	    DEBUG2 (("Pivotal row is %d\n", pivot_row)) ;
+	}
+	else
+	{
+	    /* there is no pivot row, since it is of zero length */
+	    pivot_row = EMPTY ;
+	    assert (pivot_row_length == 0) ;
+	}
+	assert (Col [pivot_col].length > 0 || pivot_row_length == 0) ;
+
+	/* === Approximate degree computation =============================== */
+
+	/* Here begins the computation of the approximate degree.  The column */
+	/* score is the sum of the pivot row "length", plus the size of the */
+	/* set differences of each row in the column minus the pattern of the */
+	/* pivot row itself.  The column ("thickness") itself is also */
+	/* excluded from the column score (we thus use an approximate */
+	/* external degree). */
+
+	/* The time taken by the following code (compute set differences, and */
+	/* add them up) is proportional to the size of the data structure */
+	/* being scanned - that is, the sum of the sizes of each column in */
+	/* the pivot row.  Thus, the amortized time to compute a column score */
+	/* is proportional to the size of that column (where size, in this */
+	/* context, is the column "length", or the number of row indices */
+	/* in that column).  The number of row indices in a column is */
+	/* monotonically non-decreasing, from the length of the original */
+	/* column on input to colamd. */
+
+	/* === Compute set differences ====================================== */
+
+	DEBUG1 (("** Computing set differences phase. **\n")) ;
+
+	/* pivot row is currently dead - it will be revived later. */
+
+	DEBUG2 (("Pivot row: ")) ;
+	/* for each column in pivot row */
+	rp = &A [pivot_row_start] ;
+	rp_end = rp + pivot_row_length ;
+	while (rp < rp_end)
+	{
+	    col = *rp++ ;
+	    assert (COL_IS_ALIVE (col) && col != pivot_col) ;
+	    DEBUG2 (("Col: %d\n", col)) ;
+
+	    /* clear tags used to construct pivot row pattern */
+	    col_thickness = -Col [col].shared1.thickness ;
+	    assert (col_thickness > 0) ;
+	    Col [col].shared1.thickness = col_thickness ;
+
+	    /* === Remove column from degree list =========================== */
+
+	    cur_score = Col [col].shared2.score ;
+	    prev_col = Col [col].shared3.prev ;
+	    next_col = Col [col].shared4.degree_next ;
+	    assert (cur_score >= 0) ;
+	    assert (cur_score <= n_col) ;
+	    assert (cur_score >= EMPTY) ;
+	    if (prev_col == EMPTY)
+	    {
+		head [cur_score] = next_col ;
+	    }
+	    else
+	    {
+		Col [prev_col].shared4.degree_next = next_col ;
+	    }
+	    if (next_col != EMPTY)
+	    {
+		Col [next_col].shared3.prev = prev_col ;
+	    }
+
+	    /* === Scan the column ========================================== */
+
+	    cp = &A [Col [col].start] ;
+	    cp_end = cp + Col [col].length ;
+	    while (cp < cp_end)
+	    {
+		/* get a row */
+		row = *cp++ ;
+		row_mark = Row [row].shared2.mark ;
+		/* skip if dead */
+		if (ROW_IS_MARKED_DEAD (row_mark))
+		{
+		    continue ;
+		}
+		assert (row != pivot_row) ;
+		set_difference = row_mark - tag_mark ;
+		/* check if the row has been seen yet */
+		if (set_difference < 0)
+		{
+		    assert (Row [row].shared1.degree <= max_deg) ;
+		    set_difference = Row [row].shared1.degree ;
+		}
+		/* subtract column thickness from this row's set difference */
+		set_difference -= col_thickness ;
+		assert (set_difference >= 0) ;
+		/* absorb this row if the set difference becomes zero */
+		if (set_difference == 0)
+		{
+		    DEBUG1 (("aggressive absorption. Row: %d\n", row)) ;
+		    KILL_ROW (row) ;
+		}
+		else
+		{
+		    /* save the new mark */
+		    Row [row].shared2.mark = set_difference + tag_mark ;
+		}
+	    }
+	}
+
+#ifndef NDEBUG
+	debug_deg_lists (n_row, n_col, Row, Col, head,
+		min_score, n_col2-k-pivot_row_degree, max_deg) ;
+#endif
+
+	/* === Add up set differences for each column ======================= */
+
+	DEBUG1 (("** Adding set differences phase. **\n")) ;
+
+	/* for each column in pivot row */
+	rp = &A [pivot_row_start] ;
+	rp_end = rp + pivot_row_length ;
+	while (rp < rp_end)
+	{
+	    /* get a column */
+	    col = *rp++ ;
+	    assert (COL_IS_ALIVE (col) && col != pivot_col) ;
+	    hash = 0 ;
+	    cur_score = 0 ;
+	    cp = &A [Col [col].start] ;
+	    /* compact the column */
+	    new_cp = cp ;
+	    cp_end = cp + Col [col].length ;
+
+	    DEBUG2 (("Adding set diffs for Col: %d.\n", col)) ;
+
+	    while (cp < cp_end)
+	    {
+		/* get a row */
+		row = *cp++ ;
+		assert(row >= 0 && row < n_row) ;
+		row_mark = Row [row].shared2.mark ;
+		/* skip if dead */
+		if (ROW_IS_MARKED_DEAD (row_mark))
+		{
+		    continue ;
+		}
+		assert (row_mark > tag_mark) ;
+		/* compact the column */
+		*new_cp++ = row ;
+		/* compute hash function */
+		hash += row ;
+		/* add set difference */
+		cur_score += row_mark - tag_mark ;
+		/* integer overflow... */
+		cur_score = MIN (cur_score, n_col) ;
+	    }
+
+	    /* recompute the column's length */
+	    Col [col].length = (int) (new_cp - &A [Col [col].start]) ;
+
+	    /* === Further mass elimination ================================= */
+
+	    if (Col [col].length == 0)
+	    {
+		DEBUG1 (("further mass elimination. Col: %d\n", col)) ;
+		/* nothing left but the pivot row in this column */
+		KILL_PRINCIPAL_COL (col) ;
+		pivot_row_degree -= Col [col].shared1.thickness ;
+		assert (pivot_row_degree >= 0) ;
+		/* order it */
+		Col [col].shared2.order = k ;
+		/* increment order count by column thickness */
+		k += Col [col].shared1.thickness ;
+	    }
+	    else
+	    {
+		/* === Prepare for supercolumn detection ==================== */
+
+		DEBUG2 (("Preparing supercol detection for Col: %d.\n", col)) ;
+
+		/* save score so far */
+		Col [col].shared2.score = cur_score ;
+
+		/* add column to hash table, for supercolumn detection */
+		hash %= n_col + 1 ;
+
+		DEBUG2 ((" Hash = %d, n_col = %d.\n", hash, n_col)) ;
+		assert (hash <= n_col) ;
+
+		head_column = head [hash] ;
+		if (head_column > EMPTY)
+		{
+		    /* degree list "hash" is non-empty, use prev (shared3) of */
+		    /* first column in degree list as head of hash bucket */
+		    first_col = Col [head_column].shared3.headhash ;
+		    Col [head_column].shared3.headhash = col ;
+		}
+		else
+		{
+		    /* degree list "hash" is empty, use head as hash bucket */
+		    first_col = - (head_column + 2) ;
+		    head [hash] = - (col + 2) ;
+		}
+		Col [col].shared4.hash_next = first_col ;
+
+		/* save hash function in Col [col].shared3.hash */
+		Col [col].shared3.hash = (int) hash ;
+		assert (COL_IS_ALIVE (col)) ;
+	    }
+	}
+
+	/* The approximate external column degree is now computed.  */
+
+	/* === Supercolumn detection ======================================== */
+
+	DEBUG1 (("** Supercolumn detection phase. **\n")) ;
+
+	detect_super_cols (
+#ifndef NDEBUG
+		n_col, Row,
+#endif
+		Col, A, head, pivot_row_start, pivot_row_length) ;
+
+	/* === Kill the pivotal column ====================================== */
+
+	KILL_PRINCIPAL_COL (pivot_col) ;
+
+	/* === Clear mark =================================================== */
+
+	tag_mark += (max_deg + 1) ;
+	if (tag_mark >= max_mark)
+	{
+	    DEBUG1 (("clearing tag_mark\n")) ;
+	    tag_mark = clear_mark (n_row, Row) ;
+	}
+#ifndef NDEBUG
+	DEBUG3 (("check3\n")) ;
+	debug_mark (n_row, Row, tag_mark, max_mark) ;
+#endif
+
+	/* === Finalize the new pivot row, and column scores ================ */
+
+	DEBUG1 (("** Finalize scores phase. **\n")) ;
+
+	/* for each column in pivot row */
+	rp = &A [pivot_row_start] ;
+	/* compact the pivot row */
+	new_rp = rp ;
+	rp_end = rp + pivot_row_length ;
+	while (rp < rp_end)
+	{
+	    col = *rp++ ;
+	    /* skip dead columns */
+	    if (COL_IS_DEAD (col))
+	    {
+		continue ;
+	    }
+	    *new_rp++ = col ;
+	    /* add new pivot row to column */
+	    A [Col [col].start + (Col [col].length++)] = pivot_row ;
+
+	    /* retrieve score so far and add on pivot row's degree. */
+	    /* (we wait until here for this in case the pivot */
+	    /* row's degree was reduced due to mass elimination). */
+	    cur_score = Col [col].shared2.score + pivot_row_degree ;
+
+	    /* calculate the max possible score as the number of */
+	    /* external columns minus the 'k' value minus the */
+	    /* columns thickness */
+	    max_score = n_col - k - Col [col].shared1.thickness ;
+
+	    /* make the score the external degree of the union-of-rows */
+	    cur_score -= Col [col].shared1.thickness ;
+
+	    /* make sure score is less or equal than the max score */
+	    cur_score = MIN (cur_score, max_score) ;
+	    assert (cur_score >= 0) ;
+
+	    /* store updated score */
+	    Col [col].shared2.score = cur_score ;
+
+	    /* === Place column back in degree list ========================= */
+
+	    assert (min_score >= 0) ;
+	    assert (min_score <= n_col) ;
+	    assert (cur_score >= 0) ;
+	    assert (cur_score <= n_col) ;
+	    assert (head [cur_score] >= EMPTY) ;
+	    next_col = head [cur_score] ;
+	    Col [col].shared4.degree_next = next_col ;
+	    Col [col].shared3.prev = EMPTY ;
+	    if (next_col != EMPTY)
+	    {
+		Col [next_col].shared3.prev = col ;
+	    }
+	    head [cur_score] = col ;
+
+	    /* see if this score is less than current min */
+	    min_score = MIN (min_score, cur_score) ;
+
+	}
+
+#ifndef NDEBUG
+	debug_deg_lists (n_row, n_col, Row, Col, head,
+		min_score, n_col2-k, max_deg) ;
+#endif
+
+	/* === Resurrect the new pivot row ================================== */
+
+	if (pivot_row_degree > 0)
+	{
+	    /* update pivot row length to reflect any cols that were killed */
+	    /* during super-col detection and mass elimination */
+	    Row [pivot_row].start  = pivot_row_start ;
+	    Row [pivot_row].length = (int) (new_rp - &A[pivot_row_start]) ;
+	    Row [pivot_row].shared1.degree = pivot_row_degree ;
+	    Row [pivot_row].shared2.mark = 0 ;
+	    /* pivot row is no longer dead */
+	}
+    }
+
+    /* === All principal columns have now been ordered ====================== */
+
+    return (ngarbage) ;
+}
+
+
+/* ========================================================================== */
+/* === order_children ======================================================= */
+/* ========================================================================== */
+
+/*
+    The find_ordering routine has ordered all of the principal columns (the
+    representatives of the supercolumns).  The non-principal columns have not
+    yet been ordered.  This routine orders those columns by walking up the
+    parent tree (a column is a child of the column which absorbed it).  The
+    final permutation vector is then placed in p [0 ... n_col-1], with p [0]
+    being the first column, and p [n_col-1] being the last.  It doesn't look
+    like it at first glance, but be assured that this routine takes time linear
+    in the number of columns.  Although not immediately obvious, the time
+    taken by this routine is O (n_col), that is, linear in the number of
+    columns.  Not user-callable.
+*/
+
+PRIVATE void order_children
+(
+    /* === Parameters ======================================================= */
+
+    int n_col,			/* number of columns of A */
+    ColInfo Col [],		/* of size n_col+1 */
+    int p []			/* p [0 ... n_col-1] is the column permutation*/
+)
+{
+    /* === Local variables ================================================== */
+
+    int i ;			/* loop counter for all columns */
+    int c ;			/* column index */
+    int parent ;		/* index of column's parent */
+    int order ;			/* column's order */
+
+    /* === Order each non-principal column ================================== */
+
+    for (i = 0 ; i < n_col ; i++)
+    {
+	/* find an un-ordered non-principal column */
+	assert (COL_IS_DEAD (i)) ;
+	if (!COL_IS_DEAD_PRINCIPAL (i) && Col [i].shared2.order == EMPTY)
+	{
+	    parent = i ;
+	    /* once found, find its principal parent */
+	    do
+	    {
+		parent = Col [parent].shared1.parent ;
+	    } while (!COL_IS_DEAD_PRINCIPAL (parent)) ;
+
+	    /* now, order all un-ordered non-principal columns along path */
+	    /* to this parent.  collapse tree at the same time */
+	    c = i ;
+	    /* get order of parent */
+	    order = Col [parent].shared2.order ;
+
+	    do
+	    {
+		assert (Col [c].shared2.order == EMPTY) ;
+
+		/* order this column */
+		Col [c].shared2.order = order++ ;
+		/* collaps tree */
+		Col [c].shared1.parent = parent ;
+
+		/* get immediate parent of this column */
+		c = Col [c].shared1.parent ;
+
+		/* continue until we hit an ordered column.  There are */
+		/* guarranteed not to be anymore unordered columns */
+		/* above an ordered column */
+	    } while (Col [c].shared2.order == EMPTY) ;
+
+	    /* re-order the super_col parent to largest order for this group */
+	    Col [parent].shared2.order = order ;
+	}
+    }
+
+    /* === Generate the permutation ========================================= */
+
+    for (c = 0 ; c < n_col ; c++)
+    {
+	p [Col [c].shared2.order] = c ;
+    }
+}
+
+
+/* ========================================================================== */
+/* === detect_super_cols ==================================================== */
+/* ========================================================================== */
+
+/*
+    Detects supercolumns by finding matches between columns in the hash buckets.
+    Check amongst columns in the set A [row_start ... row_start + row_length-1].
+    The columns under consideration are currently *not* in the degree lists,
+    and have already been placed in the hash buckets.
+
+    The hash bucket for columns whose hash function is equal to h is stored
+    as follows:
+
+	if head [h] is >= 0, then head [h] contains a degree list, so:
+
+		head [h] is the first column in degree bucket h.
+		Col [head [h]].headhash gives the first column in hash bucket h.
+
+	otherwise, the degree list is empty, and:
+
+		-(head [h] + 2) is the first column in hash bucket h.
+
+    For a column c in a hash bucket, Col [c].shared3.prev is NOT a "previous
+    column" pointer.  Col [c].shared3.hash is used instead as the hash number
+    for that column.  The value of Col [c].shared4.hash_next is the next column
+    in the same hash bucket.
+
+    Assuming no, or "few" hash collisions, the time taken by this routine is
+    linear in the sum of the sizes (lengths) of each column whose score has
+    just been computed in the approximate degree computation.
+    Not user-callable.
+*/
+
+PRIVATE void detect_super_cols
+(
+    /* === Parameters ======================================================= */
+
+#ifndef NDEBUG
+    /* these two parameters are only needed when debugging is enabled: */
+    int n_col,			/* number of columns of A */
+    RowInfo Row [],		/* of size n_row+1 */
+#endif
+    ColInfo Col [],		/* of size n_col+1 */
+    int A [],			/* row indices of A */
+    int head [],		/* head of degree lists and hash buckets */
+    int row_start,		/* pointer to set of columns to check */
+    int row_length		/* number of columns to check */
+)
+{
+    /* === Local variables ================================================== */
+
+    int hash ;			/* hash # for a column */
+    int *rp ;			/* pointer to a row */
+    int c ;			/* a column index */
+    int super_c ;		/* column index of the column to absorb into */
+    int *cp1 ;			/* column pointer for column super_c */
+    int *cp2 ;			/* column pointer for column c */
+    int length ;		/* length of column super_c */
+    int prev_c ;		/* column preceding c in hash bucket */
+    int i ;			/* loop counter */
+    int *rp_end ;		/* pointer to the end of the row */
+    int col ;			/* a column index in the row to check */
+    int head_column ;		/* first column in hash bucket or degree list */
+    int first_col ;		/* first column in hash bucket */
+
+    /* === Consider each column in the row ================================== */
+
+    rp = &A [row_start] ;
+    rp_end = rp + row_length ;
+    while (rp < rp_end)
+    {
+	col = *rp++ ;
+	if (COL_IS_DEAD (col))
+	{
+	    continue ;
+	}
+
+	/* get hash number for this column */
+	hash = Col [col].shared3.hash ;
+	assert (hash <= n_col) ;
+
+	/* === Get the first column in this hash bucket ===================== */
+
+	head_column = head [hash] ;
+	if (head_column > EMPTY)
+	{
+	    first_col = Col [head_column].shared3.headhash ;
+	}
+	else
+	{
+	    first_col = - (head_column + 2) ;
+	}
+
+	/* === Consider each column in the hash bucket ====================== */
+
+	for (super_c = first_col ; super_c != EMPTY ;
+	    super_c = Col [super_c].shared4.hash_next)
+	{
+	    assert (COL_IS_ALIVE (super_c)) ;
+	    assert (Col [super_c].shared3.hash == hash) ;
+	    length = Col [super_c].length ;
+
+	    /* prev_c is the column preceding column c in the hash bucket */
+	    prev_c = super_c ;
+
+	    /* === Compare super_c with all columns after it ================ */
+
+	    for (c = Col [super_c].shared4.hash_next ;
+		 c != EMPTY ; c = Col [c].shared4.hash_next)
+	    {
+		assert (c != super_c) ;
+		assert (COL_IS_ALIVE (c)) ;
+		assert (Col [c].shared3.hash == hash) ;
+
+		/* not identical if lengths or scores are different */
+		if (Col [c].length != length ||
+		    Col [c].shared2.score != Col [super_c].shared2.score)
+		{
+		    prev_c = c ;
+		    continue ;
+		}
+
+		/* compare the two columns */
+		cp1 = &A [Col [super_c].start] ;
+		cp2 = &A [Col [c].start] ;
+
+		for (i = 0 ; i < length ; i++)
+		{
+		    /* the columns are "clean" (no dead rows) */
+		    assert (ROW_IS_ALIVE (*cp1))  ;
+		    assert (ROW_IS_ALIVE (*cp2))  ;
+		    /* row indices will same order for both supercols, */
+		    /* no gather scatter nessasary */
+		    if (*cp1++ != *cp2++)
+		    {
+			break ;
+		    }
+		}
+
+		/* the two columns are different if the for-loop "broke" */
+		if (i != length)
+		{
+		    prev_c = c ;
+		    continue ;
+		}
+
+		/* === Got it!  two columns are identical =================== */
+
+		assert (Col [c].shared2.score == Col [super_c].shared2.score) ;
+
+		Col [super_c].shared1.thickness += Col [c].shared1.thickness ;
+		Col [c].shared1.parent = super_c ;
+		KILL_NON_PRINCIPAL_COL (c) ;
+		/* order c later, in order_children() */
+		Col [c].shared2.order = EMPTY ;
+		/* remove c from hash bucket */
+		Col [prev_c].shared4.hash_next = Col [c].shared4.hash_next ;
+	    }
+	}
+
+	/* === Empty this hash bucket ======================================= */
+
+	if (head_column > EMPTY)
+	{
+	    /* corresponding degree list "hash" is not empty */
+	    Col [head_column].shared3.headhash = EMPTY ;
+	}
+	else
+	{
+	    /* corresponding degree list "hash" is empty */
+	    head [hash] = EMPTY ;
+	}
+    }
+}
+
+
+/* ========================================================================== */
+/* === garbage_collection =================================================== */
+/* ========================================================================== */
+
+/*
+    Defragments and compacts columns and rows in the workspace A.  Used when
+    all avaliable memory has been used while performing row merging.  Returns
+    the index of the first free position in A, after garbage collection.  The
+    time taken by this routine is linear is the size of the array A, which is
+    itself linear in the number of nonzeros in the input matrix.
+    Not user-callable.
+*/
+
+PRIVATE int garbage_collection  /* returns the new value of pfree */
+(
+    /* === Parameters ======================================================= */
+
+    int n_row,			/* number of rows */
+    int n_col,			/* number of columns */
+    RowInfo Row [],		/* row info */
+    ColInfo Col [],		/* column info */
+    int A [],			/* A [0 ... Alen-1] holds the matrix */
+    int *pfree			/* &A [0] ... pfree is in use */
+)
+{
+    /* === Local variables ================================================== */
+
+    int *psrc ;			/* source pointer */
+    int *pdest ;		/* destination pointer */
+    int j ;			/* counter */
+    int r ;			/* a row index */
+    int c ;			/* a column index */
+    int length ;		/* length of a row or column */
+
+#ifndef NDEBUG
+    int debug_rows ;
+    DEBUG0 (("Defrag..\n")) ;
+    for (psrc = &A[0] ; psrc < pfree ; psrc++) assert (*psrc >= 0) ;
+    debug_rows = 0 ;
+#endif
+
+    /* === Defragment the columns =========================================== */
+
+    pdest = &A[0] ;
+    for (c = 0 ; c < n_col ; c++)
+    {
+	if (COL_IS_ALIVE (c))
+	{
+	    psrc = &A [Col [c].start] ;
+
+	    /* move and compact the column */
+	    assert (pdest <= psrc) ;
+	    Col [c].start = (int) (pdest - &A [0]) ;
+	    length = Col [c].length ;
+	    for (j = 0 ; j < length ; j++)
+	    {
+		r = *psrc++ ;
+		if (ROW_IS_ALIVE (r))
+		{
+		    *pdest++ = r ;
+		}
+	    }
+	    Col [c].length = (int) (pdest - &A [Col [c].start]) ;
+	}
+    }
+
+    /* === Prepare to defragment the rows =================================== */
+
+    for (r = 0 ; r < n_row ; r++)
+    {
+	if (ROW_IS_ALIVE (r))
+	{
+	    if (Row [r].length == 0)
+	    {
+		/* this row is of zero length.  cannot compact it, so kill it */
+		DEBUG0 (("Defrag row kill\n")) ;
+		KILL_ROW (r) ;
+	    }
+	    else
+	    {
+		/* save first column index in Row [r].shared2.first_column */
+		psrc = &A [Row [r].start] ;
+		Row [r].shared2.first_column = *psrc ;
+		assert (ROW_IS_ALIVE (r)) ;
+		/* flag the start of the row with the one's complement of row */
+		*psrc = ONES_COMPLEMENT (r) ;
+#ifndef NDEBUG
+		debug_rows++ ;
+#endif
+	    }
+	}
+    }
+
+    /* === Defragment the rows ============================================== */
+
+    psrc = pdest ;
+    while (psrc < pfree)
+    {
+	/* find a negative number ... the start of a row */
+	if (*psrc++ < 0)
+	{
+	    psrc-- ;
+	    /* get the row index */
+	    r = ONES_COMPLEMENT (*psrc) ;
+	    assert (r >= 0 && r < n_row) ;
+	    /* restore first column index */
+	    *psrc = Row [r].shared2.first_column ;
+	    assert (ROW_IS_ALIVE (r)) ;
+
+	    /* move and compact the row */
+	    assert (pdest <= psrc) ;
+	    Row [r].start = (int) (pdest - &A [0]) ;
+	    length = Row [r].length ;
+	    for (j = 0 ; j < length ; j++)
+	    {
+		c = *psrc++ ;
+		if (COL_IS_ALIVE (c))
+		{
+		    *pdest++ = c ;
+		}
+	    }
+	    Row [r].length = (int) (pdest - &A [Row [r].start]) ;
+#ifndef NDEBUG
+	    debug_rows-- ;
+#endif
+	}
+    }
+    /* ensure we found all the rows */
+    assert (debug_rows == 0) ;
+
+    /* === Return the new value of pfree ==================================== */
+
+    return ((int) (pdest - &A [0])) ;
+}
+
+
+/* ========================================================================== */
+/* === clear_mark =========================================================== */
+/* ========================================================================== */
+
+/*
+    Clears the Row [].shared2.mark array, and returns the new tag_mark.
+    Return value is the new tag_mark.  Not user-callable.
+*/
+
+PRIVATE int clear_mark	/* return the new value for tag_mark */
+(
+    /* === Parameters ======================================================= */
+
+    int n_row,		/* number of rows in A */
+    RowInfo Row []	/* Row [0 ... n_row-1].shared2.mark is set to zero */
+)
+{
+    /* === Local variables ================================================== */
+
+    int r ;
+
+    DEBUG0 (("Clear mark\n")) ;
+    for (r = 0 ; r < n_row ; r++)
+    {
+	if (ROW_IS_ALIVE (r))
+	{
+	    Row [r].shared2.mark = 0 ;
+	}
+    }
+    return (1) ;
+}
+
+
+/* ========================================================================== */
+/* === debugging routines =================================================== */
+/* ========================================================================== */
+
+/* When debugging is disabled, the remainder of this file is ignored. */
+
+#ifndef NDEBUG
+
+
+/* ========================================================================== */
+/* === debug_structures ===================================================== */
+/* ========================================================================== */
+
+/*
+    At this point, all empty rows and columns are dead.  All live columns
+    are "clean" (containing no dead rows) and simplicial (no supercolumns
+    yet).  Rows may contain dead columns, but all live rows contain at
+    least one live column.
+*/
+
+PRIVATE void debug_structures
+(
+    /* === Parameters ======================================================= */
+
+    int n_row,
+    int n_col,
+    RowInfo Row [],
+    ColInfo Col [],
+    int A [],
+    int n_col2
+)
+{
+    /* === Local variables ================================================== */
+
+    int i ;
+    int c ;
+    int *cp ;
+    int *cp_end ;
+    int len ;
+    int score ;
+    int r ;
+    int *rp ;
+    int *rp_end ;
+    int deg ;
+
+    /* === Check A, Row, and Col ============================================ */
+
+    for (c = 0 ; c < n_col ; c++)
+    {
+	if (COL_IS_ALIVE (c))
+	{
+	    len = Col [c].length ;
+	    score = Col [c].shared2.score ;
+	    DEBUG4 (("initial live col %5d %5d %5d\n", c, len, score)) ;
+	    assert (len > 0) ;
+	    assert (score >= 0) ;
+	    assert (Col [c].shared1.thickness == 1) ;
+	    cp = &A [Col [c].start] ;
+	    cp_end = cp + len ;
+	    while (cp < cp_end)
+	    {
+		r = *cp++ ;
+		assert (ROW_IS_ALIVE (r)) ;
+	    }
+	}
+	else
+	{
+	    i = Col [c].shared2.order ;
+	    assert (i >= n_col2 && i < n_col) ;
+	}
+    }
+
+    for (r = 0 ; r < n_row ; r++)
+    {
+	if (ROW_IS_ALIVE (r))
+	{
+	    i = 0 ;
+	    len = Row [r].length ;
+	    deg = Row [r].shared1.degree ;
+	    assert (len > 0) ;
+	    assert (deg > 0) ;
+	    rp = &A [Row [r].start] ;
+	    rp_end = rp + len ;
+	    while (rp < rp_end)
+	    {
+		c = *rp++ ;
+		if (COL_IS_ALIVE (c))
+		{
+		    i++ ;
+		}
+	    }
+	    assert (i > 0) ;
+	}
+    }
+}
+
+
+/* ========================================================================== */
+/* === debug_deg_lists ====================================================== */
+/* ========================================================================== */
+
+/*
+    Prints the contents of the degree lists.  Counts the number of columns
+    in the degree list and compares it to the total it should have.  Also
+    checks the row degrees.
+*/
+
+PRIVATE void debug_deg_lists
+(
+    /* === Parameters ======================================================= */
+
+    int n_row,
+    int n_col,
+    RowInfo Row [],
+    ColInfo Col [],
+    int head [],
+    int min_score,
+    int should,
+    int max_deg
+)
+{
+    /* === Local variables ================================================== */
+
+    int deg ;
+    int col ;
+    int have ;
+    int row ;
+
+    /* === Check the degree lists =========================================== */
+
+    if (n_col > 10000 && debug_colamd <= 0)
+    {
+	return ;
+    }
+    have = 0 ;
+    DEBUG4 (("Degree lists: %d\n", min_score)) ;
+    for (deg = 0 ; deg <= n_col ; deg++)
+    {
+	col = head [deg] ;
+	if (col == EMPTY)
+	{
+	    continue ;
+	}
+	DEBUG4 (("%d:", deg)) ;
+	while (col != EMPTY)
+	{
+	    DEBUG4 ((" %d", col)) ;
+	    have += Col [col].shared1.thickness ;
+	    assert (COL_IS_ALIVE (col)) ;
+	    col = Col [col].shared4.degree_next ;
+	}
+	DEBUG4 (("\n")) ;
+    }
+    DEBUG4 (("should %d have %d\n", should, have)) ;
+    assert (should == have) ;
+
+    /* === Check the row degrees ============================================ */
+
+    if (n_row > 10000 && debug_colamd <= 0)
+    {
+	return ;
+    }
+    for (row = 0 ; row < n_row ; row++)
+    {
+	if (ROW_IS_ALIVE (row))
+	{
+	    assert (Row [row].shared1.degree <= max_deg) ;
+	}
+    }
+}
+
+
+/* ========================================================================== */
+/* === debug_mark =========================================================== */
+/* ========================================================================== */
+
+/*
+    Ensures that the tag_mark is less that the maximum and also ensures that
+    each entry in the mark array is less than the tag mark.
+*/
+
+PRIVATE void debug_mark
+(
+    /* === Parameters ======================================================= */
+
+    int n_row,
+    RowInfo Row [],
+    int tag_mark,
+    int max_mark
+)
+{
+    /* === Local variables ================================================== */
+
+    int r ;
+
+    /* === Check the Row marks ============================================== */
+
+    assert (tag_mark > 0 && tag_mark <= max_mark) ;
+    if (n_row > 10000 && debug_colamd <= 0)
+    {
+	return ;
+    }
+    for (r = 0 ; r < n_row ; r++)
+    {
+	assert (Row [r].shared2.mark < tag_mark) ;
+    }
+}
+
+
+/* ========================================================================== */
+/* === debug_matrix ========================================================= */
+/* ========================================================================== */
+
+/*
+    Prints out the contents of the columns and the rows.
+*/
+
+PRIVATE void debug_matrix
+(
+    /* === Parameters ======================================================= */
+
+    int n_row,
+    int n_col,
+    RowInfo Row [],
+    ColInfo Col [],
+    int A []
+)
+{
+    /* === Local variables ================================================== */
+
+    int r ;
+    int c ;
+    int *rp ;
+    int *rp_end ;
+    int *cp ;
+    int *cp_end ;
+
+    /* === Dump the rows and columns of the matrix ========================== */
+
+    if (debug_colamd < 3)
+    {
+	return ;
+    }
+    DEBUG3 (("DUMP MATRIX:\n")) ;
+    for (r = 0 ; r < n_row ; r++)
+    {
+	DEBUG3 (("Row %d alive? %d\n", r, ROW_IS_ALIVE (r))) ;
+	if (ROW_IS_DEAD (r))
+	{
+	    continue ;
+	}
+	DEBUG3 (("start %d length %d degree %d\n",
+		Row [r].start, Row [r].length, Row [r].shared1.degree)) ;
+	rp = &A [Row [r].start] ;
+	rp_end = rp + Row [r].length ;
+	while (rp < rp_end)
+	{
+	    c = *rp++ ;
+	    DEBUG3 (("	%d col %d\n", COL_IS_ALIVE (c), c)) ;
+	}
+    }
+
+    for (c = 0 ; c < n_col ; c++)
+    {
+	DEBUG3 (("Col %d alive? %d\n", c, COL_IS_ALIVE (c))) ;
+	if (COL_IS_DEAD (c))
+	{
+	    continue ;
+	}
+	DEBUG3 (("start %d length %d shared1 %d shared2 %d\n",
+		Col [c].start, Col [c].length,
+		Col [c].shared1.thickness, Col [c].shared2.score)) ;
+	cp = &A [Col [c].start] ;
+	cp_end = cp + Col [c].length ;
+	while (cp < cp_end)
+	{
+	    r = *cp++ ;
+	    DEBUG3 (("	%d row %d\n", ROW_IS_ALIVE (r), r)) ;
+	}
+    }
+}
+
+#endif
+
diff --git a/contrib/taucs/external/src/colamd.h b/contrib/taucs/external/src/colamd.h
new file mode 100644
index 0000000000000000000000000000000000000000..00783983b27cbf8a744e4b64c8e291fb5469fd3b
--- /dev/null
+++ b/contrib/taucs/external/src/colamd.h
@@ -0,0 +1,67 @@
+/* ========================================================================== */
+/* === colamd prototypes and definitions ==================================== */
+/* ========================================================================== */
+
+/*
+    This is the colamd include file,
+
+	http://www.cise.ufl.edu/~davis/colamd/colamd.h
+
+    for use in the colamd.c, colamdmex.c, and symamdmex.c files located at
+
+	http://www.cise.ufl.edu/~davis/colamd/
+
+    See those files for a description of colamd and symamd, and for the
+    copyright notice, which also applies to this file.
+
+    August 3, 1998.  Version 1.0.
+*/
+
+/* ========================================================================== */
+/* === Definitions ========================================================== */
+/* ========================================================================== */
+
+/* size of the knobs [ ] array.  Only knobs [0..1] are currently used. */
+#define COLAMD_KNOBS 20
+
+/* number of output statistics.  Only A [0..2] are currently used. */
+#define COLAMD_STATS 20
+
+/* knobs [0] and A [0]: dense row knob and output statistic. */
+#define COLAMD_DENSE_ROW 0
+
+/* knobs [1] and A [1]: dense column knob and output statistic. */
+#define COLAMD_DENSE_COL 1
+
+/* A [2]: memory defragmentation count output statistic */
+#define COLAMD_DEFRAG_COUNT 2
+
+/* A [3]: whether or not the input columns were jumbled or had duplicates */
+#define COLAMD_JUMBLED_COLS 3
+
+/* ========================================================================== */
+/* === Prototypes of user-callable routines ================================= */
+/* ========================================================================== */
+
+int colamd_recommended		/* returns recommended value of Alen */
+(
+    int nnz,			/* nonzeros in A */
+    int n_row,			/* number of rows in A */
+    int n_col			/* number of columns in A */
+) ;
+
+void colamd_set_defaults	/* sets default parameters */
+(				/* knobs argument is modified on output */
+    double knobs [COLAMD_KNOBS]	/* parameter settings for colamd */
+) ;
+
+int colamd			/* returns TRUE if successful, FALSE otherwise*/
+(				/* A and p arguments are modified on output */
+    int n_row,			/* number of rows in A */
+    int n_col,			/* number of columns in A */
+    int Alen,			/* size of the array A */
+    int A [],			/* row indices of A, of size Alen */
+    int p [],			/* column pointers of A, of size n_col+1 */
+    double knobs [COLAMD_KNOBS]	/* parameter settings for colamd */
+) ;
+
diff --git a/contrib/taucs/external/src/f2c.h b/contrib/taucs/external/src/f2c.h
new file mode 100644
index 0000000000000000000000000000000000000000..9397f984c9e69df3dbcc4c7d5c3ec38f09ca37d9
--- /dev/null
+++ b/contrib/taucs/external/src/f2c.h
@@ -0,0 +1,217 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+typedef long int integer;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+typedef long int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+#ifdef f2c_i2
+/* for -i2 */
+typedef short flag;
+typedef short ftnlen;
+typedef short ftnint;
+#else
+typedef long flag;
+typedef long ftnlen;
+typedef long ftnint;
+#endif
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	shortint h;
+	integer i;
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+typedef long Long;	/* No longer used; formerly in Namelist */
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (doublereal)abs(x)
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (doublereal)min(a,b)
+#define dmax(a,b) (doublereal)max(a,b)
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef int /* Unknown procedure type */ (*U_fp)(...);
+typedef shortint (*J_fp)(...);
+typedef integer (*I_fp)(...);
+typedef real (*R_fp)(...);
+typedef doublereal (*D_fp)(...), (*E_fp)(...);
+typedef /* Complex */ VOID (*C_fp)(...);
+typedef /* Double Complex */ VOID (*Z_fp)(...);
+typedef logical (*L_fp)(...);
+typedef shortlogical (*K_fp)(...);
+typedef /* Character */ VOID (*H_fp)(...);
+typedef /* Subroutine */ int (*S_fp)(...);
+#else
+typedef int /* Unknown procedure type */ (*U_fp)();
+typedef shortint (*J_fp)();
+typedef integer (*I_fp)();
+typedef real (*R_fp)();
+typedef doublereal (*D_fp)(), (*E_fp)();
+typedef /* Complex */ VOID (*C_fp)();
+typedef /* Double Complex */ VOID (*Z_fp)();
+typedef logical (*L_fp)();
+typedef shortlogical (*K_fp)();
+typedef /* Character */ VOID (*H_fp)();
+typedef /* Subroutine */ int (*S_fp)();
+#endif
+/* E_fp is for real functions when -R is not specified */
+typedef VOID C_f;	/* complex function */
+typedef VOID H_f;	/* character function */
+typedef VOID Z_f;	/* double complex function */
+typedef doublereal E_f;	/* real function with -R not specified */
+
+/* undef any lower-case symbols that your C compiler predefines, e.g.: */
+
+#ifndef Skip_f2c_Undefs
+#undef cray
+#undef gcos
+#undef mc68010
+#undef mc68020
+#undef mips
+#undef pdp11
+#undef sgi
+#undef sparc
+#undef sun
+#undef sun2
+#undef sun3
+#undef sun4
+#undef u370
+#undef u3b
+#undef u3b2
+#undef u3b5
+#undef unix
+#undef vax
+#endif
+#endif
+
+//KMS
+#ifdef _WIN32
+#define huge huged
+#define near neard
+#endif
diff --git a/contrib/taucs/external/src/genmmd.c b/contrib/taucs/external/src/genmmd.c
new file mode 100644
index 0000000000000000000000000000000000000000..3b372b8a7d0aa435cb0a421de0e66a233b6a586f
--- /dev/null
+++ b/contrib/taucs/external/src/genmmd.c
@@ -0,0 +1,1021 @@
+/* genmmd.f -- translated by f2c (version of 23 April 1993  18:34:30).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+#include "f2c.h"
+
+/* Sivan: I modified INTEGER*2 -> INTEGER*4 */
+/* *************************************************************** */
+/* *************************************************************** */
+/* ****     GENMMD ..... MULTIPLE MINIMUM EXTERNAL DEGREE     **** */
+/* *************************************************************** */
+/* *************************************************************** */
+
+/*     AUTHOR - JOSEPH W.H. LIU */
+/*              DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY. */
+
+/*     PURPOSE - THIS ROUTINE IMPLEMENTS THE MINIMUM DEGREE */
+/*        ALGORITHM.  IT MAKES USE OF THE IMPLICIT REPRESENTATION */
+/*        OF ELIMINATION GRAPHS BY QUOTIENT GRAPHS, AND THE */
+/*        NOTION OF INDISTINGUISHABLE NODES.  IT ALSO IMPLEMENTS */
+/*        THE MODIFICATIONS BY MULTIPLE ELIMINATION AND MINIMUM */
+/*        EXTERNAL DEGREE. */
+/*        --------------------------------------------- */
+/*        CAUTION - THE ADJACENCY VECTOR ADJNCY WILL BE */
+/*        DESTROYED. */
+/*        --------------------------------------------- */
+
+/*     INPUT PARAMETERS - */
+/*        NEQNS  - NUMBER OF EQUATIONS. */
+/*        (XADJ,ADJNCY) - THE ADJACENCY STRUCTURE. */
+/*        DELTA  - TOLERANCE VALUE FOR MULTIPLE ELIMINATION. */
+/*        MAXINT - MAXIMUM MACHINE REPRESENTABLE (SHORT) INTEGER */
+/*                 (ANY SMALLER ESTIMATE WILL DO) FOR MARKING */
+/*                 NODES. */
+
+/*     OUTPUT PARAMETERS - */
+/*        PERM   - THE MINIMUM DEGREE ORDERING. */
+/*        INVP   - THE INVERSE OF PERM. */
+/*        NOFSUB - AN UPPER BOUND ON THE NUMBER OF NONZERO */
+/*                 SUBSCRIPTS FOR THE COMPRESSED STORAGE SCHEME. */
+
+/*     WORKING PARAMETERS - */
+/*        DHEAD  - VECTOR FOR HEAD OF DEGREE LISTS. */
+/*        INVP   - USED TEMPORARILY FOR DEGREE FORWARD LINK. */
+/*        PERM   - USED TEMPORARILY FOR DEGREE BACKWARD LINK. */
+/*        QSIZE  - VECTOR FOR SIZE OF SUPERNODES. */
+/*        LLIST  - VECTOR FOR TEMPORARY LINKED LISTS. */
+/*        MARKER - A TEMPORARY MARKER VECTOR. */
+
+/*     PROGRAM SUBROUTINES - */
+/*        MMDELM, MMDINT, MMDNUM, MMDUPD. */
+
+/* *************************************************************** */
+
+/* Subroutine */ int genmmd_(neqns, xadj, adjncy, invp, perm, delta, dhead, 
+	qsize, llist, marker, maxint, nofsub)
+integer *neqns, *xadj, *adjncy, *invp, *perm, *delta, *dhead, *qsize, *llist, 
+	*marker, *maxint, *nofsub;
+{
+    /* System generated locals */
+    integer i__1;
+
+    /* Local variables */
+    static integer mdeg, ehead, i, mdlmt, mdnode;
+    extern /* Subroutine */ int mmdelm_(), mmdupd_(), mmdint_(), mmdnum_();
+    static integer nextmd, tag, num;
+
+
+/* *************************************************************** */
+
+/*         INTEGER*2  ADJNCY(1), DHEAD(1) , INVP(1)  , LLIST(1) , */
+/*     1              MARKER(1), PERM(1)  , QSIZE(1) */
+
+/* *************************************************************** */
+
+    /* Parameter adjustments */
+    --marker;
+    --llist;
+    --qsize;
+    --dhead;
+    --perm;
+    --invp;
+    --adjncy;
+    --xadj;
+
+    /* Function Body */
+    if (*neqns <= 0) {
+	return 0;
+    }
+
+/*        ------------------------------------------------ */
+/*        INITIALIZATION FOR THE MINIMUM DEGREE ALGORITHM. */
+/*        ------------------------------------------------ */
+    *nofsub = 0;
+    mmdint_(neqns, &xadj[1], &adjncy[1], &dhead[1], &invp[1], &perm[1], &
+	    qsize[1], &llist[1], &marker[1]);
+
+/*        ---------------------------------------------- */
+/*        NUM COUNTS THE NUMBER OF ORDERED NODES PLUS 1. */
+/*        ---------------------------------------------- */
+    num = 1;
+
+/*        ----------------------------- */
+/*        ELIMINATE ALL ISOLATED NODES. */
+/*        ----------------------------- */
+    nextmd = dhead[1];
+L100:
+    if (nextmd <= 0) {
+	goto L200;
+    }
+    mdnode = nextmd;
+    nextmd = invp[mdnode];
+    marker[mdnode] = *maxint;
+    invp[mdnode] = -num;
+    ++num;
+    goto L100;
+
+L200:
+/*        ---------------------------------------- */
+/*        SEARCH FOR NODE OF THE MINIMUM DEGREE. */
+/*        MDEG IS THE CURRENT MINIMUM DEGREE; */
+/*        TAG IS USED TO FACILITATE MARKING NODES. */
+/*        ---------------------------------------- */
+    if (num > *neqns) {
+	goto L1000;
+    }
+    tag = 1;
+    dhead[1] = 0;
+    mdeg = 2;
+L300:
+    if (dhead[mdeg] > 0) {
+	goto L400;
+    }
+    ++mdeg;
+    goto L300;
+L400:
+/*            ------------------------------------------------- */
+/*            USE VALUE OF DELTA TO SET UP MDLMT, WHICH GOVERNS */
+/*            WHEN A DEGREE UPDATE IS TO BE PERFORMED. */
+/*            ------------------------------------------------- */
+    mdlmt = mdeg + *delta;
+    ehead = 0;
+
+L500:
+    mdnode = dhead[mdeg];
+    if (mdnode > 0) {
+	goto L600;
+    }
+    ++mdeg;
+    if (mdeg > mdlmt) {
+	goto L900;
+    }
+    goto L500;
+L600:
+/*                ---------------------------------------- */
+/*                REMOVE MDNODE FROM THE DEGREE STRUCTURE. */
+/*                ---------------------------------------- */
+    nextmd = invp[mdnode];
+    dhead[mdeg] = nextmd;
+    if (nextmd > 0) {
+	perm[nextmd] = -mdeg;
+    }
+    invp[mdnode] = -num;
+    *nofsub = *nofsub + mdeg + qsize[mdnode] - 2;
+    if (num + qsize[mdnode] > *neqns) {
+	goto L1000;
+    }
+/*                ---------------------------------------------- */
+/*                ELIMINATE MDNODE AND PERFORM QUOTIENT GRAPH */
+/*                TRANSFORMATION.  RESET TAG VALUE IF NECESSARY. */
+/*                ---------------------------------------------- */
+    ++tag;
+    if (tag < *maxint) {
+	goto L800;
+    }
+    tag = 1;
+    i__1 = *neqns;
+    for (i = 1; i <= i__1; ++i) {
+	if (marker[i] < *maxint) {
+	    marker[i] = 0;
+	}
+/* L700: */
+    }
+L800:
+    mmdelm_(&mdnode, &xadj[1], &adjncy[1], &dhead[1], &invp[1], &perm[1], &
+	    qsize[1], &llist[1], &marker[1], maxint, &tag);
+    num += qsize[mdnode];
+    llist[mdnode] = ehead;
+    ehead = mdnode;
+    if (*delta >= 0) {
+	goto L500;
+    }
+L900:
+/*            ------------------------------------------- */
+/*            UPDATE DEGREES OF THE NODES INVOLVED IN THE */
+/*            MINIMUM DEGREE NODES ELIMINATION. */
+/*            ------------------------------------------- */
+    if (num > *neqns) {
+	goto L1000;
+    }
+    mmdupd_(&ehead, neqns, &xadj[1], &adjncy[1], delta, &mdeg, &dhead[1], &
+	    invp[1], &perm[1], &qsize[1], &llist[1], &marker[1], maxint, &tag)
+	    ;
+    goto L300;
+
+L1000:
+    mmdnum_(neqns, &perm[1], &invp[1], &qsize[1]);
+    return 0;
+
+} /* genmmd_ */
+
+/* *************************************************************** */
+/* *************************************************************** */
+/* ***     MMDINT ..... MULT MINIMUM DEGREE INITIALIZATION     *** */
+/* *************************************************************** */
+/* *************************************************************** */
+
+/*     AUTHOR - JOSEPH W.H. LIU */
+/*              DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY. */
+
+/*     PURPOSE - THIS ROUTINE PERFORMS INITIALIZATION FOR THE */
+/*        MULTIPLE ELIMINATION VERSION OF THE MINIMUM DEGREE */
+/*        ALGORITHM. */
+
+/*     INPUT PARAMETERS - */
+/*        NEQNS  - NUMBER OF EQUATIONS. */
+/*        (XADJ,ADJNCY) - ADJACENCY STRUCTURE. */
+
+/*     OUTPUT PARAMETERS - */
+/*        (DHEAD,DFORW,DBAKW) - DEGREE DOUBLY LINKED STRUCTURE. */
+/*        QSIZE  - SIZE OF SUPERNODE (INITIALIZED TO ONE). */
+/*        LLIST  - LINKED LIST. */
+/*        MARKER - MARKER VECTOR. */
+
+/* *************************************************************** */
+
+/* Subroutine */ int mmdint_(neqns, xadj, adjncy, dhead, dforw, dbakw, qsize, 
+	llist, marker)
+integer *neqns, *xadj, *adjncy, *dhead, *dforw, *dbakw, *qsize, *llist, *
+	marker;
+{
+    /* System generated locals */
+    integer i__1;
+
+    /* Local variables */
+    static integer ndeg, node, fnode;
+
+
+/* *************************************************************** */
+
+/*         INTEGER*2  ADJNCY(1), DBAKW(1) , DFORW(1) , DHEAD(1) , */
+/*     1              LLIST(1) , MARKER(1), QSIZE(1) */
+
+/* *************************************************************** */
+
+    /* Parameter adjustments */
+    --marker;
+    --llist;
+    --qsize;
+    --dbakw;
+    --dforw;
+    --dhead;
+    --adjncy;
+    --xadj;
+
+    /* Function Body */
+    i__1 = *neqns;
+    for (node = 1; node <= i__1; ++node) {
+	dhead[node] = 0;
+	qsize[node] = 1;
+	marker[node] = 0;
+	llist[node] = 0;
+/* L100: */
+    }
+/*        ------------------------------------------ */
+/*        INITIALIZE THE DEGREE DOUBLY LINKED LISTS. */
+/*        ------------------------------------------ */
+    i__1 = *neqns;
+    for (node = 1; node <= i__1; ++node) {
+	ndeg = xadj[node + 1] - xadj[node] + 1;
+	fnode = dhead[ndeg];
+	dforw[node] = fnode;
+	dhead[ndeg] = node;
+	if (fnode > 0) {
+	    dbakw[fnode] = node;
+	}
+	dbakw[node] = -ndeg;
+/* L200: */
+    }
+    return 0;
+
+} /* mmdint_ */
+
+/* *************************************************************** */
+/* *************************************************************** */
+/* **     MMDELM ..... MULTIPLE MINIMUM DEGREE ELIMINATION     *** */
+/* *************************************************************** */
+/* *************************************************************** */
+
+/*     AUTHOR - JOSEPH W.H. LIU */
+/*              DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY. */
+
+/*     PURPOSE - THIS ROUTINE ELIMINATES THE NODE MDNODE OF */
+/*        MINIMUM DEGREE FROM THE ADJACENCY STRUCTURE, WHICH */
+/*        IS STORED IN THE QUOTIENT GRAPH FORMAT.  IT ALSO */
+/*        TRANSFORMS THE QUOTIENT GRAPH REPRESENTATION OF THE */
+/*        ELIMINATION GRAPH. */
+
+/*     INPUT PARAMETERS - */
+/*        MDNODE - NODE OF MINIMUM DEGREE. */
+/*        MAXINT - ESTIMATE OF MAXIMUM REPRESENTABLE (SHORT) */
+/*                 INTEGER. */
+/*        TAG    - TAG VALUE. */
+
+/*     UPDATED PARAMETERS - */
+/*        (XADJ,ADJNCY) - UPDATED ADJACENCY STRUCTURE. */
+/*        (DHEAD,DFORW,DBAKW) - DEGREE DOUBLY LINKED STRUCTURE. */
+/*        QSIZE  - SIZE OF SUPERNODE. */
+/*        MARKER - MARKER VECTOR. */
+/*        LLIST  - TEMPORARY LINKED LIST OF ELIMINATED NABORS. */
+
+/* *************************************************************** */
+
+/* Subroutine */ int mmdelm_(mdnode, xadj, adjncy, dhead, dforw, dbakw, qsize,
+	 llist, marker, maxint, tag)
+integer *mdnode, *xadj, *adjncy, *dhead, *dforw, *dbakw, *qsize, *llist, *
+	marker, *maxint, *tag;
+{
+    /* System generated locals */
+    integer i__1, i__2;
+
+    /* Local variables */
+    static integer node, link, rloc, rlmt, i, j, nabor, rnode, elmnt, xqnbr, 
+	    istop, jstop, istrt, jstrt, nxnode, pvnode, nqnbrs, npv;
+
+
+/* *************************************************************** */
+
+/*         INTEGER*2  ADJNCY(1), DBAKW(1) , DFORW(1) , DHEAD(1) , */
+/*     1              LLIST(1) , MARKER(1), QSIZE(1) */
+
+/* *************************************************************** */
+
+/*        ----------------------------------------------- */
+/*        FIND REACHABLE SET AND PLACE IN DATA STRUCTURE. */
+/*        ----------------------------------------------- */
+    /* Parameter adjustments */
+    --marker;
+    --llist;
+    --qsize;
+    --dbakw;
+    --dforw;
+    --dhead;
+    --adjncy;
+    --xadj;
+
+    /* Function Body */
+    marker[*mdnode] = *tag;
+    istrt = xadj[*mdnode];
+    istop = xadj[*mdnode + 1] - 1;
+/*        ------------------------------------------------------- */
+/*        ELMNT POINTS TO THE BEGINNING OF THE LIST OF ELIMINATED */
+/*        NABORS OF MDNODE, AND RLOC GIVES THE STORAGE LOCATION */
+/*        FOR THE NEXT REACHABLE NODE. */
+/*        ------------------------------------------------------- */
+    elmnt = 0;
+    rloc = istrt;
+    rlmt = istop;
+    i__1 = istop;
+    for (i = istrt; i <= i__1; ++i) {
+	nabor = adjncy[i];
+	if (nabor == 0) {
+	    goto L300;
+	}
+	if (marker[nabor] >= *tag) {
+	    goto L200;
+	}
+	marker[nabor] = *tag;
+	if (dforw[nabor] < 0) {
+	    goto L100;
+	}
+	adjncy[rloc] = nabor;
+	++rloc;
+	goto L200;
+L100:
+	llist[nabor] = elmnt;
+	elmnt = nabor;
+L200:
+	;
+    }
+L300:
+/*            ----------------------------------------------------- */
+/*            MERGE WITH REACHABLE NODES FROM GENERALIZED ELEMENTS. */
+/*            ----------------------------------------------------- */
+    if (elmnt <= 0) {
+	goto L1000;
+    }
+    adjncy[rlmt] = -elmnt;
+    link = elmnt;
+L400:
+    jstrt = xadj[link];
+    jstop = xadj[link + 1] - 1;
+    i__1 = jstop;
+    for (j = jstrt; j <= i__1; ++j) {
+	node = adjncy[j];
+	link = -node;
+	if (node < 0) {
+	    goto L400;
+	} else if (node == 0) {
+	    goto L900;
+	} else {
+	    goto L500;
+	}
+L500:
+	if (marker[node] >= *tag || dforw[node] < 0) {
+	    goto L800;
+	}
+	marker[node] = *tag;
+/*                            --------------------------------- */
+/*                            USE STORAGE FROM ELIMINATED NODES */
+/*                            IF NECESSARY. */
+/*                            --------------------------------- */
+L600:
+	if (rloc < rlmt) {
+	    goto L700;
+	}
+	link = -adjncy[rlmt];
+	rloc = xadj[link];
+	rlmt = xadj[link + 1] - 1;
+	goto L600;
+L700:
+	adjncy[rloc] = node;
+	++rloc;
+L800:
+	;
+    }
+L900:
+    elmnt = llist[elmnt];
+    goto L300;
+L1000:
+    if (rloc <= rlmt) {
+	adjncy[rloc] = 0;
+    }
+/*        -------------------------------------------------------- */
+/*        FOR EACH NODE IN THE REACHABLE SET, DO THE FOLLOWING ... */
+/*        -------------------------------------------------------- */
+    link = *mdnode;
+L1100:
+    istrt = xadj[link];
+    istop = xadj[link + 1] - 1;
+    i__1 = istop;
+    for (i = istrt; i <= i__1; ++i) {
+	rnode = adjncy[i];
+	link = -rnode;
+	if (rnode < 0) {
+	    goto L1100;
+	} else if (rnode == 0) {
+	    goto L1800;
+	} else {
+	    goto L1200;
+	}
+L1200:
+/*                -------------------------------------------- */
+/*                IF RNODE IS IN THE DEGREE LIST STRUCTURE ... */
+/*                -------------------------------------------- */
+	pvnode = dbakw[rnode];
+	if (pvnode == 0 || pvnode == -(*maxint)) {
+	    goto L1300;
+	}
+/*                    ------------------------------------- */
+/*                    THEN REMOVE RNODE FROM THE STRUCTURE. */
+/*                    ------------------------------------- */
+	nxnode = dforw[rnode];
+	if (nxnode > 0) {
+	    dbakw[nxnode] = pvnode;
+	}
+	if (pvnode > 0) {
+	    dforw[pvnode] = nxnode;
+	}
+	npv = -pvnode;
+	if (pvnode < 0) {
+	    dhead[npv] = nxnode;
+	}
+L1300:
+/*                ---------------------------------------- */
+/*                PURGE INACTIVE QUOTIENT NABORS OF RNODE. */
+/*                ---------------------------------------- */
+	jstrt = xadj[rnode];
+	jstop = xadj[rnode + 1] - 1;
+	xqnbr = jstrt;
+	i__2 = jstop;
+	for (j = jstrt; j <= i__2; ++j) {
+	    nabor = adjncy[j];
+	    if (nabor == 0) {
+		goto L1500;
+	    }
+	    if (marker[nabor] >= *tag) {
+		goto L1400;
+	    }
+	    adjncy[xqnbr] = nabor;
+	    ++xqnbr;
+L1400:
+	    ;
+	}
+L1500:
+/*                ---------------------------------------- */
+/*                IF NO ACTIVE NABOR AFTER THE PURGING ... */
+/*                ---------------------------------------- */
+	nqnbrs = xqnbr - jstrt;
+	if (nqnbrs > 0) {
+	    goto L1600;
+	}
+/*                    ----------------------------- */
+/*                    THEN MERGE RNODE WITH MDNODE. */
+/*                    ----------------------------- */
+	qsize[*mdnode] += qsize[rnode];
+	qsize[rnode] = 0;
+	marker[rnode] = *maxint;
+	dforw[rnode] = -(*mdnode);
+	dbakw[rnode] = -(*maxint);
+	goto L1700;
+L1600:
+/*                -------------------------------------- */
+/*                ELSE FLAG RNODE FOR DEGREE UPDATE, AND */
+/*                ADD MDNODE AS A NABOR OF RNODE. */
+/*                -------------------------------------- */
+	dforw[rnode] = nqnbrs + 1;
+	dbakw[rnode] = 0;
+	adjncy[xqnbr] = *mdnode;
+	++xqnbr;
+	if (xqnbr <= jstop) {
+	    adjncy[xqnbr] = 0;
+	}
+
+L1700:
+	;
+    }
+L1800:
+    return 0;
+
+} /* mmdelm_ */
+
+/* *************************************************************** */
+/* *************************************************************** */
+/* *****     MMDUPD ..... MULTIPLE MINIMUM DEGREE UPDATE     ***** */
+/* *************************************************************** */
+/* *************************************************************** */
+
+/*     AUTHOR - JOSEPH W.H. LIU */
+/*              DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY. */
+
+/*     PURPOSE - THIS ROUTINE UPDATES THE DEGREES OF NODES */
+/*        AFTER A MULTIPLE ELIMINATION STEP. */
+
+/*     INPUT PARAMETERS - */
+/*        EHEAD  - THE BEGINNING OF THE LIST OF ELIMINATED */
+/*                 NODES (I.E., NEWLY FORMED ELEMENTS). */
+/*        NEQNS  - NUMBER OF EQUATIONS. */
+/*        (XADJ,ADJNCY) - ADJACENCY STRUCTURE. */
+/*        DELTA  - TOLERANCE VALUE FOR MULTIPLE ELIMINATION. */
+/*        MAXINT - MAXIMUM MACHINE REPRESENTABLE (SHORT) */
+/*                 INTEGER. */
+
+/*     UPDATED PARAMETERS - */
+/*        MDEG   - NEW MINIMUM DEGREE AFTER DEGREE UPDATE. */
+/*        (DHEAD,DFORW,DBAKW) - DEGREE DOUBLY LINKED STRUCTURE. */
+/*        QSIZE  - SIZE OF SUPERNODE. */
+/*        LLIST  - WORKING LINKED LIST. */
+/*        MARKER - MARKER VECTOR FOR DEGREE UPDATE. */
+/*        TAG    - TAG VALUE. */
+
+/* *************************************************************** */
+
+/* Subroutine */ int mmdupd_(ehead, neqns, xadj, adjncy, delta, mdeg, dhead, 
+	dforw, dbakw, qsize, llist, marker, maxint, tag)
+integer *ehead, *neqns, *xadj, *adjncy, *delta, *mdeg, *dhead, *dforw, *dbakw,
+	 *qsize, *llist, *marker, *maxint, *tag;
+{
+    /* System generated locals */
+    integer i__1, i__2;
+
+    /* Local variables */
+    static integer node, mtag, link, mdeg0, i, j, enode, fnode, nabor, elmnt, 
+	    istop, jstop, q2head, istrt, jstrt, qxhead, iq2, deg, deg0;
+
+
+/* *************************************************************** */
+
+/*         INTEGER*2  ADJNCY(1), DBAKW(1) , DFORW(1) , DHEAD(1) , */
+/*     1              LLIST(1) , MARKER(1), QSIZE(1) */
+
+/* *************************************************************** */
+
+    /* Parameter adjustments */
+    --marker;
+    --llist;
+    --qsize;
+    --dbakw;
+    --dforw;
+    --dhead;
+    --adjncy;
+    --xadj;
+
+    /* Function Body */
+    mdeg0 = *mdeg + *delta;
+    elmnt = *ehead;
+L100:
+/*            ------------------------------------------------------- */
+/*            FOR EACH OF THE NEWLY FORMED ELEMENT, DO THE FOLLOWING. */
+/*            (RESET TAG VALUE IF NECESSARY.) */
+/*            ------------------------------------------------------- */
+    if (elmnt <= 0) {
+	return 0;
+    }
+    mtag = *tag + mdeg0;
+    if (mtag < *maxint) {
+	goto L300;
+    }
+    *tag = 1;
+    i__1 = *neqns;
+    for (i = 1; i <= i__1; ++i) {
+	if (marker[i] < *maxint) {
+	    marker[i] = 0;
+	}
+/* L200: */
+    }
+    mtag = *tag + mdeg0;
+L300:
+/*            --------------------------------------------- */
+/*            CREATE TWO LINKED LISTS FROM NODES ASSOCIATED */
+/*            WITH ELMNT: ONE WITH TWO NABORS (Q2HEAD) IN */
+/*            ADJACENCY STRUCTURE, AND THE OTHER WITH MORE */
+/*            THAN TWO NABORS (QXHEAD).  ALSO COMPUTE DEG0, */
+/*            NUMBER OF NODES IN THIS ELEMENT. */
+/*            --------------------------------------------- */
+    q2head = 0;
+    qxhead = 0;
+    deg0 = 0;
+    link = elmnt;
+L400:
+    istrt = xadj[link];
+    istop = xadj[link + 1] - 1;
+    i__1 = istop;
+    for (i = istrt; i <= i__1; ++i) {
+	enode = adjncy[i];
+	link = -enode;
+	if (enode < 0) {
+	    goto L400;
+	} else if (enode == 0) {
+	    goto L800;
+	} else {
+	    goto L500;
+	}
+
+L500:
+	if (qsize[enode] == 0) {
+	    goto L700;
+	}
+	deg0 += qsize[enode];
+	marker[enode] = mtag;
+/*                        ---------------------------------- */
+/*                        IF ENODE REQUIRES A DEGREE UPDATE, */
+/*                        THEN DO THE FOLLOWING. */
+/*                        ---------------------------------- */
+	if (dbakw[enode] != 0) {
+	    goto L700;
+	}
+/*                            --------------------------------------- 
+*/
+/*                            PLACE EITHER IN QXHEAD OR Q2HEAD LISTS. 
+*/
+/*                            --------------------------------------- 
+*/
+	if (dforw[enode] == 2) {
+	    goto L600;
+	}
+	llist[enode] = qxhead;
+	qxhead = enode;
+	goto L700;
+L600:
+	llist[enode] = q2head;
+	q2head = enode;
+L700:
+	;
+    }
+L800:
+/*            -------------------------------------------- */
+/*            FOR EACH ENODE IN Q2 LIST, DO THE FOLLOWING. */
+/*            -------------------------------------------- */
+    enode = q2head;
+    iq2 = 1;
+L900:
+    if (enode <= 0) {
+	goto L1500;
+    }
+    if (dbakw[enode] != 0) {
+	goto L2200;
+    }
+    ++(*tag);
+    deg = deg0;
+/*                    ------------------------------------------ */
+/*                    IDENTIFY THE OTHER ADJACENT ELEMENT NABOR. */
+/*                    ------------------------------------------ */
+    istrt = xadj[enode];
+    nabor = adjncy[istrt];
+    if (nabor == elmnt) {
+	nabor = adjncy[istrt + 1];
+    }
+/*                    ------------------------------------------------ */
+/*                    IF NABOR IS UNELIMINATED, INCREASE DEGREE COUNT. */
+/*                    ------------------------------------------------ */
+    link = nabor;
+    if (dforw[nabor] < 0) {
+	goto L1000;
+    }
+    deg += qsize[nabor];
+    goto L2100;
+L1000:
+/*                        -------------------------------------------- */
+/*                        OTHERWISE, FOR EACH NODE IN THE 2ND ELEMENT, */
+/*                        DO THE FOLLOWING. */
+/*                        -------------------------------------------- */
+    istrt = xadj[link];
+    istop = xadj[link + 1] - 1;
+    i__1 = istop;
+    for (i = istrt; i <= i__1; ++i) {
+	node = adjncy[i];
+	link = -node;
+	if (node == enode) {
+	    goto L1400;
+	}
+	if (node < 0) {
+	    goto L1000;
+	} else if (node == 0) {
+	    goto L2100;
+	} else {
+	    goto L1100;
+	}
+
+L1100:
+	if (qsize[node] == 0) {
+	    goto L1400;
+	}
+	if (marker[node] >= *tag) {
+	    goto L1200;
+	}
+/*                                -----------------------------------
+-- */
+/*                                CASE WHEN NODE IS NOT YET CONSIDERED
+. */
+/*                                -----------------------------------
+-- */
+	marker[node] = *tag;
+	deg += qsize[node];
+	goto L1400;
+L1200:
+/*                            ----------------------------------------
+ */
+/*                            CASE WHEN NODE IS INDISTINGUISHABLE FROM
+ */
+/*                            ENODE.  MERGE THEM INTO A NEW SUPERNODE.
+ */
+/*                            ----------------------------------------
+ */
+	if (dbakw[node] != 0) {
+	    goto L1400;
+	}
+	if (dforw[node] != 2) {
+	    goto L1300;
+	}
+	qsize[enode] += qsize[node];
+	qsize[node] = 0;
+	marker[node] = *maxint;
+	dforw[node] = -enode;
+	dbakw[node] = -(*maxint);
+	goto L1400;
+L1300:
+/*                            -------------------------------------- 
+*/
+/*                            CASE WHEN NODE IS OUTMATCHED BY ENODE. 
+*/
+/*                            -------------------------------------- 
+*/
+	if (dbakw[node] == 0) {
+	    dbakw[node] = -(*maxint);
+	}
+L1400:
+	;
+    }
+    goto L2100;
+L1500:
+/*                ------------------------------------------------ */
+/*                FOR EACH ENODE IN THE QX LIST, DO THE FOLLOWING. */
+/*                ------------------------------------------------ */
+    enode = qxhead;
+    iq2 = 0;
+L1600:
+    if (enode <= 0) {
+	goto L2300;
+    }
+    if (dbakw[enode] != 0) {
+	goto L2200;
+    }
+    ++(*tag);
+    deg = deg0;
+/*                        --------------------------------- */
+/*                        FOR EACH UNMARKED NABOR OF ENODE, */
+/*                        DO THE FOLLOWING. */
+/*                        --------------------------------- */
+    istrt = xadj[enode];
+    istop = xadj[enode + 1] - 1;
+    i__1 = istop;
+    for (i = istrt; i <= i__1; ++i) {
+	nabor = adjncy[i];
+	if (nabor == 0) {
+	    goto L2100;
+	}
+	if (marker[nabor] >= *tag) {
+	    goto L2000;
+	}
+	marker[nabor] = *tag;
+	link = nabor;
+/*                                ------------------------------ */
+/*                                IF UNELIMINATED, INCLUDE IT IN */
+/*                                DEG COUNT. */
+/*                                ------------------------------ */
+	if (dforw[nabor] < 0) {
+	    goto L1700;
+	}
+	deg += qsize[nabor];
+	goto L2000;
+L1700:
+/*                                    ------------------------------- 
+*/
+/*                                    IF ELIMINATED, INCLUDE UNMARKED 
+*/
+/*                                    NODES IN THIS ELEMENT INTO THE 
+*/
+/*                                    DEGREE COUNT. */
+/*                                    ------------------------------- 
+*/
+	jstrt = xadj[link];
+	jstop = xadj[link + 1] - 1;
+	i__2 = jstop;
+	for (j = jstrt; j <= i__2; ++j) {
+	    node = adjncy[j];
+	    link = -node;
+	    if (node < 0) {
+		goto L1700;
+	    } else if (node == 0) {
+		goto L2000;
+	    } else {
+		goto L1800;
+	    }
+
+L1800:
+	    if (marker[node] >= *tag) {
+		goto L1900;
+	    }
+	    marker[node] = *tag;
+	    deg += qsize[node];
+L1900:
+	    ;
+	}
+L2000:
+	;
+    }
+L2100:
+/*                    ------------------------------------------- */
+/*                    UPDATE EXTERNAL DEGREE OF ENODE IN DEGREE */
+/*                    STRUCTURE, AND MDEG (MIN DEG) IF NECESSARY. */
+/*                    ------------------------------------------- */
+    deg = deg - qsize[enode] + 1;
+    fnode = dhead[deg];
+    dforw[enode] = fnode;
+    dbakw[enode] = -deg;
+    if (fnode > 0) {
+	dbakw[fnode] = enode;
+    }
+    dhead[deg] = enode;
+    if (deg < *mdeg) {
+	*mdeg = deg;
+    }
+L2200:
+/*                    ---------------------------------- */
+/*                    GET NEXT ENODE IN CURRENT ELEMENT. */
+/*                    ---------------------------------- */
+    enode = llist[enode];
+    if (iq2 == 1) {
+	goto L900;
+    }
+    goto L1600;
+L2300:
+/*            ----------------------------- */
+/*            GET NEXT ELEMENT IN THE LIST. */
+/*            ----------------------------- */
+    *tag = mtag;
+    elmnt = llist[elmnt];
+    goto L100;
+
+} /* mmdupd_ */
+
+/* *************************************************************** */
+/* *************************************************************** */
+/* *****     MMDNUM ..... MULTI MINIMUM DEGREE NUMBERING     ***** */
+/* *************************************************************** */
+/* *************************************************************** */
+
+/*     AUTHOR - JOSEPH W.H. LIU */
+/*              DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY. */
+
+/*     PURPOSE - THIS ROUTINE PERFORMS THE FINAL STEP IN */
+/*        PRODUCING THE PERMUTATION AND INVERSE PERMUTATION */
+/*        VECTORS IN THE MULTIPLE ELIMINATION VERSION OF THE */
+/*        MINIMUM DEGREE ORDERING ALGORITHM. */
+
+/*     INPUT PARAMETERS - */
+/*        NEQNS  - NUMBER OF EQUATIONS. */
+/*        QSIZE  - SIZE OF SUPERNODES AT ELIMINATION. */
+
+/*     UPDATED PARAMETERS - */
+/*        INVP   - INVERSE PERMUTATION VECTOR.  ON INPUT, */
+/*                 IF QSIZE(NODE)=0, THEN NODE HAS BEEN MERGED */
+/*                 INTO THE NODE -INVP(NODE); OTHERWISE, */
+/*                 -INVP(NODE) IS ITS INVERSE LABELLING. */
+
+/*     OUTPUT PARAMETERS - */
+/*        PERM   - THE PERMUTATION VECTOR. */
+
+/* *************************************************************** */
+
+/* Subroutine */ int mmdnum_(neqns, perm, invp, qsize)
+integer *neqns, *perm, *invp, *qsize;
+{
+    /* System generated locals */
+    integer i__1;
+
+    /* Local variables */
+    static integer node, root, nextf, father, nqsize, num;
+
+
+/* *************************************************************** */
+
+/*         INTEGER*2  INVP(1)  , PERM(1)  , QSIZE(1) */
+
+/* *************************************************************** */
+
+    /* Parameter adjustments */
+    --qsize;
+    --invp;
+    --perm;
+
+    /* Function Body */
+    i__1 = *neqns;
+    for (node = 1; node <= i__1; ++node) {
+	nqsize = qsize[node];
+	if (nqsize <= 0) {
+	    perm[node] = invp[node];
+	}
+	if (nqsize > 0) {
+	    perm[node] = -invp[node];
+	}
+/* L100: */
+    }
+/*        ------------------------------------------------------ */
+/*        FOR EACH NODE WHICH HAS BEEN MERGED, DO THE FOLLOWING. */
+/*        ------------------------------------------------------ */
+    i__1 = *neqns;
+    for (node = 1; node <= i__1; ++node) {
+	if (perm[node] > 0) {
+	    goto L500;
+	}
+/*                ----------------------------------------- */
+/*                TRACE THE MERGED TREE UNTIL ONE WHICH HAS */
+/*                NOT BEEN MERGED, CALL IT ROOT. */
+/*                ----------------------------------------- */
+	father = node;
+L200:
+	if (perm[father] > 0) {
+	    goto L300;
+	}
+	father = -perm[father];
+	goto L200;
+L300:
+/*                ----------------------- */
+/*                NUMBER NODE AFTER ROOT. */
+/*                ----------------------- */
+	root = father;
+	num = perm[root] + 1;
+	invp[node] = -num;
+	perm[root] = num;
+/*                ------------------------ */
+/*                SHORTEN THE MERGED TREE. */
+/*                ------------------------ */
+	father = node;
+L400:
+	nextf = -perm[father];
+	if (nextf <= 0) {
+	    goto L500;
+	}
+	perm[father] = -root;
+	father = nextf;
+	goto L400;
+L500:
+	;
+    }
+/*        ---------------------- */
+/*        READY TO COMPUTE PERM. */
+/*        ---------------------- */
+    i__1 = *neqns;
+    for (node = 1; node <= i__1; ++node) {
+	num = -invp[node];
+	invp[node] = num;
+	perm[num] = node;
+/* L600: */
+    }
+    return 0;
+
+} /* mmdnum_ */
+
diff --git a/contrib/taucs/external/src/genmmd.f b/contrib/taucs/external/src/genmmd.f
new file mode 100644
index 0000000000000000000000000000000000000000..362bfdbd185e6a9fad96753050e9db1fba61a86e
--- /dev/null
+++ b/contrib/taucs/external/src/genmmd.f
@@ -0,0 +1,750 @@
+C Sivan: I modified INTEGER*2 -> INTEGER*4
+
+C***************************************************************
+C***************************************************************
+C****     GENMMD ..... MULTIPLE MINIMUM EXTERNAL DEGREE     ****
+C***************************************************************
+C***************************************************************
+C
+C     AUTHOR - JOSEPH W.H. LIU
+C              DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY.
+C
+C     PURPOSE - THIS ROUTINE IMPLEMENTS THE MINIMUM DEGREE
+C        ALGORITHM.  IT MAKES USE OF THE IMPLICIT REPRESENTATION
+C        OF ELIMINATION GRAPHS BY QUOTIENT GRAPHS, AND THE
+C        NOTION OF INDISTINGUISHABLE NODES.  IT ALSO IMPLEMENTS
+C        THE MODIFICATIONS BY MULTIPLE ELIMINATION AND MINIMUM
+C        EXTERNAL DEGREE.
+C        ---------------------------------------------
+C        CAUTION - THE ADJACENCY VECTOR ADJNCY WILL BE
+C        DESTROYED.
+C        ---------------------------------------------
+C
+C     INPUT PARAMETERS -
+C        NEQNS  - NUMBER OF EQUATIONS.
+C        (XADJ,ADJNCY) - THE ADJACENCY STRUCTURE.
+C        DELTA  - TOLERANCE VALUE FOR MULTIPLE ELIMINATION.
+C        MAXINT - MAXIMUM MACHINE REPRESENTABLE (SHORT) INTEGER
+C                 (ANY SMALLER ESTIMATE WILL DO) FOR MARKING
+C                 NODES.
+C
+C     OUTPUT PARAMETERS -
+C        PERM   - THE MINIMUM DEGREE ORDERING.
+C        INVP   - THE INVERSE OF PERM.
+C        NOFSUB - AN UPPER BOUND ON THE NUMBER OF NONZERO
+C                 SUBSCRIPTS FOR THE COMPRESSED STORAGE SCHEME.
+C
+C     WORKING PARAMETERS -
+C        DHEAD  - VECTOR FOR HEAD OF DEGREE LISTS.
+C        INVP   - USED TEMPORARILY FOR DEGREE FORWARD LINK.
+C        PERM   - USED TEMPORARILY FOR DEGREE BACKWARD LINK.
+C        QSIZE  - VECTOR FOR SIZE OF SUPERNODES.
+C        LLIST  - VECTOR FOR TEMPORARY LINKED LISTS.
+C        MARKER - A TEMPORARY MARKER VECTOR.
+C
+C     PROGRAM SUBROUTINES -
+C        MMDELM, MMDINT, MMDNUM, MMDUPD.
+C
+C***************************************************************
+C
+      SUBROUTINE  GENMMD ( NEQNS, XADJ, ADJNCY, INVP, PERM,
+     1                     DELTA, DHEAD, QSIZE, LLIST, MARKER,
+     1                     MAXINT, NOFSUB )
+C
+C***************************************************************
+C
+C         INTEGER*2  ADJNCY(1), DHEAD(1) , INVP(1)  , LLIST(1) ,
+C     1              MARKER(1), PERM(1)  , QSIZE(1)
+         INTEGER*4  ADJNCY(1), DHEAD(1) , INVP(1)  , LLIST(1) ,
+     1              MARKER(1), PERM(1)  , QSIZE(1)
+         INTEGER*4  XADJ(1)
+         INTEGER*4  DELTA , EHEAD , I     , MAXINT, MDEG  ,
+     1              MDLMT , MDNODE, NEQNS , NEXTMD, NOFSUB,
+     1              NUM, TAG
+C
+C***************************************************************
+C
+         IF  ( NEQNS .LE. 0 )  RETURN
+C
+C        ------------------------------------------------
+C        INITIALIZATION FOR THE MINIMUM DEGREE ALGORITHM.
+C        ------------------------------------------------
+         NOFSUB = 0
+         CALL  MMDINT ( NEQNS, XADJ, ADJNCY, DHEAD, INVP, PERM,
+     1                  QSIZE, LLIST, MARKER )
+C
+C        ----------------------------------------------
+C        NUM COUNTS THE NUMBER OF ORDERED NODES PLUS 1.
+C        ----------------------------------------------
+         NUM = 1
+C
+C        -----------------------------
+C        ELIMINATE ALL ISOLATED NODES.
+C        -----------------------------
+         NEXTMD = DHEAD(1)
+  100    CONTINUE
+             IF  ( NEXTMD .LE. 0 )  GO TO 200
+                 MDNODE = NEXTMD
+                 NEXTMD = INVP(MDNODE)
+                 MARKER(MDNODE) = MAXINT
+                 INVP(MDNODE) = - NUM
+                 NUM = NUM + 1
+                 GO TO 100
+C
+  200    CONTINUE
+C        ----------------------------------------
+C        SEARCH FOR NODE OF THE MINIMUM DEGREE.
+C        MDEG IS THE CURRENT MINIMUM DEGREE;
+C        TAG IS USED TO FACILITATE MARKING NODES.
+C        ----------------------------------------
+         IF  ( NUM .GT. NEQNS )  GO TO 1000
+         TAG = 1
+         DHEAD(1) = 0
+         MDEG = 2
+  300    CONTINUE
+             IF  ( DHEAD(MDEG) .GT. 0 )  GO TO 400
+                 MDEG = MDEG + 1
+                 GO TO 300
+  400        CONTINUE
+C            -------------------------------------------------
+C            USE VALUE OF DELTA TO SET UP MDLMT, WHICH GOVERNS
+C            WHEN A DEGREE UPDATE IS TO BE PERFORMED.
+C            -------------------------------------------------
+             MDLMT = MDEG + DELTA
+             EHEAD = 0
+C
+  500        CONTINUE
+                 MDNODE = DHEAD(MDEG)
+                 IF  ( MDNODE .GT. 0 )  GO TO 600
+                     MDEG = MDEG + 1
+                     IF  ( MDEG .GT. MDLMT )  GO TO 900
+                         GO TO 500
+  600            CONTINUE
+C                ----------------------------------------
+C                REMOVE MDNODE FROM THE DEGREE STRUCTURE.
+C                ----------------------------------------
+                 NEXTMD = INVP(MDNODE)
+                 DHEAD(MDEG) = NEXTMD
+                 IF  ( NEXTMD .GT. 0 )  PERM(NEXTMD) = - MDEG
+                 INVP(MDNODE) = - NUM
+                 NOFSUB = NOFSUB + MDEG + QSIZE(MDNODE) - 2
+                 IF  ( NUM+QSIZE(MDNODE) .GT. NEQNS )  GO TO 1000
+C                ----------------------------------------------
+C                ELIMINATE MDNODE AND PERFORM QUOTIENT GRAPH
+C                TRANSFORMATION.  RESET TAG VALUE IF NECESSARY.
+C                ----------------------------------------------
+                 TAG = TAG + 1
+                 IF  ( TAG .LT. MAXINT )  GO TO 800
+                     TAG = 1
+                     DO  700  I = 1, NEQNS
+                         IF  ( MARKER(I) .LT. MAXINT )  MARKER(I) = 0
+  700                CONTINUE
+  800            CONTINUE
+                 CALL  MMDELM ( MDNODE, XADJ, ADJNCY, DHEAD, INVP,
+     1                          PERM, QSIZE, LLIST, MARKER, MAXINT,
+     1                          TAG )
+                 NUM = NUM + QSIZE(MDNODE)
+                 LLIST(MDNODE) = EHEAD
+                 EHEAD = MDNODE
+                 IF  ( DELTA .GE. 0 )  GO TO 500
+  900        CONTINUE
+C            -------------------------------------------
+C            UPDATE DEGREES OF THE NODES INVOLVED IN THE
+C            MINIMUM DEGREE NODES ELIMINATION.
+C            -------------------------------------------
+             IF  ( NUM .GT. NEQNS )  GO TO 1000
+             CALL  MMDUPD ( EHEAD, NEQNS, XADJ, ADJNCY, DELTA, MDEG,
+     1                      DHEAD, INVP, PERM, QSIZE, LLIST, MARKER,
+     1                      MAXINT, TAG )
+             GO TO 300
+C
+ 1000    CONTINUE
+         CALL  MMDNUM ( NEQNS, PERM, INVP, QSIZE )
+         RETURN
+C
+      END
+C***************************************************************
+C***************************************************************
+C***     MMDINT ..... MULT MINIMUM DEGREE INITIALIZATION     ***
+C***************************************************************
+C***************************************************************
+C
+C     AUTHOR - JOSEPH W.H. LIU
+C              DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY.
+C
+C     PURPOSE - THIS ROUTINE PERFORMS INITIALIZATION FOR THE
+C        MULTIPLE ELIMINATION VERSION OF THE MINIMUM DEGREE
+C        ALGORITHM.
+C
+C     INPUT PARAMETERS -
+C        NEQNS  - NUMBER OF EQUATIONS.
+C        (XADJ,ADJNCY) - ADJACENCY STRUCTURE.
+C
+C     OUTPUT PARAMETERS -
+C        (DHEAD,DFORW,DBAKW) - DEGREE DOUBLY LINKED STRUCTURE.
+C        QSIZE  - SIZE OF SUPERNODE (INITIALIZED TO ONE).
+C        LLIST  - LINKED LIST.
+C        MARKER - MARKER VECTOR.
+C
+C***************************************************************
+C
+      SUBROUTINE  MMDINT ( NEQNS, XADJ, ADJNCY, DHEAD, DFORW,
+     1                     DBAKW, QSIZE, LLIST, MARKER )
+C
+C***************************************************************
+C
+C         INTEGER*2  ADJNCY(1), DBAKW(1) , DFORW(1) , DHEAD(1) ,
+C     1              LLIST(1) , MARKER(1), QSIZE(1)
+         INTEGER*4  ADJNCY(1), DBAKW(1) , DFORW(1) , DHEAD(1) ,
+     1              LLIST(1) , MARKER(1), QSIZE(1)
+         INTEGER*4  XADJ(1)
+         INTEGER*4  FNODE , NDEG  , NEQNS , NODE
+C
+C***************************************************************
+C
+         DO  100  NODE = 1, NEQNS
+             DHEAD(NODE) = 0
+             QSIZE(NODE) = 1
+             MARKER(NODE) = 0
+             LLIST(NODE) = 0
+  100    CONTINUE
+C        ------------------------------------------
+C        INITIALIZE THE DEGREE DOUBLY LINKED LISTS.
+C        ------------------------------------------
+         DO  200  NODE = 1, NEQNS
+             NDEG = XADJ(NODE+1) - XADJ(NODE) + 1
+             FNODE = DHEAD(NDEG)
+             DFORW(NODE) = FNODE
+             DHEAD(NDEG) = NODE
+             IF  ( FNODE .GT. 0 )  DBAKW(FNODE) = NODE
+             DBAKW(NODE) = - NDEG
+  200    CONTINUE
+         RETURN
+C
+      END
+C***************************************************************
+C***************************************************************
+C**     MMDELM ..... MULTIPLE MINIMUM DEGREE ELIMINATION     ***
+C***************************************************************
+C***************************************************************
+C
+C     AUTHOR - JOSEPH W.H. LIU
+C              DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY.
+C
+C     PURPOSE - THIS ROUTINE ELIMINATES THE NODE MDNODE OF
+C        MINIMUM DEGREE FROM THE ADJACENCY STRUCTURE, WHICH
+C        IS STORED IN THE QUOTIENT GRAPH FORMAT.  IT ALSO
+C        TRANSFORMS THE QUOTIENT GRAPH REPRESENTATION OF THE
+C        ELIMINATION GRAPH.
+C
+C     INPUT PARAMETERS -
+C        MDNODE - NODE OF MINIMUM DEGREE.
+C        MAXINT - ESTIMATE OF MAXIMUM REPRESENTABLE (SHORT)
+C                 INTEGER.
+C        TAG    - TAG VALUE.
+C
+C     UPDATED PARAMETERS -
+C        (XADJ,ADJNCY) - UPDATED ADJACENCY STRUCTURE.
+C        (DHEAD,DFORW,DBAKW) - DEGREE DOUBLY LINKED STRUCTURE.
+C        QSIZE  - SIZE OF SUPERNODE.
+C        MARKER - MARKER VECTOR.
+C        LLIST  - TEMPORARY LINKED LIST OF ELIMINATED NABORS.
+C
+C***************************************************************
+C
+      SUBROUTINE  MMDELM ( MDNODE, XADJ, ADJNCY, DHEAD, DFORW,
+     1                     DBAKW, QSIZE, LLIST, MARKER, MAXINT,
+     1                     TAG )
+C
+C***************************************************************
+C
+C         INTEGER*2  ADJNCY(1), DBAKW(1) , DFORW(1) , DHEAD(1) ,
+C     1              LLIST(1) , MARKER(1), QSIZE(1)
+         INTEGER*4  ADJNCY(1), DBAKW(1) , DFORW(1) , DHEAD(1) ,
+     1              LLIST(1) , MARKER(1), QSIZE(1)
+         INTEGER*4  XADJ(1)
+         INTEGER*4  ELMNT , I     , ISTOP , ISTRT , J     ,
+     1              JSTOP , JSTRT , LINK  , MAXINT, MDNODE,
+     1              NABOR , NODE  , NPV   , NQNBRS, NXNODE,
+     1              PVNODE, RLMT  , RLOC  , RNODE , TAG   ,
+     1              XQNBR
+C
+C***************************************************************
+C
+C        -----------------------------------------------
+C        FIND REACHABLE SET AND PLACE IN DATA STRUCTURE.
+C        -----------------------------------------------
+         MARKER(MDNODE) = TAG
+         ISTRT = XADJ(MDNODE)
+         ISTOP = XADJ(MDNODE+1) - 1
+C        -------------------------------------------------------
+C        ELMNT POINTS TO THE BEGINNING OF THE LIST OF ELIMINATED
+C        NABORS OF MDNODE, AND RLOC GIVES THE STORAGE LOCATION
+C        FOR THE NEXT REACHABLE NODE.
+C        -------------------------------------------------------
+         ELMNT = 0
+         RLOC = ISTRT
+         RLMT = ISTOP
+         DO  200  I = ISTRT, ISTOP
+             NABOR = ADJNCY(I)
+             IF  ( NABOR .EQ. 0 )  GO TO 300
+                 IF  ( MARKER(NABOR) .GE. TAG )  GO TO 200
+                     MARKER(NABOR) = TAG
+                     IF  ( DFORW(NABOR) .LT. 0 )  GO TO 100
+                         ADJNCY(RLOC) = NABOR
+                         RLOC = RLOC + 1
+                         GO TO 200
+  100                CONTINUE
+                     LLIST(NABOR) = ELMNT
+                     ELMNT = NABOR
+  200    CONTINUE
+  300    CONTINUE
+C            -----------------------------------------------------
+C            MERGE WITH REACHABLE NODES FROM GENERALIZED ELEMENTS.
+C            -----------------------------------------------------
+             IF  ( ELMNT .LE. 0 )  GO TO 1000
+                 ADJNCY(RLMT) = - ELMNT
+                 LINK = ELMNT
+  400            CONTINUE
+                     JSTRT = XADJ(LINK)
+                     JSTOP = XADJ(LINK+1) - 1
+                     DO  800  J = JSTRT, JSTOP
+                         NODE = ADJNCY(J)
+                         LINK = - NODE
+                         IF  ( NODE )  400, 900, 500
+  500                    CONTINUE
+                         IF  ( MARKER(NODE) .GE. TAG  .OR.
+     1                         DFORW(NODE) .LT. 0 )  GO TO 800
+                             MARKER(NODE) = TAG
+C                            ---------------------------------
+C                            USE STORAGE FROM ELIMINATED NODES
+C                            IF NECESSARY.
+C                            ---------------------------------
+  600                        CONTINUE
+                                 IF  ( RLOC .LT. RLMT )  GO TO 700
+                                     LINK = - ADJNCY(RLMT)
+                                     RLOC = XADJ(LINK)
+                                     RLMT = XADJ(LINK+1) - 1
+                                     GO TO 600
+  700                        CONTINUE
+                             ADJNCY(RLOC) = NODE
+                             RLOC = RLOC + 1
+  800                CONTINUE
+  900            CONTINUE
+                 ELMNT = LLIST(ELMNT)
+                 GO TO 300
+ 1000    CONTINUE
+         IF  ( RLOC .LE. RLMT )  ADJNCY(RLOC) = 0
+C        --------------------------------------------------------
+C        FOR EACH NODE IN THE REACHABLE SET, DO THE FOLLOWING ...
+C        --------------------------------------------------------
+         LINK = MDNODE
+ 1100    CONTINUE
+             ISTRT = XADJ(LINK)
+             ISTOP = XADJ(LINK+1) - 1
+             DO  1700  I = ISTRT, ISTOP
+                 RNODE = ADJNCY(I)
+                 LINK = - RNODE
+                 IF  ( RNODE )  1100, 1800, 1200
+ 1200            CONTINUE
+C                --------------------------------------------
+C                IF RNODE IS IN THE DEGREE LIST STRUCTURE ...
+C                --------------------------------------------
+                 PVNODE = DBAKW(RNODE)
+                 IF  ( PVNODE .EQ. 0  .OR.
+     1                 PVNODE .EQ. (-MAXINT) )  GO TO 1300
+C                    -------------------------------------
+C                    THEN REMOVE RNODE FROM THE STRUCTURE.
+C                    -------------------------------------
+                     NXNODE = DFORW(RNODE)
+                     IF  ( NXNODE .GT. 0 )  DBAKW(NXNODE) = PVNODE
+                     IF  ( PVNODE .GT. 0 )  DFORW(PVNODE) = NXNODE
+                     NPV = - PVNODE
+                     IF  ( PVNODE .LT. 0 )  DHEAD(NPV) = NXNODE
+ 1300            CONTINUE
+C                ----------------------------------------
+C                PURGE INACTIVE QUOTIENT NABORS OF RNODE.
+C                ----------------------------------------
+                 JSTRT = XADJ(RNODE)
+                 JSTOP = XADJ(RNODE+1) - 1
+                 XQNBR = JSTRT
+                 DO  1400  J = JSTRT, JSTOP
+                     NABOR = ADJNCY(J)
+                     IF  ( NABOR .EQ. 0 )  GO TO 1500
+                         IF  ( MARKER(NABOR) .GE. TAG )  GO TO 1400
+                             ADJNCY(XQNBR) = NABOR
+                             XQNBR = XQNBR + 1
+ 1400            CONTINUE
+ 1500            CONTINUE
+C                ----------------------------------------
+C                IF NO ACTIVE NABOR AFTER THE PURGING ...
+C                ----------------------------------------
+                 NQNBRS = XQNBR - JSTRT
+                 IF  ( NQNBRS .GT. 0 )  GO TO 1600
+C                    -----------------------------
+C                    THEN MERGE RNODE WITH MDNODE.
+C                    -----------------------------
+                     QSIZE(MDNODE) = QSIZE(MDNODE) + QSIZE(RNODE)
+                     QSIZE(RNODE) = 0
+                     MARKER(RNODE) = MAXINT
+                     DFORW(RNODE) = - MDNODE
+                     DBAKW(RNODE) = - MAXINT
+                     GO TO 1700
+ 1600            CONTINUE
+C                --------------------------------------
+C                ELSE FLAG RNODE FOR DEGREE UPDATE, AND
+C                ADD MDNODE AS A NABOR OF RNODE.
+C                --------------------------------------
+                 DFORW(RNODE) = NQNBRS + 1
+                 DBAKW(RNODE) = 0
+                 ADJNCY(XQNBR) = MDNODE
+                 XQNBR = XQNBR + 1
+                 IF  ( XQNBR .LE. JSTOP )  ADJNCY(XQNBR) = 0
+C
+ 1700        CONTINUE
+ 1800    CONTINUE
+         RETURN
+C
+      END
+C***************************************************************
+C***************************************************************
+C*****     MMDUPD ..... MULTIPLE MINIMUM DEGREE UPDATE     *****
+C***************************************************************
+C***************************************************************
+C
+C     AUTHOR - JOSEPH W.H. LIU
+C              DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY.
+C
+C     PURPOSE - THIS ROUTINE UPDATES THE DEGREES OF NODES
+C        AFTER A MULTIPLE ELIMINATION STEP.
+C
+C     INPUT PARAMETERS -
+C        EHEAD  - THE BEGINNING OF THE LIST OF ELIMINATED
+C                 NODES (I.E., NEWLY FORMED ELEMENTS).
+C        NEQNS  - NUMBER OF EQUATIONS.
+C        (XADJ,ADJNCY) - ADJACENCY STRUCTURE.
+C        DELTA  - TOLERANCE VALUE FOR MULTIPLE ELIMINATION.
+C        MAXINT - MAXIMUM MACHINE REPRESENTABLE (SHORT)
+C                 INTEGER.
+C
+C     UPDATED PARAMETERS -
+C        MDEG   - NEW MINIMUM DEGREE AFTER DEGREE UPDATE.
+C        (DHEAD,DFORW,DBAKW) - DEGREE DOUBLY LINKED STRUCTURE.
+C        QSIZE  - SIZE OF SUPERNODE.
+C        LLIST  - WORKING LINKED LIST.
+C        MARKER - MARKER VECTOR FOR DEGREE UPDATE.
+C        TAG    - TAG VALUE.
+C
+C***************************************************************
+C
+      SUBROUTINE  MMDUPD ( EHEAD, NEQNS, XADJ, ADJNCY, DELTA,
+     1                     MDEG, DHEAD, DFORW, DBAKW, QSIZE,
+     1                     LLIST, MARKER, MAXINT, TAG )
+C
+C***************************************************************
+C
+C         INTEGER*2  ADJNCY(1), DBAKW(1) , DFORW(1) , DHEAD(1) ,
+C     1              LLIST(1) , MARKER(1), QSIZE(1)
+         INTEGER*4  ADJNCY(1), DBAKW(1) , DFORW(1) , DHEAD(1) ,
+     1              LLIST(1) , MARKER(1), QSIZE(1)
+         INTEGER*4  XADJ(1)
+         INTEGER*4  DEG   , DEG0  , DELTA , EHEAD , ELMNT ,
+     1              ENODE , FNODE , I     , IQ2   , ISTOP ,
+     1              ISTRT , J     , JSTOP , JSTRT , LINK  ,
+     1              MAXINT, MDEG  , MDEG0 , MTAG  , NABOR ,
+     1              NEQNS , NODE  , Q2HEAD, QXHEAD, TAG
+C
+C***************************************************************
+C
+         MDEG0 = MDEG + DELTA
+         ELMNT = EHEAD
+  100    CONTINUE
+C            -------------------------------------------------------
+C            FOR EACH OF THE NEWLY FORMED ELEMENT, DO THE FOLLOWING.
+C            (RESET TAG VALUE IF NECESSARY.)
+C            -------------------------------------------------------
+             IF  ( ELMNT .LE. 0 )  RETURN
+             MTAG = TAG + MDEG0
+             IF  ( MTAG .LT. MAXINT )  GO TO 300
+                 TAG = 1
+                 DO  200  I = 1, NEQNS
+                     IF  ( MARKER(I) .LT. MAXINT )  MARKER(I) = 0
+  200            CONTINUE
+                 MTAG = TAG + MDEG0
+  300        CONTINUE
+C            ---------------------------------------------
+C            CREATE TWO LINKED LISTS FROM NODES ASSOCIATED
+C            WITH ELMNT: ONE WITH TWO NABORS (Q2HEAD) IN
+C            ADJACENCY STRUCTURE, AND THE OTHER WITH MORE
+C            THAN TWO NABORS (QXHEAD).  ALSO COMPUTE DEG0,
+C            NUMBER OF NODES IN THIS ELEMENT.
+C            ---------------------------------------------
+             Q2HEAD = 0
+             QXHEAD = 0
+             DEG0 = 0
+             LINK = ELMNT
+  400        CONTINUE
+                 ISTRT = XADJ(LINK)
+                 ISTOP = XADJ(LINK+1) - 1
+                 DO  700  I = ISTRT, ISTOP
+                     ENODE = ADJNCY(I)
+                     LINK = - ENODE
+                     IF  ( ENODE )  400, 800, 500
+C
+  500                CONTINUE
+                     IF  ( QSIZE(ENODE) .EQ. 0 )  GO TO 700
+                         DEG0 = DEG0 + QSIZE(ENODE)
+                         MARKER(ENODE) = MTAG
+C                        ----------------------------------
+C                        IF ENODE REQUIRES A DEGREE UPDATE,
+C                        THEN DO THE FOLLOWING.
+C                        ----------------------------------
+                         IF  ( DBAKW(ENODE) .NE. 0 )  GO TO 700
+C                            ---------------------------------------
+C                            PLACE EITHER IN QXHEAD OR Q2HEAD LISTS.
+C                            ---------------------------------------
+                             IF  ( DFORW(ENODE) .EQ. 2 )  GO TO 600
+                                 LLIST(ENODE) = QXHEAD
+                                 QXHEAD = ENODE
+                                 GO TO 700
+  600                        CONTINUE
+                             LLIST(ENODE) = Q2HEAD
+                             Q2HEAD = ENODE
+  700            CONTINUE
+  800        CONTINUE
+C            --------------------------------------------
+C            FOR EACH ENODE IN Q2 LIST, DO THE FOLLOWING.
+C            --------------------------------------------
+             ENODE = Q2HEAD
+             IQ2 = 1
+  900        CONTINUE
+                 IF  ( ENODE .LE. 0 )  GO TO 1500
+                 IF  ( DBAKW(ENODE) .NE. 0 )  GO TO 2200
+                     TAG = TAG + 1
+                     DEG = DEG0
+C                    ------------------------------------------
+C                    IDENTIFY THE OTHER ADJACENT ELEMENT NABOR.
+C                    ------------------------------------------
+                     ISTRT = XADJ(ENODE)
+                     NABOR = ADJNCY(ISTRT)
+                     IF  ( NABOR .EQ. ELMNT )  NABOR = ADJNCY(ISTRT+1)
+C                    ------------------------------------------------
+C                    IF NABOR IS UNELIMINATED, INCREASE DEGREE COUNT.
+C                    ------------------------------------------------
+                     LINK = NABOR
+                     IF  ( DFORW(NABOR) .LT. 0 )  GO TO 1000
+                         DEG = DEG + QSIZE(NABOR)
+                         GO TO 2100
+ 1000                CONTINUE
+C                        --------------------------------------------
+C                        OTHERWISE, FOR EACH NODE IN THE 2ND ELEMENT,
+C                        DO THE FOLLOWING.
+C                        --------------------------------------------
+                         ISTRT = XADJ(LINK)
+                         ISTOP = XADJ(LINK+1) - 1
+                         DO  1400  I = ISTRT, ISTOP
+                             NODE = ADJNCY(I)
+                             LINK = - NODE
+                             IF  ( NODE .EQ. ENODE )  GO TO 1400
+                             IF  ( NODE )  1000, 2100, 1100
+C
+ 1100                        CONTINUE
+                             IF  ( QSIZE(NODE) .EQ. 0 )  GO TO 1400
+                             IF  ( MARKER(NODE) .GE. TAG )  GO TO 1200
+C                                -------------------------------------
+C                                CASE WHEN NODE IS NOT YET CONSIDERED.
+C                                -------------------------------------
+                                 MARKER(NODE) = TAG
+                                 DEG = DEG + QSIZE(NODE)
+                                 GO TO 1400
+ 1200                        CONTINUE
+C                            ----------------------------------------
+C                            CASE WHEN NODE IS INDISTINGUISHABLE FROM
+C                            ENODE.  MERGE THEM INTO A NEW SUPERNODE.
+C                            ----------------------------------------
+                             IF  ( DBAKW(NODE) .NE. 0 )  GO TO 1400
+                             IF  ( DFORW(NODE) .NE. 2 )  GO TO 1300
+                                 QSIZE(ENODE) = QSIZE(ENODE) +
+     1                                          QSIZE(NODE)
+                                 QSIZE(NODE) = 0
+                                 MARKER(NODE) = MAXINT
+                                 DFORW(NODE) = - ENODE
+                                 DBAKW(NODE) = - MAXINT
+                                 GO TO 1400
+ 1300                        CONTINUE
+C                            --------------------------------------
+C                            CASE WHEN NODE IS OUTMATCHED BY ENODE.
+C                            --------------------------------------
+                             IF  ( DBAKW(NODE) .EQ.0 )
+     1                             DBAKW(NODE) = - MAXINT
+ 1400                    CONTINUE
+                         GO TO 2100
+ 1500            CONTINUE
+C                ------------------------------------------------
+C                FOR EACH ENODE IN THE QX LIST, DO THE FOLLOWING.
+C                ------------------------------------------------
+                 ENODE = QXHEAD
+                 IQ2 = 0
+ 1600            CONTINUE
+                     IF  ( ENODE .LE. 0 )  GO TO 2300
+                     IF  ( DBAKW(ENODE) .NE. 0 )  GO TO 2200
+                         TAG = TAG + 1
+                         DEG = DEG0
+C                        ---------------------------------
+C                        FOR EACH UNMARKED NABOR OF ENODE,
+C                        DO THE FOLLOWING.
+C                        ---------------------------------
+                         ISTRT = XADJ(ENODE)
+                         ISTOP = XADJ(ENODE+1) - 1
+                         DO  2000  I = ISTRT, ISTOP
+                             NABOR = ADJNCY(I)
+                             IF  ( NABOR .EQ. 0 )  GO TO 2100
+                             IF  ( MARKER(NABOR) .GE. TAG )  GO TO 2000
+                                 MARKER(NABOR) = TAG
+                                 LINK = NABOR
+C                                ------------------------------
+C                                IF UNELIMINATED, INCLUDE IT IN
+C                                DEG COUNT.
+C                                ------------------------------
+                                 IF  ( DFORW(NABOR) .LT. 0 )  GO TO 1700
+                                     DEG = DEG + QSIZE(NABOR)
+                                     GO TO 2000
+ 1700                            CONTINUE
+C                                    -------------------------------
+C                                    IF ELIMINATED, INCLUDE UNMARKED
+C                                    NODES IN THIS ELEMENT INTO THE
+C                                    DEGREE COUNT.
+C                                    -------------------------------
+                                     JSTRT = XADJ(LINK)
+                                     JSTOP = XADJ(LINK+1) - 1
+                                     DO  1900  J = JSTRT, JSTOP
+                                         NODE = ADJNCY(J)
+                                         LINK = - NODE
+                                         IF  ( NODE )  1700, 2000, 1800
+C
+ 1800                                    CONTINUE
+                                         IF  ( MARKER(NODE) .GE. TAG )
+     1                                         GO TO 1900
+                                             MARKER(NODE) = TAG
+                                             DEG = DEG + QSIZE(NODE)
+ 1900                                CONTINUE
+ 2000                    CONTINUE
+ 2100                CONTINUE
+C                    -------------------------------------------
+C                    UPDATE EXTERNAL DEGREE OF ENODE IN DEGREE
+C                    STRUCTURE, AND MDEG (MIN DEG) IF NECESSARY.
+C                    -------------------------------------------
+                     DEG = DEG - QSIZE(ENODE) + 1
+                     FNODE = DHEAD(DEG)
+                     DFORW(ENODE) = FNODE
+                     DBAKW(ENODE) = - DEG
+                     IF  ( FNODE .GT. 0 )  DBAKW(FNODE) = ENODE
+                     DHEAD(DEG) = ENODE
+                     IF  ( DEG .LT. MDEG )  MDEG = DEG
+ 2200                CONTINUE
+C                    ----------------------------------
+C                    GET NEXT ENODE IN CURRENT ELEMENT.
+C                    ----------------------------------
+                     ENODE = LLIST(ENODE)
+                     IF  ( IQ2 .EQ. 1 )  GO TO 900
+                         GO TO 1600
+ 2300        CONTINUE
+C            -----------------------------
+C            GET NEXT ELEMENT IN THE LIST.
+C            -----------------------------
+             TAG = MTAG
+             ELMNT = LLIST(ELMNT)
+             GO TO 100
+C
+      END
+C***************************************************************
+C***************************************************************
+C*****     MMDNUM ..... MULTI MINIMUM DEGREE NUMBERING     *****
+C***************************************************************
+C***************************************************************
+C
+C     AUTHOR - JOSEPH W.H. LIU
+C              DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY.
+C
+C     PURPOSE - THIS ROUTINE PERFORMS THE FINAL STEP IN
+C        PRODUCING THE PERMUTATION AND INVERSE PERMUTATION
+C        VECTORS IN THE MULTIPLE ELIMINATION VERSION OF THE
+C        MINIMUM DEGREE ORDERING ALGORITHM.
+C
+C     INPUT PARAMETERS -
+C        NEQNS  - NUMBER OF EQUATIONS.
+C        QSIZE  - SIZE OF SUPERNODES AT ELIMINATION.
+C
+C     UPDATED PARAMETERS -
+C        INVP   - INVERSE PERMUTATION VECTOR.  ON INPUT,
+C                 IF QSIZE(NODE)=0, THEN NODE HAS BEEN MERGED
+C                 INTO THE NODE -INVP(NODE); OTHERWISE,
+C                 -INVP(NODE) IS ITS INVERSE LABELLING.
+C
+C     OUTPUT PARAMETERS -
+C        PERM   - THE PERMUTATION VECTOR.
+C
+C***************************************************************
+C
+      SUBROUTINE  MMDNUM ( NEQNS, PERM, INVP, QSIZE )
+C
+C***************************************************************
+C
+C         INTEGER*2  INVP(1)  , PERM(1)  , QSIZE(1)
+         INTEGER*4  INVP(1)  , PERM(1)  , QSIZE(1)
+         INTEGER*4  FATHER, NEQNS , NEXTF , NODE  , NQSIZE,
+     1              NUM   , ROOT
+C
+C***************************************************************
+C
+         DO  100  NODE = 1, NEQNS
+             NQSIZE = QSIZE(NODE)
+             IF  ( NQSIZE .LE. 0 )  PERM(NODE) = INVP(NODE)
+             IF  ( NQSIZE .GT. 0 )  PERM(NODE) = - INVP(NODE)
+  100    CONTINUE
+C        ------------------------------------------------------
+C        FOR EACH NODE WHICH HAS BEEN MERGED, DO THE FOLLOWING.
+C        ------------------------------------------------------
+         DO  500  NODE = 1, NEQNS
+             IF  ( PERM(NODE) .GT. 0 )  GO TO 500
+C                -----------------------------------------
+C                TRACE THE MERGED TREE UNTIL ONE WHICH HAS
+C                NOT BEEN MERGED, CALL IT ROOT.
+C                -----------------------------------------
+                 FATHER = NODE
+  200            CONTINUE
+                     IF  ( PERM(FATHER) .GT. 0 )  GO TO 300
+                         FATHER = - PERM(FATHER)
+                         GO TO 200
+  300            CONTINUE
+C                -----------------------
+C                NUMBER NODE AFTER ROOT.
+C                -----------------------
+                 ROOT = FATHER
+                 NUM = PERM(ROOT) + 1
+                 INVP(NODE) = - NUM
+                 PERM(ROOT) = NUM
+C                ------------------------
+C                SHORTEN THE MERGED TREE.
+C                ------------------------
+                 FATHER = NODE
+  400            CONTINUE
+                     NEXTF = - PERM(FATHER)
+                     IF  ( NEXTF .LE. 0 )  GO TO 500
+                         PERM(FATHER) = - ROOT
+                         FATHER = NEXTF
+                         GO TO 400
+  500    CONTINUE
+C        ----------------------
+C        READY TO COMPUTE PERM.
+C        ----------------------
+         DO  600  NODE = 1, NEQNS
+             NUM = - INVP(NODE)
+             INVP(NODE) = NUM
+             PERM(NUM) = NODE
+  600    CONTINUE
+         RETURN
+C
+      END
+
+
diff --git a/contrib/taucs/external/src/readhb.c b/contrib/taucs/external/src/readhb.c
new file mode 100644
index 0000000000000000000000000000000000000000..d4cf7f04b27a40c6acf06002749e507744280ef6
--- /dev/null
+++ b/contrib/taucs/external/src/readhb.c
@@ -0,0 +1,1525 @@
+/* readhb.f -- translated by f2c (version of 23 April 1993  18:34:30).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+#include "f2c.h"
+
+/* Common Block Declarations */
+
+struct {
+    doublereal seed;
+} mrand_;
+
+#define mrand_1 mrand_
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c__9 = 9;
+static integer c__8 = 8;
+static integer c__5 = 5;
+static integer c__0 = 0;
+static integer c_n1 = -1;
+static integer c__4 = 4;
+static integer c__7 = 7;
+static integer c__2 = 2;
+static integer c__6 = 6;
+static doublereal c_b349 = 4294967296.;
+
+/* ----------------------------------------------------------------------- */
+/* ----------------------------------------------------------------------- */
+/* ----------------------------------------------------------------------- */
+/* === Myrand ============================================================ */
+
+/*  Derived from the FA01 routines in the MUPS package (CERFACS and/or */
+/*  Harwell).  CERFACS and/or Harwell copyrights may apply.  Permission */
+/*  granted to use this routine in the DEMO PROGRAM only. */
+
+/*  DEMO PROGRAM. */
+
+/*  random number generator */
+/*  i = 0:  reinitialize the sequence */
+/*  i >=0:  return 0 < x < 1 */
+/*  i < 0:  return -1 < x < 1 */
+doublereal myrand_(i)
+integer *i;
+{
+    /* System generated locals */
+    doublereal ret_val, d__1;
+
+    /* Builtin functions */
+    double d_mod();
+
+    if (*i == 0) {
+/*          reinitialize to known sequence */
+	mrand_1.seed = 1431655765.;
+    }
+    d__1 = mrand_1.seed * 9228907.;
+    mrand_1.seed = d_mod(&d__1, &c_b349);
+    if (*i >= 0) {
+	ret_val = mrand_1.seed / 4294967296.;
+    } else {
+	ret_val = mrand_1.seed / 4294967296. * 2 - 1;
+    }
+    return ret_val;
+} /* myrand_ */
+
+
+/* ----------------------------------------------------------------------- */
+/* ----------------------------------------------------------------------- */
+/* ----------------------------------------------------------------------- */
+/* Subroutine */ int ireadhb_(fname, type, nrows, ncols, nnz, fname_len, 
+	type_len)
+char *fname, *type;
+integer *nrows, *ncols, *nnz;
+ftnlen fname_len;
+ftnlen type_len;
+{
+    /* Format strings */
+    static char fmt_10[] = "(a72,a8/5i14/a3,11x,4i14)";
+    static char fmt_30[] = "(\002 title: \002,a72/\002 key: \002,a8/\002 Lin\
+es: tot: \002,i14,\002 ptr: \002,i14,\002 ind: \002,i14/\002        val: \
+\002,i14,\002 rhs: \002,i14/\002 type: \002,a3,\002 nrow: \002,i14,\002 ncol\
+: \002,i14/\002 nz: \002,i14,\002 elements: \002,i14)";
+
+    /* System generated locals */
+    integer i__1;
+    olist o__1;
+    cllist cl__1;
+
+    /* Builtin functions */
+    integer f_open(), s_rsfe(), do_fio(), e_rsfe(), s_wsfe(), e_wsfe(), 
+	    f_clos(), s_wsle(), do_lio(), e_wsle();
+    /* Subroutine */ int s_stop();
+
+    /* Local variables */
+    static char title[72];
+    static integer indcrd, valcrd, rhscrd, ptrcrd, totcrd, nel;
+    static char key[30];
+
+    /* Fortran I/O blocks */
+    static cilist io___1 = { 1, 99, 0, fmt_10, 0 };
+    static cilist io___10 = { 0, 0, 0, fmt_30, 0 };
+    static cilist io___11 = { 0, 0, 0, 0, 0 };
+
+
+/* -----------------------------------------------------------------------
+ */
+/*       read header information from Harwell/Boeing matrix */
+    o__1.oerr = 1;
+    o__1.ounit = 99;
+    o__1.ofnmlen = 256;
+    o__1.ofnm = fname;
+    o__1.orl = 0;
+    o__1.osta = "OLD";
+    o__1.oacc = 0;
+    o__1.ofm = 0;
+    o__1.oblnk = 0;
+    i__1 = f_open(&o__1);
+    if (i__1 != 0) {
+	goto L999;
+    }
+    i__1 = s_rsfe(&io___1);
+    if (i__1 != 0) {
+	goto L999;
+    }
+    i__1 = do_fio(&c__1, title, 72L);
+    if (i__1 != 0) {
+	goto L999;
+    }
+    i__1 = do_fio(&c__1, key, 30L);
+    if (i__1 != 0) {
+	goto L999;
+    }
+    i__1 = do_fio(&c__1, (char *)&totcrd, (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L999;
+    }
+    i__1 = do_fio(&c__1, (char *)&ptrcrd, (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L999;
+    }
+    i__1 = do_fio(&c__1, (char *)&indcrd, (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L999;
+    }
+    i__1 = do_fio(&c__1, (char *)&valcrd, (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L999;
+    }
+    i__1 = do_fio(&c__1, (char *)&rhscrd, (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L999;
+    }
+    i__1 = do_fio(&c__1, type, 3L);
+    if (i__1 != 0) {
+	goto L999;
+    }
+    i__1 = do_fio(&c__1, (char *)&(*nrows), (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L999;
+    }
+    i__1 = do_fio(&c__1, (char *)&(*ncols), (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L999;
+    }
+    i__1 = do_fio(&c__1, (char *)&(*nnz), (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L999;
+    }
+    i__1 = do_fio(&c__1, (char *)&nel, (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L999;
+    }
+    i__1 = e_rsfe();
+    if (i__1 != 0) {
+	goto L999;
+    }
+    s_wsfe(&io___10);
+    do_fio(&c__1, title, 72L);
+    do_fio(&c__1, key, 30L);
+    do_fio(&c__1, (char *)&totcrd, (ftnlen)sizeof(integer));
+    do_fio(&c__1, (char *)&ptrcrd, (ftnlen)sizeof(integer));
+    do_fio(&c__1, (char *)&indcrd, (ftnlen)sizeof(integer));
+    do_fio(&c__1, (char *)&valcrd, (ftnlen)sizeof(integer));
+    do_fio(&c__1, (char *)&rhscrd, (ftnlen)sizeof(integer));
+    do_fio(&c__1, type, 3L);
+    do_fio(&c__1, (char *)&(*nrows), (ftnlen)sizeof(integer));
+    do_fio(&c__1, (char *)&(*ncols), (ftnlen)sizeof(integer));
+    do_fio(&c__1, (char *)&(*nnz), (ftnlen)sizeof(integer));
+    do_fio(&c__1, (char *)&nel, (ftnlen)sizeof(integer));
+    e_wsfe();
+    cl__1.cerr = 0;
+    cl__1.cunit = 99;
+    cl__1.csta = 0;
+    f_clos(&cl__1);
+    return 0;
+L999:
+    s_wsle(&io___11);
+    do_lio(&c__9, &c__1, "Read error: Harwell/Boeing matrix", 33L);
+    e_wsle();
+    s_stop("", 0L);
+} /* ireadhb_ */
+
+/* ----------------------------------------------------------------------- */
+/* ----------------------------------------------------------------------- */
+/* ----------------------------------------------------------------------- */
+/* Subroutine */ int dreadhb_(fname, nrows, ncols, nnz, ptr, index, value, 
+	fname_len)
+char *fname;
+integer *nrows, *ncols, *nnz, *ptr, *index;
+doublereal *value;
+ftnlen fname_len;
+{
+    /* Format strings */
+    static char fmt_105[] = "(a72,a8/5i14/a3,11x,4i14)";
+    static char fmt_110[] = "(2a16,2a20)";
+    static char fmt_120[] = "(a3,11x,2i14)";
+    static char fmt_130[] = "(\002 ptrfmt: \002,a20,\002 rowfmt: \002,a20,\
+/\002 valfmt: \002,a20,\002 rhsfmt: \002,a20)";
+    static char fmt_140[] = "(\002 rhstyp: \002,a3,\002 nrhs: \002,i14,\002 \
+nzrhs: \002,i14)";
+
+    /* System generated locals */
+    integer i__1, i__2;
+    olist o__1;
+    cllist cl__1;
+
+    /* Builtin functions */
+    integer f_open(), s_rsfe(), do_fio(), e_rsfe(), s_wsfe(), e_wsfe(), 
+	    s_wsle(), do_lio(), e_wsle(), f_clos();
+    /* Subroutine */ int s_stop();
+
+    /* Local variables */
+    static doublereal skew;
+    static integer nrhs;
+    static char type[3];
+    static integer p;
+    static char title[72];
+    static integer nzrhs, indcrd, valcrd;
+    static char indfmt[16];
+    static integer rhscrd;
+    static char valfmt[20];
+    extern doublereal myrand_();
+    static integer ptrcrd, totcrd;
+    static char rhsfmt[20], ptrfmt[16], rhstyp[3];
+    static integer col, nel;
+    static char key[30];
+    static integer row;
+    static logical sym;
+
+    /* Fortran I/O blocks */
+    static cilist io___12 = { 1, 99, 0, fmt_105, 0 };
+    static cilist io___22 = { 1, 99, 0, fmt_110, 0 };
+    static cilist io___27 = { 1, 99, 0, fmt_120, 0 };
+    static cilist io___33 = { 0, 0, 0, fmt_130, 0 };
+    static cilist io___34 = { 0, 0, 0, fmt_140, 0 };
+    static cilist io___35 = { 0, 0, 0, 0, 0 };
+    static cilist io___36 = { 0, 6, 0, 0, 0 };
+    static cilist io___37 = { 1, 99, 0, ptrfmt, 0 };
+    static cilist io___39 = { 0, 6, 0, 0, 0 };
+    static cilist io___40 = { 1, 99, 0, indfmt, 0 };
+    static cilist io___42 = { 0, 6, 0, 0, 0 };
+    static cilist io___43 = { 1, 99, 0, valfmt, 0 };
+    static cilist io___45 = { 0, 0, 0, 0, 0 };
+
+
+/* -----------------------------------------------------------------------
+ */
+/*       read header information from Harwell/Boeing matrix */
+    /* Parameter adjustments */
+    --value;
+    --index;
+    --ptr;
+
+    /* Function Body */
+    o__1.oerr = 1;
+    o__1.ounit = 99;
+    o__1.ofnmlen = 256;
+    o__1.ofnm = fname;
+    o__1.orl = 0;
+    o__1.osta = "OLD";
+    o__1.oacc = 0;
+    o__1.ofm = 0;
+    o__1.oblnk = 0;
+    i__1 = f_open(&o__1);
+    if (i__1 != 0) {
+	goto L198;
+    }
+    i__1 = s_rsfe(&io___12);
+    if (i__1 != 0) {
+	goto L198;
+    }
+    i__1 = do_fio(&c__1, title, 72L);
+    if (i__1 != 0) {
+	goto L198;
+    }
+    i__1 = do_fio(&c__1, key, 30L);
+    if (i__1 != 0) {
+	goto L198;
+    }
+    i__1 = do_fio(&c__1, (char *)&totcrd, (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L198;
+    }
+    i__1 = do_fio(&c__1, (char *)&ptrcrd, (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L198;
+    }
+    i__1 = do_fio(&c__1, (char *)&indcrd, (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L198;
+    }
+    i__1 = do_fio(&c__1, (char *)&valcrd, (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L198;
+    }
+    i__1 = do_fio(&c__1, (char *)&rhscrd, (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L198;
+    }
+    i__1 = do_fio(&c__1, type, 3L);
+    if (i__1 != 0) {
+	goto L198;
+    }
+    i__1 = do_fio(&c__1, (char *)&(*nrows), (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L198;
+    }
+    i__1 = do_fio(&c__1, (char *)&(*ncols), (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L198;
+    }
+    i__1 = do_fio(&c__1, (char *)&(*nnz), (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L198;
+    }
+    i__1 = do_fio(&c__1, (char *)&nel, (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L198;
+    }
+    i__1 = e_rsfe();
+    if (i__1 != 0) {
+	goto L198;
+    }
+    i__1 = s_rsfe(&io___22);
+    if (i__1 != 0) {
+	goto L198;
+    }
+    i__1 = do_fio(&c__1, ptrfmt, 16L);
+    if (i__1 != 0) {
+	goto L198;
+    }
+    i__1 = do_fio(&c__1, indfmt, 16L);
+    if (i__1 != 0) {
+	goto L198;
+    }
+    i__1 = do_fio(&c__1, valfmt, 20L);
+    if (i__1 != 0) {
+	goto L198;
+    }
+    i__1 = do_fio(&c__1, rhsfmt, 20L);
+    if (i__1 != 0) {
+	goto L198;
+    }
+    i__1 = e_rsfe();
+    if (i__1 != 0) {
+	goto L198;
+    }
+    if (rhscrd > 0) {
+/*          new Harwell/Boeing format: */
+	i__1 = s_rsfe(&io___27);
+	if (i__1 != 0) {
+	    goto L198;
+	}
+	i__1 = do_fio(&c__1, rhstyp, 3L);
+	if (i__1 != 0) {
+	    goto L198;
+	}
+	i__1 = do_fio(&c__1, (char *)&nrhs, (ftnlen)sizeof(integer));
+	if (i__1 != 0) {
+	    goto L198;
+	}
+	i__1 = do_fio(&c__1, (char *)&nzrhs, (ftnlen)sizeof(integer));
+	if (i__1 != 0) {
+	    goto L198;
+	}
+	i__1 = e_rsfe();
+	if (i__1 != 0) {
+	    goto L198;
+	}
+    }
+    skew = (float)0.;
+    if (type[1] == 'Z' || type[1] == 'z') {
+	skew = (float)-1.;
+    }
+    if (type[1] == 'S' || type[1] == 's') {
+	skew = (float)1.;
+    }
+    sym = skew != 0.;
+    s_wsfe(&io___33);
+    do_fio(&c__1, ptrfmt, 16L);
+    do_fio(&c__1, indfmt, 16L);
+    do_fio(&c__1, valfmt, 20L);
+    do_fio(&c__1, rhsfmt, 20L);
+    e_wsfe();
+    if (rhscrd > 0) {
+/*          new Harwell/Boeing format: */
+	s_wsfe(&io___34);
+	do_fio(&c__1, rhstyp, 3L);
+	do_fio(&c__1, (char *)&nrhs, (ftnlen)sizeof(integer));
+	do_fio(&c__1, (char *)&nzrhs, (ftnlen)sizeof(integer));
+	e_wsfe();
+    }
+    s_wsle(&io___35);
+    do_lio(&c__9, &c__1, " sym: ", 6L);
+    do_lio(&c__8, &c__1, (char *)&sym, (ftnlen)sizeof(logical));
+    do_lio(&c__9, &c__1, " skew: ", 7L);
+    do_lio(&c__5, &c__1, (char *)&skew, (ftnlen)sizeof(doublereal));
+    e_wsle();
+    s_wsle(&io___36);
+    do_lio(&c__9, &c__1, "reading colptr", 14L);
+    e_wsle();
+    i__1 = s_rsfe(&io___37);
+    if (i__1 != 0) {
+	goto L198;
+    }
+    i__2 = *ncols + 1;
+    for (p = 1; p <= i__2; ++p) {
+	i__1 = do_fio(&c__1, (char *)&ptr[p], (ftnlen)sizeof(integer));
+	if (i__1 != 0) {
+	    goto L198;
+	}
+    }
+    i__1 = e_rsfe();
+    if (i__1 != 0) {
+	goto L198;
+    }
+    s_wsle(&io___39);
+    do_lio(&c__9, &c__1, "reading rowind", 14L);
+    e_wsle();
+    i__1 = s_rsfe(&io___40);
+    if (i__1 != 0) {
+	goto L198;
+    }
+    i__2 = *nnz;
+    for (p = 1; p <= i__2; ++p) {
+	i__1 = do_fio(&c__1, (char *)&index[p], (ftnlen)sizeof(integer));
+	if (i__1 != 0) {
+	    goto L198;
+	}
+    }
+    i__1 = e_rsfe();
+    if (i__1 != 0) {
+	goto L198;
+    }
+/*      what's this? maybe for rectangualr matrices */
+    i__1 = *ncols + 1;
+    for (col = *ncols + 2; col <= i__1; ++col) {
+	ptr[col] = ptr[*ncols + 1];
+/* L155: */
+    }
+    s_wsle(&io___42);
+    do_lio(&c__9, &c__1, "reading values", 14L);
+    e_wsle();
+/*       read the values, or create random-valued matrix */
+    if (valcrd > 0) {
+	i__1 = s_rsfe(&io___43);
+	if (i__1 != 0) {
+	    goto L198;
+	}
+	i__2 = *nnz;
+	for (p = 1; p <= i__2; ++p) {
+	    i__1 = do_fio(&c__1, (char *)&value[p], (ftnlen)sizeof(doublereal)
+		    );
+	    if (i__1 != 0) {
+		goto L198;
+	    }
+	}
+	i__1 = e_rsfe();
+	if (i__1 != 0) {
+	    goto L198;
+	}
+    } else {
+	if (sym) {
+	    i__1 = *ncols;
+	    for (col = 1; col <= i__1; ++col) {
+		i__2 = ptr[col + 1] - 1;
+		for (p = ptr[col]; p <= i__2; ++p) {
+		    row = index[p];
+		    if (row == col) {
+			value[p] = (doublereal) (*ncols);
+		    } else {
+			value[p] = (float)-1.;
+		    }
+/* L156: */
+		}
+/* L157: */
+	    }
+	} else {
+	    value[1] = myrand_(&c__0);
+	    i__1 = *nnz;
+	    for (p = 1; p <= i__1; ++p) {
+		value[p] = myrand_(&c_n1);
+/* L158: */
+	    }
+	}
+    }
+/*  create the triplet form of the input matrix */
+/*        do 100 col = 1, n */
+/*           do 90 p = Ptr (col), Ptr (col+1) - 1 */
+/*              row = Index (p) */
+/*              write (6, 200) row, col, Value (p) */
+/*              if (sym .and. row .ne. col) then */
+/* 		 write (6, 200) col, row, skew * Value (p) */
+/* 		 endif */
+/* 90            continue */
+/* 100        continue */
+/* 200	format (2i7, e26.16e3) */
+    cl__1.cerr = 0;
+    cl__1.cunit = 99;
+    cl__1.csta = 0;
+    f_clos(&cl__1);
+    return 0;
+L198:
+    s_wsle(&io___45);
+    do_lio(&c__9, &c__1, "Read error: Harwell/Boeing matrix", 33L);
+    e_wsle();
+    s_stop("", 0L);
+} /* dreadhb_ */
+
+/* ----------------------------------------------------------------------- */
+/* ----------------------------------------------------------------------- */
+/* ----------------------------------------------------------------------- */
+/* Subroutine */ int sreadhb_(fname, nrows, ncols, nnz, ptr, index, value, 
+	fname_len)
+char *fname;
+integer *nrows, *ncols, *nnz, *ptr, *index;
+real *value;
+ftnlen fname_len;
+{
+    /* Format strings */
+    static char fmt_205[] = "(a72,a8/5i14/a3,11x,4i14)";
+    static char fmt_210[] = "(2a16,2a20)";
+    static char fmt_220[] = "(a3,11x,2i14)";
+    static char fmt_230[] = "(\002 ptrfmt: \002,a20,\002 rowfmt: \002,a20,\
+/\002 valfmt: \002,a20,\002 rhsfmt: \002,a20)";
+    static char fmt_240[] = "(\002 rhstyp: \002,a3,\002 nrhs: \002,i14,\002 \
+nzrhs: \002,i14)";
+
+    /* System generated locals */
+    integer i__1, i__2;
+    olist o__1;
+    cllist cl__1;
+
+    /* Builtin functions */
+    integer f_open(), s_rsfe(), do_fio(), e_rsfe(), s_wsfe(), e_wsfe(), 
+	    s_wsle(), do_lio(), e_wsle(), f_clos();
+    /* Subroutine */ int s_stop();
+
+    /* Local variables */
+    static real skew;
+    static integer nrhs;
+    static char type[3];
+    static integer p;
+    static char title[72];
+    static integer nzrhs, indcrd, valcrd;
+    static char indfmt[16];
+    static integer rhscrd;
+    static char valfmt[20];
+    extern doublereal myrand_();
+    static integer ptrcrd, totcrd;
+    static char rhsfmt[20], ptrfmt[16], rhstyp[3];
+    static integer col, nel;
+    static char key[30];
+    static integer row;
+    static logical sym;
+
+    /* Fortran I/O blocks */
+    static cilist io___46 = { 1, 99, 0, fmt_205, 0 };
+    static cilist io___56 = { 1, 99, 0, fmt_210, 0 };
+    static cilist io___61 = { 1, 99, 0, fmt_220, 0 };
+    static cilist io___67 = { 0, 0, 0, fmt_230, 0 };
+    static cilist io___68 = { 0, 0, 0, fmt_240, 0 };
+    static cilist io___69 = { 0, 0, 0, 0, 0 };
+    static cilist io___70 = { 0, 6, 0, 0, 0 };
+    static cilist io___71 = { 1, 99, 0, ptrfmt, 0 };
+    static cilist io___73 = { 0, 6, 0, 0, 0 };
+    static cilist io___74 = { 1, 99, 0, indfmt, 0 };
+    static cilist io___76 = { 0, 6, 0, 0, 0 };
+    static cilist io___77 = { 1, 99, 0, valfmt, 0 };
+    static cilist io___79 = { 0, 0, 0, 0, 0 };
+
+
+/* -----------------------------------------------------------------------
+ */
+/*       read header information from Harwell/Boeing matrix */
+    /* Parameter adjustments */
+    --value;
+    --index;
+    --ptr;
+
+    /* Function Body */
+    o__1.oerr = 1;
+    o__1.ounit = 99;
+    o__1.ofnmlen = 256;
+    o__1.ofnm = fname;
+    o__1.orl = 0;
+    o__1.osta = "OLD";
+    o__1.oacc = 0;
+    o__1.ofm = 0;
+    o__1.oblnk = 0;
+    i__1 = f_open(&o__1);
+    if (i__1 != 0) {
+	goto L298;
+    }
+    i__1 = s_rsfe(&io___46);
+    if (i__1 != 0) {
+	goto L298;
+    }
+    i__1 = do_fio(&c__1, title, 72L);
+    if (i__1 != 0) {
+	goto L298;
+    }
+    i__1 = do_fio(&c__1, key, 30L);
+    if (i__1 != 0) {
+	goto L298;
+    }
+    i__1 = do_fio(&c__1, (char *)&totcrd, (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L298;
+    }
+    i__1 = do_fio(&c__1, (char *)&ptrcrd, (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L298;
+    }
+    i__1 = do_fio(&c__1, (char *)&indcrd, (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L298;
+    }
+    i__1 = do_fio(&c__1, (char *)&valcrd, (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L298;
+    }
+    i__1 = do_fio(&c__1, (char *)&rhscrd, (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L298;
+    }
+    i__1 = do_fio(&c__1, type, 3L);
+    if (i__1 != 0) {
+	goto L298;
+    }
+    i__1 = do_fio(&c__1, (char *)&(*nrows), (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L298;
+    }
+    i__1 = do_fio(&c__1, (char *)&(*ncols), (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L298;
+    }
+    i__1 = do_fio(&c__1, (char *)&(*nnz), (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L298;
+    }
+    i__1 = do_fio(&c__1, (char *)&nel, (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L298;
+    }
+    i__1 = e_rsfe();
+    if (i__1 != 0) {
+	goto L298;
+    }
+    i__1 = s_rsfe(&io___56);
+    if (i__1 != 0) {
+	goto L298;
+    }
+    i__1 = do_fio(&c__1, ptrfmt, 16L);
+    if (i__1 != 0) {
+	goto L298;
+    }
+    i__1 = do_fio(&c__1, indfmt, 16L);
+    if (i__1 != 0) {
+	goto L298;
+    }
+    i__1 = do_fio(&c__1, valfmt, 20L);
+    if (i__1 != 0) {
+	goto L298;
+    }
+    i__1 = do_fio(&c__1, rhsfmt, 20L);
+    if (i__1 != 0) {
+	goto L298;
+    }
+    i__1 = e_rsfe();
+    if (i__1 != 0) {
+	goto L298;
+    }
+    if (rhscrd > 0) {
+/*          new Harwell/Boeing format: */
+	i__1 = s_rsfe(&io___61);
+	if (i__1 != 0) {
+	    goto L298;
+	}
+	i__1 = do_fio(&c__1, rhstyp, 3L);
+	if (i__1 != 0) {
+	    goto L298;
+	}
+	i__1 = do_fio(&c__1, (char *)&nrhs, (ftnlen)sizeof(integer));
+	if (i__1 != 0) {
+	    goto L298;
+	}
+	i__1 = do_fio(&c__1, (char *)&nzrhs, (ftnlen)sizeof(integer));
+	if (i__1 != 0) {
+	    goto L298;
+	}
+	i__1 = e_rsfe();
+	if (i__1 != 0) {
+	    goto L298;
+	}
+    }
+    skew = (float)0.;
+    if (type[1] == 'Z' || type[1] == 'z') {
+	skew = (float)-1.;
+    }
+    if (type[1] == 'S' || type[1] == 's') {
+	skew = (float)1.;
+    }
+    sym = skew != (float)0.;
+    s_wsfe(&io___67);
+    do_fio(&c__1, ptrfmt, 16L);
+    do_fio(&c__1, indfmt, 16L);
+    do_fio(&c__1, valfmt, 20L);
+    do_fio(&c__1, rhsfmt, 20L);
+    e_wsfe();
+    if (rhscrd > 0) {
+/*          new Harwell/Boeing format: */
+	s_wsfe(&io___68);
+	do_fio(&c__1, rhstyp, 3L);
+	do_fio(&c__1, (char *)&nrhs, (ftnlen)sizeof(integer));
+	do_fio(&c__1, (char *)&nzrhs, (ftnlen)sizeof(integer));
+	e_wsfe();
+    }
+    s_wsle(&io___69);
+    do_lio(&c__9, &c__1, " sym: ", 6L);
+    do_lio(&c__8, &c__1, (char *)&sym, (ftnlen)sizeof(logical));
+    do_lio(&c__9, &c__1, " skew: ", 7L);
+    do_lio(&c__4, &c__1, (char *)&skew, (ftnlen)sizeof(real));
+    e_wsle();
+    s_wsle(&io___70);
+    do_lio(&c__9, &c__1, "reading colptr", 14L);
+    e_wsle();
+    i__1 = s_rsfe(&io___71);
+    if (i__1 != 0) {
+	goto L298;
+    }
+    i__2 = *ncols + 1;
+    for (p = 1; p <= i__2; ++p) {
+	i__1 = do_fio(&c__1, (char *)&ptr[p], (ftnlen)sizeof(integer));
+	if (i__1 != 0) {
+	    goto L298;
+	}
+    }
+    i__1 = e_rsfe();
+    if (i__1 != 0) {
+	goto L298;
+    }
+    s_wsle(&io___73);
+    do_lio(&c__9, &c__1, "reading rowind", 14L);
+    e_wsle();
+    i__1 = s_rsfe(&io___74);
+    if (i__1 != 0) {
+	goto L298;
+    }
+    i__2 = *nnz;
+    for (p = 1; p <= i__2; ++p) {
+	i__1 = do_fio(&c__1, (char *)&index[p], (ftnlen)sizeof(integer));
+	if (i__1 != 0) {
+	    goto L298;
+	}
+    }
+    i__1 = e_rsfe();
+    if (i__1 != 0) {
+	goto L298;
+    }
+/*      what's this? maybe for rectangualr matrices */
+    i__1 = *ncols + 1;
+    for (col = *ncols + 2; col <= i__1; ++col) {
+	ptr[col] = ptr[*ncols + 1];
+/* L255: */
+    }
+    s_wsle(&io___76);
+    do_lio(&c__9, &c__1, "reading values", 14L);
+    e_wsle();
+/*       read the values, or create random-valued matrix */
+    if (valcrd > 0) {
+	i__1 = s_rsfe(&io___77);
+	if (i__1 != 0) {
+	    goto L298;
+	}
+	i__2 = *nnz;
+	for (p = 1; p <= i__2; ++p) {
+	    i__1 = do_fio(&c__1, (char *)&value[p], (ftnlen)sizeof(real));
+	    if (i__1 != 0) {
+		goto L298;
+	    }
+	}
+	i__1 = e_rsfe();
+	if (i__1 != 0) {
+	    goto L298;
+	}
+    } else {
+	if (sym) {
+	    i__1 = *ncols;
+	    for (col = 1; col <= i__1; ++col) {
+		i__2 = ptr[col + 1] - 1;
+		for (p = ptr[col]; p <= i__2; ++p) {
+		    row = index[p];
+		    if (row == col) {
+			value[p] = (real) (*ncols);
+		    } else {
+			value[p] = (float)-1.;
+		    }
+/* L256: */
+		}
+/* L257: */
+	    }
+	} else {
+	    value[1] = myrand_(&c__0);
+	    i__1 = *nnz;
+	    for (p = 1; p <= i__1; ++p) {
+		value[p] = myrand_(&c_n1);
+/* L258: */
+	    }
+	}
+    }
+/*  create the triplet form of the input matrix */
+/*        do 100 col = 1, n */
+/*           do 90 p = Ptr (col), Ptr (col+1) - 1 */
+/*              row = Index (p) */
+/*              write (6, 200) row, col, Value (p) */
+/*              if (sym .and. row .ne. col) then */
+/* 		 write (6, 200) col, row, skew * Value (p) */
+/* 		 endif */
+/* 90            continue */
+/* 100        continue */
+/* 200	format (2i7, e26.16e3) */
+    cl__1.cerr = 0;
+    cl__1.cunit = 99;
+    cl__1.csta = 0;
+    f_clos(&cl__1);
+    return 0;
+L298:
+    s_wsle(&io___79);
+    do_lio(&c__9, &c__1, "Read error: Harwell/Boeing matrix", 33L);
+    e_wsle();
+    s_stop("", 0L);
+} /* sreadhb_ */
+
+/* ----------------------------------------------------------------------- */
+/* ----------------------------------------------------------------------- */
+/* ----------------------------------------------------------------------- */
+/* Subroutine */ int zreadhb_(fname, nrows, ncols, nnz, ptr, index, value, 
+	fname_len)
+char *fname;
+integer *nrows, *ncols, *nnz, *ptr, *index;
+doublecomplex *value;
+ftnlen fname_len;
+{
+    /* Format strings */
+    static char fmt_305[] = "(a72,a8/5i14/a3,11x,4i14)";
+    static char fmt_310[] = "(2a16,2a20)";
+    static char fmt_320[] = "(a3,11x,2i14)";
+    static char fmt_330[] = "(\002 ptrfmt: \002,a20,\002 rowfmt: \002,a20,\
+/\002 valfmt: \002,a20,\002 rhsfmt: \002,a20)";
+    static char fmt_340[] = "(\002 rhstyp: \002,a3,\002 nrhs: \002,i14,\002 \
+nzrhs: \002,i14)";
+
+    /* System generated locals */
+    integer i__1, i__2, i__3;
+    doublecomplex z__1;
+    olist o__1;
+    cllist cl__1;
+
+    /* Builtin functions */
+    integer f_open(), s_rsfe(), do_fio(), e_rsfe(), s_wsfe(), e_wsfe(), 
+	    s_wsle(), do_lio(), e_wsle(), f_clos();
+    /* Subroutine */ int s_stop();
+
+    /* Local variables */
+    static doublecomplex skew;
+    static integer nrhs;
+    static char type[3];
+    static integer p;
+    static char title[72];
+    static integer nzrhs, indcrd, valcrd;
+    static char indfmt[16];
+    static integer rhscrd;
+    static char valfmt[20];
+//htl
+//    extern /* Double Complex */ int myrand_();
+    static integer ptrcrd, totcrd;
+    static char rhsfmt[20], ptrfmt[16], rhstyp[3];
+    static integer col, nel;
+    static char key[30];
+    static integer row;
+    static logical sym;
+
+    /* Fortran I/O blocks */
+    static cilist io___80 = { 1, 99, 0, fmt_305, 0 };
+    static cilist io___90 = { 1, 99, 0, fmt_310, 0 };
+    static cilist io___95 = { 1, 99, 0, fmt_320, 0 };
+    static cilist io___101 = { 0, 0, 0, fmt_330, 0 };
+    static cilist io___102 = { 0, 0, 0, fmt_340, 0 };
+    static cilist io___103 = { 0, 0, 0, 0, 0 };
+    static cilist io___104 = { 0, 6, 0, 0, 0 };
+    static cilist io___105 = { 1, 99, 0, ptrfmt, 0 };
+    static cilist io___107 = { 0, 6, 0, 0, 0 };
+    static cilist io___108 = { 1, 99, 0, indfmt, 0 };
+    static cilist io___110 = { 0, 6, 0, 0, 0 };
+    static cilist io___111 = { 1, 99, 0, valfmt, 0 };
+    static cilist io___113 = { 0, 0, 0, 0, 0 };
+
+
+/* -----------------------------------------------------------------------
+ */
+/*       read header information from Harwell/Boeing matrix */
+    /* Parameter adjustments */
+    --value;
+    --index;
+    --ptr;
+
+    /* Function Body */
+    o__1.oerr = 1;
+    o__1.ounit = 99;
+    o__1.ofnmlen = 256;
+    o__1.ofnm = fname;
+    o__1.orl = 0;
+    o__1.osta = "OLD";
+    o__1.oacc = 0;
+    o__1.ofm = 0;
+    o__1.oblnk = 0;
+    i__1 = f_open(&o__1);
+    if (i__1 != 0) {
+	goto L398;
+    }
+    i__1 = s_rsfe(&io___80);
+    if (i__1 != 0) {
+	goto L398;
+    }
+    i__1 = do_fio(&c__1, title, 72L);
+    if (i__1 != 0) {
+	goto L398;
+    }
+    i__1 = do_fio(&c__1, key, 30L);
+    if (i__1 != 0) {
+	goto L398;
+    }
+    i__1 = do_fio(&c__1, (char *)&totcrd, (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L398;
+    }
+    i__1 = do_fio(&c__1, (char *)&ptrcrd, (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L398;
+    }
+    i__1 = do_fio(&c__1, (char *)&indcrd, (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L398;
+    }
+    i__1 = do_fio(&c__1, (char *)&valcrd, (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L398;
+    }
+    i__1 = do_fio(&c__1, (char *)&rhscrd, (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L398;
+    }
+    i__1 = do_fio(&c__1, type, 3L);
+    if (i__1 != 0) {
+	goto L398;
+    }
+    i__1 = do_fio(&c__1, (char *)&(*nrows), (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L398;
+    }
+    i__1 = do_fio(&c__1, (char *)&(*ncols), (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L398;
+    }
+    i__1 = do_fio(&c__1, (char *)&(*nnz), (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L398;
+    }
+    i__1 = do_fio(&c__1, (char *)&nel, (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L398;
+    }
+    i__1 = e_rsfe();
+    if (i__1 != 0) {
+	goto L398;
+    }
+    i__1 = s_rsfe(&io___90);
+    if (i__1 != 0) {
+	goto L398;
+    }
+    i__1 = do_fio(&c__1, ptrfmt, 16L);
+    if (i__1 != 0) {
+	goto L398;
+    }
+    i__1 = do_fio(&c__1, indfmt, 16L);
+    if (i__1 != 0) {
+	goto L398;
+    }
+    i__1 = do_fio(&c__1, valfmt, 20L);
+    if (i__1 != 0) {
+	goto L398;
+    }
+    i__1 = do_fio(&c__1, rhsfmt, 20L);
+    if (i__1 != 0) {
+	goto L398;
+    }
+    i__1 = e_rsfe();
+    if (i__1 != 0) {
+	goto L398;
+    }
+    if (rhscrd > 0) {
+/*          new Harwell/Boeing format: */
+	i__1 = s_rsfe(&io___95);
+	if (i__1 != 0) {
+	    goto L398;
+	}
+	i__1 = do_fio(&c__1, rhstyp, 3L);
+	if (i__1 != 0) {
+	    goto L398;
+	}
+	i__1 = do_fio(&c__1, (char *)&nrhs, (ftnlen)sizeof(integer));
+	if (i__1 != 0) {
+	    goto L398;
+	}
+	i__1 = do_fio(&c__1, (char *)&nzrhs, (ftnlen)sizeof(integer));
+	if (i__1 != 0) {
+	    goto L398;
+	}
+	i__1 = e_rsfe();
+	if (i__1 != 0) {
+	    goto L398;
+	}
+    }
+    skew.r = (float)0., skew.i = (float)0.;
+    if (type[1] == 'Z' || type[1] == 'z') {
+	skew.r = (float)-1., skew.i = (float)0.;
+    }
+    if (type[1] == 'S' || type[1] == 's') {
+	skew.r = (float)1., skew.i = (float)0.;
+    }
+    sym = skew.r != 0. || skew.i != 0.;
+    s_wsfe(&io___101);
+    do_fio(&c__1, ptrfmt, 16L);
+    do_fio(&c__1, indfmt, 16L);
+    do_fio(&c__1, valfmt, 20L);
+    do_fio(&c__1, rhsfmt, 20L);
+    e_wsfe();
+    if (rhscrd > 0) {
+/*          new Harwell/Boeing format: */
+	s_wsfe(&io___102);
+	do_fio(&c__1, rhstyp, 3L);
+	do_fio(&c__1, (char *)&nrhs, (ftnlen)sizeof(integer));
+	do_fio(&c__1, (char *)&nzrhs, (ftnlen)sizeof(integer));
+	e_wsfe();
+    }
+    s_wsle(&io___103);
+    do_lio(&c__9, &c__1, " sym: ", 6L);
+    do_lio(&c__8, &c__1, (char *)&sym, (ftnlen)sizeof(logical));
+    do_lio(&c__9, &c__1, " skew: ", 7L);
+    do_lio(&c__7, &c__1, (char *)&skew, (ftnlen)sizeof(doublecomplex));
+    e_wsle();
+    s_wsle(&io___104);
+    do_lio(&c__9, &c__1, "reading colptr", 14L);
+    e_wsle();
+    i__1 = s_rsfe(&io___105);
+    if (i__1 != 0) {
+	goto L398;
+    }
+    i__2 = *ncols + 1;
+    for (p = 1; p <= i__2; ++p) {
+	i__1 = do_fio(&c__1, (char *)&ptr[p], (ftnlen)sizeof(integer));
+	if (i__1 != 0) {
+	    goto L398;
+	}
+    }
+    i__1 = e_rsfe();
+    if (i__1 != 0) {
+	goto L398;
+    }
+    s_wsle(&io___107);
+    do_lio(&c__9, &c__1, "reading rowind", 14L);
+    e_wsle();
+    i__1 = s_rsfe(&io___108);
+    if (i__1 != 0) {
+	goto L398;
+    }
+    i__2 = *nnz;
+    for (p = 1; p <= i__2; ++p) {
+	i__1 = do_fio(&c__1, (char *)&index[p], (ftnlen)sizeof(integer));
+	if (i__1 != 0) {
+	    goto L398;
+	}
+    }
+    i__1 = e_rsfe();
+    if (i__1 != 0) {
+	goto L398;
+    }
+/*      what's this? maybe for rectangualr matrices */
+    i__1 = *ncols + 1;
+    for (col = *ncols + 2; col <= i__1; ++col) {
+	ptr[col] = ptr[*ncols + 1];
+/* L355: */
+    }
+    s_wsle(&io___110);
+    do_lio(&c__9, &c__1, "reading values", 14L);
+    e_wsle();
+/*       read the values, or create random-valued matrix */
+    if (valcrd > 0) {
+	i__1 = s_rsfe(&io___111);
+	if (i__1 != 0) {
+	    goto L398;
+	}
+	i__2 = *nnz;
+	for (p = 1; p <= i__2; ++p) {
+	    i__1 = do_fio(&c__2, (char *)&value[p], (ftnlen)sizeof(doublereal)
+		    );
+	    if (i__1 != 0) {
+		goto L398;
+	    }
+	}
+	i__1 = e_rsfe();
+	if (i__1 != 0) {
+	    goto L398;
+	}
+    } else {
+	if (sym) {
+	    i__1 = *ncols;
+	    for (col = 1; col <= i__1; ++col) {
+		i__2 = ptr[col + 1] - 1;
+		for (p = ptr[col]; p <= i__2; ++p) {
+		    row = index[p];
+		    if (row == col) {
+			i__3 = p;
+			value[i__3].r = (doublereal) (*ncols), value[i__3].i =
+				 0.;
+		    } else {
+			i__3 = p;
+			value[i__3].r = (float)-1., value[i__3].i = (float)0.;
+		    }
+/* L356: */
+		}
+/* L357: */
+	    }
+	} else {
+	    myrand_(&z__1, &c__0);
+	    value[1].r = z__1.r, value[1].i = z__1.i;
+	    i__1 = *nnz;
+	    for (p = 1; p <= i__1; ++p) {
+		i__2 = p;
+		myrand_(&z__1, &c_n1);
+		value[i__2].r = z__1.r, value[i__2].i = z__1.i;
+/* L350: */
+	    }
+	}
+    }
+/*  create the triplet form of the input matrix */
+/*        do 100 col = 1, n */
+/*           do 90 p = Ptr (col), Ptr (col+1) - 1 */
+/*              row = Index (p) */
+/*              write (6, 200) row, col, Value (p) */
+/*              if (sym .and. row .ne. col) then */
+/* 		 write (6, 200) col, row, skew * Value (p) */
+/* 		 endif */
+/* 90            continue */
+/* 100        continue */
+/* 200	format (2i7, e26.16e3) */
+    cl__1.cerr = 0;
+    cl__1.cunit = 99;
+    cl__1.csta = 0;
+    f_clos(&cl__1);
+    return 0;
+L398:
+    s_wsle(&io___113);
+    do_lio(&c__9, &c__1, "Read error: Harwell/Boeing matrix", 33L);
+    e_wsle();
+    s_stop("", 0L);
+} /* zreadhb_ */
+
+/* ----------------------------------------------------------------------- */
+/* ----------------------------------------------------------------------- */
+/* ----------------------------------------------------------------------- */
+/* Subroutine */ int creadhb_(fname, nrows, ncols, nnz, ptr, index, value, 
+	fname_len)
+char *fname;
+integer *nrows, *ncols, *nnz, *ptr, *index;
+complex *value;
+ftnlen fname_len;
+{
+    /* Format strings */
+    static char fmt_405[] = "(a72,a8/5i14/a3,11x,4i14)";
+    static char fmt_410[] = "(2a16,2a20)";
+    static char fmt_420[] = "(a3,11x,2i14)";
+    static char fmt_430[] = "(\002 ptrfmt: \002,a20,\002 rowfmt: \002,a20,\
+/\002 valfmt: \002,a20,\002 rhsfmt: \002,a20)";
+    static char fmt_440[] = "(\002 rhstyp: \002,a3,\002 nrhs: \002,i14,\002 \
+nzrhs: \002,i14)";
+
+    /* System generated locals */
+    integer i__1, i__2, i__3;
+    complex q__1;
+    olist o__1;
+    cllist cl__1;
+
+    /* Builtin functions */
+    integer f_open(), s_rsfe(), do_fio(), e_rsfe(), s_wsfe(), e_wsfe(), 
+	    s_wsle(), do_lio(), e_wsle(), f_clos();
+    /* Subroutine */ int s_stop();
+
+    /* Local variables */
+    static complex skew;
+    static integer nrhs;
+    static char type[3];
+    static integer p;
+    static char title[72];
+    static integer nzrhs, indcrd, valcrd;
+    static char indfmt[16];
+    static integer rhscrd;
+    static char valfmt[20];
+    //htl extern /* Complex */ int myrand_();
+    static integer ptrcrd, totcrd;
+    static char rhsfmt[20], ptrfmt[16], rhstyp[3];
+    static integer col, nel;
+    static char key[30];
+    static integer row;
+    static logical sym;
+
+    /* Fortran I/O blocks */
+    static cilist io___114 = { 1, 99, 0, fmt_405, 0 };
+    static cilist io___124 = { 1, 99, 0, fmt_410, 0 };
+    static cilist io___129 = { 1, 99, 0, fmt_420, 0 };
+    static cilist io___135 = { 0, 0, 0, fmt_430, 0 };
+    static cilist io___136 = { 0, 0, 0, fmt_440, 0 };
+    static cilist io___137 = { 0, 0, 0, 0, 0 };
+    static cilist io___138 = { 0, 6, 0, 0, 0 };
+    static cilist io___139 = { 1, 99, 0, ptrfmt, 0 };
+    static cilist io___141 = { 0, 6, 0, 0, 0 };
+    static cilist io___142 = { 1, 99, 0, indfmt, 0 };
+    static cilist io___144 = { 0, 6, 0, 0, 0 };
+    static cilist io___145 = { 1, 99, 0, valfmt, 0 };
+    static cilist io___147 = { 0, 0, 0, 0, 0 };
+
+
+/* -----------------------------------------------------------------------
+ */
+/*       read header information from Harwell/Boeing matrix */
+    /* Parameter adjustments */
+    --value;
+    --index;
+    --ptr;
+
+    /* Function Body */
+    o__1.oerr = 1;
+    o__1.ounit = 99;
+    o__1.ofnmlen = 256;
+    o__1.ofnm = fname;
+    o__1.orl = 0;
+    o__1.osta = "OLD";
+    o__1.oacc = 0;
+    o__1.ofm = 0;
+    o__1.oblnk = 0;
+    i__1 = f_open(&o__1);
+    if (i__1 != 0) {
+	goto L498;
+    }
+    i__1 = s_rsfe(&io___114);
+    if (i__1 != 0) {
+	goto L498;
+    }
+    i__1 = do_fio(&c__1, title, 72L);
+    if (i__1 != 0) {
+	goto L498;
+    }
+    i__1 = do_fio(&c__1, key, 30L);
+    if (i__1 != 0) {
+	goto L498;
+    }
+    i__1 = do_fio(&c__1, (char *)&totcrd, (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L498;
+    }
+    i__1 = do_fio(&c__1, (char *)&ptrcrd, (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L498;
+    }
+    i__1 = do_fio(&c__1, (char *)&indcrd, (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L498;
+    }
+    i__1 = do_fio(&c__1, (char *)&valcrd, (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L498;
+    }
+    i__1 = do_fio(&c__1, (char *)&rhscrd, (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L498;
+    }
+    i__1 = do_fio(&c__1, type, 3L);
+    if (i__1 != 0) {
+	goto L498;
+    }
+    i__1 = do_fio(&c__1, (char *)&(*nrows), (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L498;
+    }
+    i__1 = do_fio(&c__1, (char *)&(*ncols), (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L498;
+    }
+    i__1 = do_fio(&c__1, (char *)&(*nnz), (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L498;
+    }
+    i__1 = do_fio(&c__1, (char *)&nel, (ftnlen)sizeof(integer));
+    if (i__1 != 0) {
+	goto L498;
+    }
+    i__1 = e_rsfe();
+    if (i__1 != 0) {
+	goto L498;
+    }
+    i__1 = s_rsfe(&io___124);
+    if (i__1 != 0) {
+	goto L498;
+    }
+    i__1 = do_fio(&c__1, ptrfmt, 16L);
+    if (i__1 != 0) {
+	goto L498;
+    }
+    i__1 = do_fio(&c__1, indfmt, 16L);
+    if (i__1 != 0) {
+	goto L498;
+    }
+    i__1 = do_fio(&c__1, valfmt, 20L);
+    if (i__1 != 0) {
+	goto L498;
+    }
+    i__1 = do_fio(&c__1, rhsfmt, 20L);
+    if (i__1 != 0) {
+	goto L498;
+    }
+    i__1 = e_rsfe();
+    if (i__1 != 0) {
+	goto L498;
+    }
+    if (rhscrd > 0) {
+/*          new Harwell/Boeing format: */
+	i__1 = s_rsfe(&io___129);
+	if (i__1 != 0) {
+	    goto L498;
+	}
+	i__1 = do_fio(&c__1, rhstyp, 3L);
+	if (i__1 != 0) {
+	    goto L498;
+	}
+	i__1 = do_fio(&c__1, (char *)&nrhs, (ftnlen)sizeof(integer));
+	if (i__1 != 0) {
+	    goto L498;
+	}
+	i__1 = do_fio(&c__1, (char *)&nzrhs, (ftnlen)sizeof(integer));
+	if (i__1 != 0) {
+	    goto L498;
+	}
+	i__1 = e_rsfe();
+	if (i__1 != 0) {
+	    goto L498;
+	}
+    }
+    skew.r = (float)0., skew.i = (float)0.;
+    if (type[1] == 'Z' || type[1] == 'z') {
+	skew.r = (float)-1., skew.i = (float)0.;
+    }
+    if (type[1] == 'S' || type[1] == 's') {
+	skew.r = (float)1., skew.i = (float)0.;
+    }
+    sym = skew.r != (float)0. || skew.i != (float)0.;
+    s_wsfe(&io___135);
+    do_fio(&c__1, ptrfmt, 16L);
+    do_fio(&c__1, indfmt, 16L);
+    do_fio(&c__1, valfmt, 20L);
+    do_fio(&c__1, rhsfmt, 20L);
+    e_wsfe();
+    if (rhscrd > 0) {
+/*          new Harwell/Boeing format: */
+	s_wsfe(&io___136);
+	do_fio(&c__1, rhstyp, 3L);
+	do_fio(&c__1, (char *)&nrhs, (ftnlen)sizeof(integer));
+	do_fio(&c__1, (char *)&nzrhs, (ftnlen)sizeof(integer));
+	e_wsfe();
+    }
+    s_wsle(&io___137);
+    do_lio(&c__9, &c__1, " sym: ", 6L);
+    do_lio(&c__8, &c__1, (char *)&sym, (ftnlen)sizeof(logical));
+    do_lio(&c__9, &c__1, " skew: ", 7L);
+    do_lio(&c__6, &c__1, (char *)&skew, (ftnlen)sizeof(complex));
+    e_wsle();
+    s_wsle(&io___138);
+    do_lio(&c__9, &c__1, "reading colptr", 14L);
+    e_wsle();
+    i__1 = s_rsfe(&io___139);
+    if (i__1 != 0) {
+	goto L498;
+    }
+    i__2 = *ncols + 1;
+    for (p = 1; p <= i__2; ++p) {
+	i__1 = do_fio(&c__1, (char *)&ptr[p], (ftnlen)sizeof(integer));
+	if (i__1 != 0) {
+	    goto L498;
+	}
+    }
+    i__1 = e_rsfe();
+    if (i__1 != 0) {
+	goto L498;
+    }
+    s_wsle(&io___141);
+    do_lio(&c__9, &c__1, "reading rowind", 14L);
+    e_wsle();
+    i__1 = s_rsfe(&io___142);
+    if (i__1 != 0) {
+	goto L498;
+    }
+    i__2 = *nnz;
+    for (p = 1; p <= i__2; ++p) {
+	i__1 = do_fio(&c__1, (char *)&index[p], (ftnlen)sizeof(integer));
+	if (i__1 != 0) {
+	    goto L498;
+	}
+    }
+    i__1 = e_rsfe();
+    if (i__1 != 0) {
+	goto L498;
+    }
+/*      what's this? maybe for rectangualr matrices */
+    i__1 = *ncols + 1;
+    for (col = *ncols + 2; col <= i__1; ++col) {
+	ptr[col] = ptr[*ncols + 1];
+/* L455: */
+    }
+    s_wsle(&io___144);
+    do_lio(&c__9, &c__1, "reading values", 14L);
+    e_wsle();
+/*       read the values, or create random-valued matrix */
+    if (valcrd > 0) {
+	i__1 = s_rsfe(&io___145);
+	if (i__1 != 0) {
+	    goto L498;
+	}
+	i__2 = *nnz;
+	for (p = 1; p <= i__2; ++p) {
+	    i__1 = do_fio(&c__2, (char *)&value[p], (ftnlen)sizeof(real));
+	    if (i__1 != 0) {
+		goto L498;
+	    }
+	}
+	i__1 = e_rsfe();
+	if (i__1 != 0) {
+	    goto L498;
+	}
+    } else {
+	if (sym) {
+	    i__1 = *ncols;
+	    for (col = 1; col <= i__1; ++col) {
+		i__2 = ptr[col + 1] - 1;
+		for (p = ptr[col]; p <= i__2; ++p) {
+		    row = index[p];
+		    if (row == col) {
+			i__3 = p;
+			value[i__3].r = (real) (*ncols), value[i__3].i = (
+				float)0.;
+		    } else {
+			i__3 = p;
+			value[i__3].r = (float)-1., value[i__3].i = (float)0.;
+		    }
+/* L456: */
+		}
+/* L457: */
+	    }
+	} else {
+	    myrand_(&q__1, &c__0);
+	    value[1].r = q__1.r, value[1].i = q__1.i;
+	    i__1 = *nnz;
+	    for (p = 1; p <= i__1; ++p) {
+		i__2 = p;
+		myrand_(&q__1, &c_n1);
+		value[i__2].r = q__1.r, value[i__2].i = q__1.i;
+/* L450: */
+	    }
+	}
+    }
+/*  create the triplet form of the input matrix */
+/*        do 100 col = 1, n */
+/*           do 90 p = Ptr (col), Ptr (col+1) - 1 */
+/*              row = Index (p) */
+/*              write (6, 200) row, col, Value (p) */
+/*              if (sym .and. row .ne. col) then */
+/* 		 write (6, 200) col, row, skew * Value (p) */
+/* 		 endif */
+/* 90            continue */
+/* 100        continue */
+/* 200	format (2i7, e26.16e3) */
+    cl__1.cerr = 0;
+    cl__1.cunit = 99;
+    cl__1.csta = 0;
+    f_clos(&cl__1);
+    return 0;
+L498:
+    s_wsle(&io___147);
+    do_lio(&c__9, &c__1, "Read error: Harwell/Boeing matrix", 33L);
+    e_wsle();
+    s_stop("", 0L);
+} /* creadhb_ */
+
diff --git a/contrib/taucs/external/src/readhb.f b/contrib/taucs/external/src/readhb.f
new file mode 100644
index 0000000000000000000000000000000000000000..b28e59e5038cb51ac658a94c4d949b6b35147504
--- /dev/null
+++ b/contrib/taucs/external/src/readhb.f
@@ -0,0 +1,554 @@
+c-----------------------------------------------------------------------
+c-----------------------------------------------------------------------
+c-----------------------------------------------------------------------
+
+        subroutine ireadhb(fname,type,
+     $                     nrows,ncols,nnz)
+
+      
+        implicit none
+        integer nrows, ncols, nnz
+        integer totcrd, ptrcrd,
+     $		indcrd, valcrd, rhscrd
+c        character title*72, key*30, type*3, ptrfmt*16,
+c     $          indfmt*16, valfmt*20, rhsfmt*20
+        character title*72, key*30, type*3
+        character fname*256
+c        logical sym
+c        double precision skew
+c        double precision myrand
+c        character rhstyp*3
+c        integer nzrhs
+        integer nel
+
+c-----------------------------------------------------------------------
+
+c       read header information from Harwell/Boeing matrix
+        open (99, file=fname, err=999, status="OLD")
+
+        read (99, 10, err = 999)
+     $          title, key,
+     $          totcrd, ptrcrd, indcrd, valcrd, rhscrd,
+     $          type, nrows, ncols, nnz, nel
+10      format (a72, a8 / 5i14 / a3, 11x, 4i14)
+
+        write (0, 30)
+     $          title, key,
+     $          totcrd, ptrcrd, indcrd, valcrd, rhscrd,
+     $          type, nrows, ncols, nnz, nel
+30      format (
+     $          ' title: ', a72 /
+     $          ' key: ', a8 /
+     $          ' Lines: tot: ', i14,' ptr: ',i14,' ind: ',i14 /
+     $          '        val: ', i14,' rhs: ',i14 /
+     $          ' type: ', a3, ' nrow: ', i14, ' ncol: ', i14 /
+     $          ' nz: ', i14, ' elements: ', i14)
+
+        close (99)
+        return
+
+999     write (0,*) 'Read error: Harwell/Boeing matrix'
+        stop
+        end
+
+c-----------------------------------------------------------------------
+c-----------------------------------------------------------------------
+c-----------------------------------------------------------------------
+
+        subroutine dreadhb(fname,
+     $                     nrows,ncols,nnz,
+     $                     Ptr,Index,Value)
+
+        implicit none
+        integer nrows, ncols, nnz
+        integer Ptr(*), Index(*), totcrd, ptrcrd,
+     $		indcrd, valcrd, rhscrd, nrhs, row, col, p
+        character title*72, key*30, type*3, ptrfmt*16,
+     $          indfmt*16, valfmt*20, rhsfmt*20
+        character fname*256
+        logical sym
+        double precision Value (*), skew
+        double precision myrand
+        character rhstyp*3
+        integer nzrhs, nel
+
+c-----------------------------------------------------------------------
+
+c       read header information from Harwell/Boeing matrix
+
+        open (99, file=fname, err=198, status="OLD")
+
+        read (99, 105, err = 198)
+     $          title, key,
+     $          totcrd, ptrcrd, indcrd, valcrd, rhscrd,
+     $          type, nrows, ncols, nnz, nel
+105     format (a72, a8 / 5i14 / a3, 11x, 4i14)
+
+        read (99, 110, err = 198)
+     $          ptrfmt, indfmt, valfmt, rhsfmt
+        if (rhscrd .gt. 0) then
+c          new Harwell/Boeing format:
+           read (99, 120, err = 198) rhstyp,nrhs,nzrhs
+           endif
+110     format (2a16, 2a20)
+120     format (a3, 11x, 2i14)
+
+        skew = 0.0
+        if (type (2:2) .eq. 'Z' .or. type (2:2) .eq. 'z') skew = -1.0
+        if (type (2:2) .eq. 'S' .or. type (2:2) .eq. 's') skew =  1.0
+        sym = skew .ne. 0.0
+
+        write (0, 130)
+     $          ptrfmt, indfmt, valfmt, rhsfmt
+        if (rhscrd .gt. 0) then
+c          new Harwell/Boeing format:
+           write (0, 140) rhstyp,nrhs,nzrhs
+           endif
+130     format (
+     $          ' ptrfmt: ', a20, ' rowfmt: ', a20, /
+     $          ' valfmt: ', a20, ' rhsfmt: ', a20)
+140     format (' rhstyp: ', a3, ' nrhs: ', i14, ' nzrhs: ', i14)
+        write (0, *) ' sym: ', sym, ' skew: ', skew
+
+        print *,'reading colptr'
+
+        read (99, ptrfmt, err = 198) (Ptr (p), p = 1, ncols+1)
+        print *,'reading rowind'
+        read (99, indfmt, err = 198) (Index (p), p = 1, nnz)
+
+c      what's this? maybe for rectangualr matrices
+
+c        do 155 col = ncols+2, ncols+1
+c           Ptr (col) = Ptr (ncols+1)
+c155         continue
+
+        print *,'reading values'
+c       read the values, or create random-valued matrix
+        if (valcrd .gt. 0) then
+           read (99, valfmt, err = 198) (Value (p), p = 1, nnz)
+        else
+          if (sym) then
+            do 157 col = 1, ncols
+              do 156 p = Ptr(col), Ptr(col+1)-1
+                row = Index(p)
+                if (row .eq. col) then
+                  Value(p) = ncols
+                else
+                  Value(p) = -1.0
+                endif
+156           continue
+157         continue
+          else
+            Value (1) = myrand (0)
+            do 158 p = 1, nnz
+               Value (p) = myrand (-1)
+158         continue
+          endif
+        endif
+
+c  create the triplet form of the input matrix
+
+c        do 100 col = 1, n
+c           do 90 p = Ptr (col), Ptr (col+1) - 1
+c              row = Index (p)
+c              write (6, 200) row, col, Value (p)
+c              if (sym .and. row .ne. col) then
+c		 write (6, 200) col, row, skew * Value (p)
+c		 endif
+c90            continue
+c100        continue
+c200	format (2i7, e26.16e3)
+
+        close (99)
+        return
+
+198     write (0,*) 'Read error: Harwell/Boeing matrix'
+        stop
+        end
+
+c-----------------------------------------------------------------------
+c-----------------------------------------------------------------------
+c-----------------------------------------------------------------------
+
+        subroutine sreadhb(fname,
+     $                     nrows,ncols,nnz,
+     $                     Ptr,Index,Value)
+
+        implicit none
+        integer nrows, ncols, nnz
+        integer Ptr(*), Index(*), totcrd, ptrcrd,
+     $		indcrd, valcrd, rhscrd, nrhs, row, col, p
+        character title*72, key*30, type*3, ptrfmt*16,
+     $          indfmt*16, valfmt*20, rhsfmt*20
+        character fname*256
+        logical sym
+        real*4 Value (*), skew
+        double precision myrand
+        character rhstyp*3
+        integer nzrhs, nel
+
+c-----------------------------------------------------------------------
+
+c       read header information from Harwell/Boeing matrix
+
+        open (99, file=fname, err=298, status="OLD")
+
+        read (99, 205, err = 298)
+     $          title, key,
+     $          totcrd, ptrcrd, indcrd, valcrd, rhscrd,
+     $          type, nrows, ncols, nnz, nel
+205     format (a72, a8 / 5i14 / a3, 11x, 4i14)
+
+        read (99, 210, err = 298)
+     $          ptrfmt, indfmt, valfmt, rhsfmt
+        if (rhscrd .gt. 0) then
+c          new Harwell/Boeing format:
+           read (99, 220, err = 298) rhstyp,nrhs,nzrhs
+           endif
+210     format (2a16, 2a20)
+220     format (a3, 11x, 2i14)
+
+        skew = 0.0
+        if (type (2:2) .eq. 'Z' .or. type (2:2) .eq. 'z') skew = -1.0
+        if (type (2:2) .eq. 'S' .or. type (2:2) .eq. 's') skew =  1.0
+        sym = skew .ne. 0.0
+
+        write (0, 230)
+     $          ptrfmt, indfmt, valfmt, rhsfmt
+        if (rhscrd .gt. 0) then
+c          new Harwell/Boeing format:
+           write (0, 240) rhstyp,nrhs,nzrhs
+           endif
+230     format (
+     $          ' ptrfmt: ', a20, ' rowfmt: ', a20, /
+     $          ' valfmt: ', a20, ' rhsfmt: ', a20)
+240     format (' rhstyp: ', a3, ' nrhs: ', i14, ' nzrhs: ', i14)
+        write (0, *) ' sym: ', sym, ' skew: ', skew
+
+        print *,'reading colptr'
+
+        read (99, ptrfmt, err = 298) (Ptr (p), p = 1, ncols+1)
+        print *,'reading rowind'
+        read (99, indfmt, err = 298) (Index (p), p = 1, nnz)
+
+c      what's this? maybe for rectangualr matrices
+
+c        do 255 col = ncols+2, ncols+1
+c           Ptr (col) = Ptr (ncols+1)
+c255         continue
+
+        print *,'reading values'
+c       read the values, or create random-valued matrix
+        if (valcrd .gt. 0) then
+           read (99, valfmt, err = 298) (Value (p), p = 1, nnz)
+        else
+          if (sym) then
+            do 257 col = 1, ncols
+              do 256 p = Ptr(col), Ptr(col+1)-1
+                row = Index(p)
+                if (row .eq. col) then
+                  Value(p) = ncols
+                else
+                  Value(p) = -1.0
+                endif
+256           continue
+257         continue
+          else
+            Value (1) = myrand (0)
+            do 258 p = 1, nnz
+               Value (p) = myrand (-1)
+258         continue
+          endif
+        endif
+
+c  create the triplet form of the input matrix
+
+c        do 100 col = 1, n
+c           do 90 p = Ptr (col), Ptr (col+1) - 1
+c              row = Index (p)
+c              write (6, 200) row, col, Value (p)
+c              if (sym .and. row .ne. col) then
+c		 write (6, 200) col, row, skew * Value (p)
+c		 endif
+c90            continue
+c100        continue
+c200	format (2i7, e26.16e3)
+
+        close (99)
+        return
+
+298     write (0,*) 'Read error: Harwell/Boeing matrix'
+        stop
+        end
+
+c-----------------------------------------------------------------------
+c-----------------------------------------------------------------------
+c-----------------------------------------------------------------------
+
+        subroutine zreadhb(fname,
+     $                     nrows,ncols,nnz,
+     $                     Ptr,Index,Value)
+
+        implicit none
+        integer nrows, ncols, nnz
+        integer Ptr(*), Index(*), totcrd, ptrcrd,
+     $		indcrd, valcrd, rhscrd, nrhs, row, col, p
+        character title*72, key*30, type*3, ptrfmt*16,
+     $          indfmt*16, valfmt*20, rhsfmt*20
+        character fname*256
+        logical sym
+        double complex Value (*), skew
+        double precision myrand
+        character rhstyp*3
+        integer nzrhs, nel
+
+c-----------------------------------------------------------------------
+
+c       read header information from Harwell/Boeing matrix
+
+        open (99, file=fname, err=398, status="OLD")
+
+        read (99, 305, err = 398)
+     $          title, key,
+     $          totcrd, ptrcrd, indcrd, valcrd, rhscrd,
+     $          type, nrows, ncols, nnz, nel
+305     format (a72, a8 / 5i14 / a3, 11x, 4i14)
+
+        read (99, 310, err = 398)
+     $          ptrfmt, indfmt, valfmt, rhsfmt
+        if (rhscrd .gt. 0) then
+c          new Harwell/Boeing format:
+           read (99, 320, err = 398) rhstyp,nrhs,nzrhs
+           endif
+310      format (2a16, 2a20)
+320      format (a3, 11x, 2i14)
+
+        skew = 0.0
+        if (type (2:2) .eq. 'Z' .or. type (2:2) .eq. 'z') skew = -1.0
+        if (type (2:2) .eq. 'S' .or. type (2:2) .eq. 's') skew =  1.0
+        sym = skew .ne. 0.0
+
+        write (0, 330)
+     $          ptrfmt, indfmt, valfmt, rhsfmt
+        if (rhscrd .gt. 0) then
+c          new Harwell/Boeing format:
+           write (0, 340) rhstyp,nrhs,nzrhs
+           endif
+330     format (
+     $          ' ptrfmt: ', a20, ' rowfmt: ', a20, /
+     $          ' valfmt: ', a20, ' rhsfmt: ', a20)
+340     format (' rhstyp: ', a3, ' nrhs: ', i14, ' nzrhs: ', i14)
+        write (0, *) ' sym: ', sym, ' skew: ', skew
+
+        print *,'reading colptr'
+
+        read (99, ptrfmt, err = 398) (Ptr (p), p = 1, ncols+1)
+        print *,'reading rowind'
+        read (99, indfmt, err = 398) (Index (p), p = 1, nnz)
+
+c      what's this? maybe for rectangualr matrices
+
+c        do 355 col = ncols+2, ncols+1
+c           Ptr (col) = Ptr (ncols+1)
+c355        continue
+
+        print *,'reading values'
+c       read the values, or create random-valued matrix
+        if (valcrd .gt. 0) then
+           read (99, valfmt, err = 398) (Value (p), p = 1, nnz)
+        else
+          if (sym) then
+            do 357 col = 1, ncols
+              do 356 p = Ptr(col), Ptr(col+1)-1
+                row = Index(p)
+                if (row .eq. col) then
+                  Value(p) = ncols
+                else
+                  Value(p) = -1.0
+                endif
+356           continue
+357         continue
+          else
+            Value (1) = myrand (0)
+            do 350 p = 1, nnz
+               Value (p) = myrand (-1)
+350         continue
+          endif
+        endif
+
+c  create the triplet form of the input matrix
+
+c        do 100 col = 1, n
+c           do 90 p = Ptr (col), Ptr (col+1) - 1
+c              row = Index (p)
+c              write (6, 200) row, col, Value (p)
+c              if (sym .and. row .ne. col) then
+c		 write (6, 200) col, row, skew * Value (p)
+c		 endif
+c90            continue
+c100        continue
+c200	format (2i7, e26.16e3)
+
+        close (99)
+        return
+
+398     write (0,*) 'Read error: Harwell/Boeing matrix'
+        stop
+        end
+
+c-----------------------------------------------------------------------
+c-----------------------------------------------------------------------
+c-----------------------------------------------------------------------
+
+        subroutine creadhb(fname,
+     $                     nrows,ncols,nnz,
+     $                     Ptr,Index,Value)
+
+        implicit none
+        integer nrows, ncols, nnz
+        integer Ptr(*), Index(*), totcrd, ptrcrd,
+     $		indcrd, valcrd, rhscrd, nrhs, row, col, p
+        character title*72, key*30, type*3, ptrfmt*16,
+     $          indfmt*16, valfmt*20, rhsfmt*20
+        character fname*256
+        logical sym
+        complex Value (*), skew
+        double precision myrand
+        character rhstyp*3
+        integer nzrhs, nel
+
+c-----------------------------------------------------------------------
+
+c       read header information from Harwell/Boeing matrix
+
+        open (99, file=fname, err=498, status="OLD")
+
+        read (99, 405, err = 498)
+     $          title, key,
+     $          totcrd, ptrcrd, indcrd, valcrd, rhscrd,
+     $          type, nrows, ncols, nnz, nel
+405     format (a72, a8 / 5i14 / a3, 11x, 4i14)
+
+        read (99, 410, err = 498)
+     $          ptrfmt, indfmt, valfmt, rhsfmt
+        if (rhscrd .gt. 0) then
+c          new Harwell/Boeing format:
+           read (99, 420, err = 498) rhstyp,nrhs,nzrhs
+           endif
+410     format (2a16, 2a20)
+420     format (a3, 11x, 2i14)
+
+        skew = 0.0
+        if (type (2:2) .eq. 'Z' .or. type (2:2) .eq. 'z') skew = -1.0
+        if (type (2:2) .eq. 'S' .or. type (2:2) .eq. 's') skew =  1.0
+        sym = skew .ne. 0.0
+
+        write (0, 430)
+     $          ptrfmt, indfmt, valfmt, rhsfmt
+        if (rhscrd .gt. 0) then
+c          new Harwell/Boeing format:
+           write (0, 440) rhstyp,nrhs,nzrhs
+           endif
+430     format (
+     $          ' ptrfmt: ', a20, ' rowfmt: ', a20, /
+     $          ' valfmt: ', a20, ' rhsfmt: ', a20)
+440     format (' rhstyp: ', a3, ' nrhs: ', i14, ' nzrhs: ', i14)
+        write (0, *) ' sym: ', sym, ' skew: ', skew
+
+        print *,'reading colptr'
+
+        read (99, ptrfmt, err = 498) (Ptr (p), p = 1, ncols+1)
+        print *,'reading rowind'
+        read (99, indfmt, err = 498) (Index (p), p = 1, nnz)
+
+c      what's this? maybe for rectangualr matrices
+
+c        do 455 col = ncols+2, ncols+1
+c           Ptr (col) = Ptr (ncols+1)
+c455        continue
+
+        print *,'reading values'
+c       read the values, or create random-valued matrix
+        if (valcrd .gt. 0) then
+           read (99, valfmt, err = 498) (Value (p), p = 1, nnz)
+        else
+          if (sym) then
+            do 457 col = 1, ncols
+              do 456 p = Ptr(col), Ptr(col+1)-1
+                row = Index(p)
+                if (row .eq. col) then
+                  Value(p) = ncols
+                else
+                  Value(p) = -1.0
+                endif
+456           continue
+457         continue
+          else
+            Value (1) = myrand (0)
+            do 450 p = 1, nnz
+               Value (p) = myrand (-1)
+450          continue
+          endif
+        endif
+
+c  create the triplet form of the input matrix
+
+c        do 100 col = 1, n
+c           do 90 p = Ptr (col), Ptr (col+1) - 1
+c              row = Index (p)
+c              write (6, 200) row, col, Value (p)
+c              if (sym .and. row .ne. col) then
+c		 write (6, 200) col, row, skew * Value (p)
+c		 endif
+c90            continue
+c100        continue
+c200	format (2i7, e26.16e3)
+
+        close (99)
+        return
+
+498     write (0,*) 'Read error: Harwell/Boeing matrix'
+        stop
+        end
+
+c-----------------------------------------------------------------------
+c-----------------------------------------------------------------------
+c-----------------------------------------------------------------------
+
+c=== Myrand ============================================================
+c
+c  Derived from the FA01 routines in the MUPS package (CERFACS and/or
+c  Harwell).  CERFACS and/or Harwell copyrights may apply.  Permission
+c  granted to use this routine in the DEMO PROGRAM only.
+c
+c  DEMO PROGRAM.
+c
+c  random number generator
+c  i = 0:  reinitialize the sequence
+c  i >=0:  return 0 < x < 1
+c  i < 0:  return -1 < x < 1
+
+        double precision function myrand (i)
+        integer i
+        double precision seed, start, mfac, d2to32
+        common /mrand/ seed
+        parameter (start = 1431655765.d0,
+     $             d2to32 = 4294967296.d0, mfac = 9228907.d0)
+
+        if (i .eq. 0) then
+c          reinitialize to known sequence
+           seed = start
+           endif
+        seed = dmod (seed * mfac, d2to32)
+
+        if (i .ge. 0) then
+           myrand = (seed/d2to32)
+        else
+           myrand = 2 * (seed/d2to32) - 1
+           endif
+        return
+        end
+
+
+
+
diff --git a/contrib/taucs/src/blas_aux.c b/contrib/taucs/src/blas_aux.c
new file mode 100644
index 0000000000000000000000000000000000000000..65088d3efa78ce690e260eaf669b007c2447ce84
--- /dev/null
+++ b/contrib/taucs/src/blas_aux.c
@@ -0,0 +1,36 @@
+/*********************************************************/
+/* TAUCS                                                 */
+/* Author: Sivan Toledo                                  */
+/*********************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+int lsame(char* ca, char* cb)
+{
+  return (tolower(*ca) == tolower(*cb));
+}
+
+void xerbla(char* srname, int* info)
+{
+  fprintf(stderr,"** On entry to %.6s parameter number %d had an illegal value\n",
+	  srname,*info);
+  fprintf(stdout,"** On entry to %.6s parameter number %d had an illegal value\n",
+	  srname,*info);
+  exit(1);
+}
+
+int lsame_(char* ca, char* cb)
+{
+  return (tolower(*ca) == tolower(*cb));
+}
+
+void xerbla_(char* srname, int* info)
+{
+  fprintf(stderr,"** On entry to %.6s parameter number %d had an illegal value\n",
+	  srname,*info);
+  fprintf(stdout,"** On entry to %.6s parameter number %d had an illegal value\n",
+	  srname,*info);
+  exit(1);
+}
diff --git a/contrib/taucs/src/taucs.h b/contrib/taucs/src/taucs.h
new file mode 100644
index 0000000000000000000000000000000000000000..d1e79179a3ef0aea7b7a692388acde7bdc7a08b0
--- /dev/null
+++ b/contrib/taucs/src/taucs.h
@@ -0,0 +1,837 @@
+/*********************************************************/
+/* TAUCS                                                 */
+/* Author: Sivan Toledo                                  */
+/*********************************************************/
+
+#include <taucs_config_tests.h>
+#include <taucs_config_build.h>
+
+/*********************************************************/
+/* Cilk-related stuff                                    */
+/*********************************************************/
+
+#ifdef TAUCS_CILK
+#undef TAUCS_C99_COMPLEX /* cilk2c can't process complex.h */
+#endif
+
+#ifdef TAUCS_CORE_CILK
+#ifdef TAUCS_CILK
+/* We are compiling a Cilk source with a Cilk compiler */
+
+
+#include <cilk.h>
+#include <cilk-lib.h>
+
+#define taucs_cilk   cilk
+#define taucs_spawn  spawn
+#define taucs_sync   sync
+#define taucs_inlet  inlet
+#define taucs_Self   Self
+#define taucs_Cilk_active_size Cilk_active_size
+
+#else
+/* We are compiling a Cilk source, but with a C compiler */
+#define cilk
+#define spawn
+#define sync
+#define inlet
+#define Self 0
+#define Cilk_active_size 1
+
+#define taucs_cilk
+#define taucs_spawn
+#define taucs_sync
+#define taucs_inlet
+#define taucs_Self 0
+#define taucs_Cilk_active_size 1
+#endif
+#else /* not CORE_CILK */
+#define taucs_cilk
+#define taucs_spawn
+#define taucs_sync
+#define taucs_inlet
+#define taucs_Self 0
+#define taucs_Cilk_active_size 1
+#endif
+
+/*********************************************************/
+/* other stuff                                           */
+/*********************************************************/
+
+#ifdef TAUCS_CONFIG_DREAL
+#define TAUCS_DOUBLE_IN_BUILD
+#endif
+#ifdef TAUCS_CONFIG_SREAL
+#define TAUCS_SINGLE_IN_BUILD
+#endif
+#ifdef TAUCS_CONFIG_DCOMPLEX
+#define TAUCS_DCOMPLEX_IN_BUILD
+#endif
+#ifdef TAUCS_CONFIG_SCOMPLEX
+#define TAUCS_SCOMPLEX_IN_BUILD
+#endif
+
+#if   defined(TAUCS_BLAS_UNDERSCORE)
+#define taucs_blas_name(x) (x##_)
+#elif defined(TAUCS_BLAS_NOUNDERSCORE)
+#define taucs_blas_name(x) (x)
+#else
+#error "taucs_blas_[no]underscore_test: linking with the BLAS failed both attempts"
+#endif 
+
+#ifdef OSTYPE_win32
+typedef unsigned long ssize_t;
+typedef int mode_t;
+typedef int perm_t;
+#define random    rand
+#define srandom   srand
+#endif
+
+#define TAUCS_SUCCESS                       0
+#define TAUCS_ERROR                        -1
+#define TAUCS_ERROR_NOMEM                  -2
+#define TAUCS_ERROR_BADARGS                -3
+#define TAUCS_ERROR_INDEFINITE             -4
+#define TAUCS_ERROR_MAXDEPTH               -5
+
+#define TAUCS_INT       1024
+#define TAUCS_DOUBLE    2048
+#define TAUCS_SINGLE    4096
+#define TAUCS_DCOMPLEX  8192
+#define TAUCS_SCOMPLEX 16384
+
+#define TAUCS_LOWER      1
+#define TAUCS_UPPER      2
+#define TAUCS_TRIANGULAR 4
+#define TAUCS_SYMMETRIC  8
+#define TAUCS_HERMITIAN  16
+#define TAUCS_PATTERN    32
+
+#define TAUCS_METHOD_LLT  1
+#define TAUCS_METHOD_LDLT 2
+#define TAUCS_METHOD_PLU  3
+
+#define TAUCS_VARIANT_SNMF 1
+#define TAUCS_VARIANT_SNLL 2
+
+typedef double    taucs_double;
+typedef float     taucs_single;
+
+/* The macro TAUCS_C99_COMPLEX is defined in */
+/* build/OSTYPE/taucs_config_tests if the    */
+/* test program progs/taucs_c99_complex_test */
+/* compiles, links, and runs.                */
+
+/*#if defined(__GNUC__) && !defined(TAUCS_CONFIG_GENERIC_COMPLEX)*/
+#ifdef TAUCS_C99_COMPLEX
+/*
+typedef __complex__ double taucs_dcomplex;
+typedef __complex__ float  taucs_scomplex;
+*/
+
+#include <complex.h>
+
+#undef I
+#ifdef _Imaginary_I
+#define TAUCS_IMAGINARY_I _Imaginary_I
+#else
+#define TAUCS_IMAGINARY_I _Complex_I
+#endif
+
+typedef _Complex double taucs_dcomplex;
+typedef _Complex float  taucs_scomplex;
+
+#define taucs_complex_create(r,i)  ((r)+TAUCS_IMAGINARY_I*(i))
+#define taucs_ccomplex_create(r,i) ((r)+TAUCS_IMAGINARY_I*(i))
+#define taucs_zcomplex_create(r,i) ((r)+TAUCS_IMAGINARY_I*(i))
+
+#define taucs_add(x,y) ((x)+(y))
+#define taucs_sub(x,y) ((x)-(y))
+#define taucs_mul(x,y) ((x)*(y))
+#define taucs_div(x,y) ((x)/(y))
+#define taucs_neg(x)   (-(x))
+
+#define taucs_dadd(x,y) ((x)+(y))
+#define taucs_dsub(x,y) ((x)-(y))
+#define taucs_dmul(x,y) ((x)*(y))
+#define taucs_ddiv(x,y) ((x)/(y))
+#define taucs_dneg(x)   (-(x))
+#define taucs_dconj(x)  (x)
+#define taucs_dimag(x)    0.0
+#define taucs_dreal(x)    (x)
+#define taucs_dminusone -1.0
+#define taucs_done      1.0
+#define taucs_dzero     0.0
+#define taucs_dabs(x)   (fabs(x))
+#define taucs_dsqrt(x)  (sqrt(x))
+
+#define taucs_sadd(x,y) ((x)+(y))
+#define taucs_ssub(x,y) ((x)-(y))
+#define taucs_smul(x,y) ((x)*(y))
+#define taucs_sdiv(x,y) ((x)/(y))
+#define taucs_sneg(x)   (-(x))
+#define taucs_sconj(x)  (x)
+#define taucs_simag(x)    0.0f
+#define taucs_sreal(x)    (x)
+#define taucs_sminusone -1.0f
+#define taucs_sone      1.0f
+#define taucs_szero     0.0f
+#define taucs_sabs(x)   ((taucs_single) fabs(x))
+#define taucs_ssqrt(x)  ((taucs_single) sqrt(x))
+
+#define taucs_zadd(x,y) ((x)+(y))
+#define taucs_zsub(x,y) ((x)-(y))
+#define taucs_zmul(x,y) ((x)*(y))
+#define taucs_zdiv(x,y) ((x)/(y))
+#define taucs_zneg(x)   (-(x))
+#define taucs_zconj(x)  (conj(x))
+#define taucs_zimag(x)    (cimag(x))
+#define taucs_zreal(x)    (creal(x))
+#define taucs_zminusone -1.0+0.0*TAUCS_IMAGINARY_I
+#define taucs_zone      1.0+0.0*TAUCS_IMAGINARY_I
+#define taucs_zzero     0.0+0.0*TAUCS_IMAGINARY_I
+#define taucs_zabs(x)   (cabs(x))
+#define taucs_zsqrt(x)  (csqrt(x))
+
+#define taucs_cadd(x,y) ((x)+(y))
+#define taucs_csub(x,y) ((x)-(y))
+#define taucs_cmul(x,y) ((x)*(y))
+#define taucs_cdiv(x,y) ((x)/(y))
+#define taucs_cneg(x)   (-(x))
+#define taucs_cconj(x)  (conjf(x))
+#define taucs_cimag(x)    (cimagf(x))
+#define taucs_creal(x)    (crealf(x))
+#define taucs_cminusone -1.0f+0.0f*TAUCS_IMAGINARY_I
+#define taucs_cone      1.0f+0.0f*TAUCS_IMAGINARY_I
+#define taucs_czero     0.0f+0.0f*TAUCS_IMAGINARY_I
+#define taucs_cabs(x)   (cabsf(x))
+#define taucs_csqrt(x)  (csqrt(x))
+
+#if defined(TAUCS_CORE_DOUBLE)
+
+#define taucs_conj(x)  (x)
+#define taucs_im(x)    0.0
+#define taucs_re(x)    (x)
+#define taucs_minusone -1.0
+#define taucs_one      1.0
+#define taucs_zero     0.0
+#define taucs_abs(x)   (fabs(x))
+#define taucs_sqrt(x)  (sqrt(x))
+
+#elif defined(TAUCS_CORE_GENERAL)
+#define taucs_im(x)    0.0
+#define taucs_re(x)    (x)
+#define taucs_minusone -1.0
+#define taucs_one      1.0
+#define taucs_zero     0.0
+/*
+#define taucs_conj(x)  (x)
+#define taucs_abs(x)   (fabs(x))
+#define taucs_sqrt(x)  (sqrt(x))
+*/
+#elif defined(TAUCS_CORE_SINGLE)
+
+#define taucs_conj(x)  (x)
+#define taucs_im(x)    0.0f
+#define taucs_re(x)    (x)
+#define taucs_minusone -1.0f
+#define taucs_one      1.0f
+#define taucs_zero     0.0f
+#define taucs_abs(x)   ((taucs_single) fabs(x))
+#define taucs_sqrt(x)  ((taucs_single) sqrt(x))
+
+#elif defined(TAUCS_CORE_DCOMPLEX)
+/*
+#define taucs_conj(x)  (~(x))
+#define taucs_im(x)    (__imag__ (x))
+#define taucs_re(x)    (__real__ (x))
+#define taucs_minusone -1.0+0.0i
+#define taucs_one      1.0+0.0i
+#define taucs_zero     0.0+0.0i
+#define taucs_abs(x)   taucs_zabs_fn(x)
+#define taucs_sqrt(x)  taucs_zsqrt_fn(x)
+*/
+
+#define taucs_conj(x)  (conj(x))
+#define taucs_im(x)    (cimag(x))
+#define taucs_re(x)    (creal(x))
+#define taucs_minusone -1.0+0.0*TAUCS_IMAGINARY_I
+#define taucs_one      1.0+0.0*TAUCS_IMAGINARY_I
+#define taucs_zero     0.0+0.0*TAUCS_IMAGINARY_I
+#define taucs_abs(x)   (cabs(x))
+#define taucs_sqrt(x)  (csqrt(x))
+
+#elif defined(TAUCS_CORE_SCOMPLEX)
+/*
+#define taucs_conj(x)  (~(x))
+#define taucs_im(x)    (__imag__ (x))
+#define taucs_re(x)    (__real__ (x))
+#define taucs_minusone -1.0f+0.0fi
+#define taucs_one      1.0f+0.0fi
+#define taucs_zero     0.0f+0.0fi
+#define taucs_abs(x)   taucs_cabs_fn(x)
+#define taucs_sqrt(x)  taucs_csqrt_fn(x)
+*/
+#define taucs_conj(x)  (conjf(x))
+#define taucs_im(x)    (cimagf(x))
+#define taucs_re(x)    (crealf(x))
+#define taucs_minusone -1.0f+0.0f*TAUCS_IMAGINARY_I
+#define taucs_one      1.0f+0.0f*TAUCS_IMAGINARY_I
+#define taucs_zero     0.0f+0.0f*TAUCS_IMAGINARY_I
+#define taucs_abs(x)   (cabsf(x))
+#define taucs_sqrt(x)  (csqrtf(x))
+
+#endif
+
+#else /* C99 */
+
+typedef struct {double r,i;} taucs_dcomplex;
+typedef struct {float  r,i;} taucs_scomplex;
+
+#define taucs_zcomplex_create(r,i) taucs_zcomplex_create_fn(r,i)
+#define taucs_ccomplex_create(r,i) taucs_ccomplex_create_fn(r,i)
+
+#define taucs_dadd(x,y) ((x)+(y))
+#define taucs_dsub(x,y) ((x)-(y))
+#define taucs_dmul(x,y) ((x)*(y))
+#define taucs_ddiv(x,y) ((x)/(y))
+#define taucs_dneg(x)   (-(x))
+#define taucs_dconj(x)  (x)
+#define taucs_dabs(x)   (fabs(x))
+#define taucs_dsqrt(x)  (sqrt(x))
+#define taucs_dimag(x)   0.0
+#define taucs_dreal(x)   (x)
+#define taucs_dminusone -1.0
+#define taucs_done     1.0
+#define taucs_dzero    0.0
+
+#define taucs_sadd(x,y) ((x)+(y))
+#define taucs_ssub(x,y) ((x)-(y))
+#define taucs_smul(x,y) ((x)*(y))
+#define taucs_sdiv(x,y) ((x)/(y))
+#define taucs_sneg(x)   (-(x))
+#define taucs_sconj(x)  (x)
+#define taucs_sabs(x)   ((taucs_single) fabs(x))
+#define taucs_ssqrt(x)  ((taucs_single) sqrt(x))
+#define taucs_sim(x)   0.0f
+#define taucs_sre(x)   (x)
+#define taucs_sminusone -1.0f
+#define taucs_sone     1.0f
+#define taucs_szero    0.0f
+
+#define taucs_zadd(x,y) taucs_zadd_fn(x,y)
+#define taucs_zsub(x,y) taucs_zsub_fn(x,y)
+#define taucs_zmul(x,y) taucs_zmul_fn(x,y)
+#define taucs_zdiv(x,y) taucs_zdiv_fn(x,y)
+#define taucs_zneg(x)   taucs_zneg_fn(x)
+#define taucs_zconj(x)  taucs_zconj_fn(x)
+#define taucs_zabs(x)   taucs_zabs_fn(x)
+#define taucs_zsqrt(x)  taucs_zsqrt_fn(x)
+#define taucs_zimag(x)    ((x).i)
+#define taucs_zreal(x)    ((x).r)
+#define taucs_zminusone taucs_zminusone_const
+#define taucs_zone      taucs_zone_const
+#define taucs_zzero     taucs_zzero_const
+
+#define taucs_cadd(x,y) taucs_cadd_fn(x,y)
+#define taucs_csub(x,y) taucs_csub_fn(x,y)
+#define taucs_cmul(x,y) taucs_cmul_fn(x,y)
+#define taucs_cdiv(x,y) taucs_cdiv_fn(x,y)
+#define taucs_cneg(x)   taucs_cneg_fn(x)
+#define taucs_cconj(x)  taucs_cconj_fn(x)
+#define taucs_cabs(x)   taucs_cabs_fn(x)
+#define taucs_csqrt(x)  taucs_csqrt_fn(x)
+#define taucs_cimag(x)    ((x).i)
+#define taucs_creal(x)    ((x).r)
+#define taucs_cminusone taucs_cminusone_const
+#define taucs_cone      taucs_cone_const
+#define taucs_czero     taucs_czero_const
+
+#if defined(TAUCS_CORE_DOUBLE)
+
+#define taucs_add(x,y) ((x)+(y))
+#define taucs_sub(x,y) ((x)-(y))
+#define taucs_mul(x,y) ((x)*(y))
+#define taucs_div(x,y) ((x)/(y))
+#define taucs_neg(x)   (-(x))
+#define taucs_conj(x)  (x)
+#define taucs_abs(x)   (fabs(x))
+#define taucs_sqrt(x)  (sqrt(x))
+
+#define taucs_im(x)   0.0
+#define taucs_re(x)   (x)
+#define taucs_minusone -1.0
+#define taucs_one     1.0
+#define taucs_zero    0.0
+
+#elif defined(TAUCS_CORE_GENERAL)
+/*
+#define taucs_add(x,y) ((x)+(y))
+#define taucs_sub(x,y) ((x)-(y))
+#define taucs_mul(x,y) ((x)*(y))
+#define taucs_div(x,y) ((x)/(y))
+#define taucs_neg(x)   (-(x))
+#define taucs_conj(x)  (x)
+#define taucs_abs(x)   (fabs(x))
+#define taucs_sqrt(x)  (sqrt(x))
+*/
+#define taucs_im(x)   0.0
+#define taucs_re(x)   (x)
+#define taucs_minusone -1.0
+#define taucs_one     1.0
+#define taucs_zero    0.0
+
+#elif defined(TAUCS_CORE_SINGLE)
+
+#define taucs_add(x,y) ((x)+(y))
+#define taucs_sub(x,y) ((x)-(y))
+#define taucs_mul(x,y) ((x)*(y))
+#define taucs_div(x,y) ((x)/(y))
+#define taucs_neg(x)   (-(x))
+#define taucs_conj(x)  (x)
+#define taucs_abs(x)   ((taucs_single) fabs(x))
+#define taucs_sqrt(x)  ((taucs_single) sqrt(x))
+
+#define taucs_im(x)   0.0f
+#define taucs_re(x)   (x)
+#define taucs_minusone -1.0f
+#define taucs_one     1.0f
+#define taucs_zero    0.0f
+
+#elif defined(TAUCS_CORE_DCOMPLEX)
+
+#define taucs_complex_create(r,i) taucs_zcomplex_create_fn(r,i)
+
+#define taucs_add(x,y) taucs_zadd_fn(x,y)
+#define taucs_sub(x,y) taucs_zsub_fn(x,y)
+#define taucs_mul(x,y) taucs_zmul_fn(x,y)
+#define taucs_div(x,y) taucs_zdiv_fn(x,y)
+#define taucs_neg(x)   taucs_zneg_fn(x)
+#define taucs_conj(x)  taucs_zconj_fn(x)
+#define taucs_abs(x)   taucs_zabs_fn(x)
+#define taucs_sqrt(x)  taucs_zsqrt_fn(x)
+
+#define taucs_im(x)    ((x).i)
+#define taucs_re(x)    ((x).r)
+#define taucs_minusone taucs_zminusone_const
+#define taucs_one      taucs_zone_const
+#define taucs_zero     taucs_zzero_const
+
+#elif defined(TAUCS_CORE_SCOMPLEX)
+
+#define taucs_complex_create(r,i) taucs_ccomplex_create_fn(r,i)
+
+#define taucs_add(x,y) taucs_cadd_fn(x,y)
+#define taucs_sub(x,y) taucs_csub_fn(x,y)
+#define taucs_mul(x,y) taucs_cmul_fn(x,y)
+#define taucs_div(x,y) taucs_cdiv_fn(x,y)
+#define taucs_neg(x)   taucs_cneg_fn(x)
+#define taucs_conj(x)  taucs_cconj_fn(x)
+#define taucs_abs(x)   taucs_cabs_fn(x)
+#define taucs_sqrt(x)  taucs_csqrt_fn(x)
+
+#define taucs_im(x)    ((x).i)
+#define taucs_re(x)    ((x).r)
+#define taucs_minusone taucs_cminusone_const
+#define taucs_one      taucs_cone_const
+#define taucs_zero     taucs_czero_const
+
+#endif /* SCOMPLEX */
+
+#endif
+
+extern taucs_double taucs_dzero_const    ;
+extern taucs_double taucs_done_const     ;
+extern taucs_double taucs_dminusone_const;
+
+extern taucs_single taucs_szero_const    ;
+extern taucs_single taucs_sone_const     ;
+extern taucs_single taucs_sminusone_const;
+
+extern taucs_dcomplex taucs_zzero_const    ;
+extern taucs_dcomplex taucs_zone_const     ;
+extern taucs_dcomplex taucs_zminusone_const;
+
+extern taucs_scomplex taucs_czero_const    ;
+extern taucs_scomplex taucs_cone_const     ;
+extern taucs_scomplex taucs_cminusone_const;
+
+#define taucs_isnan(x) (isnan((double)(taucs_re(x))) || isnan((double)(taucs_im(x))))
+#define taucs_isinf(x) (isinf((double)(taucs_re(x))) || isinf((double)(taucs_im(x))))
+
+#ifdef TAUCS_CORE_SINGLE
+#define taucs_zero_const     taucs_szero_const
+#define taucs_one_const      taucs_sone_const
+#define taucs_minusone_const taucs_sminusone_const
+
+#define taucs_zero_real_const     taucs_szero_const
+#define taucs_one_real_const      taucs_sone_const
+#define taucs_minusone_real_const taucs_sminusone_const
+
+#define taucs_gemm  taucs_blas_name(sgemm)
+#define taucs_potrf taucs_blas_name(spotrf)
+#define taucs_herk  taucs_blas_name(ssyrk)
+#define taucs_trsm  taucs_blas_name(strsm)
+#endif
+
+#ifdef TAUCS_CORE_DOUBLE
+#define taucs_zero_const     taucs_dzero_const
+#define taucs_one_const      taucs_done_const
+#define taucs_minusone_const taucs_dminusone_const
+
+#define taucs_zero_real_const     taucs_dzero_const
+#define taucs_one_real_const      taucs_done_const
+#define taucs_minusone_real_const taucs_dminusone_const
+
+#define taucs_gemm  taucs_blas_name(dgemm)
+#define taucs_potrf taucs_blas_name(dpotrf)
+#define taucs_herk  taucs_blas_name(dsyrk)
+#define taucs_trsm  taucs_blas_name(dtrsm)
+#endif
+
+/*
+#ifdef TAUCS_CORE_GENERAL
+#define taucs_zero_const     taucs_dzero_const
+#define taucs_one_const      taucs_done_const
+#define taucs_minusone_const taucs_dminusone_const
+#define taucs_zero_real_const     taucs_dzero_const
+#define taucs_one_real_const      taucs_done_const
+#define taucs_minusone_real_const taucs_dminusone_const
+#endif
+*/
+
+#ifdef TAUCS_CORE_SCOMPLEX
+#define taucs_zero_const     taucs_czero_const
+#define taucs_one_const      taucs_cone_const
+#define taucs_minusone_const taucs_cminusone_const
+
+#define taucs_zero_real_const     taucs_szero_const
+#define taucs_one_real_const      taucs_sone_const
+#define taucs_minusone_real_const taucs_sminusone_const
+
+#define taucs_gemm  taucs_blas_name(cgemm)
+#define taucs_potrf taucs_blas_name(cpotrf)
+#define taucs_herk  taucs_blas_name(cherk)
+#define taucs_trsm  taucs_blas_name(ctrsm)
+#endif
+
+#ifdef TAUCS_CORE_DCOMPLEX
+#define taucs_zero_const     taucs_zzero_const
+#define taucs_one_const      taucs_zone_const
+#define taucs_minusone_const taucs_zminusone_const
+
+#define taucs_zero_real_const     taucs_dzero_const
+#define taucs_one_real_const      taucs_done_const
+#define taucs_minusone_real_const taucs_dminusone_const
+
+#define taucs_gemm  taucs_blas_name(zgemm)
+#define taucs_potrf taucs_blas_name(zpotrf)
+#define taucs_herk  taucs_blas_name(zherk)
+#define taucs_trsm  taucs_blas_name(ztrsm)
+#endif
+
+/*********************************************************/
+/*                                                       */
+/*********************************************************/
+
+typedef struct
+{
+  int     n;    /* columns                      */
+  int     m;    /* rows; don't use if symmetric   */
+  int     flags;
+  int*    colptr; /* pointers to where columns begin in rowind and values. */
+                  /* 0-based. Length is (n+1). */
+  int*    rowind; /* row indices */
+
+  union {
+    void*           v;
+    taucs_double*   d;
+    taucs_single*   s;
+    taucs_dcomplex* z;
+    taucs_scomplex* c;
+  } values;
+
+} taucs_ccs_matrix;
+
+typedef struct {
+  int   type;
+  int   nmatrices;
+  void* type_specific;
+
+  /* the following may change! do not rely on them. */
+  double nreads, nwrites, bytes_read, bytes_written, read_time, write_time;
+} taucs_io_handle;
+
+/* generate all the prototypes */
+
+#define taucs_datatype taucs_double
+#define taucs_real_datatype taucs_double
+#define taucs_dtl(X) taucs_d##X
+#include "taucs_private.h"
+#undef taucs_real_datatype
+#undef taucs_datatype
+#undef taucs_dtl
+
+#define taucs_datatype taucs_single
+#define taucs_real_datatype taucs_single
+#define taucs_dtl(X) taucs_s##X
+#include "taucs_private.h"
+#undef taucs_real_datatype
+#undef taucs_datatype
+#undef taucs_dtl
+
+#define taucs_datatype taucs_dcomplex
+#define taucs_real_datatype taucs_double
+#define taucs_dtl(X) taucs_z##X
+#include "taucs_private.h"
+#undef taucs_real_datatype
+#undef taucs_datatype
+#undef taucs_dtl
+
+#define taucs_datatype taucs_scomplex
+#define taucs_real_datatype taucs_single
+#define taucs_dtl(X) taucs_c##X
+#include "taucs_private.h"
+#undef taucs_real_datatype
+#undef taucs_datatype
+#undef taucs_dtl
+
+/*********************************************************/
+/*                                                       */
+/*********************************************************/
+
+/* now define the data type for the file that we compile now */
+
+#ifdef TAUCS_CORE_DOUBLE
+#define TAUCS_CORE
+#define TAUCS_CORE_REAL
+#define TAUCS_CORE_DATATYPE TAUCS_DOUBLE
+typedef taucs_double taucs_datatype;
+#define taucs_dtl(X) taucs_d##X
+#define taucs_values values.d
+#define taucs_iszero(x) ((x) == 0.0)
+typedef double taucs_real_datatype; /* omer: this is the datatype of the real and imaginary part of the datatype*/
+#endif
+
+#ifdef TAUCS_CORE_GENERAL
+#define TAUCS_CORE
+#define TAUCS_CORE_DATATYPE TAUCS_DOUBLE
+typedef taucs_double taucs_datatype;
+typedef double taucs_real_datatype; 
+/*
+#define TAUCS_CORE_REAL
+#define TAUCS_CORE_DATATYPE TAUCS_DOUBLE
+#define taucs_values values.d
+#define taucs_dtl(X) taucs_g##X
+#define taucs_iszero(x) ((x) == 0.0)
+*/
+#endif
+
+#ifdef  TAUCS_CORE_SINGLE
+#define TAUCS_CORE
+#define TAUCS_CORE_REAL
+#define TAUCS_CORE_DATATYPE TAUCS_SINGLE
+typedef taucs_single taucs_datatype;
+#define taucs_dtl(X) taucs_s##X
+#define taucs_values values.s
+#define taucs_iszero(x) ((x) == 0.0f)
+typedef float taucs_real_datatype; /* omer: this is the datatype of the real and imaginary part of the datatype*/
+#endif
+
+#ifdef  TAUCS_CORE_DCOMPLEX
+#define TAUCS_CORE
+#define TAUCS_CORE_COMPLEX
+#define TAUCS_CORE_DATATYPE TAUCS_DCOMPLEX
+typedef taucs_dcomplex taucs_datatype;
+#define taucs_dtl(X) taucs_z##X
+#define taucs_values values.z
+#define taucs_iszero(x) (taucs_re(x) == 0.0 && taucs_im(x) == 0.0)
+typedef double taucs_real_datatype; /* omer: this is the datatype of the real and imaginary part of the datatype*/
+#endif
+
+#ifdef  TAUCS_CORE_SCOMPLEX
+#define TAUCS_CORE
+#define TAUCS_CORE_COMPLEX
+#define TAUCS_CORE_DATATYPE TAUCS_SCOMPLEX
+typedef taucs_scomplex taucs_datatype;
+#define taucs_dtl(X) taucs_c##X
+#define taucs_values values.c
+#define taucs_iszero(x) (taucs_re(x) == 0.0f && taucs_im(x) == 0.0f)
+typedef float taucs_real_datatype; /* omer: this is the datatype of the real and imaginary part of the datatype*/
+#endif
+
+#ifndef TAUCS_CORE_DATATYPE
+typedef taucs_double taucs_datatype;
+typedef double taucs_real_datatype; 
+#endif
+
+/*********************************************************/
+/*                                                       */
+/*********************************************************/
+
+double taucs_get_nan(void);
+
+/* 
+   routines for testing memory allocation.
+   Mostly useful for testing programs
+   that hunt for memory leaks.
+*/
+
+double taucs_allocation_amount(void);
+int    taucs_allocation_count(void);
+int    taucs_allocation_attempts(void);
+void   taucs_allocation_assert_clean(void);
+void   taucs_allocation_mark_clean(void);
+void   taucs_allocation_induce_failure(int i);
+
+/* 
+   these are meant to allow allocation 
+   and more importantly, deallocation,
+   within the testing programs.
+*/
+
+#include <stdlib.h>
+
+void* taucs_malloc (size_t size)              ;
+void* taucs_calloc (size_t nmemb, size_t size);
+void* taucs_realloc(void* ptr, size_t size)   ;
+void  taucs_free   (void* ptr)                ;
+
+#if defined(TAUCS_CORE) 
+
+#if defined(TAUCS_MEMORY_TEST_yes)
+
+#include <stdlib.h>
+
+void* taucs_internal_calloc(size_t nmemb, size_t size,char* file, int line);
+void* taucs_internal_malloc(size_t size,              char* file, int line);
+void* taucs_internal_realloc(void *ptr, size_t size,   char* file, int line);
+void  taucs_internal_free(void *ptr,                   char* file, int line);
+
+/*
+ #define realloc(x,y) taucs_internal_realloc(x,y,__FILE__,__LINE__)
+ #define malloc(x)    taucs_internal_malloc(x,__FILE__,__LINE__)
+ #define calloc(x,y)  taucs_internal_calloc(x,y,__FILE__,__LINE__)
+ #define free(x)      taucs_internal_free(x,__FILE__,__LINE__)
+*/
+
+#define taucs_realloc(x,y) taucs_internal_realloc(x,y,__FILE__,__LINE__)
+#define taucs_malloc(x)    taucs_internal_malloc(x,__FILE__,__LINE__)
+#define taucs_calloc(x,y)  taucs_internal_calloc(x,y,__FILE__,__LINE__)
+#define taucs_free(x)      taucs_internal_free(x,__FILE__,__LINE__)
+
+#define realloc(x,y) taucs_must_not_call_realloc_directly(x,y)
+#define malloc(x)    taucs_must_not_call_malloc_directly(x)
+#define calloc(x,y)  taucs_must_not_call_calloc_directly(x,y)
+#define free(x)      taucs_must_not_call_free_directly(x)
+
+#else /* TAUCS_CORE, but not memory testing */
+
+void* taucs_calloc_stub(size_t nmemb, size_t size);
+void* taucs_malloc_stub(size_t size);
+void* taucs_realloc_stub(void *ptr, size_t size);
+void  taucs_free_stub(void *ptr);
+
+#define realloc(x,y) taucs_must_not_call_realloc_directly(x,y)
+#define malloc(x)    taucs_must_not_call_malloc_directly(x)
+#define calloc(x,y)  taucs_must_not_call_calloc_directly(x,y)
+#define free(x)      taucs_must_not_call_free_directly(x)
+
+#define taucs_realloc(x,y) taucs_realloc_stub(x,y)
+#define taucs_malloc(x)    taucs_malloc_stub(x)
+#define taucs_calloc(x,y)  taucs_calloc_stub(x,y)
+#define taucs_free(x)      taucs_free_stub(x)
+
+#endif
+#endif
+
+/*********************************************************/
+/*                                                       */
+/*********************************************************/
+
+#ifndef max
+#define max(x,y) ( ((x) > (y)) ? (x) : (y) )
+#endif
+
+#ifndef min
+#define min(x,y) ( ((x) < (y)) ? (x) : (y) )
+#endif
+
+/*********************************************************/
+/*                                                       */
+/*********************************************************/
+
+/* externs */
+extern int ireadhb_(char*, char*, int*, int*, int*);
+extern int creadhb_(char*, int*, int*, int*, int*, int*, taucs_scomplex*);
+extern int dreadhb_(char*, int*, int*, int*, int*, int*, taucs_double*);
+extern int sreadhb_(char*, int*, int*, int*, int*, int*, taucs_single*);
+extern int zreadhb_(char*, int*, int*, int*, int*, int*, taucs_dcomplex*);
+
+extern int amdexa_(int*, int*, int*, int*, int*, int*, int*, int*, int*, 
+			int*, int*, int*, int*, int*, int*);
+extern int amdtru_(int*, int*, int*, int*, int*, int*, int*, int*, int*, 
+			int*, int*, int*, int*, int*, int*);
+extern int amdbar_(int*, int*, int*, int*, int*, int*, int*, int*, int*, 
+			int*, int*, int*, int*, int*, int*);
+extern int genmmd_(int*, int*, int*, int*, int*, int*, int*, int*, int*, 
+			int*, int*, int*);
+
+/*********************************************************/
+/*                                                       */
+/*********************************************************/
+
+#if (defined(OSTYPE_irix) || defined(OSTYPE_solaris))
+
+#include <math.h>
+#include <ieeefp.h>
+#define isinf(x) (!finite((x)) && !isnan((x)))
+
+#elif defined(OSTYPE_win32)
+
+#include <float.h>
+#define isnan(x)  (_isnan(x))
+#define isinf(x)  (!(_finite(x)) && !(_isnan(x)))
+#define finite(x) (_finite(x))
+
+#endif
+
+/* If these are mactors (e.g., gcc -std=c99), do not declare   */
+/* otherwise, declare them, since they are not always declared */
+/* in math.h (e.g., gcc -std=c89 -pedantic); these are for     */
+/* gcc 3.3.1                                                   */
+
+#ifndef isnan
+extern int isnan(double);
+#endif
+#ifndef finite
+extern int finite(double);
+#endif
+#ifndef isinf
+extern int isinf(double);
+#endif
+
+extern int taucs_potrf(char*, int*, taucs_datatype*, int*, int*);
+extern int taucs_trsm(char *, char *, char *, char *, 
+			int*, int*, taucs_datatype*, taucs_datatype*, int *, 
+			taucs_datatype*, int *);
+extern int taucs_gemm(char *, char *, int*, int*, int *,
+			taucs_datatype*, taucs_datatype*, int *, taucs_datatype*, int *, 
+			taucs_datatype*, taucs_datatype*, int*);
+extern int taucs_herk(char *, char *, 
+		      int *, int *, 
+		      taucs_real_datatype*, 
+		      taucs_datatype*, int *, 
+		      taucs_real_datatype*, 
+		      taucs_datatype*, int *);
+
+taucs_double taucs_blas_name(dnrm2)(int*, taucs_double*, int*);
+taucs_single taucs_blas_name(snrm2)(int*, taucs_single*, int*);
+taucs_double taucs_blas_name(dznrm2)(int*, taucs_dcomplex*, int*);
+taucs_single taucs_blas_name(scnrm2)(int*, taucs_scomplex*, int*);
+
+/*********************************************************/
+/*                                                       */
+/*********************************************************/
+
+
diff --git a/contrib/taucs/src/taucs_ccs_base.c b/contrib/taucs/src/taucs_ccs_base.c
new file mode 100644
index 0000000000000000000000000000000000000000..c4980ed176ef4444f17d30c58cfcbd70d30980d2
--- /dev/null
+++ b/contrib/taucs/src/taucs_ccs_base.c
@@ -0,0 +1,132 @@
+/*********************************************************/
+/* TAUCS                                                 */
+/* Author: Sivan Toledo                                  */
+/*********************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <math.h>
+#include "taucs.h"
+
+#ifndef TAUCS_CORE
+#error "You must define TAUCS_CORE to compile this file"
+#endif
+
+/*********************************************************/
+/* CCS                                                   */
+/*********************************************************/
+
+#ifdef TAUCS_CORE_GENERAL
+void 
+taucs_ccs_free(taucs_ccs_matrix* matrix)
+{
+  taucs_dccs_free(matrix);
+}
+#endif /*TAUCS_CORE_GENERAL*/
+
+/* 
+   Here the generic and type specific routines are different
+   due to a historical accident, which forces us to set the
+   flags again in the generic routine.
+*/
+
+#ifdef TAUCS_CORE_GENERAL
+taucs_ccs_matrix* 
+taucs_ccs_create(int m, int n, int nnz, int flags)
+{
+  taucs_ccs_matrix* A = NULL;
+
+#ifdef TAUCS_DOUBLE_IN_BUILD
+  if (flags & TAUCS_DOUBLE)
+    A = taucs_dccs_create(m,n,nnz);
+#endif
+
+#ifdef TAUCS_SINGLE_IN_BUILD
+  if (flags & TAUCS_SINGLE)	 
+    A = taucs_sccs_create(m,n,nnz);
+#endif
+
+#ifdef TAUCS_DCOMPLEX_IN_BUILD
+  if (flags & TAUCS_DCOMPLEX)	 
+    A = taucs_zccs_create(m,n,nnz);
+#endif
+
+#ifdef TAUCS_SCOMPLEX_IN_BUILD
+  if (flags & TAUCS_SCOMPLEX)	 
+    A = taucs_cccs_create(m,n,nnz);
+#endif
+  
+
+  if (A) {
+    A->flags = flags;
+    return A;
+  } else {
+    taucs_printf("taucs_ccs_create: no data type specifiedy\n");
+    return NULL;
+  }
+}
+#endif /*TAUCS_CORE_GENERAL*/
+
+#ifndef TAUCS_CORE_GENERAL
+taucs_ccs_matrix* 
+taucs_dtl(ccs_create)(int m, int n, int nnz)
+{
+  taucs_ccs_matrix* matrix;
+
+  matrix = (taucs_ccs_matrix*) taucs_malloc(sizeof(taucs_ccs_matrix));
+  if (!matrix) { 
+    taucs_printf("taucs_ccs_create: out of memory\n");
+    return NULL; 
+  }
+
+#ifdef TAUCS_CORE_DOUBLE
+  matrix->flags = TAUCS_DOUBLE;
+#endif
+
+#ifdef TAUCS_CORE_SINGLE
+  matrix->flags = TAUCS_SINGLE;
+#endif
+
+#ifdef TAUCS_CORE_DCOMPLEX
+  matrix->flags = TAUCS_DCOMPLEX;
+#endif
+
+#ifdef TAUCS_CORE_SINGLE
+  matrix->flags = TAUCS_SINGLE;
+#endif
+
+  matrix->n = n;
+  matrix->m = m;
+  matrix->colptr = (int*)    taucs_malloc((n+1) * sizeof(int));
+  matrix->rowind = (int*)    taucs_malloc(nnz   * sizeof(int));
+  matrix->taucs_values = (taucs_datatype*) taucs_malloc(nnz * sizeof(taucs_datatype));
+  if (!(matrix->colptr) || !(matrix->rowind) || !(matrix->taucs_values)) {
+    taucs_printf("taucs_ccs_create: out of memory (n=%d, nnz=%d)\n",n,nnz);
+    taucs_free(matrix->colptr); 
+    taucs_free(matrix->rowind); 
+    taucs_free(matrix->taucs_values);
+    taucs_free (matrix);
+    return NULL; 
+  }
+
+  return matrix;
+} 
+
+void 
+taucs_dtl(ccs_free)(taucs_ccs_matrix* matrix)
+{
+  if (!matrix) return;
+
+  taucs_free(matrix->rowind);
+  taucs_free(matrix->colptr);
+  taucs_free(matrix->taucs_values);
+  taucs_free(matrix);
+}
+
+#endif /*TAUCS_CORE_GENERAL*/
+
+/*********************************************************/
+/*                                                       */
+/*********************************************************/
+
diff --git a/contrib/taucs/src/taucs_ccs_factor_llt.c b/contrib/taucs/src/taucs_ccs_factor_llt.c
new file mode 100644
index 0000000000000000000000000000000000000000..8c877808fde63bb36eae7eb31612c2757c4ef9aa
--- /dev/null
+++ b/contrib/taucs/src/taucs_ccs_factor_llt.c
@@ -0,0 +1,991 @@
+/*********************************************************/
+/* TAUCS                                                 */
+/* Author: Sivan Toledo                                  */
+/*********************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <math.h>
+#include "taucs.h"
+
+typedef struct {
+  int     length;
+  int*    indices;
+  int*    bitmap;
+
+  taucs_datatype* values;
+} spa;
+
+#ifndef TAUCS_CORE_GENERAL
+
+/*********************************************************/
+/* sparse accumulator                                    */
+/*********************************************************/
+
+static spa* spa_create(int n)
+{
+  int i;
+  spa* s;
+  
+  s = (spa*) taucs_malloc( sizeof(spa) );
+  if ( !s ) return NULL;
+
+  s->indices = (int*)    taucs_malloc( n * sizeof(int) );
+  s->bitmap  = (int*)    taucs_malloc( n * sizeof(int) );
+  s->values  = (taucs_datatype*) taucs_malloc( n * sizeof(taucs_datatype) );
+
+  if ( !(s->indices) || !(s->values) || !(s->bitmap) ) {
+    taucs_printf("chol: cannot create spa\n");
+    taucs_free( s->indices );
+    taucs_free( s->bitmap  );
+    taucs_free( s->values  );
+    taucs_free( s );
+    return NULL;
+  }
+
+  s->length = 0;
+
+  for (i=0; i<n; i++) (s->bitmap)[i] = -1;
+  
+  return s;
+}
+
+static void spa_free(spa* s)
+{
+  if (!s) return;
+
+  taucs_free( s->indices );
+  taucs_free( s->bitmap  );
+  taucs_free( s->values  );
+  taucs_free( s );
+}
+
+
+static void spa_set(spa* s, taucs_ccs_matrix* A, int j)
+{
+  int i, ip, next;
+  taucs_datatype Aij;
+  
+  assert(j < A->n);
+
+  next = 0;
+  for (ip = (A->colptr)[j]; ip < (A->colptr)[j+1]; ip++) {
+    i   = (A->rowind)[ip];
+    Aij = (A->taucs_values)[ip];
+
+    assert( i >= j ); /* A must be lower */
+    
+    (s->indices)[ next ] = i;
+    (s->values) [ i    ] = Aij;
+    (s->bitmap) [ i    ] = j;
+    next++;
+  }
+
+  s->length = next;
+}
+
+
+static void spa_scale_add(spa* s, int j, taucs_ccs_matrix* A, int k, taucs_datatype alpha)
+{
+  int i, ip, next;
+  taucs_datatype Aik;
+  
+  assert(k < A->n);
+
+  /*
+  printf("spa_scale_add: updating column %d with column %d\n",j,k);
+  printf("spa_scale_add: colptr %d to %d-1\n",(A->colptr)[k],(A->colptr)[k+1]);
+  */
+
+  next = 0;
+  for (ip = (A->colptr)[k]; ip < (A->colptr)[k+1]; ip++) {
+    i   = (A->rowind)[ip];
+    if (i < j) continue;
+    Aik = (A->taucs_values)[ip];
+    
+    if ( (s->bitmap)[ i ] < j ) {
+      /*printf("fill in (%d,%d)\n",i,j);*/
+      (s->bitmap)[i] = j;
+      (s->values)[i] = taucs_zero;
+      (s->indices)[ s->length ] = i;
+      (s->length)++;
+    }
+
+    /*(s->values)[ i ] += taucs_mul(alpha,Aik);*/
+
+    (s->values)[ i ] = taucs_add((s->values)[ i ],
+				 taucs_mul(alpha,Aik));
+
+    /*printf("spa_scale_add: A(%d,%d) -= %lg * %lg ==> %lg\n",i,j,alpha,Aik,(s->values)[i]);*/
+  }
+}
+
+/*********************************************************/
+/* linked lists for rows                                 */
+/*********************************************************/
+
+static int*            rowlist;
+static int*            rowlist_next;
+static int*            rowlist_colind;
+static taucs_datatype* rowlist_values;
+
+static int     rowlist_freelist;
+static int     rowlist_size;
+static int     rowlist_next_expansion;
+
+static int rowlist_create(int n)
+{
+  int i;
+
+  rowlist_size           = 1000;
+  rowlist_next_expansion = 1000;
+
+  rowlist        = (int*) taucs_malloc( n * sizeof(int) );
+  rowlist_next   = (int*) taucs_malloc( rowlist_size * sizeof(int) );
+  rowlist_colind = (int*) taucs_malloc( rowlist_size * sizeof(int) );
+  rowlist_values = (taucs_datatype*) taucs_malloc( rowlist_size * sizeof(taucs_datatype) );
+
+  if (!rowlist || !rowlist_next | !rowlist_colind || !rowlist_values) {
+    taucs_free(rowlist);
+    taucs_free(rowlist_next);
+    taucs_free(rowlist_colind);
+    taucs_free(rowlist_values);
+    rowlist = rowlist_next = rowlist_colind = NULL;
+    rowlist_values = NULL;
+    return -1;
+  }
+
+  for (i=0; i<n; i++) rowlist[i] = -1; /* no list yet for row i */
+
+  /* free list */
+  rowlist_freelist = 0; 
+  for (i=0; i<rowlist_size-1; i++) rowlist_next[i] = i+1; 
+  rowlist_next[rowlist_size-1] = -1;
+				   
+  return 0;
+}
+
+static void rowlist_free()
+{
+  taucs_free(rowlist);
+  taucs_free(rowlist_next);
+  taucs_free(rowlist_colind);
+  taucs_free(rowlist_values);
+}
+
+/* static void rowlist_freerow(int i){} */
+
+static int rowlist_add(int i,int j,taucs_datatype v)
+{
+  int             l;
+  int*            new_next;
+  int*            new_colind;
+  taucs_datatype* new_values;
+
+  if (rowlist_freelist == -1) {
+    int inc = rowlist_next_expansion;
+    int ii;
+
+    rowlist_next_expansion = (int) floor(1.25 * (double) rowlist_next_expansion);
+
+    new_next   = (int*) taucs_realloc( rowlist_next,   (rowlist_size+inc) * sizeof(int) );
+    if (!new_next) return -1;
+    rowlist_next   = new_next;
+
+    new_colind = (int*) taucs_realloc( rowlist_colind, (rowlist_size+inc) * sizeof(int) );
+    if (!new_colind) return -1;
+    rowlist_colind = new_colind;
+
+    new_values = (taucs_datatype*) 
+                            taucs_realloc(rowlist_values, 
+				    (rowlist_size+inc) * sizeof(taucs_datatype) );
+    if (!new_values) return -1;
+    rowlist_values = new_values;
+
+    rowlist_freelist = rowlist_size;
+    for (ii=rowlist_size; ii<rowlist_size+inc-1; ii++)
+      rowlist_next[ii] = ii+1;
+    rowlist_next[ rowlist_size+inc-1 ] = -1;
+
+    rowlist_size    += inc;
+  }
+
+  l = rowlist_freelist;
+  rowlist_freelist = rowlist_next[ rowlist_freelist ];
+
+  rowlist_next  [ l ] = rowlist[ i ];
+  rowlist_colind[ l ] = j;
+  rowlist_values[ l ] = v;
+  
+  rowlist[ i ] = l;
+
+
+  return 0;
+}
+
+static int rowlist_getfirst(int i)
+{
+  return rowlist[ i ];
+}
+
+static int rowlist_getnext(int l)
+{
+  return rowlist_next[ l ];
+}
+
+static int rowlist_getcolind(int l)
+{
+  return rowlist_colind[ l ];
+}
+
+static taucs_datatype rowlist_getvalue(int l)
+{
+  return rowlist_values[ l ];
+}
+
+/*********************************************************/
+/* Cholesky factorization                                */
+/* This is a left-looking column-column code using       */
+/* row lists. Can perform drop-tolerance incomplete      */
+/* factorization with or without diagonal modification   */
+/* to maintain rowsums.                                  */
+/*********************************************************/
+
+taucs_ccs_matrix* 
+taucs_dtl(ccs_factor_llt)(taucs_ccs_matrix* A,double droptol, int modified)
+{
+  int            i,j,k,l,n,ip,next,Lnnz;
+  taucs_datatype Lkj,pivot,v;
+  double norm;
+  spa*           s;
+  taucs_ccs_matrix* L;
+  taucs_datatype* dropped;
+  int Aj_nnz;
+  double flops = 0.0;
+
+  if (!(A->flags & TAUCS_SYMMETRIC) && !(A->flags & TAUCS_HERMITIAN)) {
+
+    taucs_printf("taucs_ccs_factor_llt: matrix must be symmetric\n");
+    return NULL;
+  }
+  if (!(A->flags & TAUCS_LOWER)) {
+    taucs_printf("taucs_ccs_factor_llt: lower part must be represented\n");
+    return NULL;
+  }
+
+  n = A->n;
+
+  taucs_printf("taucs_ccs_factor_llt: starting n=%d droptol=%lf modified?=%d\n",
+	     n,droptol,modified);
+
+  L = taucs_dtl(ccs_create)(n,n,1000);
+  if (!L) 
+    return NULL;
+
+  L->flags |= TAUCS_TRIANGULAR | TAUCS_LOWER;
+
+  Lnnz = 1000;
+  next = 0;
+
+  s = spa_create(n);
+  i = rowlist_create(n);
+
+  dropped = (taucs_datatype*) taucs_malloc( n * sizeof(taucs_datatype) );
+
+  if (!s || i == -1 || !dropped) {
+    taucs_ccs_free(L);
+    spa_free(s);
+    rowlist_free();
+    taucs_free(dropped);
+    return NULL;
+  }
+
+  for (i=0; i<n; i++) dropped[i] = taucs_zero;
+
+  for (j=0; j<n; j++) {
+    spa_set(s,A,j);
+
+    for (l = rowlist_getfirst(j); 
+	 l != -1; 
+	 l = rowlist_getnext(l)) {
+      k   = rowlist_getcolind(l);
+      Lkj = rowlist_getvalue(l);
+      /*spa_scale_add(s,j,L,k,taucs_neg(Lkj));*/ /* L_*j -= L_kj * L_*k */
+      spa_scale_add(s,j,L,k,taucs_neg(taucs_conj(Lkj))); /* L_*j -= L_kj * L_*k */
+    }
+
+    /* we now add the j'th column of L to the taucs_ccs */
+    
+    if ( next+(s->length) > Lnnz ) {
+      int*    rowind;
+      taucs_datatype* values;
+      int inc = max( (int) floor(1.25 * (double)Lnnz) , max( 8192, s->length ) );
+      
+      Lnnz += inc;
+
+      rowind = (int*) taucs_realloc( L->rowind, Lnnz * sizeof(int) );
+      if (!rowind) {
+	taucs_free(dropped);
+	spa_free(s);
+	rowlist_free();
+	taucs_ccs_free(L);
+	return NULL;
+      }
+      L->rowind = rowind;
+
+      values = (taucs_datatype*) taucs_realloc( L->taucs_values, Lnnz * sizeof(taucs_datatype) );
+      if (!values) {
+	taucs_free(dropped);
+	spa_free(s);
+	rowlist_free();
+	taucs_ccs_free(L);
+	return NULL;
+      }
+      L->taucs_values = values;
+    }
+
+    (L->colptr)[j] = next;
+
+    norm = 0.0;
+    for (ip = 0; ip < s->length; ip++) {
+      i = (s->indices)[ip];
+      v = (s->values)[i];
+      /* norm += v*v; */
+      norm += taucs_re( taucs_mul(v,taucs_conj(v)) );
+    }
+    norm = sqrt(norm);
+
+    Aj_nnz = (A->colptr)[j+1] - (A->colptr)[j];
+
+    for (ip = 0; ip < s->length; ip++) {
+      i = (s->indices)[ip];
+      v = (s->values)[i];
+      
+      if (i==j || taucs_abs(v) > droptol * norm || ip < Aj_nnz) {
+	/* if (i==j || taucs_abs(v) > droptol * norm) { */
+      } else {
+	/*
+	dropped[i] -= v;
+	dropped[j] -= v;
+	*/
+	dropped[i] = taucs_sub( dropped[i], v );
+	dropped[j] = taucs_sub( dropped[j], v );
+      }
+    }
+
+    if (modified) 
+      /*     pivot = taucs_sqrt( (s->values)[j] - dropped[j] );*/
+      pivot = taucs_sqrt( taucs_sub ( (s->values)[j] , dropped[j] ) );
+    else
+      pivot = taucs_sqrt( (s->values)[j] );
+
+#if 0
+    taucs_printf("pivot=%.4e+%.4ei, sqrt=%.4e+%.4ei\n",
+		 taucs_re( (s->values)[j] ),
+		 taucs_im( (s->values)[j] ),
+		 taucs_re( pivot ),
+		 taucs_im( pivot ) );
+#endif
+
+    if (taucs_re(pivot) == 0.0 && taucs_im(pivot) == 0.0) {
+      taucs_printf("taucs_ccs_factor_llt: zero pivot in column %d\n",j);
+      taucs_printf("taucs_ccs_factor_llt: Ajj in spa = %lg dropped[j] = %lg Aj_nnz=%d\n",
+		 (s->values)[j],dropped[j],Aj_nnz);
+    } else if (taucs_abs(pivot) < 1e-12) {
+      taucs_printf("taucs_ccs_factor_llt: small pivot in column %d (%le)\n",j,pivot);
+    }
+
+    /* we want Lii to be first in the compressed column */
+    for (ip = 0; ip < s->length; ip++) {
+      i = (s->indices)[ip];
+      v = (s->values)[i];
+
+      if (i==j) {
+	/*if (modified) v = (s->values)[j] - dropped[j];*/
+	if (modified) v = taucs_sub( (s->values)[j] , dropped[j] );
+
+	/*v = v / pivot;*/
+	v = taucs_div( v , pivot );
+
+	(L->rowind)[next] = i;
+	(L->taucs_values)[next] = v;
+	next++;
+	if (rowlist_add(i,j,v) == -1) {
+	  taucs_free(dropped);
+	  spa_free(s);
+	  rowlist_free();
+	  taucs_ccs_free(L);
+	  return NULL;
+	}
+	break;
+      }
+    }
+
+    for (ip = 0; ip < s->length; ip++) {
+      i = (s->indices)[ip];
+      v = (s->values)[i];
+      
+      if (i==j) continue;
+
+      /* if (modified && i == j) v = (s->values)[j] - dropped[j]; */
+
+      if (i==j || taucs_abs(v) > droptol * norm || ip < Aj_nnz) {
+	/* v = v / pivot; */
+	v = taucs_div( v , pivot );
+
+	(L->rowind)[next] = i;
+	(L->taucs_values)[next] = v;
+	next++;
+	if (rowlist_add(i,j,v) == -1) {
+	  taucs_free(dropped);
+	  spa_free(s);
+	  rowlist_free();
+	  taucs_ccs_free(L);
+	  return NULL;
+	}
+      }
+    }
+
+    (L->colptr)[j+1] = next;
+    {
+      double Lj_nnz = (double) ((L->colptr)[j+1] - (L->colptr)[j]);
+      flops += 2.0 * Lj_nnz * Lj_nnz;
+    }
+
+    /* rowlist_free(j); */
+  }
+
+  (L->colptr)[n] = next;
+  
+  rowlist_free();
+  spa_free(s);
+  taucs_free(dropped);
+
+  taucs_printf("taucs_ccs_factor_llt: done; nnz(L) = %d, flops=%.1le\n",(L->colptr)[n],flops);
+
+  return L;
+}
+
+/***************** FACTOR LLT PARTIAL ********************/
+
+/* 
+ * Partial LL^T factorization. Factors the first p columns
+ * and then updates, but does not factor, the trailing submatrix.
+ * Designed for Shur-complement preconditioning.
+ * 
+ */
+
+taucs_ccs_matrix* 
+taucs_dtl(ccs_factor_llt_partial)(taucs_ccs_matrix* A, 
+   			          int p)
+{
+  int            i,j,k,l,n,ip,next,Lnnz;
+  taucs_datatype Lkj,pivot,v;
+  spa*           s;
+  taucs_ccs_matrix* L;
+  int Aj_nnz;
+  double flops = 0.0;
+
+  if (!(A->flags & TAUCS_SYMMETRIC)) {
+    taucs_printf("taucs_ccs_factor_llt_partial: matrix must be symmetric\n");
+    return NULL;
+  }
+  if (!(A->flags & TAUCS_LOWER)) {
+    taucs_printf("taucs_ccs_factor_llt_partial: lower part must be represented\n");
+    return NULL;
+  }
+
+  n = A->n;
+
+  taucs_printf("taucs_ccs_factor_llt_partial: starting n=%d p=%d\n",n,p);
+
+  L = taucs_dtl(ccs_create)(n,n,1000);
+  if (!L) 
+    return NULL;
+
+  L->flags |= TAUCS_TRIANGULAR | TAUCS_LOWER;
+
+  Lnnz = 1000;
+  next = 0;
+
+  s = spa_create(n);
+  i = rowlist_create(n);
+
+  if (!s || i == -1) {
+    taucs_ccs_free(L);
+    spa_free(s);
+    rowlist_free();
+    return NULL;
+  }
+
+  for (j=0; j<p; j++) {
+    spa_set(s,A,j);
+
+    for (l = rowlist_getfirst(j); 
+	 l != -1; 
+	 l = rowlist_getnext(l)) {
+      k   = rowlist_getcolind(l);
+      Lkj = rowlist_getvalue(l);
+      spa_scale_add(s,j,L,k,taucs_neg(Lkj)); /*  L_*j -= L_kj * L_*k  */
+    }
+
+    /* we now add the j'th column of L to the symccs */
+    
+    if ( next+(s->length) > Lnnz ) {
+      int*    rowind;
+      taucs_datatype* values;
+      int inc = max( (int) floor(1.25 * (double)Lnnz) , max( 8192, s->length ) );
+      /*int inc = max( 8192, s->length );*/
+      
+      Lnnz += inc;
+
+      rowind = (int*) taucs_realloc( L->rowind, Lnnz * sizeof(int) );
+      if (!rowind) {
+	spa_free(s);
+	rowlist_free();
+	taucs_ccs_free(L);
+	return NULL;
+      }
+      L->rowind = rowind;
+
+      values = (taucs_datatype*) taucs_realloc( L->taucs_values, Lnnz * sizeof(taucs_datatype) );
+      if (!values) {
+	spa_free(s);
+	rowlist_free();
+	taucs_ccs_free(L);
+	return NULL;
+      }
+      L->taucs_values = values;
+
+      /*
+      rowind = (int*)    taucs_realloc( L->rowind, Lnnz * sizeof(int) );
+      values = (taucs_datatype*) taucs_realloc( L->taucs_values, Lnnz * sizeof(taucs_datatype) );
+      assert( rowind && values );
+      L->rowind = rowind;
+      L->taucs_values = values;
+      */
+    }
+
+    (L->colptr)[j] = next;
+
+    Aj_nnz = (A->colptr)[j+1] - (A->colptr)[j]; 
+
+    pivot = taucs_sqrt( (s->values)[j] );
+
+    if (taucs_re(pivot) == 0.0 && taucs_im(pivot) == 0.0) {
+      taucs_printf("taucs_ccs_factor_llt_partial: zero pivot in column %d\n",j);
+    } else if (taucs_abs(pivot) < 1e-12) {
+      taucs_printf("taucs_ccs_factor_llt_partial: small pivot in column %d (%le)\n",j,pivot);
+    }
+
+    /* we want Lii to be first in the compressed column */
+    for (ip = 0; ip < s->length; ip++) {
+      i = (s->indices)[ip];
+      v = (s->values)[i];
+
+      if (i==j) {
+	/*v = v / pivot;*/
+	v = taucs_div(v , pivot);
+
+	(L->rowind)[next] = i;
+	(L->taucs_values)[next] = v;
+	next++;
+	rowlist_add(i,j,v);
+	break;
+      }
+    }
+
+    for (ip = 0; ip < s->length; ip++) {
+      i = (s->indices)[ip];
+      v = (s->values)[i];
+      
+      if (i==j) continue;
+
+      /*v = v / pivot;*/
+      v = taucs_div(v , pivot);
+
+      (L->rowind)[next] = i;
+      (L->taucs_values)[next] = v;
+      next++;
+      rowlist_add(i,j,v);
+    }
+
+    (L->colptr)[j+1] = next;
+
+    {
+      double Lj_nnz = (double) ((L->colptr)[j+1] - (L->colptr)[j]);
+      flops += 2.0 * Lj_nnz * Lj_nnz;
+    }
+  }
+
+  for (j=p; j<n; j++) {
+    spa_set(s,A,j);
+
+    /* we only apply updates from columns 0..p-1 */
+    for (l = rowlist_getfirst(j); 
+	 l != -1; 
+	 l = rowlist_getnext(l)) {
+      k   = rowlist_getcolind(l);
+      Lkj = rowlist_getvalue(l);
+      if (k >= p) continue; 
+      spa_scale_add(s,j,L,k,taucs_neg(Lkj)); /*  L_*j -= L_kj * L_*k  */
+    }
+
+    /* we now add the j'th column of L to the symccs */
+    
+    if ( next+(s->length) > Lnnz ) {
+      int*    rowind;
+      taucs_datatype* values;
+      int inc = max( (int) floor(1.25 * (double)Lnnz) , max( 8192, s->length ) );
+      /*int inc = max( 8192, s->length );*/
+      
+      Lnnz += inc;
+
+      rowind = (int*) taucs_realloc( L->rowind, Lnnz * sizeof(int) );
+      if (!rowind) {
+	spa_free(s);
+	rowlist_free();
+	taucs_ccs_free(L);
+	return NULL;
+      }
+      L->rowind = rowind;
+
+      values = (taucs_datatype*) taucs_realloc( L->taucs_values, Lnnz * sizeof(taucs_datatype) );
+      if (!values) {
+	spa_free(s);
+	rowlist_free();
+	taucs_ccs_free(L);
+	return NULL;
+      }
+      L->taucs_values = values;
+
+      /*
+      rowind = (int*)    taucs_realloc( L->rowind, Lnnz * sizeof(int) );
+      values = (taucs_datatype*) taucs_realloc( L->taucs_values, Lnnz * sizeof(taucs_datatype) );
+      assert( rowind && values );
+      L->rowind = rowind;
+      L->taucs_values = values;
+      */
+    }
+
+    (L->colptr)[j] = next;
+
+    Aj_nnz = (A->colptr)[j+1] - (A->colptr)[j]; 
+
+    /* we want Lii to be first in the compressed column */
+    for (ip = 0; ip < s->length; ip++) {
+      i = (s->indices)[ip];
+      v = (s->values)[i];
+
+      if (i==j) {
+	(L->rowind)[next] = i;
+	(L->taucs_values)[next] = v;
+	next++;
+	rowlist_add(i,j,v);
+	break;
+      }
+    }
+
+    for (ip = 0; ip < s->length; ip++) {
+      i = (s->indices)[ip];
+      v = (s->values)[i];
+      
+      if (i==j) continue;
+      (L->rowind)[next] = i;
+      (L->taucs_values)[next] = v;
+      next++;
+      rowlist_add(i,j,v);
+    }
+
+    (L->colptr)[j+1] = next;
+
+    /* not sure the flop count is correct. */
+    {
+      double Lj_nnz = (double) ((L->colptr)[j+1] - (L->colptr)[j]);
+      flops += 2.0 * Lj_nnz * Lj_nnz;
+    }
+  }
+
+  (L->colptr)[n] = next;
+  
+  rowlist_free();
+  spa_free(s);
+
+  taucs_printf("taucs_ccs_factor_llt_partial: done; nnz(L) = %d, flops=%.1le\n",(L->colptr)[n],flops);
+
+  return L;
+}
+
+/*********************************************************/
+/* LDL^T factorization                                   */
+/* This is a left-looking column-column code using       */
+/* row lists.                                            */
+/*********************************************************/
+
+taucs_ccs_matrix* 
+taucs_dtl(ccs_factor_ldlt)(taucs_ccs_matrix* A)
+{
+  int            i,j,k,l,n,ip,next,Lnnz;
+  taucs_datatype Lkj,pivot,v,Dkk;
+  spa*           s;
+  taucs_ccs_matrix* L;
+  int Aj_nnz;
+  double flops = 0.0;
+
+  n = A->n;
+
+  taucs_printf("taucs_ccs_factor_ldlt: starting n=%d\n",n);
+
+  L = taucs_dtl(ccs_create)(n,n,1000);
+  if (!L)
+    return NULL;
+
+  L->flags |= TAUCS_TRIANGULAR | TAUCS_LOWER;
+
+  Lnnz = 1000;
+  next = 0;
+
+  s = spa_create(n);
+  i = rowlist_create(n);
+
+  if (!s || i == -1) {
+    taucs_ccs_free(L);
+    spa_free(s);
+    rowlist_free();
+    return NULL;
+  }
+
+  for (j=0; j<n; j++) {
+    spa_set(s,A,j);
+
+    for (l = rowlist_getfirst(j); 
+	 l != -1; 
+	 l = rowlist_getnext(l)) {
+      k   = rowlist_getcolind(l);
+      Lkj = rowlist_getvalue(l);
+      Dkk = (L->taucs_values)[ (L->colptr)[k] ];
+      /*spa_scale_add(s,j,L,k,-Lkj*Dkk);*/ /* L_*j -= L_kj * L_*k */
+      /*spa_scale_add(s,j,L,k,taucs_mul(taucs_neg(Lkj,Dkk));*/ /* L_*j -= L_kj * L_*k */
+      spa_scale_add(s,j,L,k,
+		    taucs_mul(taucs_neg(taucs_conj(Lkj)),Dkk)); /* L_*j -= L_kj * L_*k */
+    }
+
+    /* we now add the j'th column of L to the taucs_ccs */
+    
+    if ( next+(s->length) > Lnnz ) {
+      int*    rowind;
+      taucs_datatype* values;
+      int inc = max( (int) floor(1.25 * (double)Lnnz) , max( 8192, s->length ) );
+      /*int inc = max( 8192, s->length );*/
+      
+      Lnnz += inc;
+
+      rowind = (int*) taucs_realloc( L->rowind, Lnnz * sizeof(int) );
+      if (!rowind) {
+	spa_free(s);
+	rowlist_free();
+	taucs_ccs_free(L);
+	return NULL;
+      }
+      L->rowind = rowind;
+
+      values = (taucs_datatype*) taucs_realloc( L->taucs_values, Lnnz * sizeof(taucs_datatype) );
+      if (!values) {
+	spa_free(s);
+	rowlist_free();
+	taucs_ccs_free(L);
+	return NULL;
+      }
+      L->taucs_values = values;
+
+      /*
+      rowind = (int*)    taucs_realloc( L->rowind, Lnnz * sizeof(int) );
+      values = (taucs_datatype*) taucs_realloc( L->taucs_values, Lnnz * sizeof(taucs_datatype) );
+      assert( rowind && values );
+      L->rowind = rowind;
+      L->taucs_values = values;
+      */
+    }
+
+    (L->colptr)[j] = next;
+
+    Aj_nnz = (A->colptr)[j+1] - (A->colptr)[j]; 
+
+    pivot = (s->values)[j]; 
+
+    if (taucs_re(pivot) == 0.0 && taucs_im(pivot) == 0.0) {
+      taucs_printf("ldlt: zero pivot in column %d\n",j);
+      taucs_printf("ldlt: Ajj in spa = %lg Aj_nnz=%d\n",
+		 (s->values)[j],Aj_nnz);
+    }
+
+#if 0
+    if (taucs_abs(pivot) < 1e-4) {
+      taucs_printf("taucs_ccs_factor_ldlt: small pivot in column %d: %.2e\n",
+		   j,pivot);
+    }
+#endif
+
+    /* we want Lii to be first in the compressed column */
+    for (ip = 0; ip < s->length; ip++) {
+      i = (s->indices)[ip];
+      v = (s->values)[i];
+
+      if (i==j) {
+	/*
+	printf(">>> %.8e + %.8ei / %.8e + %.8ei = ",
+	       taucs_re(v),taucs_im(v),
+	       taucs_re(pivot),taucs_im(pivot));
+	*/
+
+	/*v = v / pivot;*/
+	v = taucs_div(v , pivot);
+
+	/*printf("%.8e + %.8ei\n",taucs_re(v),taucs_im(v));*/
+
+	(L->rowind)[next] = i;
+	(L->taucs_values)[next] = pivot; /* we put D on the diagonal */
+	next++;
+	if (rowlist_add(i,j,v) == -1) {
+	  spa_free(s);
+	  rowlist_free();
+	  taucs_ccs_free(L);
+	  return NULL;
+	}
+	break;
+      }
+    }
+
+    for (ip = 0; ip < s->length; ip++) {
+      i = (s->indices)[ip];
+      v = (s->values)[i];
+      
+      if (i==j) continue;
+
+      /* v = v / pivot; */
+      v = taucs_div(v , pivot);
+      
+      (L->rowind)[next] = i;
+      (L->taucs_values)[next] = v;
+      next++;
+      if (rowlist_add(i,j,v) == -1) {
+	spa_free(s);
+	rowlist_free();
+	taucs_ccs_free(L);
+	return NULL;
+      }
+    }
+
+    (L->colptr)[j+1] = next;
+    {
+      double Lj_nnz = (double) ((L->colptr)[j+1] - (L->colptr)[j]);
+      flops += 2.0 * Lj_nnz * Lj_nnz;
+    }
+
+    /* rowlist_free(j); */
+  }
+
+  (L->colptr)[n] = next;
+  
+  rowlist_free();
+  spa_free(s);
+
+  taucs_printf("taucs_ccs_factor_ldlt: done; nnz(L) = %.2le, flops=%.2le\n",
+	     (double) (L->colptr)[n],flops);
+
+  return L;
+}
+
+#endif /*#ifndef TAUCS_CORE_GENERAL*/
+
+/*********************************************************/
+/*                                                       */
+/*********************************************************/
+
+#ifdef TAUCS_CORE_GENERAL
+taucs_ccs_matrix* 
+taucs_ccs_factor_llt(taucs_ccs_matrix* A,double droptol, int modified)
+{
+
+#ifdef TAUCS_DOUBLE_IN_BUILD
+  if (A->flags & TAUCS_DOUBLE)
+    return taucs_dccs_factor_llt(A,droptol,modified);
+#endif
+
+#ifdef TAUCS_SINGLE_IN_BUILD
+  if (A->flags & TAUCS_SINGLE)
+    return taucs_sccs_factor_llt(A,droptol,modified);
+#endif
+
+#ifdef TAUCS_DCOMPLEX_IN_BUILD
+  if (A->flags & TAUCS_DCOMPLEX)
+    return taucs_zccs_factor_llt(A,droptol,modified);
+#endif
+
+#ifdef TAUCS_SCOMPLEX_IN_BUILD
+  if (A->flags & TAUCS_SCOMPLEX)
+    return taucs_cccs_factor_llt(A,droptol,modified);
+#endif
+  
+  assert(0);
+  return NULL;
+}
+
+taucs_ccs_matrix* 
+taucs_ccs_factor_llt_partial(taucs_ccs_matrix* A, int p)
+{
+
+#ifdef TAUCS_DOUBLE_IN_BUILD
+  if (A->flags & TAUCS_DOUBLE)
+    return taucs_dccs_factor_llt_partial(A,p);
+#endif
+
+#ifdef TAUCS_SINGLE_IN_BUILD
+  if (A->flags & TAUCS_SINGLE)
+    return taucs_sccs_factor_llt_partial(A,p);
+#endif
+
+#ifdef TAUCS_DCOMPLEX_IN_BUILD
+  if (A->flags & TAUCS_DCOMPLEX)
+    return taucs_zccs_factor_llt_partial(A,p);
+#endif
+
+#ifdef TAUCS_SCOMPLEX_IN_BUILD
+  if (A->flags & TAUCS_SCOMPLEX)
+    return taucs_cccs_factor_llt_partial(A,p);
+#endif
+  
+  assert(0);
+  return NULL;
+}
+
+taucs_ccs_matrix* 
+taucs_ccs_factor_ldlt(taucs_ccs_matrix* A)
+{
+
+#ifdef TAUCS_DOUBLE_IN_BUILD
+  if (A->flags & TAUCS_DOUBLE)
+    return taucs_dccs_factor_ldlt(A);
+#endif
+
+#ifdef TAUCS_SINGLE_IN_BUILD
+  if (A->flags & TAUCS_SINGLE)
+    return taucs_sccs_factor_ldlt(A);
+#endif
+
+#ifdef TAUCS_DCOMPLEX_IN_BUILD
+  if (A->flags & TAUCS_DCOMPLEX)
+    return taucs_zccs_factor_ldlt(A);
+#endif
+
+#ifdef TAUCS_SCOMPLEX_IN_BUILD
+  if (A->flags & TAUCS_SCOMPLEX)
+    return taucs_cccs_factor_ldlt(A);
+#endif
+  
+  assert(0);
+  return NULL;
+}
+#endif /*TAUCS_CORE_GENERAL*/
+
diff --git a/contrib/taucs/src/taucs_ccs_generators.c b/contrib/taucs/src/taucs_ccs_generators.c
new file mode 100644
index 0000000000000000000000000000000000000000..45435f05ed9695ba651e6786b12089027f2e3ebe
--- /dev/null
+++ b/contrib/taucs/src/taucs_ccs_generators.c
@@ -0,0 +1,948 @@
+/*********************************************************/
+/* TAUCS                                                 */
+/* Author: Sivan Toledo                                  */
+/*********************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <math.h>
+#include <string.h>
+#include "taucs.h"
+
+#define RNDM ((double)rand()/(double)RAND_MAX);
+
+/*ifndef added omer*/
+#ifndef max
+#define max(x,y) ( ((x) > (y)) ? (x) : (y) )
+#endif
+#ifndef mod
+#define mod(x,n) ((x) % (n))
+#endif
+/*********************************************************/
+/*                                                       */
+/*********************************************************/
+
+#ifdef TAUCS_CORE_DOUBLE
+
+taucs_ccs_matrix* taucs_ccs_generate_mesh2d_negative(int n)
+{
+  taucs_ccs_matrix* m;
+  int         N;
+  int         nnz;
+  int         x,y,i,j,ip;
+
+  taucs_printf("generate_mesh2d_negative: starting\n");
+
+  m = (taucs_ccs_matrix*) taucs_malloc(sizeof(taucs_ccs_matrix));
+  if (!m) { 
+    taucs_printf("generate_mesh2d_negative: out of memory (1)\n");
+    return NULL; 
+  }
+
+  N   = n*n;
+  nnz = 4*N;
+
+  m->n      = N;
+  m->flags  = TAUCS_SYMMETRIC | TAUCS_LOWER | TAUCS_DOUBLE;
+  m->colptr = (int*)    taucs_malloc((N+1) * sizeof(int));
+  m->rowind = (int*)    taucs_malloc(nnz       * sizeof(int));
+  m->values.d/*taucs_values*/ = (double*) taucs_malloc(nnz       * sizeof(double));
+
+  if (!(m->colptr) || !(m->rowind) || !(m->rowind)) {
+    taucs_printf("generate_mesh2d_negative: out of memory (4): ncols=%d nnz=%d\n",N,nnz);
+    taucs_free(m->colptr); taucs_free(m->rowind); taucs_free(m->values.d/*taucs_values*/);
+    return NULL; 
+  }
+
+  ip = 0;
+  for (y=0; y<n; y++) {
+    for (x=0; x<n; x++) {
+      j = x + y*n; /* convert mesh (x,y) location to index in vector */
+      /*printf("column %d xy %d,%d starts at %d\n",j,x,y,ip);*/
+      (m->colptr)[j] = ip;
+
+      i=mod(x+1,n) + (y  )*n      ; if (i>j) { (m->rowind)[ip]=i; (m->values.d/*taucs_values*/)[ip]=-1.0; ip++; }
+      i=(x  )      + mod(y+1,n)*n ; if (i>j) { (m->rowind)[ip]=i; (m->values.d/*taucs_values*/)[ip]=+100.0; ip++; }
+      i=mod(x+n-1,n) + (y  )*n      ; if (i>j) { (m->rowind)[ip]=i; (m->values.d/*taucs_values*/)[ip]=-1.0; ip++; }
+      i=(x  )      + mod(y+n-1,n)*n ; if (i>j) { (m->rowind)[ip]=i; (m->values.d/*taucs_values*/)[ip]=+100.0; ip++; }
+
+      /* i=mod(x+1,n) + mod(y+1,n)*n ; (m->rowind)[ip]=i; (m->taucs_values)[ip]=+1.0; ip++;  */
+      /* i=mod(x+2,n) + (y  )*n      ; (m->rowind)[ip]=i; (m->taucs_values)[ip]=.0625; ip++;  */
+      /* i=(x  )      + mod(y+2,n)*n ; (m->rowind)[ip]=i; (m->taucs_values)[ip]=.0625; ip++;  */
+
+      i=(x  )+(y  )*n; (m->rowind)[ip]=i;
+      /* (m->taucs_values)[ip]= 4.25; if (x==0 && y==0) (m->taucs_values)[ip] += 1; to make it nonsingular  */
+      (m->values.d/*taucs_values*/)[ip]= 202.0; if (x==0 && y==0) (m->values.d/*taucs_values*/)[ip] += 1; /* to make it nonsingular */ 
+      ip++; 
+    }
+  }
+
+  (m->colptr)[N] = ip;
+
+  taucs_printf("generate_mesh2d_negative: done: ncols=%d nnz=%d\n",N,ip);
+
+  return m;
+}
+
+
+taucs_ccs_matrix* 
+taucs_ccs_generate_mesh2d(int n,char *which)
+{
+  taucs_ccs_matrix* m;
+  int         N;
+  int         nnz;
+  int         x,y,i,j,ip;
+  double jump = 100;
+
+  taucs_printf("taucs_ccs_generate_mesh2d: starting\n");
+
+  m = (taucs_ccs_matrix*) taucs_malloc(sizeof(taucs_ccs_matrix));
+  if (!m) { 
+    taucs_printf("generate_mesh2d: out of memory (1)\n");
+    return NULL; 
+  }
+
+  N   = n*n;
+  nnz = 3*N;
+
+  m->n      = N;
+  m->m      = N;
+  m->flags  = TAUCS_SYMMETRIC | TAUCS_LOWER | TAUCS_DOUBLE;
+  m->colptr = (int*)    taucs_malloc((N+1) * sizeof(int));
+  m->rowind = (int*)    taucs_malloc(nnz       * sizeof(int));
+  m->values.d/*taucs_values*/ = (double*) taucs_malloc(nnz       * sizeof(double));
+
+  if (!(m->colptr) || !(m->rowind) || !(m->rowind)) {
+    taucs_printf("taucs_ccs_generate_mesh2d: out of memory: ncols=%d nnz=%d\n",N,nnz);
+    taucs_free(m->colptr); taucs_free(m->rowind); taucs_free(m->values.d/*taucs_values*/);
+    return NULL; 
+  }
+
+  ip = 0;
+  for (y=0; y<n; y++) {
+    for (x=0; x<n; x++) {
+      j = x + y*n; /* convert mesh (x,y) location to index in vector */
+      /*printf("column %d xy %d,%d starts at %d\n",j,x,y,ip);*/
+      (m->colptr)[j] = ip;
+      /* if (x < n-1) { i=(x+1)+(y  )*n; (m->rowind)[ip]=i; (m->taucs_values)[ip]=-1.0; ip++; } */
+      
+      if (!strcmp(which,"anisotropic_y")) {
+	if (y < n-1) { i=(x  )+(y+1)*n; (m->rowind)[ip]=i; (m->values.d/*taucs_values*/)[ip]=-jump; ip++; }
+      } else 
+	if (y < n-1) { i=(x  )+(y+1)*n; (m->rowind)[ip]=i; (m->values.d/*taucs_values*/)[ip]=-1.0; ip++; }
+
+      if (!strcmp(which,"anisotropic_x")) {
+	if (x < n-1) { i=(x+1)+(y  )*n; (m->rowind)[ip]=i; (m->values.d/*taucs_values*/)[ip]=-jump; ip++; }
+      } else 
+	if (x < n-1) { i=(x+1)+(y  )*n; (m->rowind)[ip]=i; (m->values.d/*taucs_values*/)[ip]=-1.0; ip++; }
+
+      if (!strcmp(which,"anisotropic_y")) 
+	{ 
+	  i=(x  )+(y  )*n; (m->rowind)[ip]=i; 
+	  (m->values.d/*taucs_values*/)[ip]= 0.0; 
+	  if (x > 0)   (m->values.d/*taucs_values*/)[ip] += 1.0; 
+	  if (y > 0)   (m->values.d/*taucs_values*/)[ip] += jump; 
+	  if (x < n-1) (m->values.d/*taucs_values*/)[ip] += 1.0; 
+	  if (y < n-1) (m->values.d/*taucs_values*/)[ip] += jump; 
+	  if (x==0 && y==0) (m->values.d/*taucs_values*/)[ip] += 1.0; /* to make it nonsingular */
+	  ip++; 
+	}
+      else if (!strcmp(which,"anisotropic_x")) 
+	{ 
+	  i=(x  )+(y  )*n; (m->rowind)[ip]=i; 
+	  (m->values.d/*taucs_values*/)[ip]= 0.0; 
+	  if (x > 0)   (m->values.d/*taucs_values*/)[ip] += jump; 
+	  if (y > 0)   (m->values.d/*taucs_values*/)[ip] += 1.0; 
+	  if (x < n-1) (m->values.d/*taucs_values*/)[ip] += jump; 
+	  if (y < n-1) (m->values.d/*taucs_values*/)[ip] += 1.0; 
+	  if (x==0 && y==0) (m->values.d/*taucs_values*/)[ip] += 1.0; /* to make it nonsingular */
+	  ip++; 
+	}
+      else if (!strcmp(which,"dirichlet"))
+	{
+	  i=(x  )+(y  )*n; (m->rowind)[ip]=i; (m->values.d/*taucs_values*/)[ip]= 4.0; 
+	  ip++; 
+	}
+      else /* neumann */
+	{ 
+	  i=(x  )+(y  )*n; (m->rowind)[ip]=i; 
+	  (m->values.d/*taucs_values*/)[ip]= 0.0; 
+	  if (x > 0)   (m->values.d/*taucs_values*/)[ip] += 1.0; 
+	  if (y > 0)   (m->values.d/*taucs_values*/)[ip] += 1.0; 
+	  if (x < n-1) (m->values.d/*taucs_values*/)[ip] += 1.0; 
+	  if (y < n-1) (m->values.d/*taucs_values*/)[ip] += 1.0; 
+	  if (x==0 && y==0) (m->values.d/*taucs_values*/)[ip] += 1.0; /* to make it nonsingular */
+	  ip++; 
+	}
+	
+    }
+  }
+  (m->colptr)[N] = ip;
+
+  taucs_printf("taucs_ccs_generate_mesh2d: done, ncols=%d nnz=%d\n",N,ip);
+
+  /*
+  for (j=0; j<N; j++) {
+    for (ip=(m->colptr)[j]; ip < (m->colptr)[j+1]; ip++) {
+      i = (m->rowind)[ip];
+      taucs_printf("<%d %d %lg>\n",i,j,m->taucs_values[ip]);
+    }
+  }
+  */
+
+  return m;
+}
+
+taucs_ccs_matrix* 
+taucs_ccs_generate_mesh3d(int X, int Y, int Z)
+{
+  taucs_ccs_matrix* m;
+  int         N;
+  int         nnz;
+  int         x,y,z,i,j,ip;
+
+  taucs_printf("taucs_ccs_generate_mesh3d: starting\n");
+
+  m = (taucs_ccs_matrix*) taucs_malloc(sizeof(taucs_ccs_matrix));
+  if (!m) { 
+    taucs_printf("taucs_ccs_generate_mesh3d: out of memory\n");
+    return NULL; 
+  }
+
+  N   = X*Y*Z;
+  nnz = 4*N;
+
+  m->n      = N;
+  m->m      = N;
+  m->flags  = TAUCS_SYMMETRIC | TAUCS_LOWER | TAUCS_DOUBLE;
+  /*m->indshift = 0;*/
+  m->colptr = (int*)    taucs_malloc((N+1) * sizeof(int));
+  m->rowind = (int*)    taucs_malloc(nnz       * sizeof(int));
+  m->values.d/*taucs_values*/ = (double*) taucs_malloc(nnz       * sizeof(double));
+
+  if (!(m->colptr) || !(m->rowind) || !(m->rowind)) {
+    taucs_printf("taucs_ccs_generate_mesh3d: out of memory: ncols=%d nnz=%d\n",N,nnz);
+    taucs_free(m->colptr); taucs_free(m->rowind); taucs_free(m->values.d/*taucs_values*/);
+    return NULL; 
+  }
+
+  ip = 0;
+  for (z=0; z<Z; z++) {
+    for (y=0; y<Y; y++) {
+      for (x=0; x<X; x++) {
+	j = z*X*Y + y*X + x; 
+	/*printf("column %d xy %d,%d starts at %d\n",j,x,y,ip);*/
+	(m->colptr)[j] = ip;
+	if (x < X-1) { i=(z  )*X*Y+(y  )*X+(x+1); (m->rowind)[ip]=i; (m->values.d/*taucs_values*/)[ip]=-1.0; ip++; }
+	if (y < Y-1) { i=(z  )*X*Y+(y+1)*X+(x  ); (m->rowind)[ip]=i; (m->values.d/*taucs_values*/)[ip]=-1.0; ip++; }
+	if (z < Z-1) { i=(z+1)*X*Y+(y  )*X+(x  ); (m->rowind)[ip]=i; (m->values.d/*taucs_values*/)[ip]=-1.0; ip++; }
+	             { 
+		       i=(z  )*X*Y+(y  )*X+(x  ); (m->rowind)[ip]=i; 
+		       (m->values.d/*taucs_values*/)[ip]= 0.0; 
+		       if (x < X-1) (m->values.d/*taucs_values*/)[ip] += 1.0;
+		       if (y < Y-1) (m->values.d/*taucs_values*/)[ip] += 1.0;
+		       if (z < Z-1) (m->values.d/*taucs_values*/)[ip] += 1.0;
+		       if (x > 0  ) (m->values.d/*taucs_values*/)[ip] += 1.0;
+		       if (y > 0  ) (m->values.d/*taucs_values*/)[ip] += 1.0;
+		       if (z > 0  ) (m->values.d/*taucs_values*/)[ip] += 1.0;
+		       if (x==0 && y==0 && z==0) (m->values.d/*taucs_values*/)[ip] += 1.0;
+		       ip++; 
+		     }
+	/* { i=(z  )*X*Y+(y  )*X+(x  ); (m->rowind)[ip]=i; (m->taucs_values)[ip]= 6.0; ip++; } */
+      }
+    }
+  }
+  (m->colptr)[N] = ip;
+
+  taucs_printf("taucs_ccs_generate_mesh3d: done, ncols=%d nnz=%d\n",N,ip);
+
+  return m;
+}
+
+taucs_ccs_matrix* 
+taucs_ccs_generate_dense(int M, int N, int flags)
+{
+  taucs_ccs_matrix* m;
+  int         nnz;
+  int         i,j,ip;/* x,y omer*/
+
+  taucs_printf("taucs_ccs_generate_dense: starting\n");
+
+  m = (taucs_ccs_matrix*) taucs_malloc(sizeof(taucs_ccs_matrix));
+  if (!m) { 
+    taucs_printf("taucs_ccs_generate_dense: out of memory\n");
+    return NULL; 
+  }
+
+  m->m      = N;
+  m->n      = N;
+  if (flags & TAUCS_SYMMETRIC) {
+    nnz = N*(N+1)/2;
+    m->flags  = TAUCS_SYMMETRIC | TAUCS_LOWER | TAUCS_DOUBLE;
+  } else {
+    nnz = N*N;
+    m->flags  =  TAUCS_DOUBLE;
+  }
+
+  m->colptr = (int*)    taucs_malloc((N+1) * sizeof(int));
+  m->rowind = (int*)    taucs_malloc(nnz       * sizeof(int));
+  m->values.d/*taucs_values*/ = (double*) taucs_malloc(nnz       * sizeof(double));
+
+  if (!(m->colptr) || !(m->rowind) || !(m->rowind)) {
+    taucs_printf("taucs_ccs_generate_dense: out of memory: nrows=%d ncols=%d nnz=%d\n",M,N,nnz);
+    taucs_free(m->colptr); taucs_free(m->rowind); taucs_free(m->values.d/*taucs_values*/);
+    return NULL; 
+  }
+
+  ip = 0;
+  for (j=0; j<N; j++) {
+    (m->colptr)[j] = ip;
+    if (flags & TAUCS_SYMMETRIC) {
+      for (i=j; i<N; i++) {
+	(m->rowind)[ip]=i; 
+	(m->values.d/*taucs_values*/)[ip]=RNDM; 
+	ip++;
+      }
+    } else {
+      for (i=0; i<M; i++) {
+	(m->rowind)[ip]=i; 
+	(m->values.d/*taucs_values*/)[ip]=RNDM; 
+	ip++;
+      }
+    }
+  }
+  (m->colptr)[N] = ip;
+
+  taucs_printf("taucs_ccs_generate_dense: done, nrows=%d ncols=%d nnz=%d\n",M,N,ip);
+
+  return m;
+}
+
+/* random resistor networks */
+
+int recursive_visit(int i, 
+		    int* neighbors[], 
+		    int degree[], 
+		    int visited[])
+{
+  int j,jp,count;
+  visited[i] = 1;
+  count = 1;
+  for (jp=0; jp<degree[i]; jp++) {
+    j = neighbors[i][jp];
+    if (! visited[j] ) count += recursive_visit(j,neighbors,degree,visited);
+  }
+  return count;
+}
+
+taucs_ccs_matrix* 
+taucs_ccs_generate_rrn(int X, int Y, int Z, double drop_probability, double rmin)
+{
+  taucs_ccs_matrix* m;
+  taucs_ccs_matrix* l;
+  int         N;
+  int         nnz;
+  int         x,y,z,i,j,k,ip,jp;
+  double*     D; /* contributions to future diagonal elements */
+
+  int**       neighbors;
+  int*        degree;
+  int*        visited;
+  int*        reps;
+  int         ncomponents;
+
+  int         largest;
+  int         largest_rep = 0; /* warning */
+
+  taucs_printf("taucs_ccs_generate_rrn: starting (%d %d %d %.4e %.4e)\n",
+	       X,Y,Z,drop_probability,rmin);
+
+  if (drop_probability > 1.0 || drop_probability < 0.0) {
+    taucs_printf("taucs_ccs_generate_rrn: drop probability (%lg) must be in [0,1], setting to 0\n",
+		 drop_probability);
+    drop_probability = 0.0;
+  }
+
+  if (rmin > 1.0 || rmin <= 0.0) {
+    taucs_printf("taucs_ccs_generate_rrn: rmin (%lg) must be in (0,1], setting to 1\n",
+		 rmin);
+    rmin = 1.0;
+  }
+
+  m = (taucs_ccs_matrix*) taucs_malloc(sizeof(taucs_ccs_matrix));
+  if (!m) { 
+    taucs_printf("taucs_ccs_generate_rrn: out of memory\n");
+    return NULL; 
+  }
+
+  N   = X*Y*Z;
+  nnz = 4*N;   /* this is an upper bound */
+
+  m->n      = N;
+  m->m      = N;
+  m->flags  = TAUCS_SYMMETRIC | TAUCS_LOWER | TAUCS_DOUBLE;
+  /*m->indshift = 0;*/
+  m->colptr = (int*)    taucs_malloc((N+1) * sizeof(int));
+  m->rowind = (int*)    taucs_malloc(nnz       * sizeof(int));
+  m->values.d/*taucs_values*/ = (double*) taucs_malloc(nnz       * sizeof(double));
+
+  D         = (double*) taucs_malloc(N         * sizeof(double));
+
+  if (!(m->colptr) || !(m->rowind) || !(m->rowind) || !D) {
+    taucs_printf("taucs_ccs_generate_rrn: out of memory: ncols=%d nnz=%d\n",N,nnz);
+    taucs_free(m->colptr); taucs_free(m->rowind); taucs_free(m->values.d/*taucs_values*/); taucs_free(D);
+    return NULL; 
+  }
+
+  for (i=0; i<N; i++) D[i] = 0.0;
+
+  ip = 0;
+  for (z=0; z<Z; z++) {
+    for (y=0; y<Y; y++) {
+      for (x=0; x<X; x++) {
+	int j, je, jw, js, jn, ju, jd; /* indices for up, down, east, west, south, north */
+	int jp; /* pointer to the diagonal value */
+	double v;
+
+	j  = z*X*Y + y*X + x;
+	jw = (x > 0  ) ? (z  )*X*Y + (y  )*X + (x-1) : (z  )*X*Y + (y  )*X + (X-1) ;
+	je = (x < X-1) ? (z  )*X*Y + (y  )*X + (x+1) : (z  )*X*Y + (y  )*X + (0  ) ;
+	js = (y > 0  ) ? (z  )*X*Y + (y-1)*X + (x  ) : (z  )*X*Y + (Y-1)*X + (x  ) ;
+	jn = (y < Y-1) ? (z  )*X*Y + (y+1)*X + (x  ) : (z  )*X*Y + (0  )*X + (x  ) ;
+	jd = (z > 0  ) ? (z-1)*X*Y + (y  )*X + (x  ) : (Z-1)*X*Y + (y  )*X + (x  ) ;
+	ju = (z < Z-1) ? (z+1)*X*Y + (y  )*X + (x  ) : (0  )*X*Y + (y  )*X + (x  ) ;
+
+	jw = (x > 0  ) ? (z  )*X*Y + (y  )*X + (x-1) : -1;
+	je = (x < X-1) ? (z  )*X*Y + (y  )*X + (x+1) : -1;
+	js = (y > 0  ) ? (z  )*X*Y + (y-1)*X + (x  ) : -1;
+	jn = (y < Y-1) ? (z  )*X*Y + (y+1)*X + (x  ) : -1;
+	jd = (z > 0  ) ? (z-1)*X*Y + (y  )*X + (x  ) : -1;
+	ju = (z < Z-1) ? (z+1)*X*Y + (y  )*X + (x  ) : -1;
+
+	if ( ((double)rand() / (double)RAND_MAX) < drop_probability) jw = -1;
+	if ( ((double)rand() / (double)RAND_MAX) < drop_probability) je = -1;
+	if ( ((double)rand() / (double)RAND_MAX) < drop_probability) js = -1;
+	if ( ((double)rand() / (double)RAND_MAX) < drop_probability) jn = -1;
+	if ( ((double)rand() / (double)RAND_MAX) < drop_probability) ju = -1;
+	if ( ((double)rand() / (double)RAND_MAX) < drop_probability) jd = -1;
+
+	/*printf("xyz=%d %d %d    j's=%d %d %d %d %d %d %d\n",x,y,z,j,jw,je,js,jn,jd,ju);*/
+	/*printf("column %d xy %d,%d starts at %d\n",j,x,y,ip);*/
+	(m->colptr)[j] = ip;
+	jp = ip;
+	
+	/*printf("j=%d D[]=%lf\n",j,D[j]);*/
+
+	(m->rowind)[ip]= j;
+	/*
+	if (x==0 && y==0 && z==0)
+	  (m->taucs_values)[ip]= 1.0;
+	else 
+	*/
+	(m->values.d/*taucs_values*/)[ip]= D[j]; 
+	ip++;
+
+	if (jw != j != -1) {
+	  if (jw > j) {
+	    v = -1.0;
+	    v = ((double)rand()/(double)RAND_MAX) > 0.99 ? -1.0 : -rmin;
+	    v = -( rmin + (((double)rand()/(double)RAND_MAX) * (1.0-rmin)) );
+	    /*printf(">> %g\n",v);*/
+	    (m->rowind)[ip]  = jw;
+	    (m->values.d/*taucs_values*/)[ip]  = v;
+	    ip++;
+	    (m->values.d/*taucs_values*/)[jp] -= v;
+	    D[jw] -= v;
+	  }
+	}
+
+	if (je != j && je != jw && je != -1) {
+	  if (je > j) {
+	    v = -1.0;
+	    v = ((double)rand()/(double)RAND_MAX) > 0.99 ? -1.0 : -rmin;
+	    v = -( rmin + (((double)rand()/(double)RAND_MAX) * (1.0-rmin)) );
+	    /*printf(">> %g\n",v);*/
+	    (m->rowind)[ip]  = je;
+	    (m->values.d/*taucs_values*/)[ip]  = v;
+	    ip++;
+	    (m->values.d/*taucs_values*/)[jp] -= v;
+	    D[je] -= v;
+	  }
+	}
+
+	if (js != j && js != -1) {
+	  if (js > j) {
+	    v = -1.0;
+	    v = ((double)rand()/(double)RAND_MAX) > 0.99 ? -1.0 : -rmin;
+	    v = -( rmin + (((double)rand()/(double)RAND_MAX) * (1.0-rmin)) );
+	    /*printf(">> %g\n",v);*/
+	    (m->rowind)[ip]  = js;
+	    (m->values.d/*taucs_values*/)[ip]  = v;
+	    ip++;
+	    (m->values.d/*taucs_values*/)[jp] -= v;
+	    D[js] -= v;
+	  }
+	}
+
+	if (jn != j && jn != js && jn != -1) {
+	  if (jn > j) {
+	    v = -1.0;
+	    v = ((double)rand()/(double)RAND_MAX) > 0.99 ? -1.0 : -rmin;
+	    v = -( rmin + (((double)rand()/(double)RAND_MAX) * (1.0-rmin)) );
+	    /*printf(">> %g\n",v);*/
+	    (m->rowind)[ip]  = jn;
+	    (m->values.d/*taucs_values*/)[ip]  = v;
+	    ip++;
+	    (m->values.d/*taucs_values*/)[jp] -= v;
+	    D[jn] -= v;
+	  }
+	}
+
+	if (ju != j && ju != -1) {
+	  if (ju > j) {
+	    v = -1.0;
+	    v = ((double)rand()/(double)RAND_MAX) > 0.99 ? -1.0 : -rmin;
+	    v = -( rmin + (((double)rand()/(double)RAND_MAX) * (1.0-rmin)) );
+	    /*printf(">> %g\n",v);*/
+	    (m->rowind)[ip]  = ju;
+	    (m->values.d/*taucs_values*/)[ip]  = v;
+	    ip++;
+	    (m->values.d/*taucs_values*/)[jp] -= v;
+	    D[ju] -= v;
+	  }
+	}
+
+	if (jd != j && jd != ju && jd != -1) {
+	  if (jd > j) {
+	    v = -1.0;
+	    v = ((double)rand()/(double)RAND_MAX) > 0.99 ? -1.0 : -rmin;
+	    v = -( rmin + (((double)rand()/(double)RAND_MAX) * (1.0-rmin)) );
+	    /*printf(">> %g\n",v);*/
+	    (m->rowind)[ip]  = jd;
+	    (m->values.d/*taucs_values*/)[ip]  = v;
+	    ip++;
+	    (m->values.d/*taucs_values*/)[jp] -= v;
+	    D[jd] -= v;
+	  }
+	}
+
+      }
+    }
+  }
+  taucs_free(D);
+  (m->colptr)[N] = ip;
+
+  taucs_printf("taucs_ccs_generate_rrn: done, ncols=%d allocated nnz=%d real nnz=%d\n",
+	       N,nnz,ip);
+
+
+  neighbors = (int**) taucs_malloc(N * sizeof(int*));
+  degree  = (int*) taucs_malloc(N * sizeof(int));
+  visited = (int*) taucs_malloc(N * sizeof(int));
+  reps    = (int*) taucs_malloc(N * sizeof(int));
+
+  for (i=0; i<N; i++) degree[i] = 0;
+
+  for (j=0; j<N; j++) {
+    for (ip=(m->colptr)[j]; ip<(m->colptr)[j+1]; ip++) {
+      i = (m->rowind)[ ip ];
+      if (i != j) {
+	degree[i]++;
+	degree[j]++;
+      }
+    }
+  }
+
+
+  for (i=0; i<N; i++) {
+    neighbors[i] = (int*) taucs_malloc(degree[i] * sizeof(int));
+    visited[i] = 0;
+  }
+
+  for (j=0; j<N; j++) {
+    for (ip=(m->colptr)[j]; ip<(m->colptr)[j+1]; ip++) {
+      i = (m->rowind)[ ip ];
+      if (i != j) {
+	neighbors[i][visited[i]] = j;
+	neighbors[j][visited[j]] = i;
+	assert(visited[i] < degree[i]);
+	assert(visited[j] < degree[j]);
+	visited[i]++;
+	visited[j]++;
+      }
+    }
+  }
+
+  for (i=0; i<N; i++) visited[i] = 0;
+  ncomponents = 0;
+  largest = -1;
+  for (i=0; i<N; i++) {
+    if (visited[i] == 0) {
+      int count;
+      reps[ncomponents] = i;
+      ncomponents++;
+      count = recursive_visit(i,neighbors,degree,visited);
+      if (count > largest) {
+	largest = count;
+	largest_rep = i;
+      }
+      /*printf("new connected component vertex %d, size=%d\n",i,count);*/
+    }
+  }
+  for (i=0; i<ncomponents; i++) {
+    j = reps[i];
+    /*printf("rep[%d] = %d\n",i,j);*/
+    (m->values.d/*taucs_values*/)[ (m->colptr)[j] ] += 1.0;
+  }
+  printf("found %d components, largest is %d, rep is %d\n",ncomponents,largest,largest_rep);
+  printf("found %d components\n",ncomponents);
+
+  for (i=0; i<N; i++) visited[i] = 0;
+  (void) recursive_visit(largest_rep,neighbors,degree,visited);
+  
+  /* we now reuse the degree and reps vectors */
+
+  for (i=0; i<N; i++) degree[i] = reps[i] = -1;
+  j = 0;
+  for (i=0; i<N; i++) {
+    if (visited[i]) {
+      degree[i] = j;
+      reps[j] = i;
+      j++;
+    }
+  }
+
+  l = (taucs_ccs_matrix*) taucs_malloc(sizeof(taucs_ccs_matrix));
+  if (!l) { 
+    taucs_printf("taucs_ccs_generate_rrn: out of memory\n");
+    return NULL; 
+  }
+
+  nnz = (m->colptr)[N];   /* this is an upper bound */
+
+  l->n      = largest;
+  l->m      = largest;
+  l->flags  = TAUCS_SYMMETRIC | TAUCS_LOWER | TAUCS_DOUBLE;
+  l->colptr = (int*)    taucs_malloc((largest+1) * sizeof(int));
+  l->rowind = (int*)    taucs_malloc(nnz       * sizeof(int));
+  l->values.d/*taucs_values*/ = (double*) taucs_malloc(nnz       * sizeof(double));
+
+  k = 0;
+  for (jp=0; jp<N; jp++) {
+    int iip;
+    j = degree[jp];
+    if (j == -1) continue;
+    assert(j < largest);
+    (l->colptr)[j] = k;
+    for (iip=(m->colptr)[jp]; iip<(m->colptr)[jp+1]; iip++) {
+      double v;
+      ip = (m->rowind)[iip];
+      v  = (m->values.d/*taucs_values*/)[iip];
+      i = degree[ip];
+      assert(i >= j);
+      (l->rowind)[k] = i;
+      (l->values.d/*taucs_values*/)[k] = v;
+      k++;
+    }
+  }
+  (l->colptr)[largest] = k;
+
+  for (i=0; i<N; i++) taucs_free(neighbors[i]);
+  taucs_free(visited);
+  taucs_free(reps);
+  taucs_free(degree);
+  taucs_free(neighbors);
+
+  taucs_ccs_free(m);
+
+  return l;
+}
+
+double* taucs_vec_generate_continuous(int X, int Y, int Z, char* which)
+{
+  int x,y,z,j;/* i,k omer*/
+  double* V;
+  double dx,dy,dz;
+
+  V = (double*) taucs_malloc( X*Y*Z * sizeof(double));
+  if (!V) {
+    taucs_printf("taucs_vec_generate_continuous: out of memory\n");
+    return V;
+  }
+
+  for (z=0; z<Z; z++) {
+    for (y=0; y<Y; y++) {
+      for (x=0; x<X; x++) {
+	double v;
+
+	j  = z*X*Y + y*X + x;
+	
+	dx = (double) (x+1) / (double) X;
+	dy = (double) (y+1) / (double) Y;
+	dz = (double) (z+1) / (double) Z;
+
+	v = (dx*dy*dz*(1.0-dx)*(1.0-dy)*(1.0-dz));
+	v = v*v;
+	v = v*exp(dx*dx*dy*dz);
+
+	V[j] = v;
+      }
+    }
+  }
+
+  return V;
+}
+
+taucs_ccs_matrix* 
+taucs_ccs_generate_discontinuous(int X, int Y, int Z, double jump)
+{
+  taucs_ccs_matrix* m;
+  /*taucs_ccs_matrix* l; omer*/
+  int         N;
+  int         nnz;
+  int         x,y,z,i,ip;/*j,k,jp omer*/
+  double*     D; /* contributions to future diagonal elements */
+
+  taucs_printf("taucs_ccs_generate_discontinuous: starting (%d %d %d %e)\n",
+	       X,Y,Z,jump);
+
+
+  m = (taucs_ccs_matrix*) taucs_malloc(sizeof(taucs_ccs_matrix));
+  if (!m) { 
+    taucs_printf("taucs_ccs_generate_discontinuous: out of memory\n");
+    return NULL; 
+  }
+
+  N   = X*Y*Z;
+  nnz = 4*N;   /* this is an upper bound */
+
+  m->n      = N;
+  m->m      = N;
+  m->flags  = TAUCS_SYMMETRIC | TAUCS_LOWER | TAUCS_DOUBLE;
+  /*m->indshift = 0;*/
+  m->colptr = (int*)    taucs_malloc((N+1) * sizeof(int));
+  m->rowind = (int*)    taucs_malloc(nnz       * sizeof(int));
+  m->values.d/*taucs_values*/ = (double*) taucs_malloc(nnz       * sizeof(double));
+
+  D         = (double*) taucs_malloc(N         * sizeof(double));
+
+  if (!(m->colptr) || !(m->rowind) || !(m->rowind) || !D) {
+    taucs_printf("taucs_ccs_generate_discontinuous: out of memory: ncols=%d nnz=%d\n",N,nnz);
+    taucs_free(m->colptr); taucs_free(m->rowind); taucs_free(m->values.d/*taucs_values*/); taucs_free(D);
+    return NULL; 
+  }
+
+  for (i=0; i<N; i++) D[i] = 0.0;
+
+  ip = 0;
+  for (z=0; z<Z; z++) {
+    for (y=0; y<Y; y++) {
+      for (x=0; x<X; x++) {
+	int j, je, jw, js, jn, ju, jd; /* indices for up, down, east, west, south, north */
+	int jp; /* pointer to the diagonal value */
+	double v;
+	int cj, cjw, cje, cjs, cjn, cjd, cju; /* which region? */
+
+	j  = z*X*Y + y*X + x;
+	jw = (x > 0  ) ? (z  )*X*Y + (y  )*X + (x-1) : (z  )*X*Y + (y  )*X + (X-1) ;
+	je = (x < X-1) ? (z  )*X*Y + (y  )*X + (x+1) : (z  )*X*Y + (y  )*X + (0  ) ;
+	js = (y > 0  ) ? (z  )*X*Y + (y-1)*X + (x  ) : (z  )*X*Y + (Y-1)*X + (x  ) ;
+	jn = (y < Y-1) ? (z  )*X*Y + (y+1)*X + (x  ) : (z  )*X*Y + (0  )*X + (x  ) ;
+	jd = (z > 0  ) ? (z-1)*X*Y + (y  )*X + (x  ) : (Z-1)*X*Y + (y  )*X + (x  ) ;
+	ju = (z < Z-1) ? (z+1)*X*Y + (y  )*X + (x  ) : (0  )*X*Y + (y  )*X + (x  ) ;
+
+	jw = (x > 0  ) ? (z  )*X*Y + (y  )*X + (x-1) : -1;
+	je = (x < X-1) ? (z  )*X*Y + (y  )*X + (x+1) : -1;
+	js = (y > 0  ) ? (z  )*X*Y + (y-1)*X + (x  ) : -1;
+	jn = (y < Y-1) ? (z  )*X*Y + (y+1)*X + (x  ) : -1;
+	jd = (z > 0  ) ? (z-1)*X*Y + (y  )*X + (x  ) : -1;
+	ju = (z < Z-1) ? (z+1)*X*Y + (y  )*X + (x  ) : -1;
+
+	/*printf("xyz=%d %d %d    j's=%d %d %d %d %d %d %d\n",x,y,z,j,jw,je,js,jn,jd,ju);*/
+	/*printf("column %d xy %d,%d starts at %d\n",j,x,y,ip);*/
+	(m->colptr)[j] = ip;
+	jp = ip;
+	
+	/*printf("j=%d D[]=%lf\n",j,D[j]);*/
+
+	(m->rowind)[ip]= j;
+	/* Nonsingular Neumann */
+	if (x==0 && y==0 && z==0)
+	  (m->values.d/*taucs_values*/)[ip]= D[j] + 1.0;
+	else 
+	  (m->values.d/*taucs_values*/)[ip]= D[j]; 
+
+	/* Singular Neumann */
+	/*
+	(m->taucs_values)[ip] = D[j];
+	*/
+
+	/* Dirichlet */
+	/*
+	(m->taucs_values)[ip] = D[j];
+	if (x==0 || x==X-1) (m->taucs_values)[ip] += 1.0;
+	if (y==0 || y==Y-1) (m->taucs_values)[ip] += 1.0;
+	if (z==0 || z==Z-1) (m->taucs_values)[ip] += 1.0;
+	*/
+
+	ip++;
+
+	cj  = ((x  ) >= X/8 && (x  ) < 7*X/8)
+	   && ((y  ) >= Y/8 && (y  ) < 7*Y/8) 
+	   && ((z  ) >= Z/8 && (z  ) < 7*Z/8);
+	/*
+	cj  = cj  && !(   ((x  ) >= 2*X/8 && (x  ) < 6*X/8)
+		       && ((y  ) >= 2*Y/8 && (y  ) < 6*Y/8) 
+		       && ((z  ) >= 2*Z/8 && (z  ) < 6*Z/8));
+	*/
+	cjw = ((x-1) >= X/8 && (x-1) < 7*X/8) 
+	   && ((y  ) >= Y/8 && (y  ) < 7*Y/8) 
+	   && ((z  ) >= Z/8 && (z  ) < 7*Z/8);
+	/*
+	cjw = cjw && !(   ((x-1) >= 2*X/8 && (x-1) < 6*X/8)
+		       && ((y  ) >= 2*Y/8 && (y  ) < 6*Y/8) 
+		       && ((z  ) >= 2*Z/8 && (z  ) < 6*Z/8));
+	*/
+	cje = ((x+1) >= X/8 && (x+1) < 7*X/8) 
+	   && ((y  ) >= Y/8 && (y  ) < 7*Y/8) 
+	   && ((z  ) >= Z/8 && (z  ) < 7*Z/8);
+	/*
+	cje = cje && !(   ((x+1) >= 2*X/8 && (x+1) < 6*X/8)
+		       && ((y  ) >= 2*Y/8 && (y  ) < 6*Y/8) 
+		       && ((z  ) >= 2*Z/8 && (z  ) < 6*Z/8));
+	*/
+	cjs = ((x  ) >= X/8 && (x  ) < 7*X/8) 
+	   && ((y-1) >= Y/8 && (y-1) < 7*Y/8) 
+	   && ((z  ) >= Z/8 && (z  ) < 7*Z/8);
+	/*
+	cjs = cjs && !(   ((x  ) >= 2*X/8 && (x  ) < 6*X/8)
+		       && ((y-1) >= 2*Y/8 && (y-1) < 6*Y/8) 
+		       && ((z  ) >= 2*Z/8 && (z  ) < 6*Z/8));
+	*/
+	cjn = ((x  ) >= X/8 && (x  ) < 7*X/8) 
+	   && ((y+1) >= Y/8 && (y+1) < 7*Y/8) 
+	   && ((z  ) >= Z/8 && (z  ) < 7*Z/8);
+	/*
+	cjn = cjn && !(   ((x  ) >= 2*X/8 && (x  ) < 6*X/8)
+		       && ((y+1) >= 2*Y/8 && (y+1) < 6*Y/8) 
+		       && ((z  ) >= 2*Z/8 && (z  ) < 6*Z/8));
+	*/
+	cjd = ((x  ) >= X/8 && (x  ) < 7*X/8) 
+	   && ((y  ) >= Y/8 && (y  ) < 7*Y/8) 
+	   && ((z-1) >= Z/8 && (z-1) < 7*Z/8);
+	/*
+	cjd = cjd && !(   ((x  ) >= 2*X/8 && (x  ) < 6*X/8)
+		       && ((y  ) >= 2*Y/8 && (y  ) < 6*Y/8) 
+		       && ((z-1) >= 2*Z/8 && (z-1) < 6*Z/8));
+	*/
+	cju = ((x  ) >= X/8 && (x  ) < 7*X/8) 
+	   && ((y  ) >= Y/8 && (y  ) < 7*Y/8) 
+	   && ((z+1) >= Z/8 && (z+1) < 7*Z/8);
+	/*
+	cju = cju && !(   ((x  ) >= 2*X/8 && (x  ) < 6*X/8)
+		       && ((y  ) >= 2*Y/8 && (y  ) < 6*Y/8) 
+		       && ((z+1) >= 2*Z/8 && (z+1) < 6*Z/8));
+	*/
+
+	if (jw != j && jw != -1) {
+	  if (jw > j) {
+	    v = -jump;
+	    v = (x < X/8 || y < Y/8) ? -jump : -1.0;
+	    v = (cj && cjw) ? -jump : -1.0;
+	    /*printf(">> %g\n",v);*/
+	    (m->rowind)[ip]  = jw;
+	    (m->values.d/*taucs_values*/)[ip]  = v;
+	    ip++;
+	    (m->values.d/*taucs_values*/)[jp] -= v;
+	    D[jw] -= v;
+	  }
+	}
+
+	if (je != j && je != jw && je != -1) {
+	  if (je > j) {
+	    v = -jump;
+	    v = ((x-1) < X/8 || y < Y/8) ? -jump : -1.0;
+	    v = (cj && cje) ? -jump : -1.0;
+	    /*printf(">> %g\n",v);*/
+	    (m->rowind)[ip]  = je;
+	    (m->values.d/*taucs_values*/)[ip]  = v;
+	    ip++;
+	    (m->values.d/*taucs_values*/)[jp] -= v;
+	    D[je] -= v;
+	  }
+	}
+
+	if (js != j && js != -1) {
+	  if (js > j) {
+	    v = -jump;
+	    v = (y < Y/8 || x < X/8) ? -jump : -1.0;
+	    v = (cj && cjs) ? -jump : -1.0;
+	    /*printf(">> %g\n",v);*/
+	    (m->rowind)[ip]  = js;
+	    (m->values.d/*taucs_values*/)[ip]  = v;
+	    ip++;
+	    (m->values.d/*taucs_values*/)[jp] -= v;
+	    D[js] -= v;
+	  }
+	}
+
+	if (jn != j && jn != js && jn != -1) {
+	  if (jn > j) {
+	    v = -jump;
+	    v = ((y-1) < Y/8 || x < X/8) ? -jump : -1.0;
+	    v = (cj && cjn) ? -jump : -1.0;
+	    /*printf(">> %g\n",v);*/
+	    (m->rowind)[ip]  = jn;
+	    (m->values.d/*taucs_values*/)[ip]  = v;
+	    ip++;
+	    (m->values.d/*taucs_values*/)[jp] -= v;
+	    D[jn] -= v;
+	  }
+	}
+
+	if (ju != j && ju != -1) {
+	  if (ju > j) {
+	    v = -1.0;
+	    v = (cj && cju) ? -jump : -1.0;
+	    /*printf(">> %g\n",v);*/
+	    (m->rowind)[ip]  = ju;
+	    (m->values.d/*taucs_values*/)[ip]  = v;
+	    ip++;
+	    (m->values.d/*taucs_values*/)[jp] -= v;
+	    D[ju] -= v;
+	  }
+	}
+
+	if (jd != j && jd != ju && jd != -1) {
+	  if (jd > j) {
+	    v = -1.0;
+	    v = (cj && cjd) ? -jump : -1.0;
+	    /*printf(">> %g\n",v);*/
+	    (m->rowind)[ip]  = jd;
+	    (m->values.d/*taucs_values*/)[ip]  = v;
+	    ip++;
+	    (m->values.d/*taucs_values*/)[jp] -= v;
+	    D[jd] -= v;
+	  }
+	}
+
+      }
+    }
+  }
+  taucs_free(D);
+  (m->colptr)[N] = ip;
+
+  taucs_printf("taucs_ccs_generate_discontinuous: done, ncols=%d allocated nnz=%d real nnz=%d\n",
+	       N,nnz,ip);
+
+  /*taucs_ccs_write_ijv(m,"X.ijv");*/
+
+  return m;
+}
+
+#endif /* TAUCS_CORE_DOUBLE */
+
+/*********************************************************/
+/*                                                       */
+/*********************************************************/
diff --git a/contrib/taucs/src/taucs_ccs_io.c b/contrib/taucs/src/taucs_ccs_io.c
new file mode 100644
index 0000000000000000000000000000000000000000..18498b0be5febb32ab0c04dc550f546b229281ce
--- /dev/null
+++ b/contrib/taucs/src/taucs_ccs_io.c
@@ -0,0 +1,1052 @@
+/*********************************************************/
+/* TAUCS                                                 */
+/* Author: Sivan Toledo                                  */
+/*********************************************************/
+
+/*#pragma lang +C*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#include <math.h>
+
+#include "taucs.h"
+
+#ifdef OSTYPE_win32
+#include <io.h> /*_telli64, _lseeki64*/
+#else
+#include <unistd.h>
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+
+/*********************************************************/
+/* read binary                                           */
+/*********************************************************/
+
+#ifdef TAUCS_CORE_GENERAL
+
+taucs_ccs_matrix* 
+taucs_ccs_read_binary(char* filename)
+{
+  taucs_ccs_matrix* A = NULL; /* warning*/
+  int  nrows,ncols,flags,j;/*nnz, omer*/
+  int     f;
+  ssize_t bytes_read;
+  int*    colptr;
+
+  taucs_printf("taucs_ccs_binary: reading binary matrix %s\n",filename);
+  
+#ifdef OSTYPE_win32
+  f = open(filename,_O_RDONLY |_O_BINARY);
+#else
+  f = open(filename,O_RDONLY);
+#endif
+  /*f = open(filename,O_RDONLY);*/
+
+  bytes_read = read(f,&nrows,sizeof(int));
+  bytes_read = read(f,&ncols,sizeof(int));
+  bytes_read = read(f,&flags,sizeof(int));
+
+  taucs_printf("\t%d-by-%d, flags = %08x\n",nrows,ncols,flags);
+  taucs_printf("\t%d-by-%d, flags = %d  \n",nrows,ncols,flags);
+
+  colptr = (int*) taucs_malloc((ncols+1) * sizeof(int));
+  assert(colptr);
+  
+  bytes_read = read(f,colptr,(ncols+1)*sizeof(int));
+
+  taucs_printf("colptr = [");
+  for(j=0; j<min(ncols-1,10); j++)
+    taucs_printf("%d,",colptr[j]);
+  taucs_printf("...,%d]\n",colptr[ncols]);
+
+	if ( 0 ) /* we need this so that we have 'else if' in each type */
+	{}
+#ifdef TAUCS_DOUBLE_IN_BUILD
+  else if (flags & TAUCS_DOUBLE) {
+    A = taucs_dccs_create(nrows,ncols,colptr[ncols]);
+    if (!A) return NULL;
+    bytes_read = read(f,A->rowind,colptr[ncols]*sizeof(int));
+    bytes_read = read(f,A->values.d,colptr[ncols]*sizeof(taucs_double));
+  }
+#endif
+
+#ifdef TAUCS_SINGLE_IN_BUILD
+  else if (flags & TAUCS_SINGLE) {
+    A = taucs_sccs_create(nrows,ncols,colptr[ncols]);
+    if (!A) return NULL;
+    bytes_read = read(f,A->rowind,colptr[ncols]*sizeof(int));
+    bytes_read = read(f,A->values.s,colptr[ncols]*sizeof(taucs_single));
+  }
+#endif
+
+#ifdef TAUCS_DCOMPLEX_IN_BUILD
+  else if (flags & TAUCS_DCOMPLEX) {
+    A = taucs_zccs_create(nrows,ncols,colptr[ncols]);
+    if (!A) return NULL;
+    bytes_read = read(f,A->rowind,colptr[ncols]*sizeof(int));
+    bytes_read = read(f,A->values.z,colptr[ncols]*sizeof(taucs_dcomplex));
+  }
+#endif
+
+#ifdef TAUCS_SCOMPLEX_IN_BUILD
+  else if (flags & TAUCS_SCOMPLEX) {
+    A = taucs_cccs_create(nrows,ncols,colptr[ncols]);
+    if (!A) return NULL;
+    bytes_read = read(f,A->rowind,colptr[ncols]*sizeof(int));
+    bytes_read = read(f,A->values.c,colptr[ncols]*sizeof(taucs_scomplex));
+  }
+#endif
+	else {
+    assert(0);
+  }
+
+  A->flags = flags;
+  
+  for (j=0; j<=ncols; j++) (A->colptr)[j] = colptr[j];
+
+  taucs_free(colptr);
+
+  close(f);
+
+  taucs_printf("taucs_ccs_read_binary: done reading\n");
+
+  return A;
+}
+
+/*********************************************************/
+/* read hb                                               */
+/*********************************************************/
+
+
+taucs_ccs_matrix* 
+taucs_ccs_read_hb(char* filename,int flags)
+{
+  taucs_ccs_matrix* A = NULL;
+  int  nrows,ncols,nnz,j;
+  char fname[256];
+  char type[3];
+  
+  for (j=0; j<256; j++) fname[j] = ' ';
+  strcpy(fname,filename);
+
+  taucs_printf("taucs_ccs_read_hb: reading HB matrix %s\n",filename);
+
+  ireadhb_(fname,type,&nrows,&ncols,&nnz);
+
+  if (type[0] == 'p' || type[0] == 'P') {
+
+		if ( 0 ); /* we need this so that we have 'else if' in each type */
+#ifdef TAUCS_DOUBLE_IN_BUILD
+		else if (flags & TAUCS_DOUBLE) {
+      A = taucs_dccs_create(nrows,ncols,nnz);
+      if (!A) return NULL;
+      dreadhb_(fname,&nrows,&ncols,&nnz,
+	       /*A->colptr,A->rowind,A->values); omer*/
+				 A->colptr,A->rowind,A->values.d/*taucs_values*/);
+    }
+#endif
+
+#ifdef TAUCS_SINGLE_IN_BUILD
+		else if (flags & TAUCS_SINGLE) {
+      A = taucs_sccs_create(nrows,ncols,nnz);
+      if (!A) return NULL;
+      sreadhb_(fname,&nrows,&ncols,&nnz,
+	       /*A->colptr,A->rowind,A->values); omer*/
+				 A->colptr,A->rowind,A->values.s/*taucs_values*/);
+    }
+#endif
+
+#ifdef TAUCS_DCOMPLEX_IN_BUILD
+		else if (flags & TAUCS_DCOMPLEX) {
+      A = taucs_zccs_create(nrows,ncols,nnz);
+      if (!A) return NULL;
+      zreadhb_(fname,&nrows,&ncols,&nnz,
+	       /*A->colptr,A->rowind,A->values); omer*/
+				 A->colptr,A->rowind,A->values.z/*taucs_values*/);
+    }
+#endif
+
+#ifdef TAUCS_SCOMPLEX_IN_BUILD
+		else if (flags & TAUCS_SCOMPLEX) {
+      A = taucs_cccs_create(nrows,ncols,nnz);
+      if (!A) return NULL;
+      creadhb_(fname,&nrows,&ncols,&nnz,
+	       /*A->colptr,A->rowind,A->values); omer*/
+				 A->colptr,A->rowind,A->values.c/*taucs_values*/);
+    }
+#endif
+    else {
+      assert(0);
+    }
+  }
+
+  if (type[0] == 'r' || type[0] == 'R') {
+		if ( 0 ); /* we need this so that we have 'else if' in each type */
+#ifdef TAUCS_DOUBLE_IN_BUILD
+		else if (flags & TAUCS_DOUBLE) {
+      A = taucs_dccs_create(nrows,ncols,nnz);
+      if (!A) return NULL;
+      dreadhb_(fname,&nrows,&ncols,&nnz,
+	       /*A->colptr,A->rowind,A->values); omer*/
+				 A->colptr,A->rowind,A->values.d/*taucs_values*/);
+    }
+#endif
+
+#ifdef TAUCS_SINGLE_IN_BUILD
+		else if (flags & TAUCS_SINGLE) {
+      A = taucs_sccs_create(nrows,ncols,nnz);
+      if (!A) return NULL;
+      sreadhb_(fname,&nrows,&ncols,&nnz,
+	       /*A->colptr,A->rowind,A->values); omer*/
+				 A->colptr,A->rowind,A->values.s/*taucs_values*/);
+    }
+#endif
+
+#ifdef TAUCS_DCOMPLEX_IN_BUILD
+		else if (flags & TAUCS_DCOMPLEX) {
+      taucs_printf("taucs_ccs_read_hb: warning: requested a complex type, matrix is real\n");
+      A = taucs_dccs_create(nrows,ncols,nnz);
+      if (!A) return NULL;
+      dreadhb_(fname,&nrows,&ncols,&nnz,
+	       /*A->colptr,A->rowind,A->values); omer*/
+				 A->colptr,A->rowind,A->values.d/*taucs_values*/);
+    }
+#endif
+
+#ifdef TAUCS_SCOMPLEX_IN_BUILD
+		else if (flags & TAUCS_SCOMPLEX) {
+      taucs_printf("taucs_ccs_read_hb: warning: requested a complex type, matrix is real\n");
+      A = taucs_sccs_create(nrows,ncols,nnz);
+      if (!A) return NULL;
+      sreadhb_(fname,&nrows,&ncols,&nnz,
+	       /*A->colptr,A->rowind,A->values); omer*/
+				 A->colptr,A->rowind,A->values.s/*taucs_values*/);
+    }
+#endif
+    else {
+      assert(0);
+    }
+  }
+
+  if (type[0] == 'c' || type[0] == 'C') {
+		if ( 0 ); /* we need this so that we have 'else if' in each type */
+#ifdef TAUCS_DCOMPLEX_IN_BUILD
+		else if (flags & TAUCS_DCOMPLEX) {
+      A = taucs_zccs_create(nrows,ncols,nnz);
+      if (!A) return NULL;
+      zreadhb_(fname,&nrows,&ncols,&nnz,
+	       /*A->colptr,A->rowind,A->values); omer*/
+				 A->colptr,A->rowind,A->values.z/*taucs_values*/);
+    }
+#endif
+
+#ifdef TAUCS_SCOMPLEX_IN_BUILD
+		else if (flags & TAUCS_SCOMPLEX) {
+      taucs_printf("taucs_ccs_read_hb: warning: requested a complex type, matrix is real\n");
+      A = taucs_cccs_create(nrows,ncols,nnz);
+      if (!A) return NULL;
+      creadhb_(fname,&nrows,&ncols,&nnz,
+	       /*A->colptr,A->rowind,A->values); omer*/
+				 A->colptr,A->rowind,A->values.c/*taucs_values*/);
+    }
+#endif
+    else {
+      assert(0);
+    }
+  }
+
+  if (type[1] == 's' || type[1] == 'S')
+    A->flags |= TAUCS_SYMMETRIC | TAUCS_LOWER;
+  if (type[1] == 'h' || type[1] == 'H')
+    A->flags |= TAUCS_HERMITIAN | TAUCS_LOWER;
+
+  /* make indices 0-based */
+  for (j=0; j<=ncols; j++) ((A->colptr)[j])--;
+  for (j=0; j<nnz;    j++) ((A->rowind)[j])--;
+
+  taucs_printf("taucs_ccs_read_hb: done reading\n");
+
+  return A;
+}
+
+
+/*********************************************************/
+/* write ijv                                             */
+/*********************************************************/
+
+
+int
+taucs_ccs_write_ijv(taucs_ccs_matrix* m, char* ijvfilename)
+{
+
+#ifdef TAUCS_DOUBLE_IN_BUILD
+  if (m->flags & TAUCS_DOUBLE)
+    return taucs_dccs_write_ijv(m,ijvfilename);
+#endif
+
+#ifdef TAUCS_SINGLE_IN_BUILD
+  if (m->flags & TAUCS_SINGLE)
+    return taucs_sccs_write_ijv(m,ijvfilename);
+#endif
+
+#ifdef TAUCS_DCOMPLEX_IN_BUILD
+  if (m->flags & TAUCS_DCOMPLEX)
+    return taucs_zccs_write_ijv(m,ijvfilename);
+#endif
+
+#ifdef TAUCS_SCOMPLEX_IN_BUILD
+  if (m->flags & TAUCS_SCOMPLEX)
+    return taucs_cccs_write_ijv(m,ijvfilename);
+#endif
+  
+  assert(0);
+  /*added omer*/
+  return -1;
+}
+
+#endif /* TAUCS_CORE_GENERAL */
+
+
+#ifndef TAUCS_CORE_GENERAL
+int
+taucs_dtl(ccs_write_ijv)(taucs_ccs_matrix* m, 
+			 char* ijvfilename)
+{
+  int i,ip,j,n;
+  taucs_datatype Aij;
+  FILE* f;
+
+  f = fopen(ijvfilename , "w");
+
+  if (f == NULL) {
+    taucs_printf("taucs_ccs_write_ijv: could not open ijv file %s\n",ijvfilename);
+    return -1;
+  }
+
+  n = m->n;
+  
+  for (j=0; j<n; j++) {
+    for (ip = (m->colptr)[j]; ip < (m->colptr[j+1]); ip++) {
+      i   = (m->rowind)[ip];
+      Aij = (m->taucs_values)[ip];
+
+#ifdef TAUCS_CORE_DOUBLE
+      fprintf(f,"%d %d %0.17e\n",i+1,j+1,Aij);
+      if (i != j && ((m->flags) & TAUCS_SYMMETRIC))
+	fprintf(f,"%d %d %0.17e\n",j+1,i+1,Aij);
+#endif
+
+#ifdef TAUCS_CORE_SINGLE
+      fprintf(f,"%d %d %0.9e\n",i+1,j+1,Aij);
+      if (i != j && ((m->flags) & TAUCS_SYMMETRIC))
+	fprintf(f,"%d %d %0.9e\n",j+1,i+1,Aij);
+#endif
+      
+#ifdef TAUCS_CORE_DCOMPLEX
+      fprintf(f,"%d %d %0.17e+%0.17ei\n",i+1,j+1,taucs_re(Aij),taucs_im(Aij));
+      if (i != j && ((m->flags) & TAUCS_SYMMETRIC))
+	fprintf(f,"%d %d %0.17e+%0.17ei\n",j+1,i+1,taucs_re(Aij),taucs_re(Aij));
+#endif
+      
+#ifdef TAUCS_CORE_SCOMPLEX
+      fprintf(f,"%d %d %0.9e+%0.9ei\n",i+1,j+1,taucs_re(Aij),taucs_im(Aij));
+      if (i != j && ((m->flags) & TAUCS_SYMMETRIC))
+	fprintf(f,"%d %d %0.9e+%0.9ei\n",j+1,i+1,taucs_re(Aij),taucs_im(Aij));
+#endif      
+
+    }
+  }
+
+  fclose(f);
+
+  return 0;
+} 
+
+#endif /*#ifndef TAUCS_CORE_GENERAL*/
+
+/*********************************************************/
+/* read ijv                                              */
+/*********************************************************/
+
+#ifdef TAUCS_CORE_GENERAL
+
+taucs_ccs_matrix* 
+taucs_ccs_read_ijv(char* ijvfilename,int flags)
+{
+
+#ifdef TAUCS_DOUBLE_IN_BUILD
+  if (flags & TAUCS_DOUBLE)
+    return taucs_dccs_read_ijv(ijvfilename,flags);
+#endif
+
+#ifdef TAUCS_SINGLE_IN_BUILD
+  if (flags & TAUCS_SINGLE)
+    return taucs_sccs_read_ijv(ijvfilename,flags);
+#endif
+
+#ifdef TAUCS_DCOMPLEX_IN_BUILD
+  if (flags & TAUCS_DCOMPLEX)
+    return taucs_zccs_read_ijv(ijvfilename,flags);
+#endif
+
+#ifdef TAUCS_SCOMPLEX_IN_BUILD
+  if (flags & TAUCS_SCOMPLEX)
+    return taucs_cccs_read_ijv(ijvfilename,flags);
+#endif
+  
+  assert(0);
+  /*added omer*/
+  return NULL;
+}
+
+#endif /* TAUCS_CORE_GENERAL */
+
+#ifndef TAUCS_CORE_GENERAL
+
+taucs_ccs_matrix* 
+taucs_dtl(ccs_read_ijv)(char* ijvfilename,int flags)
+{
+  FILE* f;
+  taucs_ccs_matrix*  m;
+  int*    clen; 
+  int*    is; 
+  int*    js;
+  taucs_datatype* vs;
+  int ncols, nrows, nnz;
+  int i,j,k,n;
+  double         di,dj;
+  taucs_datatype dv;
+
+  f = fopen (ijvfilename , "r");
+
+  if (f == NULL) {
+    taucs_printf("taucs_ccs_read_ijv: could not open ijv file %s\n",ijvfilename);
+    return NULL;
+  }
+
+  n = 10000;
+  is = (int*)    taucs_malloc(n*sizeof(int));
+  js = (int*)    taucs_malloc(n*sizeof(int));
+  vs = (taucs_datatype*) taucs_malloc(n*sizeof(taucs_datatype));
+  if (!is || !js || !vs) {
+    taucs_printf("symccs_read_ijv: out of memory\n");
+    taucs_free(is); taucs_free(js); taucs_free(vs); 
+    return NULL; 
+  }
+
+  nnz = 0;
+  nrows = ncols = 0;
+  while (!feof(f)) {
+    if (nnz == n) {
+      n = (int) ( 1.25 * (double) n);
+      taucs_printf("taucs_ccs_read_ijv: allocating %d ijv's\n",n);
+      is = (int*)    taucs_realloc(is,n*sizeof(int));
+      js = (int*)    taucs_realloc(js,n*sizeof(int));
+      vs = (taucs_datatype*) taucs_realloc(vs,n*sizeof(taucs_datatype));
+      if (!is || !js || !vs) { 
+	taucs_printf("taucs_ccs_read_ijv: out of memory\n");
+	taucs_free(is); taucs_free(js); taucs_free(vs); 
+	return NULL; 
+      }
+    }
+
+#ifdef TAUCS_CORE_DOUBLE
+    if (fscanf(f, "%lg %lg %lg", &di, &dj, &dv) != 3) break;
+#endif
+
+#ifdef TAUCS_CORE_SINGLE
+    if (fscanf(f, "%lg %lg %g", &di, &dj, &dv) != 3) break;
+#endif
+
+#ifdef TAUCS_CORE_COMPLEX
+    {
+      taucs_real_datatype dv_i;
+      taucs_real_datatype dv_r;
+
+#ifdef TAUCS_CORE_DCOMPLEX
+      if (fscanf(f, "%lg %lg %lg+%lgi", &di, &dj, &dv_r,&dv_i) != 4) break;
+#endif
+#ifdef TAUCS_CORE_SCOMPLEX
+      if (fscanf(f, "%lg %lg %g+%gi", &di, &dj, &dv_r, &dv_i) != 4) break;
+#endif
+      dv = taucs_complex_create(dv_r,dv_i);
+    }
+#endif
+
+    is[nnz] = (int)di; js[nnz] = (int)dj; vs[nnz] = dv;/*omer*/
+    /* we read the lower part */
+    if ((flags & TAUCS_SYMMETRIC) && is[nnz] < js[nnz]) continue; 
+    if ((flags & TAUCS_HERMITIAN) && is[nnz] < js[nnz]) continue; 
+    nrows = max(is[nnz],nrows);
+    ncols = max(js[nnz],ncols);
+    nnz++;
+   }
+
+  fclose ( f );
+
+  m = (taucs_ccs_matrix*) taucs_malloc(sizeof(taucs_ccs_matrix));
+  if (!m) { 
+    taucs_printf("taucs_ccs_read_ijv: out of memory\n");
+    taucs_free(is); taucs_free(js); taucs_free(vs); 
+    return NULL; 
+  }
+  m->n      = nrows;
+  m->m      = ncols;
+  m->flags  = 0;
+  if (flags & TAUCS_SYMMETRIC) 
+    m->flags  = TAUCS_SYMMETRIC | TAUCS_LOWER;
+  if (flags & TAUCS_HERMITIAN) 
+    m->flags  = TAUCS_HERMITIAN | TAUCS_LOWER;
+
+#ifdef TAUCS_CORE_DOUBLE
+  m->flags |= TAUCS_DOUBLE;
+#endif
+
+#ifdef TAUCS_CORE_SINGLE
+  m->flags |= TAUCS_SINGLE;
+#endif
+
+#ifdef TAUCS_CORE_DCOMPLEX
+  m->flags |= TAUCS_DCOMPLEX;
+#endif
+
+#ifdef TAUCS_CORE_SCOMPLEX
+  m->flags |= TAUCS_SCOMPLEX;
+#endif
+
+  clen      = (int*)    taucs_malloc((ncols+1) * sizeof(int));
+  m->colptr = (int*)    taucs_malloc((ncols+1) * sizeof(int));
+  m->rowind = (int*)    taucs_malloc(nnz       * sizeof(int));
+  m->taucs_values = (taucs_datatype*) taucs_malloc(nnz * sizeof(taucs_datatype));
+  if (!clen || !(m->colptr) || !(m->rowind) || !(m->rowind)) {
+    taucs_printf("taucs_ccs_read_ijv: out of memory: ncols=%d nnz=%d\n",ncols,nnz);
+    taucs_free(clen); taucs_free(m->colptr); taucs_free(m->rowind); 
+    taucs_free(m->taucs_values);
+    taucs_free (m); taucs_free(is); taucs_free(js); taucs_free(vs); 
+    return NULL; 
+  }
+
+  for (j=0; j<ncols; j++) clen[j] = 0;
+  for (k=0; k<nnz; k++) {
+    i = is[k] - 1; /* make it 1-based */
+    j = js[k] - 1; /* make it 1-based */
+    ( clen[j] )++;
+  }
+  /* just check */
+  k = 0;
+  for (j=0; j<ncols; j++) 
+    k += clen[j];
+  assert(k == nnz);
+
+  /* now compute column pointers */
+  
+  k = 0;
+  for (j=0; j<ncols; j++) {
+    int tmp;
+    tmp =  clen[j];
+    clen[j] = (m->colptr[j]) = k;
+    k += tmp;
+  }
+  clen[ncols] = (m->colptr[ncols]) = k;
+  assert(clen[ncols] == nnz);
+  
+  /* now read matrix into data structure */
+
+  for (k=0; k<nnz; k++) {
+    i = is[k] - 1; /* make it 1-based */
+    j = js[k] - 1; /* make it 1-based */
+    assert(i < nrows);
+    assert(j < ncols);
+    (m->taucs_values)[ clen[j] ] = vs[k];
+    (m->rowind)[ clen[j] ] = i;
+    clen[j] ++;
+  }
+  
+  taucs_free(clen);
+  taucs_free(vs);
+  taucs_free(js);
+  taucs_free(is);
+  
+  taucs_printf("taucs_ccs_read_ijv: read %s, n=%d\n",ijvfilename,m->n);
+
+  return m;
+} 
+
+#endif /*#ifndef TAUCS_CORE_GENERAL*/
+
+/*********************************************************/
+/* read mtx                                              */
+/*********************************************************/
+
+#ifdef TAUCS_CORE_GENERAL
+
+taucs_ccs_matrix* 
+taucs_ccs_read_mtx(char* mtxfilename,int flags)
+{
+
+#ifdef TAUCS_DOUBLE_IN_BUILD
+  if (flags & TAUCS_DOUBLE)
+    return taucs_dccs_read_mtx(mtxfilename,flags);
+#endif
+
+#ifdef TAUCS_SINGLE_IN_BUILD
+  if (flags & TAUCS_SINGLE)
+    return taucs_sccs_read_mtx(mtxfilename,flags);
+#endif
+
+#ifdef TAUCS_DCOMPLEX_IN_BUILD
+  if (flags & TAUCS_DCOMPLEX)
+    return taucs_zccs_read_mtx(mtxfilename,flags);
+#endif
+
+#ifdef TAUCS_SCOMPLEX_IN_BUILD
+  if (flags & TAUCS_SCOMPLEX)
+    return taucs_cccs_read_mtx(mtxfilename,flags);
+#endif  
+	
+  assert(0);
+  /*added omer*/
+  return NULL;
+}
+
+#endif /* TAUCS_CORE_GENERAL */
+
+#ifndef TAUCS_CORE_GENERAL
+
+taucs_ccs_matrix* 
+taucs_dtl(ccs_read_mtx)(char* filename,int flags)
+{
+  FILE* f;
+  taucs_ccs_matrix*  m;
+  int*    clen; 
+  int*    is; 
+  int*    js;
+  taucs_datatype* vs;
+  int ncols, nrows, nnz;
+  int i,j,k,n;
+  double di,dj;
+  taucs_datatype dv;
+
+  f = fopen (filename , "r");
+
+  if (f == NULL) {
+    taucs_printf("taucs_ccs_read_mtx: could not open mtx file %s\n",filename);
+    return NULL;
+  }
+
+  if (fscanf(f, "%d %d %d", &nrows, &ncols, &nnz) != 3) {
+    taucs_printf("taucs_ccs_read_mtx: wrong header\n");
+    return NULL;
+  }
+
+  n = 10000;
+  is = (int*)    taucs_malloc(n*sizeof(int));
+  js = (int*)    taucs_malloc(n*sizeof(int));
+  vs = (taucs_datatype*) taucs_malloc(n*sizeof(taucs_datatype));
+  if (!is || !js || !vs) {
+    taucs_printf("taucs_ccs_read_mtx: out of memory\n");
+    taucs_free(is); taucs_free(js); taucs_free(vs); 
+    return NULL; 
+  }
+
+  nnz = 0;
+  nrows = ncols = 0;
+  while (!feof(f)) {
+    if (nnz == n) {
+      n = (int) ( 1.25 * (double) n);
+      taucs_printf("taucs_ccs_read_mtx: allocating %d ijv's\n",n);
+      is = (int*)    taucs_realloc(is,n*sizeof(int));
+      js = (int*)    taucs_realloc(js,n*sizeof(int));
+      vs = (taucs_datatype*) taucs_realloc(vs,n*sizeof(taucs_datatype));
+      if (!is || !js || !vs) { 
+	taucs_printf("taucs_ccs_read_mtx: out of memory\n");
+	taucs_free(is); taucs_free(js); taucs_free(vs); 
+	return NULL; 
+      }
+    }
+
+#ifdef TAUCS_CORE_DOUBLE
+    if (fscanf(f, "%lg %lg %lg", &di, &dj, &dv) != 3) break;
+#endif
+
+#ifdef TAUCS_CORE_SINGLE
+    if (fscanf(f, "%lg %lg %g", &di, &dj, &dv) != 3) break;
+#endif
+
+#ifdef TAUCS_CORE_COMPLEX
+    {
+      taucs_real_datatype dv_i;
+      taucs_real_datatype dv_r;
+#ifdef TAUCS_CORE_DCOMPLEX
+      if (fscanf(f, "%lg %lg %lg+%lgi", &di, &dj, &dv_r,&dv_i) != 4) break;
+#endif
+#ifdef TAUCS_CORE_SCOMPLEX
+      if (fscanf(f, "%lg %lg %g+%gi", &di, &dj, &dv_r,&dv_i) != 4) break;
+#endif
+      dv = taucs_complex_create(dv_r,dv_i);
+    }
+#endif
+
+    is[nnz] = (int)di; js[nnz] = (int)dj; vs[nnz] = dv;/*omer*/
+    /* upper or lower might be stored, we use lower */
+    if ((flags & TAUCS_SYMMETRIC) && is[nnz] < js[nnz]) {
+      int t = is[nnz];
+      is[nnz] = js[nnz];
+      js[nnz] = t;
+    }
+
+    if (flags & TAUCS_PATTERN) {
+#ifdef TAUCS_CORE_DOUBLE
+      if (is[nnz] == js[nnz]) vs[nnz] = (double) (nrows+1);
+      else                    vs[nnz] = -1.0;
+#endif
+
+#ifdef TAUCS_CORE_SINGLE
+      if (is[nnz] == js[nnz]) vs[nnz] = (float) (nrows+1);
+      else                    vs[nnz] = -1.0;
+#endif
+
+#ifdef TAUCS_CORE_DCOMPEX
+      assert(0);
+#endif
+
+#ifdef TAUCS_CORE_SCOMPLEX
+      assert(0);
+#endif
+    }
+    nrows = max(is[nnz],nrows);
+    ncols = max(js[nnz],ncols);
+    nnz++;
+   }
+
+  fclose ( f );
+
+  m = (taucs_ccs_matrix*) taucs_malloc(sizeof(taucs_ccs_matrix));
+  if (!m) { 
+    taucs_printf("taucs_ccs_read_mtx: out of memory\n");
+    taucs_free(is); taucs_free(js); taucs_free(vs); 
+    return NULL; 
+  }
+  m->n      = nrows;
+  m->m      = ncols;
+  if (flags & TAUCS_SYMMETRIC) 
+    m->flags  = TAUCS_SYMMETRIC | TAUCS_LOWER;
+  else
+    m->flags  = 0;
+
+#ifdef TAUCS_CORE_DOUBLE
+  m->flags |= TAUCS_DOUBLE;
+#endif
+
+#ifdef TAUCS_CORE_SINGLE
+  m->flags |= TAUCS_SINGLE;
+#endif
+
+#ifdef TAUCS_CORE_DCOMPLEX
+  m->flags |= TAUCS_DCOMPLEX;
+#endif
+
+#ifdef TAUCS_CORE_SCOMPLEX
+  m->flags |= TAUCS_SCOMPLEX;
+#endif
+
+  clen      = (int*)    taucs_malloc((ncols+1) * sizeof(int));
+  m->colptr = (int*)    taucs_malloc((ncols+1) * sizeof(int));
+  m->rowind = (int*)    taucs_malloc(nnz       * sizeof(int));
+  m->taucs_values = (taucs_datatype*) taucs_malloc(nnz * sizeof(taucs_datatype));
+  if (!clen || !(m->colptr) || !(m->rowind) || !(m->rowind)) {
+    taucs_printf("taucs_ccs_read_mtx: out of memory: ncols=%d nnz=%d\n",ncols,nnz);
+    taucs_free(clen); taucs_free(m->colptr); taucs_free(m->rowind); 
+    taucs_free(m->taucs_values);
+    taucs_free (m); taucs_free(is); taucs_free(js); taucs_free(vs); 
+    return NULL; 
+  }
+
+  for (j=0; j<ncols; j++) clen[j] = 0;
+  for (k=0; k<nnz; k++) {
+    i = is[k] - 1; /* make it 1-based */
+    j = js[k] - 1; /* make it 1-based */
+    ( clen[j] )++;
+  }
+  /* just check */
+  k = 0;
+  for (j=0; j<ncols; j++) 
+    k += clen[j];
+  assert(k == nnz);
+
+  /* now compute column pointers */
+  
+  k = 0;
+  for (j=0; j<ncols; j++) {
+    int tmp;
+    tmp =  clen[j];
+    clen[j] = (m->colptr[j]) = k;
+    k += tmp;
+  }
+  clen[ncols] = (m->colptr[ncols]) = k;
+  assert(clen[ncols] == nnz);
+  
+  /* now read matrix into data structure */
+
+  for (k=0; k<nnz; k++) {
+    i = is[k] - 1; /* make it 1-based */
+    j = js[k] - 1; /* make it 1-based */
+    assert(i < nrows);
+    assert(j < ncols);
+    (m->taucs_values)[ clen[j] ] = vs[k];
+    (m->rowind)[ clen[j] ] = i;
+    clen[j] ++;
+  }
+  
+  taucs_free(clen);
+  taucs_free(vs);
+  taucs_free(js);
+  taucs_free(is);
+  
+  taucs_printf("taucs_ccs_read_mtx: read %s, n=%d\n",filename,m->n);
+
+  return m;
+} 
+
+#endif /* #ifndef TAUCS_CORE_GENERAL */
+
+/*********************************************************/
+/* read ccs                                              */
+/*********************************************************/
+
+#ifdef TAUCS_CORE_GENERAL
+
+taucs_ccs_matrix* 
+taucs_ccs_read_ccs(char* ccsfilename,int flags)
+{
+
+#ifdef TAUCS_DOUBLE_IN_BUILD
+  if (flags & TAUCS_DOUBLE)
+    return taucs_dccs_read_ccs(ccsfilename,flags);
+#endif
+
+#ifdef TAUCS_SINGLE_IN_BUILD
+  if (flags & TAUCS_SINGLE)
+    return taucs_sccs_read_ccs(ccsfilename,flags);
+#endif
+
+#ifdef TAUCS_DCOMPLEX_IN_BUILD
+  if (flags & TAUCS_DCOMPLEX)
+    return taucs_zccs_read_ccs(ccsfilename,flags);
+#endif
+
+#ifdef TAUCS_SCOMPLEX_IN_BUILD
+  if (flags & TAUCS_SCOMPLEX)
+    return taucs_cccs_read_ccs(ccsfilename,flags);
+#endif  
+	
+  assert(0);
+  /*added omer*/
+  return NULL;
+}
+
+#endif /* TAUCS_CORE_GENERAL */
+
+#ifndef TAUCS_CORE_GENERAL
+
+taucs_ccs_matrix* 
+taucs_dtl(ccs_read_ccs)(char* filename,int flags)
+{
+  FILE* f;
+  taucs_ccs_matrix*  m;
+
+  /*
+  int*    clen; 
+  int*    is; 
+  int*    js;
+  taucs_datatype* vs;
+  int ncols, nrows, nnz;
+  int i,ip,j,k,n;
+  */
+  /* taucs_datatype dv;*/
+  /* double         di,dj;*/
+
+  int i,ip,j,N,*pointers;
+
+  f = fopen(filename ,"r");
+
+  if (f == NULL) {
+    taucs_printf("taucs_ccs_read_ccs: could not open ccs file %s\n",filename);
+    return NULL;
+  }
+
+  fscanf(f,"%d",&N);
+
+  pointers = (int*) taucs_malloc((N+1)*sizeof(int));
+  for(i=0; i<N+1; ++i) {
+    fscanf(f,"%d",&pointers[i]);
+  }
+
+  m = taucs_dtl(ccs_create)(N, N, pointers[N]);
+  for (i=0; i<=N; i++) (m->colptr)[i] = pointers[i];
+
+  for(i=0; i<pointers[N]; ++i)
+    fscanf(f,"%d",(m->rowind)+i);
+
+#ifdef TAUCS_CORE_DOUBLE  
+  for(i=0; i<pointers[N]; ++i)
+    fscanf(f,"%lg",(m->taucs_values)+i);
+#endif
+  
+#ifdef TAUCS_CORE_SINGLE  
+  for(i=0; i<pointers[N]; ++i)
+    fscanf(f,"%g",(m->taucs_values)+i);
+#endif
+  
+#ifdef TAUCS_CORE_DCOMPLEX  
+  for(i=0; i<pointers[N]; ++i) {
+    taucs_real_datatype dv_r;
+    taucs_real_datatype dv_i;
+    fscanf(f,"%lg+%lgi",&dv_r,&dv_i);
+    (m->taucs_values)[i] = taucs_complex_create(dv_r,dv_i);
+  }
+#endif
+  
+#ifdef TAUCS_CORE_SCOMPLEX
+  for(i=0; i<pointers[N]; ++i) {
+    taucs_real_datatype dv_r;
+    taucs_real_datatype dv_i;
+    fscanf(f,"%g+%gi",&dv_r,&dv_i);
+    (m->taucs_values)[i] = taucs_complex_create(dv_r,dv_i);
+  }
+#endif
+  
+  if (flags & TAUCS_SYMMETRIC) {
+    m->flags  = TAUCS_SYMMETRIC | TAUCS_LOWER;
+    for (j=0; j<N; j++) {
+      for (ip=(m->colptr)[j]; ip<(m->colptr)[j+1]; ip++) {
+	i = (m->rowind)[ip];
+	assert(i >= j);
+      }
+    }
+  } else
+    m->flags  = 0;
+
+#ifdef TAUCS_CORE_DOUBLE
+  m->flags |= TAUCS_DOUBLE;
+#endif
+
+#ifdef TAUCS_CORE_SINGLE
+  m->flags |= TAUCS_SINGLE;
+#endif
+
+#ifdef TAUCS_CORE_DCOMPLEX
+  m->flags |= TAUCS_DCOMPLEX;
+#endif
+
+#ifdef TAUCS_CORE_SCOMPLEX
+  m->flags |= TAUCS_SCOMPLEX;
+#endif
+
+  taucs_free(pointers);
+  
+  taucs_printf("taucs_ccs_read_ccs: read %s, n=%d\n",filename,m->n);
+
+  return m;
+} 
+
+#endif /*#ifndef TAUCS_CORE_GENERAL*/
+
+/*********************************************************/
+/* vector io                                             */
+/*********************************************************/
+
+#ifdef TAUCS_CORE_GENERAL
+
+void* 
+taucs_vec_read_binary(int n, int flags, char* filename)
+{
+  void* v = NULL; /* warning */
+  /*int   nrows; omer*/
+  ssize_t bytes_read;
+  int f;
+
+  taucs_printf("taucs_vec_read_binary: reading binary vector %s\n",filename);
+  
+#ifdef OSTYPE_win32
+  f = open(filename,_O_RDONLY |_O_BINARY);
+#else
+  f = open(filename,O_RDONLY);
+#endif
+  /*f = open(filename,O_RDONLY);*/
+
+  if (flags & TAUCS_DOUBLE) {
+    v = taucs_malloc(n * sizeof(taucs_double));
+    if (!v) return NULL;
+    bytes_read = read(f,v,n*sizeof(taucs_double));
+  } else if (flags & TAUCS_SINGLE) {
+    v = taucs_malloc(n * sizeof(taucs_single));
+    if (!v) return NULL;
+    bytes_read = read(f,v,n*sizeof(taucs_single));
+  } else if (flags & TAUCS_DCOMPLEX) {
+    v = taucs_malloc(n * sizeof(taucs_dcomplex));
+    if (!v) return NULL;
+    bytes_read = read(f,v,n*sizeof(taucs_dcomplex));
+  } else if (flags & TAUCS_SCOMPLEX) {
+    v = taucs_malloc(n * sizeof(taucs_scomplex));
+    if (!v) return NULL;
+    bytes_read = read(f,v,n*sizeof(taucs_scomplex));
+  } else {
+    assert(0);
+  }
+
+  close(f);
+
+  taucs_printf("taucs_vec_read_binary: done reading\n");
+
+  return v;
+}
+
+int
+taucs_vec_write_binary(int n, int flags, void* v, char* filename)
+{
+  /*int   nrows; omer*/
+  ssize_t bytes_read;
+  int f;
+
+  taucs_printf("taucs_vec_write_binary: writing binary vector %s\n",filename);
+  
+#ifdef OSTYPE_win32
+  f = open(filename,
+	   _O_WRONLY | _O_CREAT | _O_BINARY, 
+	   _S_IREAD | _S_IWRITE | _S_IEXEC);
+#else
+  f = open(filename,O_WRONLY | O_CREAT | O_TRUNC, S_IRWXO | S_IRWXG | S_IRWXU);
+#endif
+
+  if (flags & TAUCS_DOUBLE) {
+    bytes_read = write(f,v,n*sizeof(taucs_double));
+  } else if (flags & TAUCS_SINGLE) {
+    bytes_read = write(f,v,n*sizeof(taucs_single));
+  } else if (flags & TAUCS_DCOMPLEX) {
+    bytes_read = write(f,v,n*sizeof(taucs_dcomplex));
+  } else if (flags & TAUCS_SCOMPLEX) {
+    bytes_read = write(f,v,n*sizeof(taucs_scomplex));
+  } else {
+    assert(0);
+  }
+
+  close(f);
+
+  taucs_printf("taucs_vec_read_binary: done reading\n");
+
+  return 0;
+}
+
+#endif /* TAUCS_CORE_GENERAL */
+
+/*********************************************************/
+/*                                                       */
+/*********************************************************/
+
diff --git a/contrib/taucs/src/taucs_ccs_ooc_llt.c b/contrib/taucs/src/taucs_ccs_ooc_llt.c
new file mode 100644
index 0000000000000000000000000000000000000000..1fb94dd12595ae68a66fb0effc42761bae8bb482
--- /dev/null
+++ b/contrib/taucs/src/taucs_ccs_ooc_llt.c
@@ -0,0 +1,3027 @@
+
+/*
+ * taucs_ccs_ooc_llt.c 
+ *
+ * Out-of-core sparse Cholesky factorization
+ *
+ * authors: Vladimir Rotkin & Sivan Toledo 
+ *
+ * Copyright, 2001.
+ */
+
+/*************************************************************/
+/*                                                           */
+/*************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+
+
+/*#include <unistd.h>*/
+/*#include <sys/uio.h>*/
+
+#include <assert.h>
+#include <math.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include "taucs.h"
+
+#define FALSE 0
+#define TRUE  1
+
+/* #define BLAS_FLOPS_CUTOFF  1000.0 */
+
+#define BLAS_FLOPS_CUTOFF  -1.0
+#define SOLVE_DENSE_CUTOFF 5
+
+/* number of matrices in the header of the file */
+/*#define IO_BASE    6*/
+#define IO_BASE    7
+/* multiple files of at most 1 GB or single file */
+#define MULTIFILE 0
+
+#ifndef TAUCS_CORE_GENERAL
+
+/*************************************************************/
+/* structures                                                */
+/*************************************************************/
+
+typedef struct {
+  int     sn_size;
+  int     n;
+  int*    rowind;
+
+  int     up_size;
+  int*    sn_vertices;
+  int*    up_vertices;
+  taucs_datatype* f1;
+  taucs_datatype* f2;
+  taucs_datatype* u;
+} supernodal_frontal_matrix;
+
+typedef struct {
+  char    uplo;     /* 'u' for upper, 'l' for lower, ' ' don't know; prefer lower. */
+  int     n;        /* size of matrix */
+  int     n_sn;     /* number of supernodes */
+
+  int* parent;      /* supernodal elimination tree */
+  int* first_child; 
+  int* next_child;
+  int* ipostorder;
+  int* col_to_sn_map;     
+
+  int* sn_size;     /* size of supernodes (diagonal block) */
+  int* sn_up_size;  /* size of subdiagonal update blocks   */
+  int** sn_struct;  /* row structure of supernodes         */
+
+  taucs_datatype** sn_blocks; /* supernode blocks        */
+  taucs_datatype** up_blocks; /* update blocks           */
+} supernodal_factor_matrix;
+
+
+/*************************************************************/
+/* for qsort                                                 */
+/*************************************************************/
+#if 0
+static int compare_ints(void* vx, void* vy)
+{
+  int* ix = (int*)vx;
+  int* iy = (int*)vy;
+  if (*ix < *iy) return -1;
+  if (*ix > *iy) return  1;
+  return 0;
+}
+#endif
+
+static int* compare_indirect_map;
+static int compare_indirect_ints(const void* vx, const void* vy)/*(void* vx, void* vy) omer*/
+{
+  int* ix = (int*)vx;
+  int* iy = (int*)vy;
+  if (compare_indirect_map[*ix] < compare_indirect_map[*iy]) return -1;
+  if (compare_indirect_map[*ix] > compare_indirect_map[*iy]) return  1;
+  return 0;
+}
+
+/*************************************************************/
+/* radix sort                                                */
+/*************************************************************/
+
+#if 0
+
+/* NCOUNTS = 2^LOGRADIX */
+
+#define RADIX_SORT_LOGRADIX 4
+#define RADIX_SORT_NCOUNTS  16
+
+static unsigned int counts[RADIX_SORT_NCOUNTS];
+
+static int
+radix_sort(unsigned int* x, int n)
+{
+  int i;
+  unsigned int mask;
+
+  unsigned int  ncounts;
+
+  unsigned int* y;
+  unsigned int* to;
+  unsigned int* from;
+
+  unsigned int v;
+  unsigned int partialsum;
+  unsigned int next;
+  unsigned int shift;
+  unsigned int bits_sorted;
+
+  if (RADIX_SORT_LOGRADIX >= 8*sizeof(unsigned int)) {
+    taucs_printf("radix sort: radix too large.\n");
+    /* the computation of ncounts will fail */
+    return 0;
+  }
+
+  mask    = 0;
+  ncounts = 1;
+  for (i=0; i<RADIX_SORT_LOGRADIX; i++) {
+    mask = (mask << 1) | 1;
+    ncounts = ncounts << 1;
+  }
+
+  assert(ncounts==RADIX_SORT_NCOUNTS);
+
+  y      = (unsigned int*) taucs_malloc(n       * sizeof(unsigned int));
+  if (!y) {
+    taucs_printf("radix sort: out of memory.\n");
+    return -1;
+  }
+
+  from = x;
+  to   = y;
+
+  bits_sorted = 0;
+  while(bits_sorted < 8*sizeof(unsigned int)) {
+    for (i=0; i<ncounts; i++) counts[i] = 0;
+
+    for (i=0; i<n; i++) {
+      v = (from[i] >> bits_sorted) & mask;
+      assert(v < ncounts);
+      counts[v] ++;
+    }
+
+    partialsum = 0;
+    for (i=0; i<ncounts; i++) {
+      /*printf("<%d ",counts[i]);*/
+      next = counts[i];
+      counts[i] = partialsum;
+      /*printf("%d>\n",counts[i]);*/
+      partialsum = partialsum + next;
+    }
+
+    for (i=0; i<n; i++) {
+      v = (from[i] >> bits_sorted) & mask;
+      assert(counts[v] < n);
+      to[counts[v]] = from[i];
+      counts[v] ++;
+    }
+    /*
+    printf("===========\n");
+    for (i=0; i<n; i++) printf(">>%d>> %08x\n",bits_sorted,to[i]);
+    printf("===========\n");
+    */
+
+    bits_sorted += RADIX_SORT_LOGRADIX;
+    if (from == x) {
+      from = y;
+      to   = x;
+    } else {
+      from = x;
+      to   = y;
+    } 
+  }
+
+  if (from == y) 
+    for (i=0; i<n; i++) x[i] = y[i];
+
+  taucs_free(y);
+
+  return 0;
+}
+#endif
+
+/*************************************************************/
+/* create and free the factor object                         */
+/*************************************************************/
+
+static supernodal_factor_matrix*
+multifrontal_supernodal_create()
+{
+  supernodal_factor_matrix* L;
+  
+  L = (supernodal_factor_matrix*) taucs_malloc(sizeof(supernodal_factor_matrix));
+  if (!L) return NULL;
+  L->uplo      = 'l';
+  L->n         = -1; /* unused */
+
+  L->sn_struct   = NULL;
+  L->sn_size     = NULL;
+  L->sn_up_size  = NULL;
+  L->parent      = NULL;
+  L->col_to_sn_map = NULL;
+  L->first_child = NULL;
+  L->next_child  = NULL;
+  L->ipostorder  = NULL;
+  L->sn_blocks     = NULL;
+  L->up_blocks     = NULL;
+
+  return L;
+}
+
+static
+void ooc_supernodal_factor_free(void* vL)
+{
+  supernodal_factor_matrix* L = (supernodal_factor_matrix*) vL;
+  int sn;
+  
+  taucs_free(L->parent);
+  taucs_free(L->first_child);
+  taucs_free(L->next_child);
+  taucs_free(L->col_to_sn_map);
+
+  taucs_free(L->sn_size);
+  taucs_free(L->sn_up_size);
+  for (sn=0; sn<L->n_sn; sn++) {
+    taucs_free(L->sn_struct[sn]);
+    taucs_free(L->sn_blocks[sn]);
+    taucs_free(L->up_blocks[sn]);
+  }
+
+  taucs_free(L->sn_struct);
+  taucs_free(L->sn_blocks);
+  taucs_free(L->up_blocks);
+
+  taucs_free(L);
+}
+
+static void
+recursive_symbolic_elimination(int            j,
+			       taucs_ccs_matrix* A,
+			       int            first_child[],
+			       int            next_child[],
+			       int*           n_sn,
+			       int            sn_size[],
+			       int            sn_up_size[],
+			       int*           sn_rowind[],
+			       int            sn_first_child[], 
+			       int            sn_next_child[], 
+			       int            rowind[],
+			       int            column_to_sn_map[],
+			       int            map[],
+			       int            do_order,
+			       int            ipostorder[],
+			       double         given_mem,
+			       void           (*sn_struct_handler)(),
+			       void*          sn_struct_handler_arg
+			       )
+{
+  int  i,ip,c,c_sn;
+  int  in_previous_sn;
+  int  nnz = 0; /* to supress a warning */
+
+  for (c=first_child[j]; c != -1; c = next_child[c]) {
+    recursive_symbolic_elimination(c,A,
+				   first_child,next_child,
+				   n_sn,
+				   sn_size,sn_up_size,sn_rowind,
+				   sn_first_child,sn_next_child,
+				   rowind, /* temporary */
+				   column_to_sn_map,
+				   map,
+				   do_order,ipostorder,given_mem,
+				   sn_struct_handler,sn_struct_handler_arg
+				   );
+  }
+
+  
+  in_previous_sn = 1;
+  if (j == A->n) 
+    in_previous_sn = 0; /* this is not a real column */
+  else if (first_child[j] == -1) 
+    in_previous_sn = 0; /* this is a leaf */
+  else if (next_child[first_child[j]] != -1) 
+    in_previous_sn = 0; /* more than 1 child */
+  else if ((double)sn_up_size[column_to_sn_map[first_child[j]]]
+	   *(double)(sn_size[column_to_sn_map[first_child[j]]]+1)
+	   *sizeof(taucs_datatype) > given_mem)
+    in_previous_sn = 0; /* size of supernode great than given memory */
+  else { 
+    /* check that the structure is nested */
+    /* map contains child markers         */
+
+    c=first_child[j];
+    for (ip=(A->colptr)[j]; ip<(A->colptr)[j+1]; ip++) {
+      i = (A->rowind)[ip];
+      in_previous_sn = in_previous_sn && (map[i] == c);
+    }
+  }
+
+  if (in_previous_sn) {
+    c = first_child[j];
+    c_sn = column_to_sn_map[c];
+    column_to_sn_map[j] = c_sn;
+
+    /* swap row indices so j is at the end of the */
+    /* supernode, not in the update indices       */
+    for (ip=sn_size[c_sn]; ip<sn_up_size[c_sn]; ip++) 
+      if (sn_rowind[c_sn][ip] == j) break;
+    assert(ip<sn_up_size[c_sn]);
+    sn_rowind[c_sn][ip] = sn_rowind[c_sn][sn_size[c_sn]];
+    sn_rowind[c_sn][sn_size[c_sn]] = j;
+
+    /* mark the nonzeros in the map */
+    for (ip=sn_size[c_sn]; ip<sn_up_size[c_sn]; ip++) 
+      map[ sn_rowind[c_sn][ip] ] = j;
+
+    sn_size   [c_sn]++;
+    if((double)sn_size[c_sn]*(double)sn_up_size[c_sn]>=(1024.0*1024.0*1024.0))
+      taucs_printf("debug!!!: sn_size[%d] = %d sn_up_size[%d] = %d\n ",c_sn,sn_size[c_sn],c_sn,sn_up_size[c_sn]);
+    /* return c_sn; */
+    return;
+  }
+
+  /* we are in a new supernode */
+  if (j < A->n) {
+    nnz = 1;
+    rowind[0] = j;
+    map[j]    = j;
+    
+    for (c=first_child[j]; c != -1; c = next_child[c]) {
+      c_sn = column_to_sn_map[c];
+      for (ip=sn_size[c_sn]; ip<sn_up_size[c_sn]; ip++) {
+	i = sn_rowind[c_sn][ip];
+	if (i > j && map[i] != j) { /* new row index */
+	  map[i] = j;
+	  rowind[nnz] = i;
+	  nnz++;
+	}
+      }
+      if (sn_struct_handler)
+	(*sn_struct_handler)(sn_struct_handler_arg,
+			     c_sn,sn_up_size[c_sn],&(sn_rowind[c_sn]));
+    }
+
+    for (ip=(A->colptr)[j]; ip<(A->colptr)[j+1]; ip++) {
+      i = (A->rowind)[ip];
+      if (map[i] != j) { /* new row index */
+	map[i] = j;
+	rowind[nnz] = i;
+	nnz++;
+      }
+    }
+  }
+
+  /* append childs from root*/
+  if (j == A->n) {
+    for (c=first_child[j]; c != -1; c = next_child[c]) {
+      c_sn = column_to_sn_map[c];
+      if (sn_struct_handler)
+	(*sn_struct_handler)(sn_struct_handler_arg,
+			     c_sn,sn_up_size[c_sn],&(sn_rowind[c_sn]));
+    }
+  }
+
+  /*printf("children of sn %d: ",*n_sn);*/
+  for (c=first_child[j]; c != -1; c = next_child[c]) {
+    c_sn = column_to_sn_map[c];
+    /*printf("%d ",c_sn);*/
+    if (c==first_child[j])
+      sn_first_child[*n_sn] = c_sn;
+    else {
+      sn_next_child[ c_sn ] = sn_first_child[*n_sn];
+      sn_first_child[*n_sn] = c_sn;
+    }
+  }
+  /*printf("\n");*/
+
+  if (j < A->n) {
+    column_to_sn_map[j] = *n_sn;
+    sn_size   [*n_sn] = 1;
+    sn_up_size[*n_sn] = nnz;
+    sn_rowind [*n_sn] = (int*) taucs_malloc(nnz * sizeof(int));
+    for (ip=0; ip<nnz; ip++) sn_rowind[*n_sn][ip] = rowind[ip];
+    if (do_order) {
+      /* Sivan and Vladimir: we think that we can sort in */
+      /* column order, not only in etree postorder.       */
+      /*
+	radix_sort(sn_rowind [*n_sn],nnz);
+	qsort(sn_rowind [*n_sn],nnz,sizeof(int),compare_ints);
+      */
+      compare_indirect_map = ipostorder;
+      qsort(sn_rowind [*n_sn],nnz,sizeof(int),compare_indirect_ints);
+    }
+    assert(sn_rowind [*n_sn][0] == j);
+    (*n_sn)++;
+  }
+}
+
+static
+void recursive_postorder(int  j,
+			 int  first_child[],
+			 int  next_child[],
+			 int  postorder[],
+			 int  ipostorder[],
+			 int* next
+			 )
+{
+  int c;
+
+
+  for (c=first_child[j]; c != -1; c = next_child[c]) {
+    /*printf("*** %d is child of %d\n",c,j);*/
+    recursive_postorder(c,first_child,next_child,
+			postorder,ipostorder,next
+			);
+  }
+  /*  printf(">>> j=%d next=%d\n",j,*next);*/
+  if (postorder)  postorder [*next] = j;
+  if (ipostorder) ipostorder[j] = *next;
+  (*next)++;
+}
+
+static void
+taucs_ccs_ooc_symbolic_elimination(taucs_ccs_matrix* A,
+				   void* vL,
+				   int do_order,
+				   int do_column_to_sn_map,
+				   double given_mem,
+				   void           (*sn_struct_handler)(),
+				   void*          sn_struct_handler_arg
+				   )
+{
+  supernodal_factor_matrix* L = (supernodal_factor_matrix*) vL;
+  int* first_child;
+  int* next_child;
+  int j;
+  int* column_to_sn_map;
+  int* map;
+  int* rowind;
+  int* parent;
+  int* ipostorder;
+
+  L->n         = A->n;
+  L->sn_struct = (int**)taucs_malloc((A->n  )*sizeof(int*));
+  L->sn_size   = (int*) taucs_malloc((A->n+1)*sizeof(int));
+  L->sn_up_size   = (int*) taucs_malloc((A->n+1)*sizeof(int));
+  L->first_child = (int*) taucs_malloc((A->n+1)*sizeof(int));
+  L->next_child  = (int*) taucs_malloc((A->n+1)*sizeof(int));
+
+  column_to_sn_map = (int*)taucs_malloc((A->n+1)*sizeof(int));
+  map              = (int*) taucs_malloc((A->n+1)*sizeof(int));
+
+  first_child = (int*) taucs_malloc(((A->n)+1)*sizeof(int));
+  next_child  = (int*) taucs_malloc(((A->n)+1)*sizeof(int));
+    
+  rowind      = (int*) taucs_malloc((A->n)*sizeof(int));
+
+  taucs_printf("STARTING SYMB 1\n");
+
+  /* compute the vertex elimination tree */
+  parent      = (int*)taucs_malloc((A->n+1)*sizeof(int));
+  taucs_ccs_etree(A,parent,NULL,NULL,NULL);
+  for (j=0; j <= (A->n); j++) first_child[j] = -1;
+  for (j = (A->n)-1; j >= 0; j--) {
+    int p = parent[j];
+    next_child[j] = first_child[p];
+    first_child[p] = j;
+  }
+  taucs_free(parent);
+
+  taucs_printf("STARTING SYMB 2\n");
+
+  ipostorder = (int*)taucs_malloc((A->n+1)*sizeof(int));
+  { 
+    int next = 0;
+    /*int* postorder = (int*)taucs_malloc((A->n+1)*sizeof(int));*/
+    recursive_postorder(A->n,first_child,next_child,
+			NULL,
+			ipostorder,&next);
+    /*
+    printf("ipostorder ");
+    for (j=0; j <= (A->n); j++) printf("%d ",ipostorder[j]);
+    printf("\n");
+    printf(" postorder ");
+    for (j=0; j <= (A->n); j++) printf("%d ",postorder[j]);
+    printf("\n");
+    */
+  }
+
+  taucs_printf("STARTING SYMB 3\n");
+
+  L->n_sn = 0;
+  for (j=0; j < (A->n); j++) map[j] = -1;
+  for (j=0; j <= (A->n); j++) (L->first_child)[j] = (L->next_child)[j] = -1;
+
+  taucs_printf("STARTING SYMB\n");
+
+  recursive_symbolic_elimination(A->n,
+				 A,
+				 first_child,next_child,
+				 &(L->n_sn),
+				 L->sn_size,L->sn_up_size,L->sn_struct,
+				 L->first_child,L->next_child,
+				 rowind,
+				 column_to_sn_map,
+				 map,
+				 do_order,ipostorder,given_mem,
+				 sn_struct_handler,sn_struct_handler_arg
+				 );
+
+  taucs_printf("AFTER SYMB\n");
+
+  {
+    double nnz   = 0.0;
+    double flops = 0.0;
+    int sn,i,colnnz;
+    for (sn=0; sn<(L->n_sn); sn++) {
+      for (i=0, colnnz = (L->sn_up_size)[sn]; 
+	   i<(L->sn_size)[sn]; 
+	   i++, colnnz--) {
+	flops += 1.0 + ((double)(colnnz)) * ((double)(colnnz));
+	nnz   += (double) (colnnz);
+      }
+    }
+    taucs_printf("\t\tSymbolic Analysis of LL^T: %.2e nonzeros, %.2e flops\n",
+		 nnz, flops);
+  }
+
+  /*
+  {
+    int i;
+    printf("c2sn: ");
+    for (i=0; i<A->n; i++) printf("%d ",column_to_sn_map[i]);
+    printf("\n");
+  }
+  */
+  
+  L->sn_struct = (int**)taucs_realloc( L->sn_struct,(L->n_sn  )*sizeof(int*));
+  L->sn_size   = (int*) taucs_realloc( L->sn_size,(L->n_sn+1)*sizeof(int));
+  L->sn_up_size   = (int*) taucs_realloc(L->sn_up_size,(L->n_sn+1)*sizeof(int));
+  L->first_child = (int*) taucs_realloc(L->first_child,(L->n_sn+1)*sizeof(int));
+  L->next_child  = (int*) taucs_realloc(L->next_child,(L->n_sn+1)*sizeof(int));
+
+  L->sn_blocks     = taucs_calloc((L->n_sn), sizeof(taucs_datatype*)); /* so we can free before allocation */
+  L->up_blocks     = taucs_calloc((L->n_sn), sizeof(taucs_datatype*));
+
+  taucs_free(rowind);
+  taucs_free(map);
+
+  if(do_column_to_sn_map)
+    L->col_to_sn_map = column_to_sn_map;
+  else
+    taucs_free(column_to_sn_map);
+
+  taucs_free(next_child);
+  taucs_free(first_child);
+  taucs_free(ipostorder);
+}
+
+/*************************************************************/
+/* left-looking factor routines                              */
+/*************************************************************/
+
+static void
+recursive_leftlooking_supernodal_update(int J,int K,
+					int bitmap[],
+					taucs_datatype* dense_update_matrix,
+					taucs_ccs_matrix* A,
+					supernodal_factor_matrix* L)
+{
+  int i,j,ir;
+  int  child;
+  int* first_child   = L->first_child;
+  int* next_child    = L->next_child;
+  int sn_size_father = (L->sn_size)[J];
+  int sn_up_size_father = (L->sn_up_size)[J];
+  int sn_size_child = (L->sn_size)[K];
+  int sn_up_size_child = (L->sn_up_size)[K];
+  int exist_upd=0;
+  int first_row = 0; /* to suppress a warning */
+  int row_count=0;
+  int PK,M,N,LDA,LDB,LDC;
+
+  for(i=0;i<sn_size_father;i++) {
+    bitmap[L->sn_struct[J][i]]=i+1;
+  }
+
+  for(i=sn_size_father;i<sn_up_size_father;i++)
+    bitmap[L->sn_struct[J][i]] = i - sn_size_father + 1;
+
+  for(i=sn_size_child;i<sn_up_size_child;i++)
+    /* is this row index included in the columns of sn J? */
+    if(bitmap[L->sn_struct[K][i]]
+       && L->sn_struct[K][i] <= L->sn_struct[J][sn_size_father-1]) {
+      if(!exist_upd) first_row = i;
+      row_count++;
+      exist_upd = 1;
+      /*taucs_printf("update from K = %d to J = %d \n",K,J);*/
+      /* loop over columns of sn K */
+            
+      /* for(j=0;j<sn_size_child;j++)
+	for(ir=i;ir<sn_up_size_child;ir++)
+	  if( L->sn_struct[K][ir] <= L->sn_struct[J][sn_size_father-1]){
+	    L->sn_blocks[J][ (bitmap[L->sn_struct[K][i]]-1)*(L->sn_blocks_ld[J])+(bitmap[L->sn_struct[K][ir]]-1)] -= L->up_blocks[K][j*(L->up_blocks_ld[K])+ir-sn_size_child]* L->up_blocks[K][j*L->up_blocks_ld[K]+i-sn_size_child];
+	    taucs_printf("sn_block: L[%d,%d] = %lf\n",(bitmap[L->sn_struct[K][ir]]-1),(bitmap[L->sn_struct[K][i]]-1),L->sn_blocks[J][ (bitmap[L->sn_struct[K][i]]-1)*(L->sn_blocks_ld[J])+(bitmap[L->sn_struct[K][ir]]-1)]);}
+	  else{
+	    L->up_blocks[J][ (bitmap[L->sn_struct[K][i]]-1)*(L->up_blocks_ld[J])+(bitmap[L->sn_struct[K][ir]]-1)] -=  L->up_blocks[K][j*L->up_blocks_ld[K]+ir-sn_size_child]* L->up_blocks[K][j*L->up_blocks_ld[K]+i-sn_size_child];
+	   taucs_printf("up_block: L[%d,%d] = %lf\n",(bitmap[L->sn_struct[K][ir]]-1),(bitmap[L->sn_struct[K][i]]-1),L->up_blocks[J][ (bitmap[L->sn_struct[K][i]]-1)*(L->up_blocks_ld[J])+(bitmap[L->sn_struct[K][ir]]-1)]);
+	   }*/
+        }
+
+  if(exist_upd){
+    LDA = LDB = (L->sn_up_size)[K]-(L->sn_size)[K];
+    M  = sn_up_size_child - first_row ; /* +-1 ? */    
+    LDC =  sn_up_size_father;
+    N  = row_count; 
+    PK = L->sn_size[K];    
+   
+    /* The GEMM code computes on the upper triangle of the trapezoidal
+       matrix, which is junk. */
+    /*
+    taucs_gemm ("No Conjugate",
+		"Conjugate",
+		&M,&N,&PK,
+		&taucs_one_const,
+		&(L->up_blocks[K][first_row-sn_size_child]),&LDA,
+		&(L->up_blocks[K][first_row-sn_size_child]),&LDB,
+		&taucs_zero_const,
+		dense_update_matrix,&LDC);
+    */
+
+    /* This is the HERK+GEMM fix by Elad */
+    taucs_herk ("Lower",
+		"No Conjugate",
+		&N,&PK,
+		&taucs_one_real_const,
+		&(L->up_blocks[K][first_row-sn_size_child]),&LDA,
+		&taucs_zero_real_const,
+		dense_update_matrix,&LDC);
+
+    if(M-N > 0)
+    {
+        int newM = M - N;
+   
+        taucs_gemm ("No Conjugate",
+		"Conjugate",
+		&newM,&N,&PK,
+		&taucs_one_const,
+		&(L->up_blocks[K][first_row-sn_size_child+N]),&LDA,
+		&(L->up_blocks[K][first_row-sn_size_child]),&LDB,
+		&taucs_zero_const,
+		dense_update_matrix+N,&LDC);
+    }
+    /* end of GEMM/HERK+GEMM fix */ 
+
+    /*for(j=0;j<row_count;j++)
+       for(ir=0;ir<sn_up_size_father;ir++)
+	 taucs_printf("dense[%d,%d] = %lf\n",ir,j,dense_update_matrix[j*LDC+ir]);
+    */
+    for(j=0;j<row_count;j++)
+      for(ir=j;ir<row_count;ir++){
+	/*
+	L->sn_blocks[J][(bitmap[L->sn_struct[K][first_row+j]]-1)*sn_size_father+(bitmap[L->sn_struct[K][first_row+ir]]-1)] -= dense_update_matrix[j*LDC+ir];
+	*/
+	L->sn_blocks[J][(bitmap[L->sn_struct[K][first_row+j]]-1)*sn_size_father+(bitmap[L->sn_struct[K][first_row+ir]]-1)] =
+	  taucs_sub(L->sn_blocks[J][(bitmap[L->sn_struct[K][first_row+j]]-1)*sn_size_father+(bitmap[L->sn_struct[K][first_row+ir]]-1)] , dense_update_matrix[j*LDC+ir]);
+
+      }
+
+    for(j=0;j<row_count;j++)
+      for(ir=row_count;ir<M;ir++){
+	/*
+	L->up_blocks[J][(bitmap[L->sn_struct[K][first_row+j]]-1)*(L->sn_up_size[J]-L->sn_size[J])+(bitmap[L->sn_struct[K][ir+first_row]]-1)] -= dense_update_matrix[j*LDC+ir];
+	*/
+	L->up_blocks[J][(bitmap[L->sn_struct[K][first_row+j]]-1)*(L->sn_up_size[J]-L->sn_size[J])+(bitmap[L->sn_struct[K][ir+first_row]]-1)] =
+	  taucs_sub(L->up_blocks[J][(bitmap[L->sn_struct[K][first_row+j]]-1)*(L->sn_up_size[J]-L->sn_size[J])+(bitmap[L->sn_struct[K][ir+first_row]]-1)] , dense_update_matrix[j*LDC+ir]);
+	}
+    for(i=0;i<sn_up_size_father;i++)
+      bitmap[L->sn_struct[J][i]]=0;
+    
+    for (child = first_child[K]; child != -1; child = next_child[child]) {
+      recursive_leftlooking_supernodal_update(J,child,
+					      bitmap,dense_update_matrix,
+					      A,L);
+    }
+  }
+  else
+    for(i=0;i<sn_up_size_father;i++)
+      bitmap[L->sn_struct[J][i]]=0;
+
+}
+
+
+static int
+leftlooking_supernodal_front_factor(int sn,
+				    int* indmap,
+				    taucs_ccs_matrix* A,
+				    supernodal_factor_matrix* L)
+{
+  int ip,jp;
+  int*    ind;
+  taucs_datatype* re;
+  int INFO;
+  int sn_size = (L->sn_size)[sn];
+  int up_size = (L->sn_up_size)[sn] - (L->sn_size)[sn];
+
+  /* creating transform for real indices */
+  for(ip=0;ip<(L->sn_up_size)[sn];ip++) indmap[(L->sn_struct)[sn][ip]] = ip;
+
+  for(jp=0;jp<sn_size;jp++) {
+    ind = &(A->rowind[A->colptr[ (L->sn_struct)[sn][jp] ]]);
+    re  = &(A->taucs_values[A->colptr[ (L->sn_struct)[sn][jp] ]]); 
+    for(ip=0;
+	ip < A->colptr[ (L->sn_struct)[sn][jp] + 1 ] 
+           - A->colptr[ (L->sn_struct)[sn][jp] ];
+	ip++) {
+      if (indmap[ind[ip]] < sn_size)
+	(L->sn_blocks)[sn][sn_size*jp + indmap[ind[ip]]] =
+	  taucs_add( (L->sn_blocks)[sn][sn_size*jp + indmap[ind[ip]]] , re[ip]);
+      else
+	(L->up_blocks)[sn][up_size*jp + indmap[ind[ip]] - sn_size] =
+	  taucs_add((L->up_blocks)[sn][up_size*jp + indmap[ind[ip]] - sn_size] , re[ip]);
+    }
+  }
+
+  /* we use the BLAS through the Fortran interface */
+
+  /* solving of lower triangular system for L */
+  if (sn_size)
+    taucs_potrf ("LOWER",
+		 &sn_size,
+		 (L->sn_blocks)[sn],&sn_size,
+		 &INFO);
+
+  if (INFO) {
+    taucs_printf("\t\tLL^T Factorization: Matrix is not positive definite.\n");
+    taucs_printf("\t\t in sn = %d   nonpositive pivot in column %d\n",
+		 sn,(L->sn_struct)[sn][INFO-1]);
+    return -1;
+  }
+
+  /* getting completion for found columns of L */
+  if (up_size && sn_size)
+    taucs_trsm ("Right",
+		"Lower",
+		"Conjugate",
+		"No unit diagonal",
+		&up_size,&sn_size,
+		&taucs_one_const,
+		(L->sn_blocks)[sn],&sn_size,
+		(L->up_blocks)[sn],&up_size);
+  
+  /* zeroes map */
+  for(ip=0;ip<(L->sn_up_size)[sn];ip++) indmap[(L->sn_struct)[sn][ip]] = 0;
+
+  return 0;
+}
+
+static int
+recursive_leftlooking_supernodal_factor_llt(int sn,       /* this supernode */
+					    int is_root,  /* is v the root? */
+					    int* map,
+					    taucs_ccs_matrix* A,
+					    supernodal_factor_matrix* L)
+{
+  int  child;
+  int* first_child   = L->first_child;
+  int* next_child    = L->next_child;
+  taucs_datatype* dense_update_matrix = NULL;
+
+  /*  if (!is_root)
+      sn_size = L->sn_size[sn];
+      else
+      sn_size = -1;
+
+      if (!is_root) { 
+      (L->sn_blocks   )[sn] = (double*)taucs_calloc(((L->sn_size)[sn])*((L->sn_size)[sn]),sizeof(double));
+      
+      (L->up_blocks   )[sn] = (double*)taucs_calloc(((L->sn_up_size)[sn]-(L->sn_size)[sn])*((L->sn_size)[sn]),sizeof(double));
+
+    }*/
+
+  for (child = first_child[sn]; child != -1; child = next_child[child]) {
+    if (recursive_leftlooking_supernodal_factor_llt(child,
+						    FALSE,
+						    map,
+						    A,L)) {
+      /* failure */
+      return -1;
+    }
+  }    
+
+  if (!is_root) {
+    (L->sn_blocks   )[sn] = (taucs_datatype*)taucs_calloc(((L->sn_size)[sn])*((L->sn_size)[sn]),
+						    sizeof(taucs_datatype));
+    (L->up_blocks   )[sn] = (taucs_datatype*)taucs_calloc(((L->sn_up_size)[sn]-(L->sn_size)[sn])*((L->sn_size)[sn]),
+					    sizeof(taucs_datatype));
+    if (!dense_update_matrix) 
+      dense_update_matrix = (taucs_datatype*) taucs_calloc((L->sn_up_size)[sn]*(L->sn_size)[sn],
+						     sizeof(taucs_datatype));
+    for (child = first_child[sn]; child != -1; child = next_child[child])
+      recursive_leftlooking_supernodal_update(sn,child,
+					      map,dense_update_matrix,
+					      A,L);
+    taucs_free(dense_update_matrix);
+    if (leftlooking_supernodal_front_factor(sn,
+					    map,
+					    A,
+					    L)) {
+      /* nonpositive pivot */
+      return -1;
+    }
+  }
+
+  return 0;
+}
+
+#if 0
+void* taucs_ccs_factor_llt_ll(taucs_ccs_matrix* A)
+{
+  supernodal_factor_matrix* L;
+  int i,j,ip,jp;
+  int sn,p;
+  int* map;
+  double wtime, ctime;
+ 
+  wtime = taucs_wtime();
+  ctime = taucs_ctime();
+
+  L = multifrontal_supernodal_create();
+
+  taucs_ccs_ooc_symbolic_elimination(A,L,
+				     TRUE /* sort row indices */,
+				     FALSE /* don't return col_tosn_map */,
+				     1.0/0.0,
+				     NULL,NULL /* sn_struct handler*/ );
+
+  wtime = taucs_wtime()-wtime;
+  ctime = taucs_ctime()-ctime;
+  taucs_printf("\t\tSymbolic Analysis            = % 10.3f seconds (%.3f cpu)\n",
+	       wtime,ctime);
+
+
+  map  = (int*)taucs_malloc((A->n+1)*sizeof(int));
+
+  wtime = taucs_wtime();
+  ctime = taucs_ctime();
+
+  if (recursive_leftlooking_supernodal_factor_llt((L->n_sn),  
+						  TRUE, 
+						  map,
+						  A,L)) {
+    ooc_supernodal_factor_free(L);
+    taucs_free(map);
+
+    return NULL;
+  }
+
+  wtime = taucs_wtime()-wtime;
+  ctime = taucs_ctime()-ctime;
+  taucs_printf("\t\tSupernodal Left-Looking LL^T = % 10.3f seconds (%.3f cpu)\n",
+	       wtime,ctime);
+
+  taucs_free(map);
+
+  return (void*) L;
+}
+#endif
+
+/*************************************************************/
+/* left-looking ooc factor routines                          */
+/*************************************************************/
+
+static
+void
+ooc_sn_struct_handler(void* argument, 
+		      int sn, int sn_up_size, int* sn_struct_ptr[])
+{
+  taucs_io_handle* handle = (taucs_io_handle*) argument;
+  taucs_io_append(handle,
+		  IO_BASE+sn, /* matrix written in postorder */
+		  1,
+		  sn_up_size,
+		  TAUCS_INT,
+		  *sn_struct_ptr);
+  taucs_free(*sn_struct_ptr);
+  *sn_struct_ptr = NULL;
+}
+
+static void
+recursive_leftlooking_supernodal_update_ooc(int J,int K,
+					    int bitmap[],
+					    taucs_datatype* dense_update_matrix,
+					    taucs_io_handle* handle,
+					    taucs_ccs_matrix* A,
+					    supernodal_factor_matrix* L)
+{
+  int i,j,ir;
+  int  child;
+  int* first_child   = L->first_child;
+  int* next_child    = L->next_child;
+  int sn_size_father = (L->sn_size)[J];
+  int sn_up_size_father = (L->sn_up_size)[J];
+  int sn_size_child = (L->sn_size)[K];
+  int sn_up_size_child = (L->sn_up_size)[K];
+  int exist_upd=0;
+  int first_row = 0; /* to supress a warning */
+  int row_count=0;
+  int PK,M,N,LDA,LDB,LDC;
+
+  if(L->sn_up_size[K]-L->sn_size[K]>0){
+  
+    for(i=0;i<sn_size_father;i++) {
+      bitmap[L->sn_struct[J][i]]=i+1;
+    }
+
+    for(i=sn_size_father;i<sn_up_size_father;i++){
+      bitmap[L->sn_struct[J][i]] = i - sn_size_father + 1;
+    }
+
+    L->sn_struct[K] = (int*)taucs_malloc(sn_up_size_child*sizeof(int));
+    taucs_io_read(handle,IO_BASE+K,1,sn_up_size_child,TAUCS_INT,L->sn_struct[K]);
+    
+    for(i=sn_size_child;i<sn_up_size_child;i++){
+      /* is this row index included in the columns of sn J? */
+      if(bitmap[L->sn_struct[K][i]]
+	 && L->sn_struct[K][i] <= L->sn_struct[J][sn_size_father-1]) {
+	if(!exist_upd) first_row = i;
+	row_count++;
+	exist_upd = 1;
+      }
+    }
+  }
+
+  if(exist_upd){
+    (L->up_blocks   )[K] = (taucs_datatype*)taucs_calloc(((L->sn_up_size)[K]-(L->sn_size)[K])
+						   *((L->sn_size)[K]),
+						   sizeof(taucs_datatype));
+    taucs_io_read(handle,IO_BASE+L->n_sn+2*K+1,
+		  (L->sn_up_size)[K]-(L->sn_size)[K],
+		  (L->sn_size)[K] ,
+		  TAUCS_CORE_DATATYPE,(L->up_blocks)[K]);
+   
+    LDA = LDB = L->sn_up_size[K]-L->sn_size[K];
+    M  = sn_up_size_child - first_row ; /* +-1 ? */    
+    LDC =  sn_up_size_father;
+    N  = row_count; 
+    PK = L->sn_size[K];    
+   
+    /* The GEMM code computes on the upper triangle of the trapezoidal
+       matrix, which is junk. */
+    /*
+    taucs_gemm ("No Conjugate",
+		"Conjugate",
+		&M,&N,&PK,
+		&taucs_one_const,
+		&(L->up_blocks[K][first_row-sn_size_child]),&LDA,
+		&(L->up_blocks[K][first_row-sn_size_child]),&LDB,
+		&taucs_zero_const,
+		dense_update_matrix,&LDC);
+    */
+
+    /* This is the HERK+GEMM fix by Elad */
+    taucs_herk ("Lower",
+		"No Conjugate",
+		&N,&PK,
+		&taucs_one_real_const,
+		&(L->up_blocks[K][first_row-sn_size_child]),&LDA,
+		&taucs_zero_real_const,
+		dense_update_matrix,&LDC);
+
+    if(M-N > 0)
+    {
+        int newM = M - N;
+   
+        taucs_gemm ("No Conjugate",
+		"Conjugate",
+		&newM,&N,&PK,
+		&taucs_one_const,
+		&(L->up_blocks[K][first_row-sn_size_child+N]),&LDA,
+		&(L->up_blocks[K][first_row-sn_size_child]),&LDB,
+		&taucs_zero_const,
+		dense_update_matrix+N,&LDC);
+    }
+    /* end of GEMM/HERK+GEMM fix */ 
+
+    taucs_free((L->up_blocks   )[K]);
+    (L->up_blocks   )[K] = NULL;
+
+    for(j=0;j<row_count;j++)
+      for(ir=j;ir<row_count;ir++){
+	/*
+	L->sn_blocks[J][(bitmap[L->sn_struct[K][first_row+j]]-1)*sn_size_father+(bitmap[L->sn_struct[K][first_row+ir]]-1)] -= dense_update_matrix[j*LDC+ir];
+	*/
+	L->sn_blocks[J][(bitmap[L->sn_struct[K][first_row+j]]-1)*sn_size_father+(bitmap[L->sn_struct[K][first_row+ir]]-1)] =
+	  taucs_sub(L->sn_blocks[J][(bitmap[L->sn_struct[K][first_row+j]]-1)*sn_size_father+(bitmap[L->sn_struct[K][first_row+ir]]-1)] , dense_update_matrix[j*LDC+ir]);
+      }
+
+    for(j=0;j<row_count;j++)
+      for(ir=row_count;ir<M;ir++){
+	/*
+	L->up_blocks[J][(bitmap[L->sn_struct[K][first_row+j]]-1)*((L->sn_up_size)[J]-(L->sn_size)[J])+(bitmap[L->sn_struct[K][ir+first_row]]-1)] -= dense_update_matrix[j*LDC+ir];
+	*/
+	L->up_blocks[J][(bitmap[L->sn_struct[K][first_row+j]]-1)*((L->sn_up_size)[J]-(L->sn_size)[J])+(bitmap[L->sn_struct[K][ir+first_row]]-1)] =
+	  taucs_sub(L->up_blocks[J][(bitmap[L->sn_struct[K][first_row+j]]-1)*((L->sn_up_size)[J]-(L->sn_size)[J])+(bitmap[L->sn_struct[K][ir+first_row]]-1)] , dense_update_matrix[j*LDC+ir]);
+      }
+
+    taucs_free( L->sn_struct[K]);
+    L->sn_struct[K] = NULL;
+
+    for(i=0;i<sn_up_size_father;i++)
+      bitmap[L->sn_struct[J][i]]=0;
+    
+    for (child = first_child[K]; child != -1; child = next_child[child]) {
+      recursive_leftlooking_supernodal_update_ooc(J,child,
+						  bitmap,
+						  dense_update_matrix,
+						  handle,A,L);
+    }
+  } else {
+    for(i=0;i<sn_up_size_father;i++)
+      bitmap[L->sn_struct[J][i]]=0;
+
+    taucs_free( L->sn_struct[K]);
+    L->sn_struct[K] = NULL;
+  }
+}
+
+static int
+recursive_append_L(int sn,   /* this supernode */
+		   int is_root,/* is v the root?*/
+		   taucs_io_handle* handle,
+		   supernodal_factor_matrix* L)
+{
+  int  child;
+  /*int  sn_size;*/
+  int* first_child   = L->first_child;
+  int* next_child    = L->next_child;
+  
+  for (child = first_child[sn]; child != -1; child = next_child[child]) {
+    if (recursive_append_L(child,FALSE,handle,L)) {
+      /* failure */
+      return -1;
+    }
+  }
+    
+  if (!is_root) { 
+    taucs_io_append(handle,IO_BASE+L->n_sn+2*sn,
+		    L->sn_size[sn],L->sn_size[sn],
+		    TAUCS_CORE_DATATYPE,L->sn_blocks[sn]);
+
+    taucs_io_append(handle,IO_BASE+L->n_sn+2*sn+1,
+		    L->sn_up_size[sn] - L->sn_size[sn],L->sn_size[sn],
+		    TAUCS_CORE_DATATYPE,L->up_blocks[sn]); 
+    taucs_free((L->sn_blocks   )[sn]);
+    taucs_free((L->up_blocks   )[sn]);
+    taucs_free((L->sn_struct   )[sn]);
+    (L->sn_blocks   )[sn] = NULL;
+    (L->up_blocks   )[sn] = NULL;
+    (L->sn_struct   )[sn] = NULL;
+ }
+
+  return 0;
+}
+
+static int
+recursive_read_L(int sn,   /* this supernode */
+		 int is_root,/* is v the root?*/
+		 taucs_io_handle* handle,
+		 supernodal_factor_matrix* L)
+{
+  int  child;
+  /*int  sn_size;*/
+  int* first_child   = L->first_child;
+  int* next_child    = L->next_child;
+  
+  for (child = first_child[sn]; child != -1; child = next_child[child]) {
+    if (recursive_read_L(child,FALSE,handle,L)) {
+      /* failure */
+      return -1;
+    }
+  }
+    
+  if (!is_root) { 
+    (L->sn_blocks)[sn] = (taucs_datatype*)taucs_calloc(((L->sn_size)[sn])
+						 *((L->sn_size)[sn]),
+						 sizeof(taucs_datatype));
+    (L->up_blocks   )[sn] = (taucs_datatype*)taucs_calloc(((L->sn_up_size)[sn]-(L->sn_size)[sn])
+						    *((L->sn_size)[sn]),
+						    sizeof(taucs_datatype));
+
+    taucs_io_read(handle,IO_BASE+L->n_sn+2*sn,
+		  L->sn_size[sn],L->sn_size[sn],
+		  TAUCS_CORE_DATATYPE,L->sn_blocks[sn]);
+
+    taucs_io_read(handle,IO_BASE+L->n_sn+2*sn+1,
+		  L->sn_up_size[sn] - L->sn_size[sn],L->sn_size[sn],
+		  TAUCS_CORE_DATATYPE,L->up_blocks[sn]); 
+ }
+
+  return 0;
+}
+
+static int
+recursive_read_L_cols(int sn,   /* this supernode */
+		      int is_root,/* is v the root?*/
+		      taucs_io_handle* handle,
+		      supernodal_factor_matrix* L)
+{
+  int  child;
+  /*int  sn_size;*/
+  int* first_child   = L->first_child;
+  int* next_child    = L->next_child;
+  
+  for (child = first_child[sn]; child != -1; child = next_child[child]) {
+    if (recursive_read_L_cols(child,FALSE,handle,L)) {
+      /* failure */
+      return -1;
+    }
+  }
+    
+  if (!is_root) { 
+    L->sn_struct[sn] = (int*)taucs_malloc((L->sn_up_size)[sn]*sizeof(int));
+    taucs_io_read(handle,IO_BASE+sn,1,(L->sn_up_size)[sn],TAUCS_INT,L->sn_struct[sn]);
+      
+  }
+
+  return 0;
+}
+
+static int
+recursive_leftlooking_supernodal_factor_llt_ooc(int sn,    /* this supernode */
+						int is_root,/* is v the root?*/
+						int* map,
+						int* sn_in_core,
+						taucs_io_handle* handle,
+						taucs_ccs_matrix* A,
+						supernodal_factor_matrix* L)
+{
+  int  child;
+  /*int  sn_size;*/
+  int* first_child   = L->first_child;
+  int* next_child    = L->next_child;
+  taucs_datatype* dense_update_matrix = NULL;
+  
+  for (child = first_child[sn]; child != -1; child = next_child[child]) {
+    if(sn_in_core[child]){
+
+      if (recursive_read_L_cols(child,
+				FALSE,
+				handle,
+				L)) {
+	return -1;
+      }
+      if (recursive_leftlooking_supernodal_factor_llt(child,
+						      FALSE,
+						      map,
+						      A,L)) {
+	/* failure */
+	return -1;
+      }
+      if (recursive_append_L(child,
+			     FALSE,
+			     handle,
+			     L)) {
+	return -1;
+      }
+
+    }
+    else
+      if (recursive_leftlooking_supernodal_factor_llt_ooc(child,
+							  FALSE,
+							  map,
+							  sn_in_core,
+							  handle,
+							  A,L)) {
+	/* failure */
+	return -1;
+      }
+  }
+
+  if (!is_root) { 
+    (L->sn_blocks   )[sn] = (taucs_datatype*)taucs_calloc(((L->sn_size)[sn])
+						    *((L->sn_size)[sn]),
+						    sizeof(taucs_datatype));
+    
+    if(L->sn_up_size[sn]-L->sn_size[sn]>0) (L->up_blocks   )[sn] = (taucs_datatype*)taucs_calloc(((L->sn_up_size)[sn]-(L->sn_size)[sn]) *((L->sn_size)[sn]),sizeof(taucs_datatype));
+
+    L->sn_struct[sn] = (int*)taucs_malloc((L->sn_up_size)[sn]*sizeof(int));
+    
+    taucs_io_read(handle,IO_BASE+sn,1,(L->sn_up_size)[sn],TAUCS_INT,L->sn_struct[sn]);
+    
+    if (!dense_update_matrix) 
+      dense_update_matrix = (taucs_datatype*) taucs_calloc((L->sn_up_size)[sn]*(L->sn_size)[sn],sizeof(taucs_datatype));
+    for (child = first_child[sn]; child != -1; child = next_child[child]) {
+      recursive_leftlooking_supernodal_update_ooc(sn,child,
+						  map,
+						  dense_update_matrix,
+						  handle,A,L);
+    }
+
+    taucs_free(dense_update_matrix);
+    if (leftlooking_supernodal_front_factor(sn,
+					    map,
+					    A,
+					    L)) {
+      /* nonpositive pivot */
+      return -1;
+    }
+    
+    taucs_io_append(handle,IO_BASE+L->n_sn+2*sn,
+		    L->sn_size[sn],L->sn_size[sn],
+		    TAUCS_CORE_DATATYPE,L->sn_blocks[sn]);
+    taucs_io_append(handle,IO_BASE+L->n_sn+2*sn+1,
+		    L->sn_up_size[sn] - L->sn_size[sn],L->sn_size[sn],
+		    TAUCS_CORE_DATATYPE,L->up_blocks[sn]);
+    
+    taucs_free((L->sn_blocks)[sn]);
+    taucs_free((L->up_blocks)[sn]); 
+    taucs_free((L->sn_struct)[sn]);
+    (L->sn_blocks   )[sn] = NULL;
+    (L->up_blocks   )[sn] = NULL;
+    (L->sn_struct   )[sn] = NULL;
+  }
+
+  return 0;
+}
+
+static double
+recursive_compute_supernodes_ll_in_core(int sn,       /* this supernode */
+					int is_root,  /* is v the root? */
+					double avail_mem,
+					int* sn_in_core,
+					supernodal_factor_matrix* L)
+{
+  int  child;
+  /*double curr_avail_mem = avail_mem;*/
+  double child_mem = 0.0;
+  double children_mem = 0.0;
+  double total_mem = 0.0;
+  int* first_child   = L->first_child;
+  int* next_child    = L->next_child;
+  
+  /*new_panel = 0;*/
+  for (child = first_child[sn]; child != -1; child = next_child[child]) {
+    child_mem=recursive_compute_supernodes_ll_in_core(child,
+						      FALSE,
+						      avail_mem,
+						      sn_in_core,
+						      L);
+    /*if (child_mem == 0) new_panel = 1; */
+    children_mem += child_mem;
+  }
+
+  /*if (first_child[sn] == -1) 
+    total_mem = (double)(L->sn_size)[sn]*(double)(L->sn_up_size)[sn]*sizeof(taucs_datatype)+(double)(L->sn_up_size)[sn]*sizeof(int);
+  else
+    total_mem = children_mem + 2*(double)(L->sn_size)[sn]*(double)(L->sn_up_size)[sn]*sizeof(taucs_datatype)+(double)(L->sn_up_size)[sn]*sizeof(int);
+  
+  if (total_mem <= avail_mem || first_child[sn] == -1) 
+    sn_in_core[sn] = 1;
+   else 
+     sn_in_core[sn] = 0;
+  return total_mem;
+  */
+  total_mem = children_mem + (double)(L->sn_size)[sn]*(double)(L->sn_up_size)[sn]*sizeof(taucs_datatype)+(double)(L->sn_up_size)[sn]*sizeof(int);
+
+  if ((total_mem + (double)(L->sn_size)[sn]*(double)(L->sn_up_size)[sn]*sizeof(taucs_datatype)) <= avail_mem || first_child[sn] == -1) {
+    sn_in_core[sn] = 1;
+    return total_mem;
+  } else {
+    sn_in_core[sn] = 0;
+    return (total_mem + (double)(L->sn_size)[sn]*(double)(L->sn_up_size)[sn]*sizeof(taucs_datatype));
+    }
+  
+}
+
+static int
+recursive_compute_supernodes_in_core_old(int sn,       /* this supernode */
+				     int is_root,  /* is v the root? */
+				     int avail_mem,
+				     int* sn_in_core,
+				     supernodal_factor_matrix* L)
+{
+  int  child;
+  int curr_avail_mem = avail_mem;
+  int catch_mem = 0;
+  int child_mem = 0;
+  int* first_child   = L->first_child;
+  int* next_child    = L->next_child;
+  /*taucs_datatype* dense_update_matrix = NULL;*/
+
+
+  for (child = first_child[sn]; child != -1; child = next_child[child]) {
+    child_mem=recursive_compute_supernodes_in_core_old(child,
+						   FALSE,
+						   curr_avail_mem,
+						   sn_in_core,
+						   L);
+    if ((int)(child_mem+(L->sn_size)[child]*(L->sn_up_size)[child]*sizeof(taucs_datatype))<curr_avail_mem) sn_in_core[child] = 1;
+
+    catch_mem += child_mem;
+  }
+  if (!is_root) catch_mem += (L->sn_size)[sn]*(L->sn_up_size)[sn]*sizeof(taucs_datatype);
+    return catch_mem;
+}
+
+static void
+recursive_compute_supernodes_ipostorder(int sn,       /* this supernode */
+					int is_root,  /* is v the root? */
+					int* current_index_ptr,
+					supernodal_factor_matrix* L)
+{
+  int  child;
+  int* first_child   = L->first_child;
+  int* next_child    = L->next_child;
+ 
+  for (child = first_child[sn]; child != -1; child = next_child[child]) {
+    recursive_compute_supernodes_ipostorder(child,
+					     FALSE,
+					     current_index_ptr,
+					     L);
+  
+  }
+  L->ipostorder[sn] = *current_index_ptr;
+  (*current_index_ptr)++;
+ }
+
+/* no panelization */
+#if 0
+void* taucs_ccs_factor_llt_ll_ooc(taucs_ccs_matrix* A, 
+				  double memory)
+{
+  supernodal_factor_matrix* L;
+  int i,j,ip,jp;
+  int sn,p;
+  int* map;
+  int* sn_in_core;
+  double wtime, ctime;
+  taucs_io_handle* handle;
+  int current_index=0;
+  double memory_overhead;
+
+  /* compute fixed memory overhead */
+  memory_overhead = 
+    4.0*(double)((A->n)*sizeof(int)) + /* integer vectors in L */
+    3.0*(double)((A->n)*sizeof(int)) + /* pointer arrays in L  */
+    2.0*(double)((A->n)*sizeof(int)) + /* integer vectors in program  */
+    4.0*3.0*(double)((A->n)*sizeof(int));  /* singlefile matrix arrays */  
+ 
+  taucs_printf("max memory overhead %.0f memory %.0f\n",memory_overhead,memory);
+
+  if ( memory - memory_overhead < 
+       2.0*(double)((A->n)*sizeof(taucs_datatype)) + 
+       2.0*(double)((A->n)*sizeof(int)) ) {
+    taucs_printf("\t\ttaucs_ccs_factor_llt_ll_ooc: not enough memory\n");
+    return NULL;
+  }
+
+  wtime = taucs_wtime();
+  ctime = taucs_ctime();
+
+  L = multifrontal_supernodal_create();
+  if(!MULTIFILE)
+    handle = taucs_io_create_singlefile("/tmp/taucs.L");
+  else
+    handle = taucs_io_create_multifile("/tmp/taucs.L");
+
+  taucs_io_append(handle,5,1,1,TAUCS_INT,&(A->n));
+
+  taucs_ccs_ooc_symbolic_elimination(A,L,
+				     TRUE /* sort row indices */,
+				     FALSE /* don't return col_to_sn_map */,
+				     (memory - memory_overhead)/3.0,
+				     ooc_sn_struct_handler,handle);
+  
+  /* we now compute an exact memory overhead bound using n_sn */
+  memory_overhead = 
+    4.0*(double)((L->n_sn)*sizeof(int)) + /* integer vectors in L */
+    3.0*(double)((L->n_sn)*sizeof(int)) + /* pointer arrays in L  */
+    2.0*(double)((L->n_sn)*sizeof(int)) + /* integer vectors in program  */
+    4.0*3.0*(double)((L->n_sn)*sizeof(int));  /* singlefile matrix arrays */
+  printf("real memory overhead %.0f memory %.0f\n",memory_overhead,memory);
+  
+  wtime = taucs_wtime()-wtime;
+  ctime = taucs_ctime()-ctime;
+  taucs_printf("\t\tSymbolic Analysis            = % 10.3f seconds (%.3f cpu)\n",
+	       wtime,ctime);
+  
+  wtime = taucs_wtime();
+  ctime = taucs_ctime();
+  
+  taucs_io_append(handle,0,1,1,TAUCS_INT,&(L->n_sn));
+  taucs_io_append(handle,1,1,L->n_sn+1,TAUCS_INT,L->first_child);
+  taucs_io_append(handle,2,1,L->n_sn+1,TAUCS_INT,L->next_child);
+  taucs_io_append(handle,3,1,L->n_sn,TAUCS_INT,L->sn_size);
+  taucs_io_append(handle,4,1,L->n_sn,TAUCS_INT,L->sn_up_size);
+  /*taucs_io_append(handle,5,1,1,TAUCS_INT,&(L->n));*/
+  taucs_io_append(handle,6,1,1,TAUCS_INT,&(A->flags));
+  
+  wtime = taucs_wtime()-wtime;
+  ctime = taucs_ctime()-ctime;
+  taucs_printf("\t\tOOC Supernodal Left-Looking Prepare L = % 10.3f seconds (%.3f cpu)\n",
+	       wtime,ctime);
+
+  wtime = taucs_wtime();
+  ctime = taucs_ctime();
+
+  map  = (int*)taucs_malloc((A->n+1)*sizeof(int));
+
+  sn_in_core = (int*)taucs_malloc((L->n_sn+1)*sizeof(int));
+  for(i=0;i<=L->n_sn;i++)
+    sn_in_core[i] = 0;
+  wtime = taucs_wtime();
+  ctime = taucs_ctime();
+  if(recursive_compute_supernodes_ll_in_core(L->n_sn,
+					     TRUE,
+					     (memory - memory_overhead)/3.0,
+					     sn_in_core,
+					     L)<0.0) {
+    ooc_supernodal_factor_free(L);
+    taucs_free(map);
+    return NULL;
+  }
+  
+  wtime = taucs_wtime()-wtime;
+  ctime = taucs_ctime()-ctime;
+  taucs_printf("\t\tOOC Supernodal Left-Looking Scheduling = % 10.3f seconds (%.3f cpu)\n",
+	       wtime,ctime);
+  
+  wtime = taucs_wtime();
+  ctime = taucs_ctime();
+
+  if (recursive_leftlooking_supernodal_factor_llt_ooc((L->n_sn),  
+						      TRUE, 
+						      map,
+						      sn_in_core,
+						      handle,
+						      A,L)) {
+    ooc_supernodal_factor_free(L);
+    taucs_free(map);
+    return NULL;
+  }
+
+  taucs_printf("\t\tSupernodal Left-Looking INFO: nreads = %.3f bytes_read = %.3f time_read = %10.3f seconds \n",
+	       handle->nreads,handle->bytes_read,handle->read_time); 
+
+  wtime = taucs_wtime()-wtime;
+  ctime = taucs_ctime()-ctime;
+  taucs_printf("\t\tSupernodal Left-Looking LL^T = % 10.3f seconds (%.3f cpu)\n",
+	       wtime,ctime);
+
+  wtime = taucs_wtime();
+  ctime = taucs_ctime();
+
+  taucs_free(map);
+  taucs_free(sn_in_core);
+  taucs_io_close(handle);
+  ooc_supernodal_factor_free(L);
+
+  wtime = taucs_wtime()-wtime;
+  ctime = taucs_ctime()-ctime;
+  taucs_printf("\t\tOOC Supernodal Left-Looking Cleanup = % 10.3f seconds (%.3f cpu)\n",
+	       wtime,ctime);
+  
+  return (void*) "/tmp/taucs.L";
+}
+#endif
+
+/******************** OOC SOLVE **************************/
+
+static void 
+recursive_supernodal_solve_l_ooc(int sn,       /* this supernode */
+				 int is_root,  /* is v the root? */
+				 taucs_io_handle* handle,
+				 int n_sn,
+				 int* first_child, int* next_child,
+				 int** sn_struct, int* sn_sizes, int* sn_up_sizes,
+				 taucs_datatype x[], taucs_datatype b[],
+				 taucs_datatype t[])
+{
+  int child;
+  int  sn_size; /* number of rows/columns in the supernode    */
+  int  up_size; /* number of rows that this supernode updates */
+  int    ione = 1;
+  /*
+  double done = 1.0;
+  double dzero = 0.0;
+  */
+  taucs_datatype* xdense;
+  taucs_datatype* bdense;
+  double  flops;
+  int i,j,ip,jp;
+  taucs_datatype* sn_block;
+  taucs_datatype* up_block = NULL; /* warning */
+
+  for (child = first_child[sn]; child != -1; child = next_child[child]) {
+    recursive_supernodal_solve_l_ooc(child,
+				     FALSE,
+				     handle,
+				     n_sn,
+				     first_child,next_child,
+				     sn_struct,sn_sizes,sn_up_sizes,
+				     x,b,t);
+  }
+
+  if(!is_root) {
+    sn_size = sn_sizes[sn];
+    up_size = sn_up_sizes[sn] - sn_sizes[sn];
+
+    sn_struct[sn] = (int*)taucs_malloc((sn_size+up_size)*sizeof(int));
+    taucs_io_read(handle,IO_BASE+sn,1,sn_size+up_size,TAUCS_INT,sn_struct[sn]);
+    
+    sn_block = (taucs_datatype*)taucs_calloc(sn_size*sn_size,sizeof(taucs_datatype));
+    taucs_io_read(handle,IO_BASE+n_sn+2*sn,
+		  sn_size,
+		  sn_size ,
+		  TAUCS_CORE_DATATYPE,sn_block);
+   
+    if (up_size > 0 && sn_size > 0) {
+      up_block = (taucs_datatype*)taucs_calloc(up_size*sn_size,sizeof(taucs_datatype));
+      taucs_io_read(handle,IO_BASE+n_sn+2*sn+1,
+		    up_size,
+		    sn_size ,
+		    TAUCS_CORE_DATATYPE,up_block);
+    }
+
+    flops = ((double)sn_size)*((double)sn_size) 
+      + 2.0*((double)sn_size)*((double)up_size);
+
+    if (flops > BLAS_FLOPS_CUTOFF) {
+      xdense = t;
+      bdense = t + sn_size;
+      
+      for (i=0; i<sn_size; i++)
+	xdense[i] = b[ sn_struct[ sn ][ i ] ];
+      for (i=0; i<up_size; i++)
+	bdense[i] = taucs_zero;
+      
+      taucs_trsm ("Left",
+	     "Lower",
+	     "No Conjugate",
+	     "No unit diagonal",
+	     &sn_size,&ione,
+	     &taucs_one_const,
+	     sn_block,&sn_size,
+	     xdense , &sn_size);
+      
+      if (up_size > 0 && sn_size > 0) 
+	taucs_gemm ("No Conjugate","No Conjugate",
+		    &up_size, &ione, &sn_size,
+		    &taucs_one_const,
+		    up_block,&up_size,
+		    xdense       ,&sn_size,
+		    &taucs_zero_const,
+		    bdense       ,&up_size);
+      
+      for (i=0; i<sn_size; i++)
+	x[ sn_struct[ sn][ i ] ]  = xdense[i];
+      for (i=0; i<up_size; i++)
+	/*b[ sn_struct[ sn ][ sn_size + i ] ] -= bdense[i];*/
+	b[ sn_struct[ sn ][ sn_size + i ] ] =
+	  taucs_sub(b[ sn_struct[ sn ][ sn_size + i ] ] , bdense[i]);
+
+    } else if (sn_size > SOLVE_DENSE_CUTOFF) {
+
+      xdense = t;
+      bdense = t + sn_size;
+      
+      for (i=0; i<sn_size; i++)
+	xdense[i] = b[ sn_struct[ sn ][ i ] ];
+      for (i=0; i<up_size; i++)
+	bdense[i] = taucs_zero;
+      
+      for (jp=0; jp<sn_size; jp++) {
+	/*xdense[jp] = xdense[jp] / sn_block[ sn_size*jp + jp];*/
+	xdense[jp] = taucs_div(xdense[jp] , sn_block[ sn_size*jp + jp]);
+
+	for (ip=jp+1; ip<sn_size; ip++) {
+	  /*xdense[ip] -= xdense[jp] * sn_block[ sn_size*jp + ip];*/
+	  xdense[ip] = taucs_sub(xdense[i],
+				 taucs_mul(xdense[jp] , sn_block[ sn_size*jp + ip]));
+	}
+      }
+
+      for (jp=0; jp<sn_size; jp++) {
+	for (ip=0; ip<up_size; ip++) {
+	  /*bdense[ip] += xdense[jp] * up_block[ up_size*jp + ip];*/
+	  bdense[ip] = taucs_add(bdense[ip],
+				 taucs_mul(xdense[jp] , up_block[ up_size*jp + ip]));
+	}
+      }
+
+      for (i=0; i<sn_size; i++)
+	x[ sn_struct[ sn][ i ] ]  = xdense[i];
+      for (i=0; i<up_size; i++)
+	/*b[ sn_struct[ sn ][ sn_size + i ] ] -= bdense[i];*/
+	b[ sn_struct[ sn ][ sn_size + i ] ] =
+	  taucs_sub(b[ sn_struct[ sn ][ sn_size + i ] ] , bdense[i]);
+      
+    } else {
+      for (jp=0; jp<sn_size; jp++) {
+	j = sn_struct[sn][jp];
+	/*x[j] = b[j] / sn_block[ sn_size*jp + jp];*/
+	x[j] = taucs_div(b[j] , sn_block[ sn_size*jp + jp]);
+	for (ip=jp+1; ip<sn_size; ip++) {
+	  i = sn_struct[sn][ip];
+	  /*b[i] -= x[j] * sn_block[ sn_size*jp + ip];*/
+	  b[i] = taucs_sub(b[i],
+			   taucs_mul(x[j] , sn_block[ sn_size*jp + ip]));
+	}
+
+	for (ip=0; ip<up_size; ip++) {
+	  i = sn_struct[sn][sn_size + ip];
+	  /*b[i] -= x[j] * up_block[ up_size*jp + ip];*/
+	  b[i] = taucs_sub(b[i],
+			   taucs_mul(x[j] , up_block[ up_size*jp + ip]));
+	}
+      }
+    }
+    taucs_free(sn_struct[sn]);
+    taucs_free(sn_block);
+    if (up_size > 0 && sn_size > 0) taucs_free(up_block);
+    sn_struct[sn] = NULL;
+    sn_block = NULL;
+    up_block = NULL;
+  }
+}
+
+static void 
+recursive_supernodal_solve_lt_ooc(int sn,       /* this supernode */
+				  int is_root,  /* is v the root? */
+				  taucs_io_handle* handle,
+				  int n_sn,
+				  int* first_child, int* next_child,
+				  int** sn_struct, int* sn_sizes, int* sn_up_sizes,
+				  taucs_datatype x[], taucs_datatype b[],
+				  taucs_datatype t[])
+{
+  int child;
+  int  sn_size; /* number of rows/columns in the supernode    */
+  int  up_size; /* number of rows that this supernode updates */
+  int    ione = 1;
+  taucs_datatype* xdense;
+  taucs_datatype* bdense;
+  double  flops;
+  int i,j,ip,jp;
+  taucs_datatype* sn_block;
+  taucs_datatype* up_block = NULL; /* warning */
+
+  if(!is_root) {
+
+    sn_size = sn_sizes[sn];
+    up_size = sn_up_sizes[sn]-sn_sizes[sn];
+
+    sn_struct[sn] = (int*)taucs_malloc((sn_size+up_size)*sizeof(int));
+    taucs_io_read(handle,IO_BASE+sn,1,sn_size+up_size,TAUCS_INT,sn_struct[sn]);
+    
+    sn_block = (taucs_datatype*)taucs_calloc(sn_size*sn_size,sizeof(taucs_datatype));
+    taucs_io_read(handle,IO_BASE+n_sn+2*sn,
+		  sn_size,
+		  sn_size ,
+		  TAUCS_CORE_DATATYPE,sn_block);
+    if (up_size > 0 && sn_size > 0){
+      up_block = (taucs_datatype*)taucs_calloc(up_size*sn_size,sizeof(taucs_datatype));
+      taucs_io_read(handle,IO_BASE+n_sn+2*sn+1,
+		    up_size,
+		    sn_size ,
+		    TAUCS_CORE_DATATYPE,up_block);
+    }
+
+    flops = ((double)sn_size)*((double)sn_size) 
+      + 2.0*((double)sn_size)*((double)up_size);
+
+    if (flops > BLAS_FLOPS_CUTOFF) {
+      bdense = t;
+      xdense = t + sn_size;
+      
+      for (i=0; i<sn_size; i++)
+	bdense[i] = b[ sn_struct[ sn][ i ] ];
+      for (i=0; i<up_size; i++)
+	xdense[i] = x[ sn_struct[sn][sn_size+i] ];
+      
+      if (up_size > 0 && sn_size > 0)
+	taucs_gemm ("Conjugate","No Conjugate",
+	       &sn_size, &ione, &up_size,
+	       &taucs_minusone_const,
+	       up_block,&up_size,
+	       xdense       ,&up_size,
+	       &taucs_one_const,
+	       bdense       ,&sn_size);
+            
+      taucs_trsm ("Left",
+	     "Lower",
+	     "Conjugate",
+	     "No unit diagonal",
+	     &sn_size,&ione,
+	     &taucs_one_const,
+	     sn_block,&sn_size,
+	     bdense       ,&sn_size);
+      
+      for (i=0; i<sn_size; i++)
+	x[ sn_struct[ sn][ i ] ]  = bdense[i];
+    
+    } else if (sn_size > SOLVE_DENSE_CUTOFF) {
+      bdense = t;
+      xdense = t + sn_size;
+      
+      for (i=0; i<sn_size; i++)
+	bdense[i] = b[ sn_struct[ sn][ i ] ];
+      
+      for (i=0; i<up_size; i++)
+	xdense[i] = x[ sn_struct[sn][sn_size+i] ];
+     
+      for (ip=sn_size-1; ip>=0; ip--) {
+	for (jp=0; jp<up_size; jp++) {
+	  /*bdense[ip] -= xdense[jp] * up_block[ up_size*ip + jp];*/
+	  bdense[ip] = taucs_sub(bdense[ip],
+				 taucs_mul(xdense[jp] , up_block[ up_size*ip + jp]));
+	}
+      }
+     
+      for (ip=sn_size-1; ip>=0; ip--) {
+	for (jp=sn_size-1; jp>ip; jp--) {
+	  /*bdense[ip] -= bdense[jp] * sn_block[ sn_size*ip + jp];*/
+	  bdense[ip] = taucs_sub(bdense[ip],
+				 taucs_mul(bdense[jp] , sn_block[ sn_size*ip + jp]));
+	}
+	/*bdense[ip] = bdense[ip] / sn_block[ sn_size*ip + ip];*/
+	bdense[ip] = taucs_div(bdense[ip] , sn_block[ sn_size*ip + ip]);
+      }
+
+      for (i=0; i<sn_size; i++)
+	x[ sn_struct[ sn][ i ] ]  = bdense[i];
+    
+    } else {
+      for (ip=sn_size-1; ip>=0; ip--) {
+	i = sn_struct[sn][ip];
+
+	for (jp=0; jp<up_size; jp++) {
+	  j = sn_struct[sn][sn_size + jp];
+	  /*b[i] -= x[j] * up_block[ up_size*ip + jp];*/
+	  b[i] = taucs_sub(b[i],
+			   taucs_mul(x[j] , up_block[ up_size*ip + jp]));
+	}
+
+	for (jp=sn_size-1; jp>ip; jp--) {
+	  j = sn_struct[sn][jp];
+	  /*b[i] -= x[j] * sn_block[ sn_size*ip + jp];*/
+	  b[i] = taucs_sub(b[i],
+			   taucs_mul(x[j] , sn_block[ sn_size*ip + jp]));
+	}
+	/*x[i] = b[i] / sn_block[ sn_size*ip + ip];*/
+	x[i] = taucs_div(b[i] , sn_block[ sn_size*ip + ip]);
+      }
+
+    }
+    taucs_free(sn_struct[sn]);
+    taucs_free(sn_block);
+    if (up_size > 0 && sn_size > 0) taucs_free(up_block);
+    sn_struct[sn] = NULL;
+    sn_block = NULL;
+    up_block = NULL;
+  }
+
+  for (child = first_child[sn]; child != -1; child = next_child[child]) {
+    recursive_supernodal_solve_lt_ooc(child,
+				      FALSE,
+				      handle,
+				      n_sn,
+				      first_child,next_child,
+				      sn_struct,sn_sizes,sn_up_sizes,
+				      x,b,t);
+  }
+}
+
+int taucs_dtl(ooc_solve_llt)(void* vL,
+			     void* vx, void* vb)
+{
+  taucs_io_handle* handle = (taucs_io_handle*) vL;
+  taucs_datatype* x = (taucs_datatype*) vx;
+  taucs_datatype* b = (taucs_datatype*) vb;
+  /*  char* filename = (char*) vL; */
+  supernodal_factor_matrix* L;
+
+  taucs_datatype* y;
+  taucs_datatype* t; /* temporary vector */
+  int     i;
+
+  L = multifrontal_supernodal_create();
+  /* READ n, n_sn, first_child, next_child*/
+  /*
+  if(!MULTIFILE)
+    handle = taucs_io_open_singlefile(filename);
+  else
+    handle = taucs_io_open_multifile(filename);
+  */
+  taucs_io_read(handle,5,1,1,TAUCS_INT,&(L->n));
+  taucs_io_read(handle,0,1,1,TAUCS_INT,&(L->n_sn));
+  L->sn_struct = (int**)taucs_malloc((L->n_sn  )*sizeof(int*));
+  L->sn_blocks = (taucs_datatype**)taucs_malloc((L->n_sn  )*sizeof(taucs_datatype*));
+  L->up_blocks = (taucs_datatype**)taucs_malloc((L->n_sn  )*sizeof(taucs_datatype*));
+  L->sn_size   = (int*) taucs_malloc((L->n_sn+1)*sizeof(int));
+  L->sn_up_size   = (int*) taucs_malloc((L->n_sn+1)*sizeof(int));
+  L->first_child = (int*) taucs_malloc((L->n_sn+1)*sizeof(int));
+  L->next_child  = (int*) taucs_malloc((L->n_sn+1)*sizeof(int));
+  taucs_io_read(handle,1,1,L->n_sn+1,TAUCS_INT,L->first_child);
+  taucs_io_read(handle,2,1,L->n_sn+1,TAUCS_INT,L->next_child);
+  taucs_io_read(handle,3,1,L->n_sn,TAUCS_INT,L->sn_size);
+  taucs_io_read(handle,4,1,L->n_sn,TAUCS_INT,L->sn_up_size);
+  /*  for (i=0; i<L->n_sn; i++) {
+    L->sn_struct[i] = (int*)taucs_malloc(L->sn_up_size[i]*sizeof(int));
+      taucs_io_read(handle,IO_BASE+i,1,L->sn_up_size[i],TAUCS_INT,L->sn_struct[i]);
+      }*/
+  
+  for(i=0;i<L->n_sn;i++){
+    L->sn_struct[i] = NULL;
+    L->sn_blocks[i] = NULL;
+    L->up_blocks[i] = NULL;
+  }
+  
+  y = (taucs_datatype*) taucs_malloc((L->n) * sizeof(taucs_datatype));
+  t = (taucs_datatype*) taucs_malloc((L->n) * sizeof(taucs_datatype));
+  if (!y || !t) {
+    taucs_free(y);
+    taucs_free(t);
+    taucs_printf("leftlooking_supernodal_solve_llt: out of memory\n");
+    return -1;
+  }
+
+  for (i=0; i<L->n; i++) x[i] = b[i];
+
+  recursive_supernodal_solve_l_ooc (L->n_sn,
+				    TRUE,  /* this is the root */
+				    handle,
+				    L->n_sn,
+				    L->first_child, L->next_child,
+				    L->sn_struct,L->sn_size,L->sn_up_size,
+				    y, x, t);
+
+  recursive_supernodal_solve_lt_ooc(L->n_sn,
+				    TRUE,  /* this is the root */
+				    handle,
+				    L->n_sn,
+				    L->first_child, L->next_child,
+				    L->sn_struct,L->sn_size,L->sn_up_size,
+				    x, y, t);
+
+  taucs_free(y);
+  taucs_free(t);
+  ooc_supernodal_factor_free(L);
+  return 0;
+}
+/*******************************************************************/
+/**                     OOC Panelize Factor                       **/
+/*******************************************************************/
+
+static double
+recursive_smart_panelize_ooc_supernodes(int sn,       /* this supernode */
+					int is_root,  /* is v the root? */
+					double global_mem,
+					int* curr_panel,
+					int* sn_in_core,
+					int* sn_to_panel_map,
+					supernodal_factor_matrix* L)
+{
+  int  child;
+  double this_sn_mem = 0.0;
+  double max_child_mem = 0.0;
+  double curr_child_mem = 0.0;
+  int* first_child   = L->first_child;
+  int* next_child    = L->next_child;
+
+
+  for (child = first_child[sn]; child != -1; child = next_child[child]) {
+    if(!sn_in_core[child]){
+      curr_child_mem = recursive_smart_panelize_ooc_supernodes(child,
+							       FALSE,
+							       global_mem,
+							       curr_panel,
+							       sn_in_core,
+							       sn_to_panel_map,
+							       L);
+      if(curr_child_mem > max_child_mem) max_child_mem = curr_child_mem;
+    }
+  }
+
+  if (!is_root){
+    this_sn_mem = 
+      (double) (L->sn_size)[sn] * (double) (L->sn_up_size)[sn] * (double) sizeof(taucs_datatype) 
+      + (double) (L->sn_up_size)[sn] * (double) sizeof(int);
+    if(max_child_mem+this_sn_mem < global_mem){
+      sn_to_panel_map[sn] = *curr_panel;
+      return (max_child_mem+this_sn_mem);
+    } else {
+      (*curr_panel)++;
+      sn_to_panel_map[sn] = *curr_panel;
+      return this_sn_mem;
+    }
+  } 
+
+  /* reached only at the root */
+  return 0.0;
+}
+
+static double
+recursive_dumb_panelize_ooc_supernodes(int sn,       /* this supernode */
+				       int is_root,  /* is v the root? */
+				       int* curr_panel,
+				       int* sn_in_core,
+				       int* sn_to_panel_map,
+				       supernodal_factor_matrix* L)
+{
+  int  child;
+  int* first_child   = L->first_child;
+  int* next_child    = L->next_child;
+
+  for (child = first_child[sn]; child != -1; child = next_child[child]) {
+    if(!sn_in_core[child]) 
+      (void) recursive_dumb_panelize_ooc_supernodes(child,
+						    FALSE,
+						    curr_panel,
+						    sn_in_core,
+						    sn_to_panel_map,
+						    L);
+  }
+
+  if (!is_root){
+    (*curr_panel)++;
+    sn_to_panel_map[sn]=*curr_panel;
+  }
+
+  return 0.0;
+}
+
+static double
+recursive_panelize_ooc_supernodes(int sn,       /* this supernode */
+				  int is_root,  /* is v the root? */
+				  double global_mem,
+				  double avail_mem,
+				  int* curr_panel,
+				  int* sn_in_core,
+				  int* sn_to_panel_map,
+				  supernodal_factor_matrix* L)
+{
+  int  child;
+  double curr_avail_mem = avail_mem;
+  double this_sn_mem = 0.0;
+  int* first_child   = L->first_child;
+  int* next_child    = L->next_child;
+
+
+  for (child = first_child[sn]; child != -1; child = next_child[child]) {
+    if(!sn_in_core[child]) 
+      curr_avail_mem=recursive_panelize_ooc_supernodes(child,
+						       FALSE,
+						       global_mem,
+						       curr_avail_mem,
+						       curr_panel,
+						       sn_in_core,
+						       sn_to_panel_map,
+						       L);
+  }
+
+  if (!is_root){
+    this_sn_mem = (double)(L->sn_size)[sn]*(double)(L->sn_up_size)[sn]*sizeof(taucs_datatype)+(double)(L->sn_up_size)[sn]*sizeof(int);
+    if(curr_avail_mem-this_sn_mem>0.0){
+      sn_to_panel_map[sn]=*curr_panel;
+      curr_avail_mem -= this_sn_mem;
+    } else {
+      (*curr_panel)++;
+      sn_to_panel_map[sn]=*curr_panel;
+      curr_avail_mem = global_mem-this_sn_mem;
+    }
+  }
+  return curr_avail_mem;
+}
+
+static void
+recursive_leftlooking_supernodal_update_panel_ooc(int J,int K,
+						  int bitmap[],
+						  int* sn_to_panel_map,
+						  taucs_datatype* dense_update_matrix,
+						  taucs_io_handle* handle,
+						  taucs_ccs_matrix* A,
+						  supernodal_factor_matrix* L)
+{
+  int i,j,ir,ii;
+  int  child;
+  int* first_child   = L->first_child;
+  int* next_child    = L->next_child;
+  int sn_size_child = (L->sn_size)[K];
+  int sn_up_size_child = (L->sn_up_size)[K];
+  int exist_upd=0;
+  int first_row=0;
+  int row_count=0;
+  int updated_panel = sn_to_panel_map[J];
+  int curr_updated_sn = J;
+  int PK,M,N,LDA,LDB,LDC;
+ 
+  if(L->sn_up_size[K]-L->sn_size[K]>0){
+    
+    if(!(L->sn_struct)[K]){
+      L->sn_struct[K] = (int*)taucs_malloc(sn_up_size_child*sizeof(int));
+      taucs_io_read(handle,IO_BASE+K,1,sn_up_size_child,TAUCS_INT,L->sn_struct[K]);
+    }
+
+    for(i=sn_size_child;i<sn_up_size_child;i++){
+      /* We want to update only supernodes great than J with using of
+	 sort of indices */
+      if(L->col_to_sn_map[L->sn_struct[K][i]]<curr_updated_sn)
+	continue;
+
+      /* is this row index included in the columns of curr_updated_sn ? */
+      if(L->col_to_sn_map[L->sn_struct[K][i]]==curr_updated_sn){
+	if(!exist_upd) first_row = i;
+	row_count++;
+	exist_upd = 1;
+      }
+     
+      /* if end of update to curr_updated_sn or edge condition */
+      if(L->col_to_sn_map[L->sn_struct[K][i]]!=curr_updated_sn||
+	 i==sn_up_size_child-1)
+	if(exist_upd){ 
+	  if(!(L->up_blocks)[K]){
+	    (L->up_blocks)[K] = (taucs_datatype*)taucs_calloc(((L->sn_up_size)[K]-(L->sn_size)[K])
+							*((L->sn_size)[K]),
+							sizeof(taucs_datatype));
+	    taucs_io_read(handle,IO_BASE+L->n_sn+2*K+1,
+			  (L->sn_up_size)[K]-(L->sn_size)[K],
+			  (L->sn_size)[K] ,
+			  TAUCS_CORE_DATATYPE,(L->up_blocks)[K]);
+	  }
+	  LDA = LDB = (L->sn_up_size)[K]-(L->sn_size)[K];
+	  M  = sn_up_size_child - first_row ;
+	  LDC =  M;
+	  N  = row_count; 
+	  PK = L->sn_size[K];    
+	  
+	  /* The GEMM code computes on the upper triangle of the trapezoidal
+	     matrix, which is junk. */
+	  /*
+             taucs_gemm ("No Conjugate",
+		         "Conjugate",
+		         &M,&N,&PK,
+		         &taucs_one_const,
+		         &(L->up_blocks[K][first_row-sn_size_child]),&LDA,
+		         &(L->up_blocks[K][first_row-sn_size_child]),&LDB,
+		         &taucs_zero_const,
+		         dense_update_matrix,&LDC);
+	  */
+
+	  /* This is the HERK+GEMM fix by Elad */
+	  taucs_herk ("Lower",
+		      "No Conjugate",
+		      &N,&PK,
+		      &taucs_one_real_const,
+		      &(L->up_blocks[K][first_row-sn_size_child]),&LDA,
+		      &taucs_zero_real_const,
+		      dense_update_matrix,&LDC);
+
+	  if(M-N > 0)
+	    {
+	      int newM = M - N;
+	      
+	      taucs_gemm ("No Conjugate",
+			  "Conjugate",
+			  &newM,&N,&PK,
+			  &taucs_one_const,
+			  &(L->up_blocks[K][first_row-sn_size_child+N]),&LDA,
+			  &(L->up_blocks[K][first_row-sn_size_child]),&LDB,
+			  &taucs_zero_const,
+			  dense_update_matrix+N,&LDC);
+	    }
+	  /* end of GEMM/HERK+GEMM fix */ 
+
+	  if(!(L->sn_blocks)[curr_updated_sn])
+	    (L->sn_blocks)[curr_updated_sn] = 
+	      (taucs_datatype*)taucs_calloc(((L->sn_size)[curr_updated_sn])
+				      *((L->sn_size)[curr_updated_sn]),
+				      sizeof(taucs_datatype));
+    
+	  if(L->sn_up_size[curr_updated_sn]-L->sn_size[curr_updated_sn]>0)
+	    if(!(L->up_blocks)[curr_updated_sn]) 
+	      (L->up_blocks)[curr_updated_sn] = (taucs_datatype*)taucs_calloc(((L->sn_up_size)[curr_updated_sn]-(L->sn_size)[curr_updated_sn]) *((L->sn_size)[curr_updated_sn]),sizeof(taucs_datatype));
+
+	  if(!L->sn_struct[curr_updated_sn]){
+	    L->sn_struct[curr_updated_sn] = (int*)taucs_malloc((L->sn_up_size)[curr_updated_sn]*sizeof(int));
+	    taucs_io_read(handle,
+			  IO_BASE+curr_updated_sn,
+			  1,(L->sn_up_size)[curr_updated_sn],
+			  TAUCS_INT,L->sn_struct[curr_updated_sn]);
+	  }
+
+	  for(ii=0;ii<L->sn_size[curr_updated_sn];ii++) {
+	    bitmap[L->sn_struct[curr_updated_sn][ii]]=ii+1;
+	  }
+
+	  for(ii=L->sn_size[curr_updated_sn];ii<L->sn_up_size[curr_updated_sn];ii++){
+	    bitmap[L->sn_struct[curr_updated_sn][ii]] = ii-L->sn_size[curr_updated_sn]+1;
+	  }
+	   
+	  assert((double)row_count*(double)LDC < 2048.0*1024.0*1024.0);
+	  for(j=0;j<row_count;j++)
+	    for(ir=j;ir<row_count;ir++){
+	      /*
+		L->sn_blocks[curr_updated_sn][(bitmap[L->sn_struct[K][first_row+j]]-1)*L->sn_size[curr_updated_sn]+(bitmap[L->sn_struct[K][first_row+ir]]-1)] -= dense_update_matrix[j*LDC+ir];*/
+	      L->sn_blocks[curr_updated_sn][(bitmap[L->sn_struct[K][first_row+j]]-1)*L->sn_size[curr_updated_sn]+(bitmap[L->sn_struct[K][first_row+ir]]-1)] =
+		taucs_sub(L->sn_blocks[curr_updated_sn][(bitmap[L->sn_struct[K][first_row+j]]-1)*L->sn_size[curr_updated_sn]+(bitmap[L->sn_struct[K][first_row+ir]]-1)] , dense_update_matrix[j*LDC+ir]);
+
+	      /* to find overflows */
+	      assert((double)(bitmap[L->sn_struct[K][first_row+j]]-1)*(double)L->sn_size[curr_updated_sn] < 2048.0*1024.0*1024.0);
+	     }
+	  for(j=0;j<row_count;j++)
+	    for(ir=row_count;ir<M;ir++){
+	      /*L->up_blocks[curr_updated_sn][(bitmap[L->sn_struct[K][first_row+j]]-1)*(L->sn_up_size[curr_updated_sn]-L->sn_size[curr_updated_sn])+(bitmap[L->sn_struct[K][ir+first_row]]-1)] -= dense_update_matrix[j*LDC+ir];*/
+
+	      L->up_blocks[curr_updated_sn][(bitmap[L->sn_struct[K][first_row+j]]-1)*(L->sn_up_size[curr_updated_sn]-L->sn_size[curr_updated_sn])+(bitmap[L->sn_struct[K][ir+first_row]]-1)] =
+		taucs_sub(L->up_blocks[curr_updated_sn][(bitmap[L->sn_struct[K][first_row+j]]-1)*(L->sn_up_size[curr_updated_sn]-L->sn_size[curr_updated_sn])+(bitmap[L->sn_struct[K][ir+first_row]]-1)] , dense_update_matrix[j*LDC+ir]);
+
+	      /* to find overflow */
+	      assert((double)(bitmap[L->sn_struct[K][first_row+j]]-1)*(double)(L->sn_up_size[curr_updated_sn]-L->sn_size[curr_updated_sn]) < 2048.0*1024.0*1024.0);
+	     }	
+	  for(ii=0;ii<L->sn_up_size[curr_updated_sn];ii++)
+	    bitmap[L->sn_struct[curr_updated_sn][ii]]=0;
+
+	  exist_upd = 0;
+	  row_count = 0;
+	}
+
+      /* is this row index included in the columns of sn in the same panel? */
+      if(L->col_to_sn_map[L->sn_struct[K][i]]!=curr_updated_sn)
+	if(sn_to_panel_map[L->col_to_sn_map[L->sn_struct[K][i]]]==updated_panel){
+	  curr_updated_sn = L->col_to_sn_map[L->sn_struct[K][i]];
+	  if(!exist_upd) first_row = i;
+	  row_count++;
+	  exist_upd = 1;
+	  if( i==sn_up_size_child-1)
+	    if(exist_upd){ 
+	      if(!(L->up_blocks)[K]){
+		(L->up_blocks)[K] = (taucs_datatype*)taucs_calloc(((L->sn_up_size)[K]-(L->sn_size)[K])*((L->sn_size)[K]),sizeof(taucs_datatype));
+		taucs_io_read(handle,IO_BASE+L->n_sn+2*K+1,
+			      (L->sn_up_size)[K]-(L->sn_size)[K],
+			      (L->sn_size)[K] ,
+			      TAUCS_CORE_DATATYPE,(L->up_blocks)[K]);
+	      }
+	      LDA = LDB = (L->sn_up_size)[K]-(L->sn_size)[K];
+	      M  = sn_up_size_child - first_row ;
+	      LDC =  M;
+	      N  = row_count; 
+	      PK = L->sn_size[K];    
+	      
+   
+	      /* The GEMM code computes on the upper triangle of the trapezoidal
+		 matrix, which is junk. */
+	      /*
+	      taucs_gemm ("No Conjugate",
+		     "Conjugate",
+		     &M,&N,&PK,
+		     &taucs_one_const,
+		     &(L->up_blocks[K][first_row-sn_size_child]),&LDA,
+		     &(L->up_blocks[K][first_row-sn_size_child]),&LDB,
+		     &taucs_zero_const,
+		     dense_update_matrix,&LDC);
+	      */
+
+	      /* This is the HERK+GEMM fix by Elad */
+	      taucs_herk ("Lower",
+			  "No Conjugate",
+			  &N,&PK,
+			  &taucs_one_real_const,
+			  &(L->up_blocks[K][first_row-sn_size_child]),&LDA,
+			  &taucs_zero_real_const,
+			  dense_update_matrix,&LDC);
+	      
+	      if(M-N > 0)
+		{
+		  int newM = M - N;
+		  
+		  taucs_gemm ("No Conjugate",
+			      "Conjugate",
+			      &newM,&N,&PK,
+			      &taucs_one_const,
+			      &(L->up_blocks[K][first_row-sn_size_child+N]),&LDA,
+			      &(L->up_blocks[K][first_row-sn_size_child]),&LDB,
+			      &taucs_zero_const,
+			      dense_update_matrix+N,&LDC);
+		}
+	      /* end of GEMM/HERK+GEMM fix */ 
+	      
+	      if(!(L->sn_blocks)[curr_updated_sn])
+		(L->sn_blocks)[curr_updated_sn] = 
+		  (taucs_datatype*)taucs_calloc(((L->sn_size)[curr_updated_sn])*((L->sn_size)[curr_updated_sn]),sizeof(taucs_datatype));
+    
+	      if(L->sn_up_size[curr_updated_sn]-L->sn_size[curr_updated_sn]>0)
+		if(!(L->up_blocks)[curr_updated_sn]) 
+		  (L->up_blocks)[curr_updated_sn] = (taucs_datatype*)taucs_calloc(((L->sn_up_size)[curr_updated_sn]-(L->sn_size)[curr_updated_sn]) *((L->sn_size)[curr_updated_sn]),sizeof(taucs_datatype));
+	      
+	      if(!L->sn_struct[curr_updated_sn]){
+		L->sn_struct[curr_updated_sn] = (int*)taucs_malloc((L->sn_up_size)[curr_updated_sn]*sizeof(int));
+		taucs_io_read(handle,IO_BASE+curr_updated_sn,1,(L->sn_up_size)[curr_updated_sn],TAUCS_INT,L->sn_struct[curr_updated_sn]);
+	      }
+
+	      for(ii=0;ii<L->sn_size[curr_updated_sn];ii++) {
+		bitmap[L->sn_struct[curr_updated_sn][ii]]=ii+1;
+	      }
+
+	      for(ii=L->sn_size[curr_updated_sn];ii<L->sn_up_size[curr_updated_sn];ii++){
+	    bitmap[L->sn_struct[curr_updated_sn][ii]] = ii-L->sn_size[curr_updated_sn]+1;
+	      }
+	  assert((double)row_count*(double)LDC < 2048.0*1024.0*1024.0);
+	      for(j=0;j<row_count;j++)
+		for(ir=j;ir<row_count;ir++){
+		  /*L->sn_blocks[curr_updated_sn][(bitmap[L->sn_struct[K][first_row+j]]-1)*L->sn_size[curr_updated_sn]+(bitmap[L->sn_struct[K][first_row+ir]]-1)] -= dense_update_matrix[j*LDC+ir];*/
+		  L->sn_blocks[curr_updated_sn][(bitmap[L->sn_struct[K][first_row+j]]-1)*L->sn_size[curr_updated_sn]+(bitmap[L->sn_struct[K][first_row+ir]]-1)] =
+		    taucs_sub(L->sn_blocks[curr_updated_sn][(bitmap[L->sn_struct[K][first_row+j]]-1)*L->sn_size[curr_updated_sn]+(bitmap[L->sn_struct[K][first_row+ir]]-1)] , dense_update_matrix[j*LDC+ir]);
+
+		  /* for find overflow */
+		  assert((double)(bitmap[L->sn_struct[K][first_row+j]]-1)*(double)L->sn_size[curr_updated_sn] < 2048.0*1024.0*1024.0);
+		}
+
+	      for(j=0;j<row_count;j++)
+		for(ir=row_count;ir<M;ir++){
+
+		  /*L->up_blocks[curr_updated_sn][(bitmap[L->sn_struct[K][first_row+j]]-1)*(L->sn_up_size[curr_updated_sn]-L->sn_size[curr_updated_sn])+(bitmap[L->sn_struct[K][ir+first_row]]-1)] -= dense_update_matrix[j*LDC+ir];*/
+
+		  L->up_blocks[curr_updated_sn][(bitmap[L->sn_struct[K][first_row+j]]-1)*(L->sn_up_size[curr_updated_sn]-L->sn_size[curr_updated_sn])+(bitmap[L->sn_struct[K][ir+first_row]]-1)] =
+		    taucs_sub(L->up_blocks[curr_updated_sn][(bitmap[L->sn_struct[K][first_row+j]]-1)*(L->sn_up_size[curr_updated_sn]-L->sn_size[curr_updated_sn])+(bitmap[L->sn_struct[K][ir+first_row]]-1)] , dense_update_matrix[j*LDC+ir]);
+
+		  /* for find overflow */
+		  assert((double)(bitmap[L->sn_struct[K][first_row+j]]-1)*(double)(L->sn_up_size[curr_updated_sn]-L->sn_size[curr_updated_sn]) < 2048.0*1024.0*1024.0);
+		}	
+	      for(ii=0;ii<L->sn_up_size[curr_updated_sn];ii++)
+		bitmap[L->sn_struct[curr_updated_sn][ii]]=0;
+
+	      exist_upd = 0;
+	      row_count = 0;
+
+	    }
+	}
+      
+    }
+    
+  }
+
+  /* free update sn from memory */
+  taucs_free((L->up_blocks   )[K]);
+  (L->up_blocks   )[K] = NULL;
+  taucs_free( L->sn_struct[K]);
+  L->sn_struct[K] = NULL;
+
+  if(first_row&&sn_to_panel_map[J]!=sn_to_panel_map[K]){   
+  
+    for (child = first_child[K]; child != -1; child = next_child[child]) {
+      recursive_leftlooking_supernodal_update_panel_ooc(J,child,
+							bitmap,
+							sn_to_panel_map,
+							dense_update_matrix,
+							handle,A,L);
+    }
+  } 
+
+}
+
+
+static int
+recursive_leftlooking_supernodal_factor_panel_llt_ooc
+(int sn,    /* this supernode */
+ int father_sn,
+ int is_root,/* is sn the root?*/
+ int* map,
+ int* sn_in_core,
+ int* sn_to_panel_map,
+ int*  panel_max_size,
+ taucs_io_handle* handle,
+ taucs_ccs_matrix* A,
+ supernodal_factor_matrix* L)
+{
+  int  child;
+  int* first_child   = L->first_child;
+  int* next_child    = L->next_child;
+  taucs_datatype* dense_update_matrix = NULL;
+  
+  for (child = first_child[sn]; child != -1; child = next_child[child]) {
+    if(sn_in_core[child]){
+
+      if (recursive_read_L_cols(child,
+				FALSE,
+				handle,
+				L)) {
+	return -1;
+      }
+      if (recursive_leftlooking_supernodal_factor_llt(child,
+						      FALSE,
+						      map,
+						      A,L)) {
+	/* failure */
+	return -1;
+      }
+      if (recursive_append_L(child,
+			     FALSE,
+			     handle,
+			     L)) {
+	return -1;
+      }
+
+    }
+    else
+      if (recursive_leftlooking_supernodal_factor_panel_llt_ooc(child,
+								sn,
+								FALSE,
+								map,
+								sn_in_core,
+								sn_to_panel_map,
+								panel_max_size,
+								handle,
+								A,L)) {
+	/* failure */
+	return -1;
+      }
+  }
+
+  if (!is_root) { 
+    if(!(L->sn_blocks)[sn])
+      (L->sn_blocks)[sn] = 
+	(taucs_datatype*)taucs_calloc(((L->sn_size)[sn])*((L->sn_size)[sn]),sizeof(taucs_datatype));
+    
+    if(L->sn_up_size[sn]-L->sn_size[sn]>0)
+      if(!(L->up_blocks)[sn]) 
+	(L->up_blocks)[sn] = (taucs_datatype*)taucs_calloc(((L->sn_up_size)[sn]-(L->sn_size)[sn]) *((L->sn_size)[sn]),sizeof(taucs_datatype));
+
+    if(!L->sn_struct[sn]){
+      L->sn_struct[sn] = (int*)taucs_malloc((L->sn_up_size)[sn]*sizeof(int));
+      taucs_io_read(handle,IO_BASE+sn,1,(L->sn_up_size)[sn],TAUCS_INT,L->sn_struct[sn]);
+    }
+
+    if (!dense_update_matrix) 
+      dense_update_matrix = (taucs_datatype*) taucs_calloc(panel_max_size[sn_to_panel_map[sn]],sizeof(taucs_datatype));
+
+    for (child = first_child[sn]; child != -1; child = next_child[child]) {
+      if(sn_to_panel_map[sn]!=sn_to_panel_map[child])
+	recursive_leftlooking_supernodal_update_panel_ooc(sn,child,
+							  map,
+							  sn_to_panel_map,
+							  dense_update_matrix,
+							  handle,A,L);
+    }
+
+    if (leftlooking_supernodal_front_factor(sn,
+					    map,
+					    A,
+					    L)) {
+      /* nonpositive pivot */
+      return -1;
+    }
+    
+    taucs_io_append(handle,IO_BASE+L->n_sn+2*sn,
+		    L->sn_size[sn],L->sn_size[sn],
+		    TAUCS_CORE_DATATYPE,L->sn_blocks[sn]);
+
+    taucs_io_append(handle,IO_BASE+L->n_sn+2*sn+1,
+		    L->sn_up_size[sn] - L->sn_size[sn],L->sn_size[sn],
+		    TAUCS_CORE_DATATYPE,L->up_blocks[sn]);
+ 
+    
+    if(sn_to_panel_map[sn]==sn_to_panel_map[father_sn])
+      recursive_leftlooking_supernodal_update_panel_ooc(father_sn,sn,
+							map,
+							sn_to_panel_map,
+							dense_update_matrix,
+							handle,A,L);
+    taucs_free(dense_update_matrix);
+    taucs_free((L->sn_blocks)[sn]);
+    taucs_free((L->up_blocks)[sn]); 
+    taucs_free((L->sn_struct)[sn]);
+    (L->sn_blocks   )[sn] = NULL;      
+    (L->up_blocks   )[sn] = NULL;
+    (L->sn_struct   )[sn] = NULL;
+  }
+
+  return 0;
+}
+
+int taucs_dtl(ooc_factor_llt)(taucs_ccs_matrix* A, 
+			      taucs_io_handle* handle,
+			      double memory)
+{
+  supernodal_factor_matrix* L;
+  int i;
+  int* map;
+  int* sn_in_core;
+  int* sn_to_panel_map;
+  int* panel_max_size;
+  int n_pn=0;
+  double wtime, ctime;
+
+  /*
+  int j,ip,jp;
+  int sn,p;
+  int current_index=0;
+  */
+  double memory_overhead;
+  double  max_multiple=0.0;
+  int ind_max_mult = 0;
+
+  /* compute fixed memory overhead */
+  
+  memory_overhead = 
+    4.0*(double)((A->n)*sizeof(int)) + /* integer vectors in L */
+    3.0*(double)((A->n)*sizeof(int)) + /* pointer arrays in L  */
+    2.0*(double)((A->n)*sizeof(int)) + /* integer vectors in program  */
+    4.0*3.0*(double)((A->n)*sizeof(int));  /* singlefile matrix arrays */
+
+  taucs_printf("\t\tOOC memory overhead bound %.0lf MB (out of %.0lf MB available)\n",
+	       memory_overhead/1048576.0,memory/1048576.0);
+
+  taucs_printf(">>> 1\n");
+
+  if ( memory - memory_overhead < 
+       2.0*(double)((A->n)*sizeof(taucs_datatype)) + 
+       2.0*(double)((A->n)*sizeof(int)) ) {
+    taucs_printf("\t\ttaucs_ccs_factor_llt_ll_ooc: not enough memory\n");
+    return -1;
+    }
+
+  wtime = taucs_wtime();
+  ctime = taucs_ctime();
+
+  L = multifrontal_supernodal_create();
+  /*
+  if(!MULTIFILE)
+    handle = taucs_io_create_singlefile("/tmp/taucs.L");
+  else
+    handle = taucs_io_create_multifile("/tmp/taucs.L");
+  */
+  taucs_io_append(handle,5,1,1,TAUCS_INT,&(A->n));
+
+  taucs_ccs_ooc_symbolic_elimination(A,L,
+				     TRUE /* sort row indices */,
+				     TRUE /* return col_to_sn_map */,
+				     (memory - memory_overhead)/3.0,
+				     ooc_sn_struct_handler,handle);
+  
+  wtime = taucs_wtime()-wtime;
+  ctime = taucs_ctime()-ctime;
+  taucs_printf("\t\tSymbolic Analysis            = % 10.3f seconds (%.3f cpu)\n",
+	       wtime,ctime);
+
+  /* we now compute an exact memory overhead bound using n_sn */
+  memory_overhead = 
+    /*    1.0*(double)((L->n)*sizeof(int)) + */ /* integer vector in L */
+    4.0*(double)((L->n_sn)*sizeof(int)) + /* integer vectors in L */
+    3.0*(double)((L->n_sn)*sizeof(int)) + /* pointer arrays in L  */
+    2.0*(double)((L->n_sn)*sizeof(int)) + /* integer vectors in program  */
+    4.0*3.0*(double)((L->n_sn)*sizeof(int));  /* singlefile matrix arrays */
+
+  taucs_printf("\t\tOOC actual memory overhead %.0lf MB (out of %.0lf MB available)\n",
+	       memory_overhead/1048576.0,memory/1048576.0);
+ 
+  wtime = taucs_wtime();
+  ctime = taucs_ctime();
+
+  taucs_io_append(handle,0,1,1,TAUCS_INT,&(L->n_sn));
+  taucs_io_append(handle,1,1,L->n_sn+1,TAUCS_INT,L->first_child);
+  taucs_io_append(handle,2,1,L->n_sn+1,TAUCS_INT,L->next_child);
+  taucs_io_append(handle,3,1,L->n_sn,TAUCS_INT,L->sn_size);
+  taucs_io_append(handle,4,1,L->n_sn,TAUCS_INT,L->sn_up_size);
+  /*taucs_io_append(handle,5,1,1,TAUCS_INT,&(L->n));*/
+  taucs_io_append(handle,6,1,1,TAUCS_INT,&(A->flags));
+
+  wtime = taucs_wtime()-wtime;
+  ctime = taucs_ctime()-ctime;
+  taucs_printf("\t\tOOC Supernodal Left-Looking Prepare L = % 10.3f seconds (%.3f cpu)\n",
+	       wtime,ctime);
+
+  wtime = taucs_wtime();
+  ctime = taucs_ctime();
+
+  map  = (int*)taucs_malloc((A->n+1)*sizeof(int));
+  sn_in_core = (int*)taucs_malloc((L->n_sn+1)*sizeof(int));
+  sn_to_panel_map = (int*)taucs_malloc((L->n_sn+1)*sizeof(int));
+  for(i=0;i<=L->n_sn;i++){
+    sn_in_core[i] = 0;
+    sn_to_panel_map[i]=-1;
+  }
+
+  for(i=0;i<L->n_sn;i++){
+    (L->sn_blocks)[i] = NULL;
+    (L->up_blocks)[i] = NULL;
+    (L->sn_struct)[i] = NULL;
+  }
+
+  wtime = taucs_wtime();
+  ctime = taucs_ctime();
+  if(recursive_compute_supernodes_ll_in_core(L->n_sn,
+					     TRUE,
+					     (memory - memory_overhead)/3.0,
+					     sn_in_core,
+					     L)<0.0) {
+    ooc_supernodal_factor_free(L);
+    taucs_free(sn_in_core);
+    taucs_free(sn_to_panel_map);  
+    taucs_free(map);
+    return -1;
+  }
+  
+ /*  if(recursive_panelize_ooc_supernodes(L->n_sn,
+				       TRUE,
+				       (memory - memory_overhead)/3.0,
+				       (memory - memory_overhead)/3.0,
+				       &n_pn,
+				       sn_in_core,
+				       sn_to_panel_map,
+				       L)<0.0){
+    ooc_supernodal_factor_free(L);
+    taucs_free(sn_in_core);
+    taucs_free(sn_to_panel_map);  
+    taucs_free(map);
+    return -1;
+    }*/
+ 
+  taucs_printf("\t\tOOC Supernodal Left-Looking: panel-is-paged\n",n_pn);
+
+  if(recursive_smart_panelize_ooc_supernodes(L->n_sn,
+					     TRUE,
+					     (memory - memory_overhead)/3.0,
+					     &n_pn,
+					     sn_in_core,
+					     sn_to_panel_map,
+					     L)<0.0){
+    ooc_supernodal_factor_free(L);
+    taucs_free(sn_in_core);
+    taucs_free(sn_to_panel_map);  
+    taucs_free(map);
+    return -1;
+    }
+
+  /* it will be at least one panel even empty */
+  n_pn++; 
+
+  /*for(i=0;i<L->n_sn;i++){
+    taucs_printf("sn_in_core[%d] = %d\n",i,sn_in_core[i]);
+    taucs_printf("sn_to_panel_map[%d] = %d\n",i,sn_to_panel_map[i]);
+    }*/
+  taucs_printf("\t\tOOC Supernodal Left-Looking: %d panels\n",n_pn);
+  /* compute max dense matrix size for every panel */
+  panel_max_size = (int*)taucs_calloc(n_pn,sizeof(int));
+  for(i=0;i<L->n_sn;i++){
+    if((double)L->sn_up_size[i]*(double)L->sn_size[i]>max_multiple){
+      max_multiple = (double)L->sn_up_size[i]*(double)L->sn_size[i]; 
+      ind_max_mult = i;
+      }
+    if(sn_to_panel_map[i]!=-1){
+      if(L->sn_up_size[i]*L->sn_size[i]>panel_max_size[sn_to_panel_map[i]]) 
+	panel_max_size[sn_to_panel_map[i]] = L->sn_up_size[i]*L->sn_size[i];
+    }
+  }
+  /*
+  taucs_printf("debug***: L->n_sn = %d max(sn_size*sn_up_size) = %lf sn_size[%d] = %d sn_up_size[%d] = %d\n ",L->n_sn,max_multiple,ind_max_mult,L->sn_size[ind_max_mult],ind_max_mult,L->sn_up_size[ind_max_mult]);
+  */
+
+  /*  for(i=0;i<n_pn;i++)
+      taucs_printf(" panel_max_size[%d] = %d\n",i, panel_max_size[i]);*/
+  
+  wtime = taucs_wtime()-wtime;
+  ctime = taucs_ctime()-ctime;
+  taucs_printf("\t\tOOC Supernodal Left-Looking Scheduling = % 10.3f seconds (%.3f cpu)\n",
+	       wtime,ctime);
+  
+  wtime = taucs_wtime();
+  ctime = taucs_ctime();
+
+
+  if (recursive_leftlooking_supernodal_factor_panel_llt_ooc(L->n_sn,
+							    L->n_sn,  
+							    TRUE, 
+							    map,
+							    sn_in_core,
+							    sn_to_panel_map,
+							    panel_max_size,
+							    handle,
+							    A,L)) {
+    ooc_supernodal_factor_free(L);
+    taucs_free(map);
+    return -1;
+  }
+ 
+ taucs_printf("\t\tOOC Supernodal Left-Looking:\n");
+ taucs_printf("\t\t\tread count           = %.0f \n",handle->nreads);
+ taucs_printf("\t\t\tread volume (bytes)  = %.2e \n",handle->bytes_read);
+ taucs_printf("\t\t\tread time (seconds)  = %.0f \n",handle->read_time);
+ taucs_printf("\t\t\twrite count          = %.0f \n",handle->nwrites);
+ taucs_printf("\t\t\twrite volume (bytes) = %.2e \n",handle->bytes_written);
+ taucs_printf("\t\t\twrite time (seconds) = %.0f \n",handle->write_time);
+
+  wtime = taucs_wtime()-wtime;
+  ctime = taucs_ctime()-ctime;
+  taucs_printf("\t\tOOC Supernodal Left-Looking LL^T = % 10.3f seconds (%.3f cpu)\n",
+	       wtime,ctime);
+
+  wtime = taucs_wtime();
+  ctime = taucs_ctime();
+
+  taucs_free(map);
+  taucs_free(sn_in_core);
+  taucs_free(sn_to_panel_map);  
+  /*taucs_io_close(handle);*/
+  ooc_supernodal_factor_free(L);
+  
+  wtime = taucs_wtime()-wtime;
+  ctime = taucs_ctime()-ctime;
+  taucs_printf("\t\tOOC Supernodal Left-Looking Cleanup = % 10.3f seconds (%.3f cpu)\n",
+	       wtime,ctime);
+  
+  /*return (void*) "/tmp/taucs.L";*/
+  /*return (void*) "/rabani2/queen/taucs.L";*/
+
+  return 0;
+}
+
+/*************************************************************/
+/* SAME ROUTINE, WITH CHOICE OF PANELIZATION FOR TESTING     */
+/*************************************************************/
+
+int taucs_dtl(ooc_factor_llt_panelchoice)(taucs_ccs_matrix* A, 
+					  taucs_io_handle* handle,
+					  double memory,
+					  int panelization_method)
+{
+  supernodal_factor_matrix* L;
+  int i;
+  int* map;
+  int* sn_in_core;
+  int* sn_to_panel_map;
+  int* panel_max_size;
+  int n_pn=0;
+  double wtime, ctime;
+  /*
+  int j,ip,jp;
+  int sn,p;
+  int current_index=0;
+  */
+  double memory_overhead;
+  double  max_multiple=0.0;
+  int ind_max_mult = 0;
+
+  /* compute fixed memory overhead */
+  
+  memory_overhead = 
+    4.0*(double)((A->n)*sizeof(int)) + /* integer vectors in L */
+    3.0*(double)((A->n)*sizeof(int)) + /* pointer arrays in L  */
+    2.0*(double)((A->n)*sizeof(int)) + /* integer vectors in program  */
+    4.0*3.0*(double)((A->n)*sizeof(int));  /* singlefile matrix arrays */
+
+  taucs_printf("\t\tOOC memory overhead bound %.0lf MB (out of %.0lf MB available)\n",
+	       memory_overhead/1048576.0,memory/1048576.0);
+
+  taucs_printf("*** 1\n");
+
+  if ( memory - memory_overhead < 
+       2.0*(double)((A->n)*sizeof(taucs_datatype)) + 
+       2.0*(double)((A->n)*sizeof(int)) ) {
+    taucs_printf("\t\ttaucs_ccs_factor_llt_ll_ooc: not enough memory\n");
+    return -1;
+    }
+
+  wtime = taucs_wtime();
+  ctime = taucs_ctime();
+
+  taucs_printf("*** 2\n");
+
+  L = multifrontal_supernodal_create();
+  /*
+  if(!MULTIFILE)
+    handle = taucs_io_create_singlefile("/tmp/taucs.L");
+  else
+    handle = taucs_io_create_multifile("/tmp/taucs.L");
+  */
+  taucs_io_append(handle,5,1,1,TAUCS_INT,&(A->n));
+
+  taucs_printf("*** 3\n");
+
+  taucs_ccs_ooc_symbolic_elimination(A,L,
+				     TRUE /* sort row indices */,
+				     TRUE /* return col_to_sn_map */,
+				     (memory - memory_overhead)/3.0,
+				     ooc_sn_struct_handler,handle);
+  
+  taucs_printf("*** 4\n");
+
+  wtime = taucs_wtime()-wtime;
+  ctime = taucs_ctime()-ctime;
+  taucs_printf("\t\tSymbolic Analysis            = % 10.3f seconds (%.3f cpu)\n",
+	       wtime,ctime);
+
+  /* we now compute an exact memory overhead bound using n_sn */
+  memory_overhead = 
+    /*    1.0*(double)((L->n)*sizeof(int)) + */ /* integer vector in L */
+    4.0*(double)((L->n_sn)*sizeof(int)) + /* integer vectors in L */
+    3.0*(double)((L->n_sn)*sizeof(int)) + /* pointer arrays in L  */
+    2.0*(double)((L->n_sn)*sizeof(int)) + /* integer vectors in program  */
+    4.0*3.0*(double)((L->n_sn)*sizeof(int));  /* singlefile matrix arrays */
+
+  taucs_printf("\t\tOOC actual memory overhead %.0lf MB (out of %.0lf MB available)\n",
+	       memory_overhead/1048576.0,memory/1048576.0);
+ 
+  wtime = taucs_wtime();
+  ctime = taucs_ctime();
+
+  taucs_io_append(handle,0,1,1,TAUCS_INT,&(L->n_sn));
+  taucs_io_append(handle,1,1,L->n_sn+1,TAUCS_INT,L->first_child);
+  taucs_io_append(handle,2,1,L->n_sn+1,TAUCS_INT,L->next_child);
+  taucs_io_append(handle,3,1,L->n_sn,TAUCS_INT,L->sn_size);
+  taucs_io_append(handle,4,1,L->n_sn,TAUCS_INT,L->sn_up_size);
+  /*taucs_io_append(handle,5,1,1,TAUCS_INT,&(L->n));*/
+  taucs_io_append(handle,6,1,1,TAUCS_INT,&(A->flags));
+
+  wtime = taucs_wtime()-wtime;
+  ctime = taucs_ctime()-ctime;
+  taucs_printf("\t\tOOC Supernodal Left-Looking Prepare L = % 10.3f seconds (%.3f cpu)\n",
+	       wtime,ctime);
+
+  wtime = taucs_wtime();
+  ctime = taucs_ctime();
+
+  map  = (int*)taucs_malloc((A->n+1)*sizeof(int));
+  sn_in_core = (int*)taucs_malloc((L->n_sn+1)*sizeof(int));
+  sn_to_panel_map = (int*)taucs_malloc((L->n_sn+1)*sizeof(int));
+  for(i=0;i<=L->n_sn;i++){
+    sn_in_core[i] = 0;
+    sn_to_panel_map[i]=-1;
+  }
+
+  for(i=0;i<L->n_sn;i++){
+    (L->sn_blocks)[i] = NULL;
+    (L->up_blocks)[i] = NULL;
+    (L->sn_struct)[i] = NULL;
+  }
+
+  wtime = taucs_wtime();
+  ctime = taucs_ctime();
+  if(recursive_compute_supernodes_ll_in_core(L->n_sn,
+					     TRUE,
+					     (memory - memory_overhead)/3.0,
+					     sn_in_core,
+					     L)<0.0) {
+    ooc_supernodal_factor_free(L);
+    taucs_free(sn_in_core);
+    taucs_free(sn_to_panel_map);  
+    taucs_free(map);
+    return -1;
+  }
+
+  if (panelization_method == 1) {
+    taucs_printf("\t\tOOC Supernodal Left-Looking: panel-in-memory\n",n_pn);
+    if(recursive_panelize_ooc_supernodes(L->n_sn,
+					 TRUE,
+					 (memory - memory_overhead)/3.0,
+					 (memory - memory_overhead)/3.0,
+					 &n_pn,
+					 sn_in_core,
+					 sn_to_panel_map,
+					 L)<0.0){
+      ooc_supernodal_factor_free(L);
+      taucs_free(sn_in_core);
+      taucs_free(sn_to_panel_map);  
+      taucs_free(map);
+      return -1;
+    }
+  } 
+ 
+  if (panelization_method == 0) {
+    taucs_printf("\t\tOOC Supernodal Left-Looking: panel-is-paged\n",n_pn);
+    if(recursive_smart_panelize_ooc_supernodes(L->n_sn,
+					       TRUE,
+					       (memory - memory_overhead)/3.0,
+					       &n_pn,
+					       sn_in_core,
+					       sn_to_panel_map,
+					       L)<0.0){
+      ooc_supernodal_factor_free(L);
+      taucs_free(sn_in_core);
+      taucs_free(sn_to_panel_map);  
+      taucs_free(map);
+      return -1;
+    }
+  }
+
+  if (panelization_method == 2) {
+    taucs_printf("\t\tOOC Supernodal Left-Looking: panel-is-supernode\n",n_pn);
+    if (recursive_dumb_panelize_ooc_supernodes(L->n_sn,
+					       TRUE,
+					       &n_pn,
+					       sn_in_core,
+					       sn_to_panel_map,
+					       L)<0.0){
+      ooc_supernodal_factor_free(L);
+      taucs_free(sn_in_core);
+      taucs_free(sn_to_panel_map);  
+      taucs_free(map);
+      return -1;
+    }
+  }
+
+  /* it will be at least one panel even empty */
+  n_pn++; 
+
+  /*for(i=0;i<L->n_sn;i++){
+    taucs_printf("sn_in_core[%d] = %d\n",i,sn_in_core[i]);
+    taucs_printf("sn_to_panel_map[%d] = %d\n",i,sn_to_panel_map[i]);
+    }*/
+  taucs_printf("\t\tOOC Supernodal Left-Looking: %d panels\n",n_pn);
+  /* compute max dense matrix size for every panel */
+  panel_max_size = (int*)taucs_calloc(n_pn,sizeof(int));
+  for(i=0;i<L->n_sn;i++){
+    if((double)L->sn_up_size[i]*(double)L->sn_size[i]>max_multiple){
+      max_multiple = (double)L->sn_up_size[i]*(double)L->sn_size[i]; 
+      ind_max_mult = i;
+      }
+    if(sn_to_panel_map[i]!=-1){
+      if(L->sn_up_size[i]*L->sn_size[i]>panel_max_size[sn_to_panel_map[i]]) 
+	panel_max_size[sn_to_panel_map[i]] = L->sn_up_size[i]*L->sn_size[i];
+    }
+  }
+  /*
+  taucs_printf("debug***: L->n_sn = %d max(sn_size*sn_up_size) = %lf sn_size[%d] = %d sn_up_size[%d] = %d\n ",L->n_sn,max_multiple,ind_max_mult,L->sn_size[ind_max_mult],ind_max_mult,L->sn_up_size[ind_max_mult]);
+  */
+
+  /*  for(i=0;i<n_pn;i++)
+      taucs_printf(" panel_max_size[%d] = %d\n",i, panel_max_size[i]);*/
+  
+  wtime = taucs_wtime()-wtime;
+  ctime = taucs_ctime()-ctime;
+  taucs_printf("\t\tOOC Supernodal Left-Looking Scheduling = % 10.3f seconds (%.3f cpu)\n",
+	       wtime,ctime);
+  
+  wtime = taucs_wtime();
+  ctime = taucs_ctime();
+
+
+  if (recursive_leftlooking_supernodal_factor_panel_llt_ooc(L->n_sn,
+							    L->n_sn,  
+							    TRUE, 
+							    map,
+							    sn_in_core,
+							    sn_to_panel_map,
+							    panel_max_size,
+							    handle,
+							    A,L)) {
+    ooc_supernodal_factor_free(L);
+    taucs_free(map);
+    return -1;
+  }
+ 
+ taucs_printf("\t\tOOC Supernodal Left-Looking:\n");
+ taucs_printf("\t\t\tread count           = %.0f \n",handle->nreads);
+ taucs_printf("\t\t\tread volume (bytes)  = %.2e \n",handle->bytes_read);
+ taucs_printf("\t\t\tread time (seconds)  = %.0f \n",handle->read_time);
+ taucs_printf("\t\t\twrite count          = %.0f \n",handle->nwrites);
+ taucs_printf("\t\t\twrite volume (bytes) = %.2e \n",handle->bytes_written);
+ taucs_printf("\t\t\twrite time (seconds) = %.0f \n",handle->write_time);
+
+  wtime = taucs_wtime()-wtime;
+  ctime = taucs_ctime()-ctime;
+  taucs_printf("\t\tOOC Supernodal Left-Looking LL^T = % 10.3f seconds (%.3f cpu)\n",
+	       wtime,ctime);
+
+  wtime = taucs_wtime();
+  ctime = taucs_ctime();
+
+  taucs_free(map);
+  taucs_free(sn_in_core);
+  taucs_free(sn_to_panel_map);  
+  /*taucs_io_close(handle);*/
+  ooc_supernodal_factor_free(L);
+  
+  wtime = taucs_wtime()-wtime;
+  ctime = taucs_ctime()-ctime;
+  taucs_printf("\t\tOOC Supernodal Left-Looking Cleanup = % 10.3f seconds (%.3f cpu)\n",
+	       wtime,ctime);
+  
+  /*return (void*) "/tmp/taucs.L";*/
+  /*return (void*) "/rabani2/queen/taucs.L";*/
+
+  return 0;
+}
+
+#else /* TAUCS_CORE_GENRAL */
+
+/*************************************************************/
+/* generic interfaces to user-callable routines              */
+/*************************************************************/
+
+int taucs_ooc_factor_llt(taucs_ccs_matrix* A,
+			 taucs_io_handle*  L,
+			 double memory)
+{
+#ifdef TAUCS_CONFIG_DREAL
+  if (A->flags & TAUCS_DOUBLE)
+    return taucs_dooc_factor_llt(A,L,memory);
+#endif
+
+#ifdef TAUCS_CONFIG_SREAL
+  if (A->flags & TAUCS_SINGLE)
+    return taucs_sooc_factor_llt(A,L,memory);
+#endif
+
+#ifdef TAUCS_CONFIG_DCOMPLEX
+  if (A->flags & TAUCS_DCOMPLEX)
+    return taucs_zooc_factor_llt(A,L,memory);
+#endif
+
+#ifdef TAUCS_CONFIG_SCOMPLEX
+  if (A->flags & TAUCS_SCOMPLEX)
+    return taucs_cooc_factor_llt(A,L,memory);
+#endif
+
+  assert(0);
+  return -1;
+}
+
+int taucs_ooc_factor_llt_panelchoice(taucs_ccs_matrix* A,
+				     taucs_io_handle*  L,
+				     double memory,
+				     int panelchoice)
+{
+#ifdef TAUCS_CONFIG_DREAL
+  if (A->flags & TAUCS_DOUBLE)
+    return taucs_dooc_factor_llt_panelchoice(A,L,memory,panelchoice);
+#endif
+
+#ifdef TAUCS_CONFIG_SREAL
+  if (A->flags & TAUCS_SINGLE)
+    return taucs_sooc_factor_llt_panelchoice(A,L,memory,panelchoice);
+#endif
+
+#ifdef TAUCS_CONFIG_DCOMPLEX
+  if (A->flags & TAUCS_DCOMPLEX)
+    return taucs_zooc_factor_llt_panelchoice(A,L,memory,panelchoice);
+#endif
+
+#ifdef TAUCS_CONFIG_SCOMPLEX
+  if (A->flags & TAUCS_SCOMPLEX)
+    return taucs_cooc_factor_llt_panelchoice(A,L,memory,panelchoice);
+#endif
+
+  assert(0);
+  return -1;
+}
+
+/* 
+   this generic function retrieves the data type
+   from the file and uses it to call a specialized 
+   function.
+*/
+
+int taucs_ooc_solve_llt (void* L /* actual type: taucs_io_handle* */,
+			 void* x, void* b)
+{
+  int flags;
+
+  taucs_io_read((taucs_io_handle*)L,
+		6,1,1,TAUCS_INT,
+		&flags);
+
+#ifdef TAUCS_CONFIG_DREAL
+  if (flags & TAUCS_DOUBLE)
+    return taucs_dooc_solve_llt(L,x,b);
+#endif
+
+#ifdef TAUCS_CONFIG_SREAL
+  if (flags & TAUCS_SINGLE)
+    return taucs_sooc_solve_llt(L,x,b);
+#endif
+
+#ifdef TAUCS_CONFIG_DCOMPLEX
+  if (flags & TAUCS_DCOMPLEX)
+    return taucs_zooc_solve_llt(L,x,b);
+#endif
+
+#ifdef TAUCS_CONFIG_SCOMPLEX
+  if (flags & TAUCS_SCOMPLEX)
+    return taucs_cooc_solve_llt(L,x,b);
+#endif
+
+  assert(0);
+  return -1;
+}
+
+#endif /* TAUCS_CORE_GENRAL */
+
+/*************************************************************/
+/* end of file                                               */
+/*************************************************************/
+
+
+
+
diff --git a/contrib/taucs/src/taucs_ccs_ooc_lu.c b/contrib/taucs/src/taucs_ccs_ooc_lu.c
new file mode 100644
index 0000000000000000000000000000000000000000..80860d57d448777c72a8bf9d22558f0015ec33bf
--- /dev/null
+++ b/contrib/taucs/src/taucs_ccs_ooc_lu.c
@@ -0,0 +1,3983 @@
+/*********************************************************/
+/* TAUCS                                                 */
+/* Author: Sivan Toledo                                  */
+/*                                                       */
+/*********************************************************/
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <math.h>
+#include <assert.h>
+
+#include "taucs.h"
+
+#define HEADER_NROWS   0
+#define HEADER_NCOLS   1
+#define HEADER_FLAGS   2
+#define HEADER_COLPERM 3
+#define HEADER_IPIVOTS 4
+#define HEADER_LCLEN   5
+#define HEADER_UCLEN   6
+#define HEADER 7
+
+#ifndef TAUCS_CORE_GENERAL
+
+#ifdef OSTYPE_win32
+#include <io.h>
+#else
+#include <unistd.h>
+#include <sys/uio.h>
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+
+#define iabs(x) ((x) > 0 ? (x) : (-(x)))
+
+static double remaining_memory;
+
+/*********************************************************/
+/* NEW IO ROUTINES                                       */
+/*********************************************************/
+
+/*
+  Layout of data structures in the file:
+  m       (number of rows)
+  n       (number of cols)
+  flags
+  colperm
+  ipivots
+  Lclen      (nonzeros per column in L)
+  Lclen      (nonzeros per column in U)
+  Lrowind[0]
+  Lvalues[0]
+  Urowind[0]
+  Uvalues[0]
+  Lrowind[1]
+  Lvalues[1]
+  Urowind[1]
+  Uvalues[1]
+  ...
+*/
+  
+static
+int Lappendcol(taucs_io_handle* LU, int j, int nnz, int* ind, taucs_datatype* re)
+{
+  taucs_io_append(LU,
+		  0 + HEADER+j*4,
+		  nnz,1,
+		  TAUCS_INT,
+		  ind);
+  taucs_io_append(LU,
+		  1 + HEADER+j*4,
+		  nnz,1,
+		  TAUCS_CORE_DATATYPE,
+		  re);
+  return 0;
+}
+
+static
+int Uappendcol(taucs_io_handle* LU, int j, int nnz, int* ind, taucs_datatype* re)
+{
+  taucs_io_append(LU,
+		  2 + HEADER+j*4,
+		  nnz,1,
+		  TAUCS_INT,
+		  ind);
+  taucs_io_append(LU,
+		  3 + HEADER+j*4,
+		  nnz,1,
+		  TAUCS_CORE_DATATYPE,
+		  re);
+  return 0;
+}
+
+static
+int Lreadcol(taucs_io_handle* LU, int j, int nnz, int* ind, taucs_datatype* re)
+{
+  taucs_io_read(LU,
+		0 + HEADER+j*4,
+		nnz,1,
+		TAUCS_INT,
+		ind);
+  taucs_io_read(LU,
+		1 + HEADER+j*4,
+		nnz,1,
+		TAUCS_CORE_DATATYPE,
+		re);
+  return 0;
+}
+
+static
+int Ureadcol(taucs_io_handle* LU, int j, int nnz, int* ind, taucs_datatype* re)
+{
+  taucs_io_read(LU,
+		2 + HEADER+j*4,
+		nnz,1,
+		TAUCS_INT,
+		ind);
+  taucs_io_read(LU,
+		3 + HEADER+j*4,
+		nnz,1,
+		TAUCS_CORE_DATATYPE,
+		re);
+  return 0;
+}
+
+/*********************************************************/
+/* ANALYSIS PHASE                                        */
+/*********************************************************/
+
+/* BUFFER SIZES AND MEMORY MANAGEMENT */
+
+static int get_iobufsize()
+{
+  return 65536; /* minimal size of I/O buffers for good performance */
+}
+
+/* COLUMN-HEAP OPERATIONS */
+
+/* The last entry in each input run must be extracted last among
+   all the entries from that run, otherwise the heap might
+   overflow */
+
+static int heap_compare(int* heap, int i, int j) 
+{
+  /* compare columns */
+
+  if (heap[3*i+1] < heap[3*j+1]) return -1;
+  if (heap[3*i+1] > heap[3*j+1]) return  1;
+
+  /* same column, compare runs (and order within runs) */
+
+  if (heap[3*i+2] < heap[3*j+2]) return -1;
+  if (heap[3*i+2] > heap[3*j+2]) return  1;
+
+  return 0;
+}
+
+static void heap_heapify(int* heap, int* heapsize,int p) 
+{
+  int r,l,smallest;
+  int i,j,k;
+
+  l = p * 2;
+  r = l + 1;
+
+  if (l != 2*p || r != 2*p+1)
+    taucs_printf("heap: left-right computation failed\n");
+
+  if (l < *heapsize && heap_compare(heap,l,p) < 0)
+    smallest = l;
+  else
+    smallest = p;
+
+  if (r < *heapsize && heap_compare(heap,r,smallest) < 0)
+    smallest = r;
+  
+  if (smallest != p) {
+    i = heap[3*p]; 
+    j = heap[3*p+1]; 
+    k = heap[3*p+2]; 
+
+    heap[3*p]   = heap[3*smallest];
+    heap[3*p+1] = heap[3*smallest+1];
+    heap[3*p+2] = heap[3*smallest+2];
+
+    heap[3*smallest]   = i;
+    heap[3*smallest+1] = j;
+    heap[3*smallest+2] = k;
+    
+    heap_heapify(heap,heapsize,smallest);
+  }
+}
+
+#if 0
+static void x_heap_insert(int* heap, int* heapsize, int i, int j, int k)
+{
+  int l = *heapsize;
+
+  heap[3*l]   = i;
+  heap[3*l+1] = j;
+  heap[3*l+2] = k;
+
+  (*heapsize)++;
+}  
+
+static void x_heap_extract_min(int* heap, int* heapsize, int* i, int* j, int* k) 
+{
+  int l,m,mv;
+
+  if (*heapsize <= 0)
+    taucs_printf("heap: Trying to extract from an expty heap\n");
+
+  mv = INT_MAX;
+  for (l=0; l<(*heapsize); l++) {
+    if (heap[3*l+1] < mv) {
+      m = l;
+      mv = heap[3*l+1];
+    }
+  }
+  
+  *i = heap[3*m];
+  *j = heap[3*m+1];
+  *k = heap[3*m+2];
+
+  (*heapsize)--;
+  for (l=m; l<(*heapsize); l++) {
+    heap[3*l]   = heap[3*(l+1)];
+    heap[3*l+1] = heap[3*(l+1) + 1];
+    heap[3*l+2] = heap[3*(l+1) + 2];
+  }
+}
+#endif
+
+static void heap_insert(int* heap, int* heapsize, int i, int j, int k)
+{
+  int child, parent;
+
+  (*heapsize)++;
+
+  child = (*heapsize-1);
+  parent = child / 2;
+  while (child > 0 && 
+	 ((heap[3*parent+1] > j) || 
+	  ((heap[3*parent+1] == j) && (heap[3*parent+2] > k)))) {
+    heap[3*child]   = heap[3*parent];
+    heap[3*child+1] = heap[3*parent+1];
+    heap[3*child+2] = heap[3*parent+2];
+    child = parent;
+    parent = child / 2;
+  }
+
+  heap[3*child]   = i;
+  heap[3*child+1] = j;
+  heap[3*child+2] = k;
+}
+
+static void heap_extract_min(int* heap, int* heapsize, int* i, int* j, int* k) 
+{
+  if (*heapsize <= 0)
+    taucs_printf("heap: Trying to extract from an expty heap\n");
+  
+  *i = heap[0];
+  *j = heap[1];
+  *k = heap[2];
+
+  heap[0] = heap[3 * ((*heapsize)-1)];
+  heap[1] = heap[3 * ((*heapsize)-1) + 1];
+  heap[2] = heap[3 * ((*heapsize)-1) + 2];
+
+  (*heapsize) --;
+
+  heap_heapify(heap,heapsize,0);
+}
+
+/* SYMMETRIC SKELETON GRAPH OPERATIONS */
+
+static char skel_basename[256];
+static int* skel_buffer;
+static int  skel_buffer_size;
+static int  skel_buffer_ptr;
+static int  skel_outfiles;
+static int  skel_infiles;
+static int  skel_outfile;
+
+static char skel_inphase;
+static char skel_outphase;
+
+static int skel_get_lastcol;
+static int skel_next;
+
+static int skel_compare(const void* e1, const void* e2) 
+{
+  /* we sort according to columns */
+  int j1, j2;
+  const int* jp1;
+  const int* jp2;
+
+  jp1 = (const int*) e1;
+  jp2 = (const int*) e2;
+
+  j1 = jp1[1];
+  j2 = jp2[1];
+  if (j1 < j2) return -1;
+  if (j1 > j2) return 1;
+
+  /* same column, compare rows */
+
+  j1 = jp1[0];
+  j2 = jp2[0];
+  if (j1 < j2) return -1;
+  if (j1 > j2) return 1;
+  
+  return 0;
+}
+
+static void skel_init(char* basename)
+{
+  sprintf(skel_basename,"%s.ssort",basename);
+
+  /* adjust remaining memory.
+     We allocate 2 io buffers for the sort phase, plus an array
+     of file pointers that is guarantreed to be smaller.
+     We then free them and allocate another io buffer for the
+     stack phase, but we do not free the skel buffer first,
+     so 3 io buffers is a conservative estimate. */
+
+  remaining_memory -= (double) (3*get_iobufsize());
+
+  skel_buffer_size = (int)(remaining_memory) / (2*sizeof(int));
+
+  skel_buffer      = (int*)taucs_malloc(skel_buffer_size * 2 * sizeof(int));
+  skel_buffer_ptr  = 0;
+  skel_outfiles    = 0;
+
+  skel_outphase = 'e';
+  skel_inphase  = 'o';
+
+  skel_get_lastcol = -1;
+  skel_next = 0;
+  skel_outfile = -1;
+}
+
+static void skel_finalize()
+{
+  taucs_free(skel_buffer);
+
+  remaining_memory += (double) (3*get_iobufsize());
+}
+
+static void skel_add(int i,int j) 
+{
+  if (skel_buffer_ptr < skel_buffer_size) {
+    skel_buffer[2*skel_buffer_ptr]   = i;
+    skel_buffer[2*skel_buffer_ptr+1] = j;
+    skel_buffer_ptr++;
+  } else {
+    int     file;
+    ssize_t io_size;
+    char    fname[256];
+
+    /* SORT THIS BUFFER */
+
+    qsort(skel_buffer, skel_buffer_ptr, 2*sizeof(int), &skel_compare);
+
+
+    /* WRITE OUT */
+    sprintf(fname,"%s.%c.%d",skel_basename,skel_outphase,skel_outfiles);
+    taucs_printf("oocsp_colanalyze: Writing out skel sort buffer <%s> (3)\n",fname);
+    file = open(fname,O_WRONLY | O_CREAT,0644);
+    if (file == -1)
+      taucs_printf("oocsp_colanalyze: could not create skel sort file\n");
+    io_size = write(file,skel_buffer,skel_buffer_ptr * 2 * sizeof(int));
+    if (io_size != skel_buffer_ptr * 2 * sizeof(int))
+      taucs_printf("oocsp_colanalyze: write to skel sort file failed\n");
+    close(file);
+
+    skel_outfiles++;
+    skel_buffer_ptr = 0;
+
+    taucs_printf("oocsp_colanalyze: done (3)\n",fname);
+  }
+}
+
+static void skel_sort_incore(int* postorder, int ncols, int* inv_postorder)
+{
+  int natural,i,col;
+
+  natural = 1;
+  for (i=0; i<ncols; i++) {
+    if (postorder[i] != i) {
+      natural = 0;
+      break;
+    }
+  }
+
+  if (natural == 0) {
+    for (i=0; i<ncols; i++)
+      inv_postorder[postorder[i]] = i;
+
+    for (i=0; i<skel_buffer_ptr; i++) {
+      col = skel_buffer[2*i+1];
+      skel_buffer[ 2*i+1 ] = inv_postorder[col];
+    }
+  }
+
+  qsort(skel_buffer, skel_buffer_ptr, 2*sizeof(int),
+	&skel_compare);
+}
+
+static void skel_sort_outofcore(int* postorder, int ncols, int* inv_postorder)
+{
+  int  natural,i,j,k,e,f;
+  int  heapsize, iobufsize, openruns, maxopenruns, runstart;
+  int* infiles;
+  int  outfile;
+
+  int *inbuf;
+  int *outbuf;
+  int outbuf_ptr;
+
+  int     file;
+  ssize_t io_size;
+  ssize_t io_count;
+  char    fname[256];
+
+  int last_col, last_extracted;
+
+  /* FIRST, WRITE OUT THIS BUFFER */
+  if (skel_buffer_ptr > 0) {
+    /* SORT THIS BUFFER */
+
+    qsort(skel_buffer, skel_buffer_ptr, 2*sizeof(int), &skel_compare);
+
+    /* WRITE OUT */
+    sprintf(fname,"%s.%c.%d",skel_basename,skel_outphase,skel_outfiles);
+    taucs_printf("oocsp_colanalyze: Writing out skel sort buffer <%s> (1)\n",fname);
+    file = open(fname,O_WRONLY | O_CREAT,0644);
+    if (file == -1)
+      taucs_printf("oocsp_colanalyze: could not create skel sort file\n");
+    io_size = write(file,skel_buffer,skel_buffer_ptr * 2 * sizeof(int));
+    if (io_size != skel_buffer_ptr * 2 * sizeof(int))
+      taucs_printf("oocsp_colanalyze: write to skel sort file failed\n");
+    close(file);
+
+    skel_outfiles++;
+    skel_buffer_ptr = 0;
+  }
+
+  /* DO WE NEED TO SORT THE RUNS AGAIN IN POSTORDER? */
+  natural = 1;
+  for (i=0; i<ncols; i++) {
+    if (postorder[i] != i) {
+      natural = 0;
+      break;
+    }
+  }
+
+  if (natural == 0) {
+    for (i=0; i<ncols; i++)
+      inv_postorder[postorder[i]] = i;
+
+    for (f=0; f<skel_outfiles; f++) {
+      sprintf(fname,"%s.%c.%d",skel_basename,skel_outphase,f);
+      taucs_printf("oocsp_colanalyze: Resorting skel sort file <%s>\n",fname);
+      file = open(fname,O_RDWR);
+      if (file == -1)
+	taucs_printf("oocsp_colanalyze: could not open skel sort file\n");
+      /* read the run */
+      io_size = read(file,skel_buffer,skel_buffer_size*2*sizeof(int));
+      if (io_size == -1)
+	taucs_printf("oocsp_colanalyze: read from skel sort file failed\n");
+      io_count = io_size/(2*sizeof(int));
+      close(file);
+
+      /* sort again */
+
+      for (i=0; i<(int)io_count; i++) {
+	j = skel_buffer[2*i+1];
+	skel_buffer[ 2*i+1 ] = inv_postorder[j];
+      }
+
+      qsort(skel_buffer, io_count, 2*sizeof(int),
+	    &skel_compare);
+
+      /* rewind the file and write back */
+      file = open(fname,O_RDWR);
+      if (file == -1)
+	taucs_printf("oocsp_colanalyze: could not open skel sort file\n");
+      /*      lseek(file,0,SEEK_SET);*/
+      io_size = write(file,skel_buffer,io_count * 2 * sizeof(int));
+      if (io_size != io_count * 2 * sizeof(int))
+	taucs_printf("oocsp_colanalyze: write to skel sort file failed\n");
+      close(file);
+    }
+  }
+
+  /* WE NOW USE THE BUFFER AS A HEAP */
+  
+  heapsize = 0;
+
+  /* in blocks of 2*sizeof(int), not bytes! */
+  iobufsize   = get_iobufsize() / (2*sizeof(int)); 
+
+  /* 
+     each element in the skel_buffer is 2 ints, but 
+     each element in the heap is 3 ints 
+  */
+  maxopenruns = (2*skel_buffer_size) / (3*iobufsize);
+  if (maxopenruns < 2) {
+    maxopenruns = 2;
+    iobufsize   = (2*skel_buffer_size) / (3 * 2);
+  }
+  taucs_printf("oocsp_colanalyze: Using io buffers of %d elements (%d bytes), max runs = %d\n",
+	     iobufsize,iobufsize*2,maxopenruns);
+
+  inbuf  = (int*)taucs_malloc(iobufsize*2*sizeof(int));
+  outbuf = (int*)taucs_malloc(iobufsize*2*sizeof(int));
+  infiles  = (int*)taucs_malloc(skel_outfiles*sizeof(int));
+  /*
+  inbuf  = taucs_calloc(iobufsize,2*sizeof(int));
+  outbuf = taucs_calloc(iobufsize,2*sizeof(int));
+  infiles  = taucs_calloc(skel_outfiles,sizeof(int));
+  */
+  while (skel_outfiles > 1) {
+    char phase;
+    /*    int  i,j,k,runstart,openruns;*/
+
+    taucs_printf("oocsp_colanalyze: Starting another merge phase with %d input runs\n",
+	       skel_outfiles);
+
+    skel_infiles  = skel_outfiles;
+    skel_outfiles = 0;
+
+    phase         = skel_inphase;
+    skel_inphase  = skel_outphase;
+    skel_outphase = phase;
+
+    for (runstart=0; runstart<skel_infiles; runstart += maxopenruns) {
+
+      sprintf(fname,"%s.%c.%d",
+	      skel_basename,skel_outphase,runstart/maxopenruns);
+      taucs_printf("oocsp_colanalyze: Opening output run <%s>\n",fname);
+      outfile = open(fname,O_WRONLY | O_CREAT,0644);
+      if (outfile == -1)
+	taucs_printf("oocsp_colanalyze: could not open skel sort output file\n");
+      skel_outfiles++;
+
+      for (openruns=0; 
+	   openruns < maxopenruns && runstart+openruns < skel_infiles;
+	   openruns++) {
+	
+	sprintf(fname,"%s.%c.%d",skel_basename,skel_inphase,runstart+openruns);
+	infiles[openruns] = open(fname,O_RDONLY);
+	taucs_printf("oocsp_colanalyze: Opening input run <%s> (%d)\n",fname,infiles[openruns]);
+	if (infiles[openruns] == -1)
+	  taucs_printf("oocsp_colanalyze: could not open skel sort input file\n");
+	io_size = read(infiles[openruns],inbuf,iobufsize*2*sizeof(int));
+	if (io_size == -1)
+	  taucs_printf("oocsp_colanalyze: read from skel sort file failed\n");
+	io_count = io_size/(2*sizeof(int));
+	if (io_count == 0) {
+	  close(infiles[openruns]);
+	  unlink(fname);
+	}
+
+	taucs_printf("oocsp_colanalyze: files %d %d\n",infiles[0],infiles[1]);
+
+	taucs_printf("oocsp_colanalyze: Inserting %d elements from input run into heap\n",io_count);
+	last_col = -1;
+	for (e=0; e<(int)io_count; e++) {
+	  i = inbuf[2*e];
+	  j = inbuf[2*e+1];
+	  /*printf("ij = %d\t%d\n",i,j);*/
+	  if (last_col > j) {
+	    taucs_printf("oocsp_colanalyze: > last = %d col = %d, (%d %d)\n",last_col,j,
+		      e,io_count);
+	    taucs_printf("oocsp_colanalyze: input run not sorted!\n");
+	  }
+	  last_col = j;
+	  if (e == (int)io_count-1) /* end of inbuf marker */
+	    heap_insert(skel_buffer,&heapsize,i,j,2*openruns+1);
+	  else
+	    heap_insert(skel_buffer,&heapsize,i,j,2*openruns);
+	}
+      } 
+
+      taucs_printf("oocsp_colanalyze: files %d %d\n",infiles[0],infiles[1]);
+
+      taucs_printf("oocsp_colanalyze: heapsize = %d\n",heapsize);
+
+      outbuf_ptr = 0;
+
+      last_extracted = -1;
+      while (heapsize > 0) {
+	int end_of_run,run;
+
+	heap_extract_min(skel_buffer,&heapsize,&i,&j,&k);
+	if (last_extracted > j)
+	  taucs_printf("oocsp_colanalyze: heap order error!\n");
+	last_extracted = j;
+	outbuf[2*outbuf_ptr]   = i;
+	outbuf[2*outbuf_ptr+1] = j;
+	if (k % 2 == 1) end_of_run = 1;
+	else end_of_run = 0;
+	run = k / 2;
+	outbuf_ptr ++;
+	/*
+	printf("extracted (%d,%d,%d) heapsize = %d\n",i,j,k,heapsize);
+	*/
+
+
+	if (end_of_run) {
+	  taucs_printf("oocsp_colanalyze: Reading more from run %d\n",run);
+	  io_size = read(infiles[run],inbuf,iobufsize * 2 * sizeof(int));
+	  if (io_size == -1) {
+	    taucs_printf("oocsp_colanalyze: errno = %d (%d)\n",errno,infiles[run]);
+	    taucs_printf("oocsp_colanalyze: read from skel sort file failed\n");
+	  }
+	  io_count = io_size/(2*sizeof(int));
+	  if (io_count == 0) {
+	    sprintf(fname,"%s.%c.%d",skel_basename,skel_inphase,runstart+run);
+	    taucs_printf("oocsp_colanalyze: Closing input run %d <%s>\n",run,fname);
+	    close(infiles[run]);
+	    unlink(fname);
+	  }
+
+	  taucs_printf("oocsp_colanalyze: Extracted %d,%d,%d\n",i,j,k);
+	  taucs_printf("oocsp_colanalyze: heapsize = %d\n",heapsize);
+	  /*
+	  { 
+	    int ii; 
+	    printf("heap: \n");
+	    for (ii=0; ii<heapsize; ii++) printf("(%d,%d,%d) ",
+						    skel_buffer[3*ii+0],
+						    skel_buffer[3*ii+1],
+						    skel_buffer[3*ii+2]);
+	    printf("\n");
+	  }
+	  */
+	  taucs_printf("oocsp_colanalyze: Inserting %d elements from input run into heap\n",io_count);
+
+	  last_col = -1;
+	  for (e=0; e<(int)io_count; e++) {
+	    i = inbuf[2*e];
+	    j = inbuf[2*e+1];
+
+	    if (last_col > j) 
+	      taucs_printf("oocsp_colanalyze: input run not sorted!\n");
+	    last_col = j;
+
+	    if (e == (int)io_count-1) /* end of inbuf marker */
+	      heap_insert(skel_buffer,&heapsize,i,j,2*run+1);
+	    else
+	      heap_insert(skel_buffer,&heapsize,i,j,2*run);
+
+	    if (3*heapsize >= 2*skel_buffer_size) {
+	      taucs_printf("oocsp_colanalyze: heapsize = %d, buffer_size = %d\n",
+			heapsize,2*skel_buffer_size);
+	      taucs_printf("oocsp_colanalyze: merge-heap overflow\n");
+	    }
+	  }
+
+	  taucs_printf("oocsp_colanalyze: heapsize = %d\n",heapsize);
+	}
+
+	if (outbuf_ptr >= iobufsize) {
+	  taucs_printf("oocsp_colanalyze: Writing to output run\n");
+	  taucs_printf("oocsp_colanalyze: heapsize = %d\n",heapsize);
+	  io_count = iobufsize;
+	  io_size = write(outfile,outbuf,io_count * 2 * sizeof(int));
+	  if (io_size != io_count * 2 * sizeof(int))
+	    taucs_printf("oocsp_colanalyze: write to skel sort file failed\n");
+	  outbuf_ptr = 0;
+	}
+      }
+
+      /* write the rest of the output */
+      if (outbuf_ptr > 0) {
+	taucs_printf("oocsp_colanalyze: Writing to output run and closing\n");
+	io_count = outbuf_ptr;
+	io_size = write(outfile,outbuf,io_count * 2 * sizeof(int));
+	if (io_size != io_count * 2 * sizeof(int))
+	  taucs_printf("oocsp_colanalyze: write to skel sort file failed\n");
+	outbuf_ptr = 0;
+      }
+      close(outfile);
+      skel_buffer_ptr = 0;
+    }
+  }
+
+
+  taucs_free(infiles);
+  taucs_free(inbuf);
+  taucs_free(outbuf);
+
+  /*
+  taucs_free(infiles);
+  taucs_free(inbuf);
+  taucs_free(outbuf);
+  */
+}
+
+static int  stack_allocated;
+static int  stack_buffer_size;
+static int* stack_buffer;
+
+static void skel_sort(int* postorder, int ncols,int* tmp)
+{
+  int  file;
+  char fname[256];
+  ssize_t io_size;
+  int iobufsize;
+
+  iobufsize   = get_iobufsize() / (2*sizeof(int)); 
+
+  if (skel_outfiles == 0) {
+    skel_sort_incore(postorder,ncols,tmp);
+
+    if (skel_buffer_ptr <= skel_buffer_size / 2) {
+      stack_buffer      = skel_buffer + (2*skel_buffer_ptr);
+      stack_buffer_size = 2*(skel_buffer_size - skel_buffer_ptr);
+      stack_allocated   = 0;
+      taucs_printf("oocsp_colanalyze: Using remainder of skeleton buffer for stack,\n");
+      taucs_printf("oocsp_colanalyze: size = %d ints\n",stack_buffer_size);
+    } else {
+      sprintf(fname,"%s.%c.%d",skel_basename,skel_outphase,0);
+      taucs_printf("oocsp_colanalyze: Writing out skel sort buffer <%s> (2)\n",fname);
+      file = open(fname,O_WRONLY | O_CREAT,0644);
+      if (file == -1)
+	taucs_printf("oocsp_colanalyze: could not create skel sort file\n");
+      io_size = write(file,skel_buffer,skel_buffer_ptr * 2 * sizeof(int));
+      if (io_size != skel_buffer_ptr * 2 * sizeof(int))
+	taucs_printf("oocsp_colanalyze: write to skel sort file failed\n");
+      close(file);
+
+      skel_outfiles++;
+      skel_buffer_ptr = 0;
+
+      stack_buffer      = skel_buffer;
+      stack_buffer_size = 2*skel_buffer_size;
+      stack_allocated   = 1; /* we need to free it */
+
+      skel_buffer       = (int*)taucs_malloc(iobufsize*2*sizeof(int));
+      skel_buffer_size  = iobufsize;
+      taucs_printf("oocsp_colanalyze: Using skeleton buffer for stack, allocating \n");
+      taucs_printf("oocsp_colanalyze: new skeleton buffer\n\n");
+    }
+  }
+  else {
+    skel_sort_outofcore(postorder,ncols,tmp);
+
+    stack_buffer      = skel_buffer;
+    stack_buffer_size = 2*skel_buffer_size;
+    skel_buffer       = (int*)taucs_malloc(iobufsize*2*sizeof(int));
+    skel_buffer_size  = iobufsize;
+    taucs_printf("oocsp_colanalyze: Using skeleton buffer for stack, allocating \n");
+    taucs_printf("oocsp_colanalyze: new skeleton buffer\n");
+  }
+}
+
+static int skel_get_next(int j)
+{
+  int row, col;
+  char fname[256];
+  ssize_t io_size;
+
+  if (skel_next >= skel_buffer_ptr) {
+    if (skel_outfiles > 0) {
+      if (skel_outfile == -1) {
+	sprintf(fname,"%s.%c.%d",skel_basename,skel_outphase,0);
+	taucs_printf("oocsp_colanalyze: Opening skel sort buffer <%s> (2)\n",fname);
+	skel_outfile = open(fname,O_RDONLY);
+	if (skel_outfile == -1)
+	  taucs_printf("oocsp_colanalyze: could not open skel sort file\n");
+      }
+      io_size = read(skel_outfile,
+		     skel_buffer,skel_buffer_size * 2 * sizeof(int));
+      if (io_size == -1) 
+	taucs_printf("oocsp_colanalyze: I/O error while trying to read skel sort file\n");
+      if (io_size == 0) { /* end of file */
+	taucs_printf("oocsp_colanalyze: Closing and removing skel file, col=%d\n",j);
+	sprintf(fname,"%s.%c.%d",skel_basename,skel_outphase,0);
+	close(skel_outfile);
+	unlink(fname);
+	skel_outfiles   = 0;
+	skel_next       = 0;
+	skel_buffer_ptr = 0;
+	return -1;
+      } else {
+	skel_next       = 0;
+	skel_buffer_ptr = io_size / (2*sizeof(int));
+	taucs_printf("oocsp_colanalyze: read %d elements from skel sort file\n",skel_buffer_ptr);
+      }
+    } else
+      return -1;
+  }
+
+  if (skel_next >= skel_buffer_ptr)
+    return -1;
+
+  row = skel_buffer[2*skel_next];
+  col = skel_buffer[2*skel_next+1];
+  if (col == j) {
+    skel_next++;
+    return row;
+  }
+  else
+    return -1;
+}
+    
+
+static void skel_get_postordercol(int* found, int flag,
+				  int j,
+				  int* nnz, int* rowind)
+{
+  int row;
+
+  *nnz = 0;
+  while ((row = skel_get_next(j)) != -1) {
+    if (found[row] < flag) {
+      found[row] = flag;
+      rowind[ *nnz ] = row;
+      (*nnz)++;
+    }
+  }
+}
+
+
+/*
+static void skel_get_postordercol(int* found, int flag,
+				  int j,
+				  int* nnz, int* rowind)
+{
+  int row;
+  int i;
+  int next;
+
+  next = skel_get_next;
+  while (skel_buffer[2*next + 1] < j && next < skel_buffer_ptr) {
+    next++;
+  }
+
+  if (skel_buffer[2*next + 1] == j && next < skel_buffer_ptr) {
+    if (next != skel_get_lastcol+1)
+      taucs_printf("oocsp_colanalyze: internal error in get_postordercol\n");
+  }
+
+  *nnz = 0;
+  for (i = next;
+       skel_buffer[2*i + 1] == j && i < skel_buffer_ptr;
+       i++) {
+    row = skel_buffer[2*i];
+    if (found[row] < flag) {
+      found[row] = flag;
+      rowind[ *nnz ] = row;
+      (*nnz)++;
+    }
+  }
+
+  skel_get_next = i;
+  skel_get_lastcol = i - 1;
+}
+*/
+
+/* UNION FIND ROUTINES */
+
+static int uf_makeset(int* uf, int i)        { uf[i] = i; return i; }
+static int uf_union  (int* uf, int s, int t) { uf[s] = t; return t; }
+static int uf_find   (int* uf, int i)        { if (uf[i] != i) 
+                                                 uf[i] = uf_find(uf,uf[i]); 
+                                               return uf[i]; }
+
+/* FILL STACK ROUTINES */
+
+static int  stack_files;
+static char stack_basename[256];
+
+/*
+static int  stack_buffer_size;
+static int* stack_buffer;
+*/
+static int  stack_buffer_ptr;
+
+static int  stack_top;
+
+static double stack_size;
+static double stack_max_size;
+
+static void stack_init(char* basename,
+		       int* colptr, int* colstack, 
+		       int ncols)
+{
+  int j;
+
+  sprintf(stack_basename,"%s.fstack",basename);
+  stack_files      = 0;
+  stack_buffer_ptr = 0;
+  stack_top        = -1;
+
+  for (j=0; j<ncols; j++) colptr[j] = -1;
+
+  /*
+  stack_buffer_size = 1048576;
+  stack_buffer = mxCalloc(stack_buffer_size,sizeof(int));
+  */
+
+  stack_size     = 0.0;
+  stack_max_size = 0.0;
+}
+
+static void stack_finalize()
+{
+  if (stack_files != 0 || stack_buffer_ptr != 0)
+    taucs_printf("oocsp_colanalyze: fill stack did not get empty\n");
+
+  if (stack_allocated)
+    taucs_free(stack_buffer);
+
+  taucs_printf("oocsp_colanalyze: max stack size = %.0lf\n",stack_max_size);
+}
+
+static void stack_push(int* colptr, int* colstack, int i, int j)
+{
+  if (stack_top < 0 || colstack[stack_top] != j) {
+    if (colptr[j] != -1) {
+      taucs_printf("oocsp_colanalyze: fill stack internal error (push)\n");
+    }
+    stack_top++;
+    colstack[stack_top] = j;
+    colptr[j] = (stack_buffer_size*stack_files) + stack_buffer_ptr;
+    if (colptr[j] > (INT_MAX/2))
+      taucs_printf("oocsp_colanalyze: Warning! Pointers to fill stack may overflow\n");
+  }
+   
+  stack_buffer[stack_buffer_ptr] = i;
+  stack_buffer_ptr++;
+
+  if (stack_buffer_ptr >= stack_buffer_size) {
+    int     file;
+    ssize_t io_size;
+    char    fname[256];
+
+    sprintf(fname,"%s.%d",stack_basename,stack_files);
+    taucs_printf("oocsp_colanalyze: Writing out fill stack buffer <%s>\n",fname);
+    file = open(fname,O_WRONLY | O_CREAT,0644);
+    if (file == -1)
+      taucs_printf("oocsp_colanalyze: could not create stack file\n");
+    io_size = write(file,stack_buffer,stack_buffer_size * sizeof(int));
+    if (io_size != stack_buffer_size * sizeof(int))
+      taucs_printf("oocsp_colanalyze: write to stack file failed\n");
+    close(file);
+
+    stack_files++;
+    stack_buffer_ptr = 0;
+  }
+
+  /*
+  stack_size++;
+  if (stack_size > stack_max_size) stack_max_size = stack_size;
+  */
+}
+
+static void stack_pop(int* colptr, int* colstack, 
+		      int* found, int flag,
+		      int j, int* nnz, int* rowind)
+{
+  int row;
+  int   i;
+
+  if (stack_top < 0 || colstack[stack_top] != j) { /* empty fill column */
+    for (i=0; i<=stack_top; i++)
+      if (colstack[i] == j)
+	taucs_printf("oocsp_colanalyze: fill stack internal error (pop)\n");
+    
+    *nnz    = 0;
+  } else {
+    *nnz = 0;
+    while (colptr[j] < (stack_buffer_size*stack_files) +stack_buffer_ptr) {
+
+      if (stack_buffer_ptr == 0) {
+	int     file;
+	ssize_t io_size;
+	char    fname[256];
+	
+	stack_files--;
+	stack_buffer_ptr = stack_buffer_size;
+	sprintf(fname,"%s.%d",stack_basename,stack_files);
+	taucs_printf("oocsp_colanalyze: Reading a fill stack buffer <%s>\n",fname);
+	file = open(fname,O_RDONLY);
+	if (file == -1)
+	  taucs_printf("oocsp_colanalyze: could not open stack file\n");
+	io_size = read(file,stack_buffer,stack_buffer_size * sizeof(int));
+	if (io_size != stack_buffer_size * sizeof(int))
+	  taucs_printf("oocsp_colanalyze: read from stack file failed\n");
+	close(file);
+	unlink(fname);
+      }
+
+      stack_buffer_ptr --;
+      row = stack_buffer[ stack_buffer_ptr ];
+
+      if (found[row] < flag) {
+	found[row] = flag;
+	rowind[ *nnz ] = row;
+	(*nnz)++;
+      }
+    }
+
+    stack_top--;
+    colptr[j] = -1;
+  }
+  /*
+  stack_size -= (double) (*nnz);
+  */
+}
+
+/* MAIN ROUTINE */
+
+static
+void oocsp_colanalyze(taucs_ccs_matrix* matrix,
+		      char* basename,
+		      int*  colperm,
+		      int** ptrparent,
+		      int** ptrpostorder,
+		      int** ptrlcolcount,
+		      int** ptrucolcount)
+{
+  int i,j,ip,p,jp;
+  int nnz,cset,rset,rroot,fcol;
+  
+  int  postnum, depth;
+  int* first_kid;
+  int* next_kid;
+  int* stack_vertex;
+  int* stack_child;
+
+  /*int* colptr;*/
+  int* rowind;
+  int* firstcol;
+  int* uf;
+  int* found;
+  int* root;
+  int* stack_colptr;
+  int* stack_colstk;
+  int* tmp_col;
+  
+  int* parent;
+  int* postorder;
+  int* lcolcount;
+  int* ucolcount;  
+  
+  int*    nrows;
+  int*    ncols;
+
+  taucs_printf("oocsp_colanalyze: In colanalyze\n");
+  taucs_printf("oocsp_colanalyze: using %.0lf MBytes of memory\n",(remaining_memory)/1048576.0);
+  
+  nrows     = &matrix->m;
+  ncols     = &matrix->n;
+
+  /* START THE ANALYSYS */
+
+  skel_init(basename);
+
+  (remaining_memory) -= (double) ( 4 * (*ncols) * 4 /* sizeof(int32) */);
+
+  /* +1 for stack_vertex, stack_child */
+  parent          = (int*)taucs_malloc((*ncols+1)*sizeof(int));
+  *ptrparent = parent;
+  lcolcount       = (int*)taucs_malloc((*ncols+1)*sizeof(int));
+  *ptrlcolcount = lcolcount;
+  ucolcount       = (int*)taucs_malloc((*ncols+1)*sizeof(int));
+  *ptrucolcount = ucolcount;
+  postorder       = (int*)taucs_malloc((*ncols+1)*sizeof(int));
+  *ptrpostorder = postorder;
+
+  (remaining_memory) -= (double) ( 2 * ((*ncols)+1) * sizeof(int));
+  (remaining_memory) -= (double) ( 2 * (*nrows) * sizeof(int));
+
+  uf        = (int*)taucs_malloc((*ncols+1)*sizeof(int));
+  root      = (int*)taucs_malloc((*ncols+1)*sizeof(int));
+  firstcol  = (int*)taucs_malloc((*nrows)*sizeof(int));
+
+  tmp_col   = (int*)taucs_malloc((*nrows)*sizeof(int));
+
+  /* we can reuse the same space */
+  first_kid = uf;
+  next_kid  = root;
+
+  found        = firstcol;
+  stack_colptr = uf;
+  stack_colstk = root;
+
+  /* we use the output arrays before they are used. */
+  stack_vertex = lcolcount; 
+  stack_child  = ucolcount;
+
+  for (i=0; i < (*nrows); i++) {
+    firstcol[i] = (*ncols);
+  }
+
+  for (j=0; j < (*ncols); j++) {
+
+#ifdef OLD
+    {
+      int   file;
+      off_t offset;
+      ssize_t io_size;
+	
+      nnz = clen[j];
+      /* taucs_printf("oocsp_colanalyze: Debug_1: j= %d nnz= %d\n",j,nnz);*/
+      file   = dfile_fid[ ind_fid[j] ];
+      offset = ind_off[j];
+      if (nnz != 0 && file != -1 && offset != -1) {
+	if (lseek(file,offset,SEEK_SET) == -1) taucs_printf("oocsp_colanalyze: lseek failed\n");
+	io_size = read(file, tmp_col, nnz * sizeof(int));
+	if (io_size != nnz * sizeof(int)) taucs_printf("oocsp_colanalyze: Error reading data.\n");
+      }
+      rowind = tmp_col;
+
+    }
+#else
+    /*
+    nnz = clen[j];
+    oocsp_readcol_structure(matrix,j,tmp_col);
+    rowind = tmp_col;
+    */
+
+    /* new code: Sivan 28 Feb 2002 */
+    nnz = (matrix->colptr)[colperm[j]+1] - (matrix->colptr)[colperm[j]];
+    rowind = (matrix->rowind) + (matrix->colptr)[colperm[j]];
+#endif
+
+    cset       = uf_makeset(uf, j);
+    root[cset] = j;
+    parent[j]  = (*ncols);
+    
+    for (ip=0; ip<nnz; ip++) {
+      
+      i = rowind[ip];
+      fcol = firstcol[i];
+      if (fcol >= j) {
+	firstcol[i] = j;
+	fcol        = j;
+      } else {
+	rset = uf_find(uf,fcol);
+	rroot = root[rset]; 
+	if (rroot != j) {
+	  parent[rroot] = j;
+	  cset          = uf_union(uf,cset,rset);
+	  root[cset]    = j;
+	}
+      }
+      /* ADD (j,fcol) TO SKELETON */
+      skel_add(j,fcol);
+    }
+    /*
+    mxDestroyArray(output_args[0]);
+    */
+  }
+
+  /* COMPUTE POSTORDER OF ETREE */
+
+  /* create linked lists of children */
+  for (j=0; j <= *ncols; j++) first_kid[j] = -1;
+  for (j = (*ncols)-1; j >= 0; j--) {
+    p              = parent[j];
+    next_kid[j]    = first_kid[p];
+    first_kid[p] = j;
+  }
+
+  
+  /* do dfs in a loop */
+  postnum = 0;
+  depth = 0;
+  stack_vertex[depth] = *ncols; /* root */
+  stack_child [depth] = first_kid[ stack_vertex[depth] ];
+  while (depth >= 0) {
+    if ( stack_child[depth] != -1 ) {
+      stack_vertex[depth+1] = stack_child[depth];
+      stack_child [depth+1] = first_kid[  stack_vertex[depth+1] ];
+      depth++;
+    } else {
+      if ( stack_vertex[depth] != (*ncols) ) { /* not root */
+	if (stack_vertex[depth] >= *ncols) 
+	  taucs_printf("oocsp_colanalyze: internal error in dfs (0)\n");
+	postorder[ postnum ] = stack_vertex[depth];
+	postnum++;
+      }
+      depth--;
+      if (depth >= 0) /* sivan June 30, to avoid crash, seems to be right */
+	stack_child[depth] = next_kid[  stack_child[depth] ];
+    }
+    if (depth > *ncols) {
+      int i;
+      taucs_printf("oocsp_colanalyze: depth=%d, ncols=%d\n",depth,*ncols);
+      for (i=0; i<(*ncols); i++) {
+	taucs_printf("oocsp_colanalyze: %d: [%d %d]\n",i,first_kid[i],next_kid[i]);
+      }
+      taucs_printf("oocsp_colanalyze: internal error in dfs (1)\n");
+    }
+  }
+
+  
+  if (postnum != *ncols) taucs_printf("oocsp_colanalyze: internal error in dfs (2)\n");
+    
+  /* SORT THE SKELETON MATRIX */
+
+  skel_sort(postorder, *ncols, 
+	    found /* temporary */);
+
+
+  
+  /* SECOND PHASE, COMPUTE COLCOUNTS */
+
+  /* we reuse the space of uf and root */
+  stack_init(basename,stack_colptr,stack_colstk,*ncols); 
+
+  for (i=0; i < (*nrows); i++) {
+    found[i] = -1;
+  }
+
+  for (j=0; j<(*ncols); j++) {
+    lcolcount[j] = 1;
+    ucolcount[j] = 1;
+  }
+
+  for (jp=0; jp<(*ncols); jp++) {
+    j = postorder[jp];
+    found[j] = jp;
+    p = parent[j];
+
+    if (p < (*ncols)) {
+      found[p] = jp;
+      lcolcount[j]++;
+      ucolcount[p]++;
+    }
+
+    stack_pop(stack_colptr,stack_colstk,
+	      found, jp,
+	      j,
+	      &nnz,tmp_col);
+    rowind = tmp_col;
+    for (ip=0; ip<nnz; ip++) {
+      i = rowind[ip];
+
+      lcolcount[j]++;
+      ucolcount[i]++;
+      if (p < *ncols) stack_push(stack_colptr,stack_colstk,i,p);
+    }
+
+    skel_get_postordercol(found,jp,
+			  jp, /* use postorder column index */
+			  &nnz,tmp_col);
+    rowind = tmp_col;
+    for (ip=0; ip<nnz; ip++) {
+      i = rowind[ip];
+
+      lcolcount[j]++;
+      ucolcount[i]++;
+
+      if (lcolcount[j] > *(ncols))
+	taucs_printf("oocsp_colanalyze: Internal error while producing lcolcounts\n");
+      if (ucolcount[i] > *(ncols))
+	taucs_printf("oocsp_colanalyze: Internal error while producing ucolcounts\n");
+
+      if (p < *ncols) stack_push(stack_colptr,stack_colstk,i,p);
+    }
+  }
+
+  stack_finalize();
+
+  /*
+  mxDestroyArray(tmp1_array);
+  mxDestroyArray(tmp2_array);
+  mxDestroyArray(tmp3_array);
+  mxDestroyArray(tmp4_array);
+  */
+
+  taucs_free(uf);
+  taucs_free(firstcol);
+  taucs_free(root);
+  taucs_free(tmp_col);
+
+  (remaining_memory) += (double) ( 2 * ((*ncols)+1) * sizeof(int));
+  (remaining_memory) += (double) ( 2 * (*nrows) * sizeof(int));
+
+  skel_finalize();
+
+  /* MAKE AND POSTORDER PARENT 1-BASED AND MARK ROOTS WITH A ZERO */
+
+  /*for (j=0; j < (*ncols); j++) {
+    postorder[j]++;
+    if (parent[j] == (*ncols))
+      parent[j] = 0;
+    else 
+      parent[j]++;
+      }  */
+  /* fix up by Vladi */
+
+  for (j=0; j < (*ncols); j++) {
+    if (parent[j] == (*ncols))
+      parent[j] = -1;
+    /*lcolcount[j]--;
+      ucolcount[i]--;*/
+  }
+
+  taucs_printf("oocsp_colanalyze: done\n");
+}
+
+/*********************************************************/
+/* PANELIZATION                                          */
+/*********************************************************/
+
+
+/* There seems to be a confusion here between spawidth and remaining memory; sivan */
+
+static
+void oocsp_panelize_simple(
+			   int  nrows,             /* input  */  
+			   int  ncols,             /* input  */
+			   int* postorder,         /* input  */
+			   int* l_colcounts,       /* input  */
+			   int* u_colcounts,       /* input  */
+			   int* parents,           /* input  */
+
+                           int* spawidth,          /* input  */  
+                           int* maxsn,             /* input  */  
+			   int** ptrpanels,        /* output */
+			   int** ptrschedstart,    /* output */
+			   int** ptrschedend,      /* output */
+			   int** ptrfetchnext,     /* output */
+			   int** ptrejectnext      /* output */
+			   )
+
+{
+  int i,j,c;
+  int panelsize, panelcols, panelnumber;
+  int stop,eject,newpanel;
+  int colcount;
+  int* panels;
+  int* schedstart;
+  int* schedend;
+  int* fetchnext;
+  int* ejectnext;  
+  double maxpanelsize;
+  double memuse, width_multiplier;
+  int  maxcolcount;
+
+  maxcolcount = 0;
+  for (j=0; j<ncols; j++)
+    maxcolcount = max( maxcolcount, l_colcounts[j]+u_colcounts[j] );
+
+  taucs_printf("oocsp_panelize: max col count = %d, nrows = %d\n",
+	     maxcolcount, nrows);
+
+  /*
+    memory usage in numfact, width > 0, snodes:
+           8*nrows*I + 2*nrows*D + 2*nrows*C + 3*nrows*P  +
+           2*ncols*I + 
+           1*w*nrows*I + 1*w*nrows*D + 1*w*I +
+           5*w*maxc *I + 2*w*maxc *D +
+           1*s*I +
+           1*s*maxc *I + 2*s*maxc *D 
+
+    we also account for the memory required for the in-core representation of A, L, U,
+    which is 2*ncols*sizeof(char)+3*ncols*sizeof(int)
+  */
+
+  *maxsn = 8 - 4; /* 8 is the minimum value we use here */
+  do {
+    *maxsn += 4;
+    memuse = 
+      8.0*nrows*sizeof(int) + 2.0*nrows*sizeof(taucs_datatype) 
+      + 2.0*nrows*sizeof(char) + 3.0*nrows*sizeof(void*)
+      + 2.0*ncols*sizeof(int)
+      + 1.0*(*maxsn)*sizeof(int)
+      + 1.0*(*maxsn)*maxcolcount*sizeof(int) + 2.0*(*maxsn)*maxcolcount*sizeof(taucs_datatype);
+
+    memuse +=
+      3 * ( 3.0*ncols*sizeof(int) + 2.0*ncols*sizeof(char) );
+
+    width_multiplier = 
+      1.0*nrows*sizeof(int) + 1.0*nrows*sizeof(taucs_datatype) + 1.0*sizeof(int)
+      + 5.0*maxcolcount*sizeof(int) + 2.0*maxcolcount*sizeof(taucs_datatype);
+    *spawidth = (int) floor( (remaining_memory - memuse) / width_multiplier );
+  } while (*spawidth > 4*(*maxsn));
+
+  if (*spawidth < 8) *spawidth = 8; /* it might go over the limit */
+
+  maxpanelsize = nrows * *spawidth; 
+
+  taucs_printf("oocsp_panelize: spawidth = %d, max supernode = %d\n",*spawidth,*maxsn);
+
+  panels         = (int*)taucs_malloc(ncols*sizeof(int)); (*ptrpanels)     = panels;
+  schedstart     = (int*)taucs_malloc(ncols*sizeof(int)); (*ptrschedstart) = schedstart;
+  schedend       = (int*)taucs_malloc(ncols*sizeof(int)); (*ptrschedend)   = schedend;
+  fetchnext      = (int*)taucs_malloc(ncols*sizeof(int)); (*ptrfetchnext)  = fetchnext;
+  ejectnext      = (int*)taucs_malloc(ncols*sizeof(int)); (*ptrejectnext)  = ejectnext;
+
+  panelnumber = 1;
+  j           = 0;
+  eject = 0;
+  
+  while (j<ncols)
+  {
+    panelsize = 0;
+    panelcols = 0;
+    stop = 0;
+
+    while (stop == 0) 
+      {
+	c = postorder[j];
+	colcount = l_colcounts[c] + u_colcounts[c]; /* was only L, don't remember why, sivan */
+	newpanel = 0;
+	if (*spawidth > 0)
+	  {
+	  if (panelcols >= *spawidth) 
+	    newpanel=1; 
+	  else
+	    if (panelsize + colcount > maxpanelsize) newpanel=1; 
+	  }
+
+	if (newpanel == 0)
+	  {
+	    /* add c to panel */
+	    panelsize = panelsize + colcount;
+	    panelcols = panelcols + 1;
+	    panels[c] = panelnumber;
+	    ejectnext[j] = c;
+	    schedstart[c] = panelnumber;
+	    schedend[c]   = panelnumber;
+	    j = j+1;
+	    if (j >= ncols) stop = 1; 
+	  } 
+	else
+	  {
+	    panelnumber = panelnumber + 1;
+	    stop = 1;
+	  }
+      }
+    
+    /* now reverse panel, compute fetchnext */
+#ifdef VLADIMIR
+    if(j<ncols)
+      for (i=0;i<j-eject;i++)
+	fetchnext[eject+i] = ejectnext[j-i];
+    else {
+      printf("j >= ncols???\n");
+      exit(1);
+      for (i=0;i<j-eject;i++)
+	fetchnext[eject+i] = ejectnext[j-i-1];
+    }
+#else
+    for (i=0;i<j-eject;i++)
+      fetchnext[eject+i] = ejectnext[j-i-1];
+#endif
+
+    eject = j;
+     
+  }
+}
+
+
+/*********************************************************/
+/* NUMERICAL PHASE                                       */
+/*********************************************************/
+
+#define SNODES
+#define SNODE_THRESHOLD 4
+#define SNODE_BLOCK 8
+#define SIMPLE_COL_COL_no
+#define SPA_ONEARRAY
+#define USE_BLAS
+#define BLAS_THRESHOLD 10
+#define BLOCK 16
+
+#ifdef DETAILED_TIMING
+static double flops_extra = 0.0;
+static double flops_dense = 0.0;
+#endif
+
+/*
+  Out-of-core sparse LU
+
+  Numerical factorization.
+
+  Memory management
+    ipivots              nrows*I                            ! not freed, returned to caller
+    rowlists             nrows*I + 3*width*maxlucols*I      ! heads,colind,next,prev
+    heap                 nrows*I                            !                        
+
+    spa                  nrows*I + nrows*D                  ! if spawidth < 0
+    spa (width>0)        width*nrows*I + width*nrows*D      !
+
+    lindices             nrows*C                            ! L or U 
+    nnzmap               nrows*C                            ! ?
+
+    panel                2*ncols*I                          ! id, nnz
+                         + 3*ncols*P                        ! ind, inrowlist, re
+
+    panel_compressed     sum_j lucols_j*(D+2*I), j=1:width  ! re, ind, inrowlist
+                         <= width*maxlucols*(D+2*I)
+                         
+    snodes               2*ncols*I                          ! pivots, snode_index
+                         +   MAX_SNODE*I                    ! pivrows
+			 + 2*nrows*I                        ! ind, map
+			 +   nrows*D                        ! re ???
+			 +   MAX_SNODE*maxcolcount*I        ! lu_ind
+			 +   MAX_SNODE*maxcolcount*D        ! lu_re
+
+    snodes_dense         SNODE_MAX*maxcolcont*D             ! S
+                         + width*maxcolcount*D              ! P
+                         + width*I                          ! updcols
+
+    no_snodes            nrows*I + nrows*D                  ! lu_ind, lu_re
+
+    total: width > 0, snodes
+           8*nrows*I + 2*nrows*D + 2*nrows*C + 3*nrows*P  +
+           2*ncols*I + 
+           1*w*nrows*I + 1*w*nrows*D + 1*w*I +
+           5*w*maxc *I + 2*w*maxc *D +
+           1*s*I +
+           1*s*maxc *I + 2*s*maxc *D 
+
+    total: width < 0, snodes
+           8*nrows*I + 2*nrows*D + 2*nrows*C + 3*nrows*P  +
+           2*ncols*I + 
+           1*w*I +
+           5*w*maxc *I + 2*w*maxc *D +
+           1*s*I +
+           1*s*maxc *I + 2*s*maxc *D 
+
+    total: width > 0, no snodes
+           7*nrows*I + 2*nrows*D + 2*nrows*C + 3*nrows*P  +
+           1*w*nrows*I + 1*w*nrows*D +
+           5*w*maxc *I + 1*w*maxc *D +
+
+    total: width < 0, no snodes
+           7*nrows*I + 2*nrows*D + 2*nrows*C + 3*nrows*P  +
+           5*w*maxc *I + 1*w*maxc *D +
+
+    where:
+      I=sizeof(int) D=sizeof(taucs_datatype) C=sizeof(char) P=sizeof(void*)
+      maxc=maxcolcount w=spawidth (=active panelwidth) s=max_snode
+*/
+
+
+
+static double time_total;
+
+#ifdef DETAILED_TIMING
+static double time_colcol;
+static double time_colcol_1;
+static double time_colcol_2;
+static double time_factor;
+static double time_scatter;
+static double time_gather;
+static double time_append;
+static double time_read;
+#ifdef SNODES
+static double time_snode_tmp;
+static double time_snode_1 = 0;
+static double time_snode_2 = 0;
+static double time_snode_21 = 0;
+static double time_snode_3 = 0;
+static double time_snode_4 = 0;
+static double time_snode_detect;
+static double time_snode_prepare;
+static double time_snode_dense;
+#endif
+
+static double bytes_read;
+static double bytes_appended;
+static double col_ooc_updates;
+static double col_read;
+
+static double  flops;
+static double  scatters;
+static double  gathers;
+static double  rowlist_ops;
+static double  num_heap_ops;
+#endif /* DETAILED_TIMING */
+
+/****************************************************/
+/*                                                  */
+/* Heap operations                                  */
+/*                                                  */
+/****************************************************/
+
+/* HEAP OPERATIONS */
+
+
+static void num_heap_heapify(int* heap, int* heapsize, 
+			     int* ipivots, int p) 
+{
+  int r,l,smallest;
+  int temp;
+
+#ifdef DETAILED_TIMING
+  num_heap_ops += 1.0;
+#endif
+
+  r = (p+1) * 2;
+  l = r - 1;
+
+#ifdef INTERNAL_CHECKS
+  if ((l-1)/2 != p || (r-1)/2 != p)
+    taucs_printf("oocsp_numfact: left-right computation in heap failed\n");
+#endif
+
+  if (l < *heapsize && ipivots[heap[l]] < ipivots[heap[p]])
+    smallest = l;
+  else
+    smallest = p;
+
+  if (r < *heapsize && ipivots[heap[r]] < ipivots[heap[smallest]])
+    smallest = r;
+  
+  if (smallest != p) {
+    temp           = heap[p]; 
+    heap[p]        = heap[smallest];
+    heap[smallest] = temp;
+    
+    num_heap_heapify(heap, heapsize, ipivots, smallest);
+  }
+}
+
+static void num_heap_insert(int* heap, int* heapsize, int* ipivots, int i)
+{
+  int child, parent;
+
+  (*heapsize)++;
+
+#ifdef DETAILED_TIMING
+  num_heap_ops += 1.0;
+#endif
+
+  child = (*heapsize-1);
+  parent = (child-1) / 2;
+  while (child > 0 && (ipivots[heap[parent]] > ipivots[i])) {
+    heap[child]   = heap[parent];
+    child = parent;
+    parent = (child-1) / 2;
+
+#ifdef DETAILED_TIMING
+    num_heap_ops += 1.0;
+#endif
+  }
+
+  heap[child]   = i;
+}
+
+static int num_heap_extractmin(int* heap, int* heapsize, int* ipivots) 
+{
+  int m; 
+
+#ifdef DETAILED_TIMING
+  num_heap_ops += 1.0;
+#endif
+
+  if (*heapsize <= 0) return -1;
+  
+  m = heap[0];
+  
+  heap[0] = heap[(*heapsize)-1];
+
+  (*heapsize)--;
+
+  num_heap_heapify(heap,heapsize,ipivots,0);
+
+  return m;
+}
+
+/****************************************************/
+/*                                                  */
+/* Row lists                                        */
+/*                                                  */
+/****************************************************/
+
+static int* rowlists_head;   /* one head per row */
+static int* rowlists_colind; 
+static int* rowlists_next;
+static int* rowlists_prev;
+static int  rowlists_size;
+static int  rowlists_freehead;
+/*static int  rowlists_freenext;*/
+
+static void rowlists_finalize()
+{
+  taucs_free(rowlists_head);
+  taucs_free(rowlists_colind);
+  taucs_free(rowlists_next);
+  taucs_free(rowlists_prev);
+}
+
+static void rowlists_init(int size, int nrows)
+{
+  int i;
+
+  rowlists_size = size;
+
+  rowlists_head   = (int*)taucs_malloc(nrows*sizeof(int));
+  rowlists_colind = (int*)taucs_malloc(rowlists_size*sizeof(int));
+  rowlists_next   = (int*)taucs_malloc(rowlists_size*sizeof(int));
+  rowlists_prev   = (int*)taucs_malloc(rowlists_size*sizeof(int));
+  assert(rowlists_head && rowlists_colind && rowlists_next && rowlists_prev); 
+
+  for (i=0; i<nrows; i++) rowlists_head[i] = -1;
+
+  /* link the entire rowlist as one freelist */
+
+  rowlists_freehead = 0;
+  for (i=0; i<rowlists_size; i++) {
+    rowlists_next[i] = i+1;
+    /* freelist does not need prev pointers */ 
+    /* rowlists_prev[i] = i-1; */ 
+  }
+  rowlists_next[ rowlists_size - 1 ] = -1;
+}
+
+static int rowlists_insert(int row, int panelcol)
+{
+  int new;
+
+#ifdef DETAILED_TIMING
+  rowlist_ops += 1.0;
+#endif /* DETAILED_TIMING */
+
+  /* get memory from the freelist */
+
+  if ((new = rowlists_freehead) == -1) {
+    taucs_printf("oocsp_numfact: Out of rowlist memory\n");
+    exit(1);
+  }
+
+  /* remove this memory from the freelist; freelist does now use prev */
+
+  rowlists_freehead = rowlists_next[ new ];
+
+  /* link to row list */
+
+  rowlists_next[ new ] = rowlists_head[ row ];
+  rowlists_prev[ new ] = -1;
+  rowlists_colind[ new ] = panelcol;
+
+  if (rowlists_next[new] != -1)
+    rowlists_prev[ rowlists_next[new] ] = new;
+
+  rowlists_head[ row ] = new;
+
+  return new;
+}
+
+static void rowlists_delete(int row, int index)
+{
+#ifdef DETAILED_TIMING
+  rowlist_ops += 1.0;
+#endif /* DETAILED_TIMING */
+
+  if (rowlists_head[ row ] == index)
+    rowlists_head[ row ] = rowlists_next[ index ];
+
+  if (rowlists_next[ index ] != -1)
+    rowlists_prev[ rowlists_next[index] ] = rowlists_prev[ index ];
+
+  if (rowlists_prev[ index ] != -1)
+    rowlists_next[ rowlists_prev[index] ] = rowlists_next[ index ];
+
+  rowlists_next[ index ] = rowlists_freehead;
+  rowlists_freehead = index;
+}
+
+static int rowlists_isempty()
+{
+  int i,count;
+
+  i = rowlists_freehead;
+  count = 0;
+  while (i != -1) {
+    count++;
+    i = rowlists_next[i];
+  }
+
+  if (count == rowlists_size) return 1;
+  else return 0;
+}
+  
+/****************************************************/
+/*                                                  */
+/* Spa routines                                     */
+/*                                                  */
+/****************************************************/
+
+#if 0
+static
+int intcmp(const void* v1, const void* v2)
+{
+  const int* i1 = (const int*) v1;
+  const int* i2 = (const int*) v2;
+
+  if      (*i1 < *i2) { return -1; }
+  else if (*i1 > *i2) { return  1; }
+  else                  return  0;
+}
+#endif
+
+static taucs_datatype*  spa;
+static char*            spamap;
+
+static void spa_finalize()
+{
+  taucs_free(spa);
+  taucs_free(spamap);
+}
+
+static void spa_init(int nrows)
+{
+  int i;
+
+  spa    = (taucs_datatype*) taucs_malloc(nrows*sizeof(taucs_datatype));
+  spamap = (char*)  taucs_malloc(nrows*sizeof(char));
+  assert(spa && spamap);
+
+  for (i=0; i<nrows; i++) {spa[i] = taucs_zero; spamap[i] = 0;}
+}
+
+static void
+gather(int             a_nnz,
+       taucs_datatype* a_re,
+       int*            a_ind,
+       taucs_datatype* spa,
+       char*           spamap)
+
+{
+  int i,ip;
+
+#ifdef DETAILED_TIMING
+  double time_tmp;
+
+  gathers += ((double) a_nnz);
+  time_tmp = taucs_wtime();
+#endif
+
+  for (ip=0; ip<a_nnz; ip++) {
+    i = a_ind[ip];
+    a_re[ip] = spa[i];
+    spamap[i] = 0;
+
+    spa[i] = taucs_zero;
+  }
+
+#ifdef DETAILED_TIMING
+  time_gather += (taucs_wtime() - time_tmp);
+#endif
+}
+
+static void 
+scatter(int             a_nnz,
+	taucs_datatype* a_re,
+	int*            a_ind,
+	taucs_datatype* spa,
+	char*           spamap)
+     
+{
+  int i,ip;
+
+#ifdef DETAILED_TIMING
+  double time_tmp;
+
+  scatters += ((double) a_nnz);
+  time_tmp = taucs_wtime();
+#endif
+
+  for (ip=0; ip<a_nnz; ip++) {
+    i = a_ind[ip];
+    spa[i]    = a_re[ip];
+    spamap[i] = 1;
+  }
+
+#ifdef DETAILED_TIMING
+  time_scatter += (taucs_wtime() - time_tmp);
+#endif
+}
+
+/****************************************************/
+/*                                                  */
+/* Column updates                                   */
+/*                                                  */
+/****************************************************/
+
+static
+void spcol_spa_update(int pivotindex,
+		      taucs_datatype* l_re,
+		      int*    l_ind,
+		      int     l_nnz,
+		      int*    panelcols,
+ 		      int     panelcols_n,
+		      int     nrows,
+		      int**   a_inrowlist,
+		      taucs_datatype* spa,
+		      char*   spamap,
+		      int**   a_ind,
+		      int*    a_nnz)
+
+{
+  int i,ip,j,q;
+  taucs_datatype v;
+
+  for(j = 0; j < panelcols_n; j++) {
+    q = panelcols[j];
+    if (   taucs_re(spa[q*nrows + pivotindex]) == 0.0 
+	&& taucs_im(spa[q*nrows + pivotindex]) == 0.0) continue;
+    for (ip=0; ip<l_nnz; ip++) {
+      i = l_ind[ip];
+      v = l_re[ip];
+      if (spamap[q*nrows + i] == 0) {
+	spamap[q*nrows + i] = 1;
+	spa   [q*nrows + i] = taucs_zero;
+	a_ind      [q][ a_nnz[q] ] = i;
+	a_inrowlist[q][ a_nnz[q] ] = rowlists_insert(i,q);
+	(a_nnz[q])++;
+      }
+      /*spa[q*nrows + i] -= (spa[q*nrows + pivotindex] * v);*/
+      spa[q*nrows + i] = taucs_sub(spa[q*nrows + i],
+				   taucs_mul(spa[q*nrows + pivotindex] , v));
+    }
+  }
+  return;
+}
+
+#ifdef SIMPLE_COL_COL
+static void
+spcol_spcol_update(int pivotindex,
+		   taucs_datatype* l_re,
+		   int*    l_ind,
+		   int     l_nnz,
+		   int     panelcol,
+		   int*    a_inrowlist,
+		   taucs_datatype* spa,
+		   char*  spamap,
+		   int*    a_ind,
+		   int*    a_nnz,
+		   int     lu_colcount)
+{
+  int i,ip;
+  taucs_datatype pv;
+
+  pv = spa[pivotindex];
+  
+  if (taucs_iszero(pv)) return;
+
+#ifdef DETAILED_TIMING
+  flops += 2.0 * ((double) l_nnz);
+#endif /* DETAILED_TIMING */
+
+  for (ip=0; ip<l_nnz; ip++) {
+    i = l_ind[ip];
+    if (spamap[i] == 0) {
+      spamap[i] = 1;
+      spa[i] = taucs_zero;
+      a_ind[ *a_nnz ] = i;
+      a_inrowlist[ *a_nnz ] = rowlists_insert(i,panelcol);
+      (*a_nnz)++;
+    }
+    /*spa[i] -= (spa[pivotindex] * l_re[ip]);*/
+    spa[i] = taucs_sub(spa[i],taucs_mul(spa[pivotindex] , l_re[ip]));
+  }
+
+  /*
+  if ((*a_nnz) > lu_colcount) {
+    taucs_printf("oocsp_numfact: prediction=%d, size now=%d\n",lu_colcount,*a_nnz);
+    taucs_printf("oocsp_numfact: Column expands beyond prediction\n");
+  }
+  */
+
+}
+#else /* simple col col */
+
+static int oocsp_spcol_n1 = 0;
+static int oocsp_spcol_n2 = 0;
+
+static void
+spcol_spcol_update(int pivotindex,
+			  taucs_datatype* l_re,
+			  int*    l_ind,
+			  int     l_nnz,
+			  int     panelcol,
+			  int*    a_inrowlist,
+			  taucs_datatype* spa,
+			  char*  spamap,
+			  int*    a_ind,
+			  int*    a_nnz,
+			  int     lu_colcount)
+{
+  register int i;
+  register int ip;
+  register int ip_block;
+  register int loop_bound;
+  register char flag;
+  register taucs_datatype pv;
+
+  pv = spa[pivotindex];
+  
+  if (taucs_iszero(pv)) return;
+
+#ifdef DETAILED_TIMING
+  flops += 2.0 * ((double) l_nnz);
+#endif /* DETAILED_TIMING */
+
+  for (ip_block=0; ip_block<l_nnz; ip_block += BLOCK) {
+
+    loop_bound = min(ip_block + BLOCK,l_nnz);
+
+    flag = 1;
+    oocsp_spcol_n1++;
+    for (ip=ip_block; ip<loop_bound; ip++) {
+      i = l_ind[ip];
+      flag &= spamap[i];
+      /* spa[i] -= (pv * l_re[ip]); */
+      spa[i] = taucs_sub(spa[i], taucs_mul(pv , l_re[ip]));
+    }
+
+    if (!flag) {
+      oocsp_spcol_n2++;
+
+      for (ip=ip_block; ip<loop_bound; ip++) {
+	i = l_ind[ip];
+	if (spamap[i] == 0) {
+	  spamap[i] = 1;
+	  a_ind[ *a_nnz ] = i;
+	  a_inrowlist[ *a_nnz ] = rowlists_insert(i,panelcol);
+	  (*a_nnz)++;
+	  /* we essentially zero and update */
+	  /*spa[i] = - (pv * l_re[ip]);*/
+	  spa[i] = taucs_neg(taucs_mul(pv , l_re[ip]));
+	}
+      }
+    }
+  }
+  return;
+}
+#endif
+
+#if 0
+static void spcol_panel_update(int pivotindex,
+			       taucs_datatype* l_re,
+			       int*    l_ind,
+			       int     l_nnz,
+			       int*    subpanel, int subpanel_size, 
+			       taucs_datatype* subpanel_tmp,
+			       int**   a_inrowlist,
+			       taucs_datatype* spa,
+			       char*   spamap,
+			       int**   a_ind,
+			       int*    a_nnz,
+			       int     nrows)
+{
+  int i,ii,ip,q,j;
+  taucs_datatype x;
+
+  assert(0);
+
+#ifdef DETAILED_TIMING
+  flops += ((double) subpanel_size) * 2.0 * ((double) l_nnz);
+#endif /* DETAILED_TIMING */
+
+  for (j=0; j<subpanel_size; j++) {
+    q = subpanel[j];
+    subpanel_tmp[q] = spa[q*nrows+pivotindex];
+  }
+
+  for (ip=0; ip<l_nnz; ip++) {
+    i = l_ind[ip];
+    x = l_re[ip];
+    for (j=0; j<subpanel_size; j++) {
+      q = subpanel[j];
+      ii = (q*nrows) + i;
+      if (spamap[ii] == 0) {
+	spamap[ii] = 1;
+	spa[ii] = taucs_zero;
+	a_ind[q][ a_nnz[q] ] = i;
+	a_inrowlist[q][ a_nnz[q] ] = rowlists_insert(i,q);
+	(a_nnz[q])++;
+      }
+      /*spa[ii] -= (subpanel_tmp[q] * x);*/
+      spa[ii] = taucs_sub(spa[ii], taucs_mul(subpanel_tmp[q] , x));
+    }
+  }
+}
+#endif
+
+/****************************************************/
+/*                                                  */
+/* OLD STUFF                                        */
+/*                                                  */
+/****************************************************/
+
+#if 0
+void x_heap_insert( int* heap, int* heapsize,  int* ipivots, int value) 
+{
+  heap[ *heapsize ] = value;
+  (*heapsize)++;
+
+  taucs_printf("oocsp_numfact: heap insert %d\n",value);
+}
+
+int x_heap_extractmin( int* heap, int* heapsize, int* ipivots) 
+{
+  int i,m,mindex;
+
+  if (*heapsize == 0) return -1;
+
+  m = INT_MAX;
+
+  for (i=0; i<(*heapsize); i++) 
+    if (ipivots[heap[i]] < ipivots[m]) {m = heap[i]; mindex=i;}
+
+  if (m == INT_MAX) return -1;
+
+  for (i=mindex; i<(*heapsize)-1; i++) 
+    heap[ i ] = heap[i+1];
+
+  (*heapsize)--;
+ 
+  taucs_printf("oocsp_numfact: heap extractmin %d (%d)\n",m,ipivots[m]);
+
+  return m;
+}
+#endif
+
+/****************************************************/
+/*                                                  */
+/* NUMERICAL FACTORIZATION MAIN ROUTINE             */
+/*                                                  */
+/****************************************************/
+
+static
+void oocsp_numfact (taucs_ccs_matrix* A, int* colperm,
+		    taucs_io_handle* LU,
+		    int* panels,
+		    int* schedstart,
+		    int* schedend,
+		    int* fetchnext,
+		    int* ejectnext,
+		    int* lcolcount,
+		    int* ucolcount,
+		    int spawidth,
+		    int maxsn
+		    )
+{
+  int i,j,k,ip,p,q,qp,ii,ks;/* ip_next,jp_next,jp omer*/
+
+  int nrows, ncols;
+
+  
+  /*char    fname[256]; 
+  int     file; 
+  int     mode; 
+  mode_t  perm; omer*/
+
+  /*int     len,status; omer*/
+
+  /*
+  double* pr;
+  */
+
+  /* NEW VARS (Sivan, for this function) */
+
+  int fn;
+  int en;
+  int nsteps;
+  int step;
+
+  /*int nnz; omer*/
+  /*off_t offset; omer*/
+  /*ssize_t io_size; omer*/
+
+  int  heapsize;
+  int* heap;
+
+  char* nnzmap;   /* bit vector */
+  char* lindices; /* bit vector */
+  /* char* uindices; */ /* bit vector */
+
+  int* panel_id;
+  int* panel_nnz;
+  int**    panel_ind;
+  int**    panel_inrowlist;
+  taucs_datatype** panel_re;
+
+  int* Lclen;
+  int* Uclen;
+
+  /*
+  taucs_datatype* update_tmp;
+  int* update_vec;
+  int  update_vec_next;
+  */
+
+#ifdef SPA_ONEARRAY
+  taucs_datatype*  panel_spa = NULL; /* warning */
+  char*    panel_spamap = NULL; /* warning */
+#else
+  taucs_datatype**  panel_spa;
+  char**    panel_spamap;
+#endif
+
+  int     unext,lnext;
+  int*    lu_ind;
+  taucs_datatype* lu_re;
+
+  int* ipivots;
+
+  int    maxind;
+  double maxval, absval;
+  int    pivotindex;
+
+#ifdef SNODES
+  int*    snode_ind;
+  taucs_datatype* snode_re;
+  /*char*   snode_map;*/ /* sivan changed to int to support m2 */
+  int*    snode_map;
+  int*    snode_pivrows;
+  int*    snode_index;
+  int*    pivots;
+
+  int     snode_hash=0; /* warning */
+  int     snode_size, snode_flag, snode_nnz, snode_lastcol;
+  int     snode_last;
+  int     hash;
+  int     snode_id;
+
+  taucs_datatype* S;
+  taucs_datatype* P;
+  int*    spa_updcols;
+  int     spa_n;
+  int*    m2;
+
+  int*    srows;       /* indices of rows in the supernode */
+  int     srows_n;     /* number of rows in the supernode  */
+  int     srow_next;
+
+  int     dense_flag;
+  int     tmp;
+#endif
+
+  int maxcolcount;
+  
+  /*double time_tmp; omer*/
+
+  /* READ GLOBALS */
+
+  taucs_printf("oocsp_numfact: Using %.0lf MBytes of memory\n",
+	     remaining_memory/1048576.0);
+
+    
+  /* START THE FACTORIZATION */
+  /*
+  nrows = A->nrows;
+  ncols = A->ncols;
+  */
+  nrows = A->m;
+  ncols = A->n;
+
+#ifdef DETAILED_TIMING
+  flops       = 0.0;
+  scatters    = 0.0;
+  gathers     = 0.0;
+  num_heap_ops    = 0.0;
+  rowlist_ops = 0.0;
+  time_append = 0.0;
+  time_read   = 0.0;
+  time_colcol = 0.0;
+  time_colcol_1 = 0.0;
+  time_colcol_2 = 0.0;
+  time_factor = 0.0;
+  time_scatter= 0.0;
+  time_gather = 0.0;
+#ifdef SNODES
+  time_snode_detect  = 0.0;
+  time_snode_prepare = 0.0;
+  time_snode_dense   = 0.0;
+#endif
+  bytes_read  = 0.0;
+  bytes_appended = 0.0;
+  col_read    = 0.0;
+  col_ooc_updates = 0.0;
+#endif /* DETAILED_TIMING */
+
+  time_total  = taucs_wtime();
+  
+  maxcolcount = 0;
+  for (j=0; j<ncols; j++)
+    maxcolcount = max( maxcolcount, lcolcount[j]+ucolcount[j] );
+  taucs_printf("oocsp_numfact: maxcolcount = %d, nrows = %d, spawidth = %d, maxsn = %d\n",
+	     maxcolcount, nrows, spawidth, maxsn);
+
+  Lclen            = (int*) taucs_calloc(ncols,sizeof(int));
+  Uclen            = (int*) taucs_calloc(ncols,sizeof(int));
+  assert(Uclen && Lclen);
+
+  ipivots          = (int*)taucs_malloc(ncols*sizeof(int));
+  assert(ipivots);
+  for (i=0; i<nrows; i++) ipivots[i] = INT_MAX;
+
+  /* create row lists */
+
+  rowlists_init(maxcolcount * iabs(spawidth),nrows);
+  
+  fn = 0;
+  en = 0;
+  
+  lindices = (char*)taucs_malloc(nrows*sizeof(char));
+  /* uindices = (char*)taucs_malloc(nrows*sizeof(char)); */
+
+  for (i=0; i<nrows; i++) {
+    lindices[i] = 1;
+    /*uindices[i] = 0;*/
+  }
+
+  nnzmap   = (char*)taucs_malloc(nrows*sizeof(char));
+  heap     = (int*) taucs_malloc(nrows*sizeof(int));
+
+#ifdef SNODES
+  /* the supernodes consists of contiguous columns and we need to know */
+  /* the corresponding pivot rows */
+  pivots           = (int*)taucs_malloc(ncols*sizeof(int));
+  assert(pivots);
+  for (i=0; i<ncols; i++) pivots[i] = INT_MAX;
+
+  snode_lastcol = snode_nnz = snode_size = 0;
+  snode_pivrows = (int*)   taucs_malloc(maxsn*sizeof(int)); /* size was ncols */
+  snode_ind     = (int*)   taucs_malloc(nrows*sizeof(int));
+  snode_re      = (taucs_datatype*)taucs_malloc(nrows*sizeof(taucs_datatype));
+  snode_map     = (int*)   taucs_malloc(nrows*sizeof(int));
+
+  snode_id      = 0;
+  snode_last    = 0;
+  snode_index   = (int*)taucs_malloc(ncols*sizeof(int));
+  for (i=0; i<ncols; i++) snode_index[i] = -1;
+  for (i=0; i<nrows; i++) snode_map[i]   = -1;
+
+  S = (taucs_datatype*) taucs_malloc( maxcolcount * maxsn  * sizeof(taucs_datatype) );
+  P = (taucs_datatype*) taucs_malloc( maxcolcount * spawidth   * sizeof(taucs_datatype) );
+  srows = (int*) taucs_malloc( maxcolcount * sizeof(int) );
+  spa_updcols = (int*) taucs_malloc( spawidth * sizeof(int) );
+  assert(spa_updcols);
+  assert(srows);
+  assert(S);
+  assert(P);
+
+  /*
+  lu_re    = (taucs_datatype*)taucs_malloc(maxsn * nrows * sizeof(taucs_datatype) );
+  lu_ind   = (int*)   taucs_malloc(maxsn * nrows * sizeof(int)    );
+  */
+  lu_re    = (taucs_datatype*)taucs_malloc(maxsn * maxcolcount * sizeof(taucs_datatype) );
+  lu_ind   = (int*)   taucs_malloc(maxsn * maxcolcount * sizeof(int)    );
+  taucs_printf("lu_re  = %08x -> %08x\n",lu_re,lu_re+(maxsn*maxcolcount));
+  taucs_printf("lu_ind = %08x -> %08x\n",lu_ind,lu_ind+(maxsn*maxcolcount));
+#else
+  lu_re    = (taucs_datatype*)taucs_malloc(maxcolcount * sizeof(taucs_datatype));
+  lu_ind   = (int*)   taucs_malloc(maxcolcount * sizeof(int));
+  /*
+  lu_re    = (taucs_datatype*)taucs_malloc(nrows*sizeof(taucs_datatype));
+  lu_ind   = (int*)taucs_malloc(nrows*sizeof(int));
+  */
+#endif
+
+  /* These two can be smaller */
+
+  /*
+  update_vec = (int*)taucs_malloc(ncols*sizeof(int));
+  update_tmp = (taucs_datatype*)taucs_malloc(ncols*sizeof(taucs_datatype));
+  */
+
+  panel_id  = (int*)taucs_malloc(ncols*sizeof(int));
+  panel_nnz = (int*)taucs_malloc(ncols*sizeof(int));
+  panel_ind = (int**)taucs_malloc(ncols*sizeof(int*));
+  panel_inrowlist = (int**)taucs_malloc(ncols*sizeof(int*));
+  panel_re  = (taucs_datatype**)taucs_malloc(ncols*sizeof(taucs_datatype*));
+  for (i=0; i<ncols; i++) panel_id[i] = -1;
+
+  if (spawidth > 0) {
+#ifdef SPA_ONEARRAY
+    panel_spa = (taucs_datatype*)taucs_malloc(spawidth*nrows*sizeof(taucs_datatype));
+    panel_spamap = (char*)taucs_malloc(spawidth*nrows*sizeof(char) );
+    assert(panel_spa && panel_spamap);
+#else
+    panel_spa = (taucs_datatype**) taucs_malloc(spawidth*sizeof(taucs_datatype*));
+    panel_spamap = (char**) taucs_malloc(spawidth*sizeof(char*) );
+    assert(panel_spa && panel_spamap);
+#endif
+  } 
+    
+  spa_init( nrows );
+
+  nsteps=0; 
+  for (i=0; i<ncols; i++) {
+    if ((int) (schedstart[i]) > nsteps) nsteps = (int) schedstart[i];
+  }
+
+
+  /* INITIALIZE HEAP */
+
+  heapsize = 0;
+  for (i=0; i<nrows; i++) nnzmap[i] = 0;
+
+  taucs_printf("oocsp_numfact: Starting numerical factorization (%d steps)\n",nsteps);
+
+  p = 0;
+
+  for (step=1; step<=nsteps; step++) {
+
+    taucs_printf("oocsp_numfact: Starting step %d/%d\r",step,nsteps);
+
+    if (p==0) {
+      /*taucs_printf("oocsp_numfact: (new panel)\n");*/
+#ifdef SNODES
+      snode_last = -1;
+#endif
+
+      if (!rowlists_isempty())
+	taucs_printf("oocsp_numfact: Internal Error (row lists not empty)\n");
+      if (heapsize) 
+	taucs_printf("oocsp_numfact: Internal Error (heap not empty; 1)\n");
+      for (i=0; i<nrows; i++) 
+	if (nnzmap[i]) taucs_printf("oocsp_numfact: Internal Error (heap not empty; 2)\n");
+    } else {
+      taucs_printf("oocsp_numfact: (same panel)\n");
+    }
+    
+    /* LOAD A PARTIAL PANEL */
+    
+    while (fn < ncols && 
+            (schedstart[fetchnext[fn]]) == step) {
+      j = fetchnext[fn];
+
+      /*
+      if (A->clen[j] > lcolcount[j]+ucolcount[j]) {
+	taucs_printf("oocsp_numfact: Column %d in matrix larger than L+U estimate\n",j);
+	taucs_printf("oocsp_numfact: Size %d > %d+%d\n",A->clen[j],lcolcount[j],ucolcount[j]); 
+	taucs_printf("oocsp_numfact: Aborting\n");
+      }
+
+      panel_nnz[p] = A->clen[j];
+      */
+
+      /* New code: Sivan 28 Feb 2002 */
+
+      panel_nnz[p] = (A->colptr)[colperm[j]+1] - (A->colptr)[colperm[j]];
+      panel_id[p]  = j;
+      if (lcolcount[j] < 1 
+	  || lcolcount[j] > nrows
+	  || ucolcount[j] < 1 
+	  || ucolcount[j] > nrows) {
+	taucs_printf("oocsp_numfact: Column %d: l,u colcounts = (%d+%d)\n",
+		  j,lcolcount[j],ucolcount[j]);
+	taucs_printf("oocsp_numfact: Invalid column nonzero count\n\n");
+      }	
+
+      panel_inrowlist[p] = (int*)    taucs_malloc((lcolcount[j] + ucolcount[j]) * sizeof(int));
+      panel_ind[p]       = (int*)    taucs_malloc((lcolcount[j] + ucolcount[j]) * sizeof(int));
+      panel_re[p]        = (taucs_datatype*) taucs_malloc((lcolcount[j] + ucolcount[j]) * sizeof(taucs_datatype));
+      if (!panel_inrowlist[p] || 
+	  !panel_ind[p] || 
+	  !panel_re[p]) {
+	fprintf(stderr,"out of memory for panel compressed vector\n");
+	fprintf(stderr,"j = %d lcolcount = %d ucolcount = %d\n",
+		j,lcolcount[j],ucolcount[j]);
+	exit(1);
+      }
+      assert (panel_inrowlist[p] && panel_ind[p] && panel_re[p]);
+
+      if (spawidth > 0) {
+#ifdef SPA_ONEARRAY
+#else
+	panel_spa[p]     = (taucs_datatype*) taucs_malloc(nrows * sizeof(taucs_datatype));
+	panel_spamap[p]  = (char*)   taucs_malloc(nrows * sizeof(char));
+	assert(panel_spa[p] && panel_spamap[p]);
+#endif
+      }
+
+      /*
+      time_tmp = taucs_wtime();
+      oocsp_readcol(A,j,panel_ind[p],panel_re[p]);
+      time_read += (taucs_wtime() - time_tmp);
+      bytes_read += (double) (panel_nnz[p] * (sizeof(taucs_datatype)+sizeof(int)));
+      */
+
+      /* new code: Sivan 28 Feb 2002 */
+      {
+	int jp,ip,i;
+	jp = colperm[j];
+	for (i=0, ip=(A->colptr)[jp]; ip<(A->colptr)[jp+1]; i++, ip++) {
+	  (panel_ind[p])[i] = (A->rowind)[ip];
+	  (panel_re [p])[i] = (A->taucs_values)[ip];
+	  /*
+	  printf(">>> reading column number %d (index %d), row %d value %.2e\n",
+		 j,jp,(panel_ind[p])[i],(panel_re[p])[i]);
+	  */
+	}
+      }
+
+      if (spawidth > 0) {
+	if (p >= spawidth) {
+	  taucs_printf("oocsp_numfact: p=%d spawidth=%lg\n",p,spawidth);
+	  taucs_printf("oocsp_numfact: Panel wider than spawidth\n");
+	} 
+	scatter(panel_nnz[p],panel_re[p],panel_ind[p],
+#ifdef SPA_ONEARRAY
+		panel_spa+(p * nrows),
+		panel_spamap+(p * nrows)
+#else
+		panel_spa[p],
+		panel_spamap[p]
+#endif
+		);
+      }
+
+      for (i=0; i<panel_nnz[p]; i++) {
+	panel_inrowlist[p][i] = rowlists_insert(panel_ind[p][i],
+						p);
+
+	/*if (nnzmap[ panel_ind[p][i] ] == 0 && uindices[ panel_ind[p][i] ]) {*/
+	if (nnzmap[ panel_ind[p][i] ] == 0 && !lindices[ panel_ind[p][i] ]) {
+	  nnzmap[ panel_ind[p][i] ] = 1;
+	  num_heap_insert( heap, &heapsize, ipivots, panel_ind[p][i] );
+	}
+      }
+
+      p++;
+      fn++;
+    }
+
+    /*taucs_printf("oocsp_numfact: Updating (step %d)\n",step);*/
+
+#ifndef SNODES
+    /* we need to prevent panel cols from being loaded! */
+
+    while ((i = num_heap_extractmin( heap, &heapsize, ipivots)) != -1) {
+      if (i == INT_MAX) continue;
+
+      nnzmap[ i ] = 0;
+      
+      k = ipivots[i];
+      if (k == INT_MAX) continue;
+      
+      /*taucs_printf("oocsp_numfact: extracted row %d (col %d) from heap. %d\n",i,k,uindices[i]);*/
+
+      if (panels[k] == panels[ panel_id[0] ]) {
+	/*taucs_printf("oocsp_numfact: skipping column from this panel (col %d, panel %lg)\n",
+	       k,panels[k]);*/
+	continue;
+      }
+      
+      /*if (lindices[i]) continue;*/   /*if (!uindices[i]) continue;*/
+
+#ifdef DETAILED_TIMING
+      time_tmp = taucs_wtime();
+#endif
+      /* Sivan: replaced 2 March 2002 */
+      /*oocsp_readcol(L,k,lu_ind,lu_re);*/
+      Lreadcol(LU,k,Lclen[k],lu_ind,lu_re);
+
+#ifdef DETAILED_TIMING
+      bytes_read += (double) (Lclen[k] * (sizeof(taucs_datatype)+sizeof(int)));
+      col_read += 1.0;
+#endif /* DETAILED_TIMING */
+
+      /*
+      for (ks = k+1; ks<ncols && snode_index[ks]==snode_index[k]; ks++) {
+	oocsp_readcol(L,ks,lu_ind+((ks-k)*nrows),lu_re+((ks-k)*nrows));
+	bytes_read += (double) ((L.clen[ks]) * (sizeof(taucs_datatype)+sizeof(int)));
+#ifdef DETAILED_TIMING
+	col_read += 1.0;
+#endif
+      }
+      taucs_printf("oocsp_numfact: Read supernode, %d cols\n",ks-k+1);
+      */
+
+#ifdef DETAILED_TIMING
+      time_read += (taucs_wtime() - time_tmp);
+#endif
+
+
+      /*
+      taucs_printf("oocsp_numfact: col %d (panel %lg, now %lg; read from disk) for row %d, length %d\n",
+	     k,panels[k],panels[panel_id[0]],i,(L.clen)[k]);
+	     */
+
+      if (Lclen[k] < 0) {
+	taucs_printf("oocsp_numfact: k=%d len=%d\n",k,Lclen[k]);
+	taucs_printf("oocsp_numfact: Internal Error\n");
+      }
+
+      for (ii=0; ii<Lclen[k]; ii++) {
+	/*if (nnzmap[ lu_ind[ii] ] == 0 && uindices[ lu_ind[ii] ]*/
+	if (nnzmap[ lu_ind[ii] ] == 0 && !lindices[ lu_ind[ii] ]
+	    && !taucs_iszero(lu_re[ii])) {
+	  nnzmap[ lu_ind[ii] ] = 1;
+	  num_heap_insert( heap, &heapsize, ipivots, lu_ind[ii] );
+	  /*taucs_printf("oocsp_numfact: inserting row %d into heap\n",lu_ind[ii]);*/
+	}
+      }
+
+      for(qp = rowlists_head[i]; qp != -1; qp = rowlists_next[qp]) {
+	q = rowlists_colind[qp];
+#ifdef DETAILED_TIMING
+	time_tmp = taucs_wtime();
+#endif
+	if (spawidth <= 0) {
+	  scatter(panel_nnz[q],panel_re[q],panel_ind[q],
+		  spa,spamap);
+	  spcol_spcol_update(i,
+			     lu_re,lu_ind,Lclen[k],
+			     q,panel_inrowlist[q],
+			     spa,
+			     spamap,
+			     panel_ind[q],&(panel_nnz[q]),
+			     lcolcount[panel_id[q]]+ucolcount[panel_id[q]]);
+	  gather(panel_nnz[q],panel_re[q],panel_ind[q],
+		 spa,spamap);
+	} else {
+	  spcol_spcol_update(i,
+			     lu_re,lu_ind,Lclen[k],
+			     q,panel_inrowlist[q],
+#ifdef SPA_ONEARRAY
+			     panel_spa+(q * nrows),
+			     panel_spamap+(q * nrows),
+#else
+			     panel_spa[q],
+			     panel_spamap[q],
+#endif
+			     panel_ind[q],&(panel_nnz[q]),
+			     lcolcount[panel_id[q]]+ucolcount[panel_id[q]]);
+	}
+#ifdef DETAILED_TIMING
+	time_colcol += (taucs_wtime() - time_tmp);
+	col_ooc_updates += 1.0;
+#endif
+      }
+
+      /*
+      update_vec_next = 0;
+      for(qp = rowlists_head[i]; qp != -1; qp = rowlists_next[qp]) {
+	q = rowlists_colind[qp];
+	col_ooc_updates += 1.0;
+	update_vec[update_vec_next] = q;
+	update_vec_next++;
+      }
+
+#ifdef DETAILED_TIMING
+      time_tmp = taucs_wtime();
+#endif
+
+#ifdef SPA_ONEARRAY
+      spcol_panel_update(i,
+			 lu_re,lu_ind,Lclen[k],
+			 update_vec,update_vec_next,update_tmp,
+			 panel_inrowlist,
+			 panel_spa,
+			 panel_spamap,
+			 panel_ind,panel_nnz,
+			 nrows);
+#endif
+
+#ifdef DETAILED_TIMING
+      time_colcol += (taucs_wtime() - time_tmp);
+#endif
+      */
+    }
+#else  /* with SNODES */
+    /* we need to prevent panel cols from being loaded! */
+
+    while ((i = num_heap_extractmin( heap, &heapsize, ipivots)) != -1) {
+      if (i == INT_MAX) continue;
+
+      nnzmap[ i ] = 0;
+      
+      k = ipivots[i];
+      if (k == INT_MAX) continue;
+      
+      /*printf("last supernode %d this column is %d its supernode %d\n",snode_last,k,snode_index[k]);*/
+      if (snode_index[k] == snode_last) continue; /* skip rest of supernode */
+      snode_last = snode_index[k]; /* mark for next time */
+      
+      /*taucs_printf("oocsp_numfact: extracted row %d (col %d) from heap. %d\n",i,k,uindices[i]);*/
+
+      if (panels[k] == panels[ panel_id[0] ]) {
+	/*taucs_printf("oocsp_numfact: skipping column from this panel (col %d, panel %lg)\n",
+	  k,panels[k]);*/
+	continue;
+      }
+      
+      /*if (lindices[i]) continue;*/ /*if (!uindices[i]) continue;*/
+
+#ifdef DETAILED_TIMING
+      time_tmp = taucs_wtime();
+#endif
+
+      for (ks = k; ks<ncols && snode_index[ks]==snode_index[k]; ks++) {
+	/*printf(">>> %d %d\n",ks,snode_index[ks]);*/
+	/*oocsp_readcol(L,ks,lu_ind+((ks-k)*maxcolcount),lu_re+((ks-k)*maxcolcount));*/
+	Lreadcol(LU,ks,Lclen[ks],lu_ind+((ks-k)*maxcolcount),lu_re+((ks-k)*maxcolcount));
+#ifdef DETAILED_TIMING
+	bytes_read += (double) (Lclen[ks] * (sizeof(taucs_datatype)+sizeof(int)));
+	col_read += 1.0;
+#endif /* DETAILED_TIMING */
+      }
+
+#ifdef DETAILED_TIMING
+      time_read += (taucs_wtime() - time_tmp);
+#endif
+      /*taucs_printf("oocsp_numfact: Read supernode, %d cols %d:%d\n",ks-k,k,ks-1);*/
+      
+      if (Lclen[k] < 0) {
+	taucs_printf("oocsp_numfact: k=%d len=%d\n",k,Lclen[k]);
+	taucs_printf("oocsp_numfact: Internal Error\n");
+      }
+
+      for (ii=0; ii<Lclen[k]; ii++) {
+	/*if (nnzmap[ lu_ind[ii] ] == 0 && uindices[ lu_ind[ii] ]*/
+	if (nnzmap[ lu_ind[ii] ] == 0 && !lindices[ lu_ind[ii] ]
+	    && !taucs_iszero(lu_re[ii])) {
+	  nnzmap[ lu_ind[ii] ] = 1;
+	  num_heap_insert( heap, &heapsize, ipivots, lu_ind[ii] );
+	  /*taucs_printf("oocsp_numfact: inserting row %d into heap\n",lu_ind[ii]);*/
+	}
+      }
+
+      /* determine which panel cols need to be updates */
+      dense_flag = 1;
+      for (tmp = 0; tmp<1; tmp++) {
+	int jj,ii,kk,iip,jjp;
+
+	/*	printf("snode %d:%d\n",k,ks-1);*/
+
+	if (ks-k < SNODE_THRESHOLD) {dense_flag=0; break;}
+
+	assert(spawidth > 0); /* the code for the other case is not implemented yet; Sivan */
+
+#ifdef DETAILED_TIMING
+	time_tmp = taucs_wtime();
+#endif
+
+	srows_n = Lclen[k] + 1; /* the diagonal element in column k is not */
+                                /* represented explicitely in L, it's 1    */
+
+	if (srows_n < SNODE_THRESHOLD) {dense_flag=0; break;}
+
+	m2 = snode_map; /* reuse the vector */
+
+	/* create an array of row indices, with pivot rows first, in order */
+	/* so that the supernode array will be a trapezoidal matrix        */
+	/* we should keep this and restore the -1 invariant                */
+
+#ifdef DETAILED_TIMING
+	time_snode_tmp = taucs_wtime();
+#endif
+
+	for (jj=k; jj<ks; jj++) {
+	  ii = pivots[jj];
+	  srows[jj-k] = ii;
+	  m2[ii] = 1;
+	}
+	/* now the rest of the rows */
+	srow_next = ks-k;
+	for (iip=0; iip<Lclen[k]; iip++) {
+	  ii = lu_ind[iip];
+	  if (ii == pivots[k]) srows_n--; /* I have no idea why this row shows up; always with 0.0 */
+	  if (m2[ii] == -1) {
+	    srows[srow_next] = ii;
+	    srow_next++;
+	  } else {
+	    m2[ii] = -1; /* restore the invariant */
+	  }
+	}
+	assert(srow_next == srows_n);
+	
+	/* we begin by figuring out which columns of the panel are updated    */
+	/* by this supernode.                                                 */
+
+#ifdef DETAILED_TIMING
+	time_snode_tmp = taucs_wtime();
+#endif
+
+	spa_n = 0;
+	for (jj=k; jj<ks; jj++) {
+	  ii = pivots[jj];
+	  for(qp = rowlists_head[ii]; qp != -1; qp = rowlists_next[qp]) {
+	    int skip; /* don't add a column twice to spa_updcols */
+	    q = rowlists_colind[qp];
+	    for (skip=0, jjp=0; jjp<spa_n; jjp++) {
+	      if (spa_updcols[jjp] == q) {
+		skip = 1;
+		break;
+	      }
+	    }
+	    if (!skip) {
+	      /*if (jj-k > 4) printf("*** jj-k %d ks-k %d\n",jj-k,ks-k);*/
+
+#ifdef DETAILED_TIMING
+	      flops += 2.0 * ( (ks-jj) * (srows_n - (jj-k)) - 0.5*(ks-jj)*(ks-jj) );
+	      flops_extra += 2.0 * ( (jj-k) * (srows_n) - 0.5*(jj-k)*(jj-k) );
+#endif /* DETAILED_TIMING */
+
+	      spa_updcols[spa_n] = q;
+	      /*spa_updptrs[spa_n] = jj-k;*/
+	      spa_n++;
+	    }
+	  }
+	}
+#ifdef DETAILED_TIMING
+	time_snode_1 += (taucs_wtime()-time_snode_tmp);
+#endif
+
+	if (spa_n < SNODE_THRESHOLD) {
+	  for (jj=k; jj<ks; jj++) {
+	    ii = pivots[jj];
+	    m2[ii] = -1;
+	  }
+	  for (iip=0; iip<Lclen[k]; iip++) {
+	    ii = lu_ind[iip];
+	    m2[ii] = -1; /* restore the invariant */
+	  }
+	  dense_flag=0; 
+	  break;
+	}
+
+	/*printf("snode %d:%d (dense)\n",k,ks-1);*/
+
+	/* copy snode columns into a dense array */
+
+	for (jj=k; jj<ks; jj++) {
+	  for (iip=0; iip<Lclen[jj]; iip++)
+	    /* mark in m2 where each row is */
+	    m2[ lu_ind[(jj-k)*maxcolcount + iip] ] = iip;
+	  
+	  for (iip=0; iip<srows_n; iip++) {
+	    if (m2[ srows[iip] ] != -1) {
+	      S[ (jj-k)*srows_n + iip ] = lu_re[(jj-k)*maxcolcount + m2[ srows[iip] ]];
+	      m2[ srows[iip] ] = -1; /* restore invariant for next column */
+	    } else
+	      S[ (jj-k)*srows_n + iip ] = taucs_zero;
+	  }
+	}
+#ifdef DETAILED_TIMING
+	time_snode_3 += (taucs_wtime()-time_snode_tmp);
+#endif
+
+	/* now the snode is stored in a dense array S, with row indices srows */
+	/* and with the diagonal block of L on top.                           */
+
+	/* next we copy the columns of the panels that need to be updated     */
+	/* into the dense array P.                                            */
+
+	/* we then copy these columns into the dense array P, and if          */
+	/* fill occurs, we update the nonzero bitmap and row lists.           */
+
+#ifdef DETAILED_TIMING
+	time_snode_tmp = taucs_wtime();
+#endif
+#define OLD_1
+#ifdef OLD_1
+	for (jjp=0; jjp<spa_n; jjp++) {
+	  jj = spa_updcols[jjp];
+
+	  for (iip=0; iip<srows_n; iip++) {
+	  /*for (iip=spa_updptrs[jjp]; iip<srows_n; iip++) {*/
+	    if (panel_spamap[jj*nrows + srows[iip]] == 0) {
+	      /* fill will occur here */
+	      panel_spa[jj*nrows + srows[iip]] = taucs_zero;
+	      /*P[jjp*srows_n + iip] = taucs_zero;*/
+	      
+	      panel_spamap   [jj*nrows + srows[iip]] = 1;
+	      panel_ind      [jj][panel_nnz[jj] ] = srows[iip];
+	      panel_inrowlist[jj][panel_nnz[jj] ] = rowlists_insert(srows[iip],jj);
+	      (panel_nnz[jj])++;
+	    }
+#ifdef JUNK
+	    } else { 
+	      /* no fill, just copy the element */
+  	      P[jjp*srows_n + iip] = panel_spa[jj*nrows + srows[iip]];
+	    }
+#endif
+	  }
+	} 
+
+	{ 
+#ifdef DETAILED_TIMING
+	  double x = taucs_wtime();
+#endif
+	  for (jjp=0; jjp<spa_n; jjp++) {
+	    jj = spa_updcols[jjp];
+	    for (iip=0; iip<srows_n; iip++) {
+	      P[jjp*srows_n + iip] = panel_spa[jj*nrows + srows[iip]];
+	    }
+	  }
+#ifdef DETAILED_TIMING
+	  time_snode_21 += (taucs_wtime()-x);
+#endif
+        }
+#else
+	for (jjp=0; jjp<spa_n; jjp++) {
+	  int iip_block, loop_bound, flag;
+	  
+	  jj = spa_updcols[jjp];
+
+	  for (iip_block=0; iip_block<srows_n; iip_block += SNODE_BLOCK) {
+
+	    loop_bound = min(iip_block + SNODE_BLOCK,srows_n);
+	    
+	    flag = 1;
+	    for (iip=iip_block; iip<loop_bound; iip++)
+	      flag &= panel_spamap[jj*nrows + srows[iip]];
+	     
+	    if (!flag) {
+	      for (iip=iip_block; iip<loop_bound; iip++) {
+		if (panel_spamap[jj*nrows + srows[iip]] == 0) {
+		  panel_spamap   [jj*nrows + srows[iip]] = 1;
+		  panel_spa      [jj*nrows + srows[iip]] = taucs_zero;
+		  panel_ind      [jj][panel_nnz[jj] ] = srows[iip];
+		  panel_inrowlist[jj][panel_nnz[jj] ] = rowlists_insert(srows[iip],jj);
+		  (panel_nnz[jj])++;
+		}
+	      }
+	    }
+	  }
+	  
+	  { 
+#ifdef DETAILED_TIMING
+	    double x = taucs_wtime();
+#endif
+	    for (iip=0; iip<srows_n; iip++)
+	      P[jjp*srows_n + iip] = panel_spa[jj*nrows + srows[iip]];
+#ifdef DETAILED_TIMING
+	    time_snode_21 += (taucs_wtime()-x);
+#endif
+	  }
+	}
+#endif
+
+#ifdef DETAILED_TIMING
+	time_snode_2 += (taucs_wtime()-time_snode_tmp);
+#endif
+
+	/*printf("supernode update: col %d pivotrow %d updates col %d\n",jj,ii,panel_id[q]);*/
+
+#ifdef DETAILED_TIMING
+	time_snode_prepare += (taucs_wtime() - time_tmp);
+#endif
+
+#ifdef DETAILED_TIMING
+	time_tmp = taucs_wtime();
+#endif
+
+	/*flops += (2.0 * spa_n * srows_n * (ks-k) - 2.0); */ /* over estimate; sivan. */
+	/* we can subract triangle in estimate, skip zero pivots in flops & code */
+#ifdef DETAILED_TIMING
+	flops_dense += (2.0 * spa_n * srows_n * (ks-k) -
+			1.0 * spa_n * (ks-k) * (ks-k)); 
+#endif /* DETAILED_TIMING */
+
+#ifdef JUNK
+	printf("TRSM's RHS:\n");
+	for (kk=k; kk<ks; kk++) {
+	  for (jj=0; jj<spa_n; jj++) {
+	    printf("%c", P[jj*srows_n + (kk-k)] ? '*' : 'o');
+	  }
+	  printf("\n");
+	}
+#endif
+
+#ifdef USE_BLAS
+	{ 
+	  int m = ks-k;
+	  int n = spa_n;
+	  int M = srows_n - m;
+	  int N = n;
+	  int K = m;
+	  
+	  if (m > BLAS_THRESHOLD && n > BLAS_THRESHOLD) {
+	    taucs_trsm("Left",
+		       "Lower",
+		       "No transpose",
+		       "Unit",
+		       &m,&n,
+		       &taucs_one_const,
+		       S,&srows_n,
+		       P,&srows_n
+		       );
+	  } else {
+	    /* TRSM */
+	    for (jj=0; jj<spa_n; jj++) {
+	      for (kk=k; kk<ks; kk++) {
+		for (ii=kk-k+1; ii<(ks-k); ii++) {
+		  /*P[jj*srows_n + ii] -= (P[jj*srows_n + (kk-k)] * S[(kk-k)*srows_n + ii]);*/
+		  P[jj*srows_n + ii] = 
+		  taucs_sub(P[jj*srows_n + ii],
+			    taucs_mul(P[jj*srows_n + (kk-k)] , S[(kk-k)*srows_n + ii]));
+		}
+	      }
+	    }
+	  }
+
+	  if (M > BLAS_THRESHOLD && N > BLAS_THRESHOLD && K > BLAS_THRESHOLD) {
+	    taucs_gemm("No transpose",
+		       "No transpose",
+		       &M,&N,&K,
+		       &taucs_minusone_const,
+		       S+m, &srows_n,
+		       P  , &srows_n,
+		       &taucs_one_const,
+		       P+m, &srows_n);
+	  } else {
+	    /* GEMM */
+	    for (jj=0; jj<spa_n; jj++) {
+	      for (kk=k; kk<ks; kk++) {
+		for (ii=(ks-k); ii<srows_n; ii++) {
+		  /*P[jj*srows_n + ii] -= (P[jj*srows_n + (kk-k)] * S[(kk-k)*srows_n + ii]);*/
+		  P[jj*srows_n + ii] =
+		    taucs_sub(P[jj*srows_n + ii],
+			      taucs_mul(P[jj*srows_n + (kk-k)] , S[(kk-k)*srows_n + ii]));
+		}
+	      }
+	    }
+	  }
+	}
+#else	   
+	/* TRSM */
+	for (jj=0; jj<spa_n; jj++) {
+	  for (kk=k; kk<ks; kk++) {
+	    for (ii=kk-k+1; ii<(ks-k); ii++) {
+	      P[jj*srows_n + ii] -= (P[jj*srows_n + (kk-k)] * S[(kk-k)*srows_n + ii]);
+	    }
+	  }
+	}
+
+	/* GEMM */
+	for (jj=0; jj<spa_n; jj++) {
+	  for (kk=k; kk<ks; kk++) {
+	    for (ii=(ks-k); ii<srows_n; ii++) {
+	      P[jj*srows_n + ii] -= (P[jj*srows_n + (kk-k)] * S[(kk-k)*srows_n + ii]);
+	    }
+	  }
+	}
+#endif
+#ifdef DETAILED_TIMING
+	time_snode_dense += (taucs_wtime() - time_tmp);
+	col_ooc_updates += 1.0;
+#endif
+
+#ifdef DETAILED_TIMING
+	time_tmp = taucs_wtime();
+#endif
+
+	/* now copy panel columns out of the dense P */
+
+#ifdef DETAILED_TIMING
+	time_snode_tmp = taucs_wtime();
+#endif
+	for (jjp=0; jjp<spa_n; jjp++) {
+	  jj = spa_updcols[jjp];
+	  for (iip=0; iip<srows_n; iip++) {
+	    /*for (iip=spa_updptrs[jjp]; iip<srows_n; iip++) {*/
+	    panel_spa[jj*nrows + srows[iip] ] = P[jjp*srows_n + iip];
+	  }
+	}
+#ifdef DETAILED_TIMING
+	time_snode_4 += (taucs_wtime()-time_snode_tmp);
+        time_snode_prepare += (taucs_wtime() - time_tmp);
+#endif
+      }
+      if (!dense_flag) { /* we didn't do it using the blas since m,n, or k were too small */
+	/* not worth copying into dense arrays etc */
+	/* determine which panel cols need to be updates */
+	int jj,ii;
+	for (jj=k; jj<ks; jj++) {
+	  ii = pivots[jj];
+	  if (ii == INT_MAX) {
+	    taucs_printf("oocsp_numfact: internal error (supernode update)\n");
+	    exit(1);
+	  }
+	  for(qp = rowlists_head[ii]; qp != -1; qp = rowlists_next[qp]) {
+	    q = rowlists_colind[qp];
+	    /*printf("supernode update: col %d pivotrow %d updates col %d\n",jj,ii,panel_id[q]);*/
+
+#ifdef DETAILED_TIMING
+	    time_tmp = taucs_wtime();
+#endif
+	    if (spawidth <= 0) {
+	      taucs_printf("oocsp_numfact: internal error (supernode without a spa)\n");
+	      exit(1);
+	    } else {
+	      spcol_spcol_update(ii,
+				 lu_re+((jj-k)*maxcolcount),lu_ind+((jj-k)*maxcolcount),Lclen[jj],
+				 q,panel_inrowlist[q],
+#ifdef SPA_ONEARRAY
+				 panel_spa+(q * nrows),
+				 panel_spamap+(q * nrows),
+#else
+				 panel_spa[q],
+				 panel_spamap[q],
+#endif
+				 panel_ind[q],&(panel_nnz[q]),
+				 lcolcount[panel_id[q]]+ucolcount[panel_id[q]]);
+	    }
+#ifdef DETAILED_TIMING
+	    time_colcol   += (taucs_wtime() - time_tmp);
+	    time_colcol_1 += (taucs_wtime() - time_tmp);
+	    col_ooc_updates += 1.0;
+#endif
+	  }
+	}
+      }
+    }
+
+#endif /* else SNODES */
+
+    
+    /*taucs_printf("oocsp_numfact: Factoring and Writing (step %d)\n",step);*/
+
+    while (en < ncols && 
+            (schedend[ejectnext[en]]) == step) {
+      p--;
+      j = ejectnext[en];
+
+#ifdef DETAILED_TIMING
+      time_tmp = taucs_wtime();
+#endif
+      
+      if (panel_id[p] < 0)  taucs_printf("oocsp_numfact: internal error (panel stack)\n");
+      if (panel_id[p] != j) {
+	taucs_printf("oocsp_numfact: en=%d p=%d, j=%d, panel_id[p]=%d\n",
+	       en,p,j,panel_id[p]);
+	taucs_printf("oocsp_numfact: internal error (panel id's)\n");
+      }
+
+      /* gather */
+
+      if (spawidth > 0) {
+	gather(panel_nnz[p],panel_re[p],panel_ind[p],
+#ifdef SPA_ONEARRAY
+	       panel_spa + (p * nrows),
+	       panel_spamap + (p * nrows));
+#else
+	       panel_spa[p],
+	       panel_spamap[p]);
+#endif
+      }
+
+      /* Find pivot */
+      
+      maxval = 0;
+      maxind = -1;
+      for (ip = 0; ip<panel_nnz[p]; ip++) {
+	absval = (double) taucs_abs(panel_re[p][ip]);
+	if (!lindices[panel_ind[p][ip]]) continue;
+	if (absval > maxval) {
+	  maxval = absval;
+	  maxind = ip;
+	}
+      }
+      
+      if ( maxind == -1 ) {
+	taucs_printf("oocsp_numfact: Zero Column!\n");
+      }
+      
+      if ( taucs_iszero(panel_re[p][maxind]) ) {
+	taucs_printf("oocsp_numfact: Zero Pivot!\n");
+      }
+      
+      pivotindex = panel_ind[p][maxind];
+
+      /*
+      taucs_printf("oocsp_numfact: pivot for column %d is %d (%lg)\n",j,pivotindex,
+	     panel_re[p][maxind]);
+	     */
+
+      if ( ipivots[ pivotindex ] != INT_MAX ) 
+	taucs_printf("oocsp_numfact: Pivoting twice on the same row\n");
+      
+      ipivots[ pivotindex ] = j;
+#ifdef SNODES
+      pivots[ j ] = pivotindex;
+#endif
+      /*printf("### pivot row for column %d is %d\n",j,pivotindex);*/
+
+      lindices[ pivotindex ] = 0;
+      /*uindices[ pivotindex ] = 1;*/
+      
+      /* copy to L, U */
+      
+      /*taucs_printf("oocsp_numfact: copying to l,u\n");*/
+      
+      lnext = 0;
+      /* unext = nrows-1; */
+      unext = maxcolcount-1;
+      for (ip = 0; ip<panel_nnz[p]; ip++) {
+	i = panel_ind[p][ip];
+	/*if (uindices[i]) {*/
+	if (!lindices[i]) {
+	  lu_re[unext]  = panel_re[p][ip];
+	  lu_ind[unext] = i;
+	  /*assert( unext > 0 && unext < maxsn*maxcolcount);*/
+	  unext--;
+	} else {
+	  /*lu_re[lnext]  = (panel_re[p][ip])/(panel_re[p][maxind]);*/
+	  lu_re[lnext]  = taucs_div(panel_re[p][ip] , panel_re[p][maxind]);
+	  lu_ind[lnext] = i;
+	  lnext++;
+	}	  
+      }
+
+#ifdef DETAILED_TIMING
+      flops += (double) lnext;
+#endif /* DETAILED_TIMING */
+      
+#ifdef DETAILED_TIMING
+      time_factor += (taucs_wtime() - time_tmp);
+#endif
+
+      /* Write out column of L, U */
+      /*
+      taucs_printf("oocsp_numfact: writing column %d of l,u (sizes %d %d)\n",
+	     j,lnext,nrows - 1 - unext);
+	     */
+
+#ifdef DETAILED_TIMING
+      time_tmp = taucs_wtime();
+#endif
+      /*
+      oocsp_appendcol(U,j,nrows - 1 - unext,
+		      lu_ind + (unext+1),
+		      lu_re  + (unext+1));
+      */
+
+      /* Sivan: replaced this code on 2 March 2002 */
+      /*
+      oocsp_appendcol(U,j,maxcolcount - 1 - unext,
+		      lu_ind + (unext+1),
+		      lu_re  + (unext+1));
+      */
+      assert(Uclen[j] == 0);
+      Uclen[j] = maxcolcount - 1 - unext;
+      Uappendcol(LU,j,maxcolcount - 1 - unext,
+		 lu_ind + (unext+1),
+		 lu_re  + (unext+1));
+#ifdef DETAILED_TIMING
+      time_append += (taucs_wtime() - time_tmp);
+#endif
+
+#ifdef DETAILED_TIMING
+      bytes_appended += (double) ((maxcolcount - 1 - unext) 
+				  * (sizeof(taucs_datatype)+sizeof(int)));
+#endif /* DETAILED_TIMING */
+
+#ifdef SNODES
+      /* detect supernodes */
+
+#ifdef DETAILED_TIMING
+      time_tmp = taucs_wtime();
+#endif
+      
+      snode_flag = 1; /* we assume so for now */
+
+      /* a quick check using a hash function to quickly rule out columns */
+
+      if (j>0 && panels[j] != panels[j-1]) snode_flag = 0;
+
+      hash = pivotindex;
+      for (ii=0; ii<snode_size; ii++)
+	hash ^= snode_pivrows[ii];
+      for (ii=0; ii<lnext; ii++)
+	hash ^= lu_ind[ii];
+
+      if (hash == snode_hash) {
+	/* The hash is identical, but is it in the same supernode? */
+
+	/* first, mark this column's structure in the snode_map    */
+	/* then make sure all the supernode's nonzeros are marked  */
+
+	/* for (ii=0; ii<nrows; ii++) assert( snode_map[ii] == -1 ); */
+
+	                                snode_map[pivotindex       ] = 1;
+	for (ii=0; ii<lnext; ii++)      snode_map[lu_ind[ii]       ] = 1;
+	for (ii=0; ii<snode_size; ii++) snode_map[snode_pivrows[ii]] = 1;
+	for (ii=0; ii<snode_nnz; ii++)
+	  if (snode_map[snode_ind[ii]] != 1) snode_flag = 0;
+
+	/* next, zero this column's structure in the snode_map     */
+	/* then make sure all the supernode's nonzeros are marked  */
+
+
+    	                                snode_map[pivotindex]        = -1;
+	for (ii=0; ii<lnext; ii++)	snode_map[lu_ind[ii]       ] = -1;
+	for (ii=0; ii<snode_size; ii++) snode_map[snode_pivrows[ii]] = -1;
+	
+	/* mark this supernodes nonzeros in the map                */
+	
+	for (ii=0; ii<snode_nnz; ii++) snode_map[snode_ind[ii]] = 1;
+
+	/* check that all the column's nonzeros are in the snode   */
+
+	for (ii=0; ii<lnext; ii++) if (snode_map[lu_ind[ii]] != 1) snode_flag = 0;
+
+	/* zero the bitmap for next time                           */
+
+	for (ii=0; ii<snode_nnz; ii++) snode_map[snode_ind[ii]] = -1;
+
+	/* for (ii=0; ii<nrows; ii++) assert( snode_map[ii] == -1 ); */
+
+      } else
+	snode_flag = 0;
+
+      if (snode_size >= maxsn) snode_flag = 0;
+
+      if (j != snode_lastcol+1) snode_flag = 0;
+
+      if (snode_flag) {
+
+	/* THIS COLUMN BELONGS TO AN EXISTING SUPERNODE */
+	
+	snode_pivrows[snode_size] = pivotindex;
+	snode_size++;
+
+	for (ii=0; ii<snode_size; ii++)
+	  spa[snode_pivrows[ii]] = taucs_zero;
+	for (ii=0; ii<lnext; ii++)
+	  spa[lu_ind[ii]] = lu_re[ii];
+
+	for (ii=0; ii<snode_nnz; ii++)
+	  snode_re[ii] = spa[snode_ind[ii]];
+
+#ifdef DETAILED_TIMING
+	time_snode_detect += (taucs_wtime() - time_tmp);
+	time_tmp = taucs_wtime();
+#endif
+
+	/* Sivan: replaced 2 March 2002 */
+	/* oocsp_appendcol(L,j,snode_nnz,snode_ind,snode_re);*/
+	assert(Lclen[j] == 0);
+	Lclen[j] = snode_nnz;
+	Lappendcol(LU,j,snode_nnz,snode_ind,snode_re);
+
+#ifdef DETAILED_TIMING
+	time_append += (taucs_wtime() - time_tmp);
+	bytes_appended += (double) (snode_nnz
+				    * (sizeof(taucs_datatype)+sizeof(int)));
+#endif
+
+	/*taucs_printf("oocsp_numfact: supernode size = %d (column %d)\n",snode_size,j);*/
+      } else {
+	/* THIS COLUMN BELONGS TO A NEW SUPERNODE */
+
+	snode_id ++;
+
+	/*taucs_printf("oocsp_numfact: new supernode, column %d row %d\n",j,pivotindex);*/
+
+#ifdef DETAILED_TIMING
+	time_snode_detect += (taucs_wtime() - time_tmp);
+	time_tmp = taucs_wtime();
+#endif
+      
+	/*oocsp_appendcol(L,j,lnext,lu_ind,lu_re);*/
+	assert(Lclen[j] == 0);
+	Lclen[j] = lnext;
+	Lappendcol(LU,j,lnext,lu_ind,lu_re);
+
+#ifdef DETAILED_TIMING
+	time_append += (taucs_wtime() - time_tmp);
+	bytes_appended += (double) (lnext 
+				    * (sizeof(taucs_datatype)+sizeof(int)));
+	time_tmp = taucs_wtime();
+#endif
+
+	snode_hash = pivotindex;
+	for (ii=0; ii<lnext; ii++) {
+	  snode_hash ^= lu_ind[ii];
+	  snode_ind[ii] = lu_ind[ii];
+	}
+	snode_pivrows[0] = pivotindex;
+	snode_nnz = lnext;
+	snode_size = 1;
+
+#ifdef DETAILED_TIMING
+	time_snode_detect += (taucs_wtime() - time_tmp);
+#endif
+      }
+
+      snode_index[j] = snode_id;
+
+      snode_lastcol = j;
+
+#else /* SNODES */
+
+#ifdef DETAILED_TIMING
+      time_tmp = taucs_wtime();
+#endif
+      /*oocsp_appendcol(L,j,lnext,lu_ind,lu_re);*/
+      assert(Lclen[j] == 0);
+      Lclen[j] = lnext;
+      Lappendcol(LU,j,lnext,lu_ind,lu_re);
+#ifdef DETAILED_TIMING
+      time_append += (taucs_wtime() - time_tmp);
+      bytes_appended += (double) (lnext 
+				  * (sizeof(taucs_datatype)+sizeof(int)));
+#endif /* DETAILED_TIMING */
+
+#endif /* SNODES */
+     
+      /* Update rest of panel */
+
+      /*taucs_printf("oocsp_numfact: updating\n");*/
+
+#ifdef DETAILED_TIMING
+      time_tmp = taucs_wtime();
+#endif
+      if (spawidth <= 0) {
+	for(qp = rowlists_head[pivotindex]; qp != -1; qp = rowlists_next[qp]) {
+	  q = rowlists_colind[qp];
+	  scatter(panel_nnz[q],panel_re[q],panel_ind[q],
+		  spa,spamap);
+	  spcol_spcol_update(pivotindex,
+			     lu_re,lu_ind,lnext,
+			     q,panel_inrowlist[q],
+			     spa,
+			     spamap,
+			     panel_ind[q],&(panel_nnz[q]),
+			     lcolcount[panel_id[q]]+ucolcount[panel_id[q]]);
+	  gather(panel_nnz[q],panel_re[q],panel_ind[q],
+		 spa,spamap);
+	}
+      } else {
+	int spa_n = 0;
+	for(qp = rowlists_head[pivotindex]; qp != -1; qp = rowlists_next[qp]) {
+	  q = rowlists_colind[qp];
+	  spa_updcols[spa_n] = q;
+	  spa_n++;
+	}
+	/*
+	for(qp = 0; qp < spa_n; qp++) {
+	  q = spa_updcols[qp];
+	  spcol_spcol_update(pivotindex,
+			     lu_re,lu_ind,lnext,
+			     q,
+			     panel_inrowlist[q],
+			     panel_spa    + (q*nrows),
+			     panel_spamap + (q*nrows),
+			     panel_ind[q],&(panel_nnz[q]),
+			     0);
+	}
+	*/
+
+	spcol_spa_update(pivotindex,
+			 lu_re,lu_ind,lnext,
+			 spa_updcols,spa_n,nrows,
+			 panel_inrowlist,
+			 panel_spa,
+			 panel_spamap,
+			 panel_ind,panel_nnz);
+      }
+#ifdef DETAILED_TIMING
+      time_colcol   += (taucs_wtime() - time_tmp);
+      time_colcol_2 += (taucs_wtime() - time_tmp);
+#endif
+
+      /*taucs_printf("oocsp_numfact: done updating\n");*/
+
+      /* Eject from panel */
+
+      for (ip = 0; ip<panel_nnz[p]; ip++) {
+	i = panel_ind[p][ip];
+	rowlists_delete(i,panel_inrowlist[p][ip]);
+      }
+
+      taucs_free(panel_inrowlist[p]); panel_inrowlist[p] = NULL;
+      taucs_free(panel_ind[p]); panel_ind[p] = NULL;
+      taucs_free(panel_re[p]);  panel_re[p]  = NULL;
+      if (spawidth > 0) {
+#ifndef SPA_ONEARRAY
+      taucs_free(panel_spa[p]); panel_spa[p] = NULL;
+      taucs_free(panel_spamap[p]);  panel_spamap[p]  = NULL;
+#endif
+      }
+      panel_id[p] = -1;
+
+      /*taucs_printf("oocsp_numfact: done with col\n");*/
+
+      en++;
+    }    
+  }
+
+#ifdef SNODESHIST
+#define HIST_SIZE 32
+#define HIST_INC  4
+  {
+    int i,j,k,s,last;
+    int hist[HIST_SIZE];
+    for (i=0; i<HIST_SIZE; i++) hist[i] = 0;
+
+    last = 0;
+    s = 0;
+    for (j=0; j<ncols; j++) {
+      if (snode_index[j] == last) {
+	s++;
+      } else {
+	i = s / HIST_INC;
+	if (i > HIST_SIZE-1) i=HIST_SIZE-1;
+	(hist[i])++;
+	/*printf("last snode index %d size %d hist %d\n",last,s,i);*/
+	last = snode_index[j];
+	s = 1;
+      }
+    }
+    /*printf("last snode index %d size %d\n",last,s);*/
+    i = s / HIST_INC;
+    if (i > HIST_SIZE-1) i=HIST_SIZE-1;
+    (hist[i])++;
+    taucs_printf("oocsp_numfact: snode histogram:\n");
+    for (i=0; i<HIST_SIZE-1; i++)
+      taucs_printf("oocsp_numfact:   %02d-%02d: %d snodes\n",i*HIST_INC+1,(i+1)*HIST_INC,hist[i]);
+    taucs_printf("oocsp_numfact:   %02d&up: %d snodes:\n",
+	       (HIST_SIZE-1)*HIST_INC+1,hist[HIST_SIZE-1]);
+  }
+#endif
+
+  taucs_io_append(LU, HEADER_NROWS  , 1,      1, TAUCS_INT, &(A->m) );
+  taucs_io_append(LU, HEADER_NCOLS  , 1,      1, TAUCS_INT, &(A->n) );
+  taucs_io_append(LU, HEADER_FLAGS  , 1,      1, TAUCS_INT, &(A->flags));
+  taucs_io_append(LU, HEADER_COLPERM, (A->n), 1, TAUCS_INT, colperm    );
+  taucs_io_append(LU, HEADER_IPIVOTS, (A->n), 1, TAUCS_INT, ipivots    );
+  taucs_io_append(LU, HEADER_LCLEN  , (A->n), 1, TAUCS_INT, Lclen      );
+  taucs_io_append(LU, HEADER_UCLEN  , (A->n), 1, TAUCS_INT, Uclen      );
+
+  (remaining_memory) += (double) ( 2 * (ncols+1) * sizeof(int));
+
+  {
+    double nnzL = 0;
+    double nnzU = 0;
+    for (j=0; j < (A->n); j++) nnzL += (double) (Lclen[j]);
+    for (j=0; j < (A->n); j++) nnzU += (double) (Uclen[j]);
+    taucs_printf("oocsp_numfact: nnz(L) = %.2le nnz(U) = %.2le nnz(L+U) = %.2leM\n",
+	       nnzL,nnzU,(nnzL+nnzU)/1e6);
+  }
+  
+  if (spawidth > 0) {
+    taucs_free(panel_spa);
+    taucs_free(panel_spamap);
+  }
+
+  spa_finalize();
+
+  rowlists_finalize();
+
+  taucs_free(ipivots);
+
+  taucs_free(Uclen);
+  taucs_free(Lclen);
+
+  taucs_free(panel_re);
+  taucs_free(panel_ind);
+  taucs_free(panel_inrowlist);
+  taucs_free(panel_nnz);
+  taucs_free(panel_id);
+
+/*
+  taucs_free(update_vec);
+  taucs_free(update_tmp);
+*/
+
+#ifdef SNODES
+  taucs_free(snode_index);
+  taucs_free(snode_pivrows);
+  taucs_free(snode_ind);
+  taucs_free(snode_re);
+  taucs_free(snode_map);
+
+  taucs_free(pivots);
+
+  taucs_free(spa_updcols);
+  taucs_free(P);
+  taucs_free(S);
+  taucs_free(srows);
+#endif
+
+  taucs_free(heap);
+  taucs_free(nnzmap);
+  taucs_free(lindices);
+  /*taucs_free(uindices);*/
+  taucs_free(lu_re);
+  taucs_free(lu_ind);
+
+  /*taucs_free(pivots);
+  taucs_free(ipivots);
+  */
+
+  /* OLD */
+
+  time_total = taucs_wtime() - time_total;
+  taucs_printf("oocsp_numfact: %lg sec total\n",time_total);
+
+#ifdef DETAILED_TIMING
+  taucs_printf("oocsp_numfact: %lg extra flops, %2.0lf %\n",flops_extra,100.0*flops_extra/flops);
+  taucs_printf("oocsp_numfact: %lg dense flops, %2.0lf %\n",flops_dense,100.0*flops_dense/flops);
+  taucs_printf("oocsp_numfact: %lg Mflop dense/s\n",(flops_dense*1e-6)/(time_snode_dense));
+ 
+  taucs_printf("oocsp_numfact: %lg flops\n",flops);
+  taucs_printf("oocsp_numfact: %lg scatter ops\n",scatters);
+  taucs_printf("oocsp_numfact: %lg gather  ops\n",gathers);
+  taucs_printf("oocsp_numfact: %lg heap    ops\n",num_heap_ops);
+  taucs_printf("oocsp_numfact: %lg rowlist ops\n",rowlist_ops);
+  taucs_printf("oocsp_numfact: %lg sec col/col ops\n",time_colcol);
+  taucs_printf("oocsp_numfact: %lg sec col/col ops (%lg+%lg=%lg)\n",time_colcol,
+	     time_colcol_1,time_colcol_2,(time_colcol_1+time_colcol_2));
+  taucs_printf("oocsp_numfact: %lg sec column factor ops\n",time_factor);
+  taucs_printf("oocsp_numfact: %lg sec scatter ops\n",time_scatter);
+  taucs_printf("oocsp_numfact: %lg sec gather  ops\n",time_gather);
+  taucs_printf("oocsp_numfact: %lg sec io read\n",time_read);
+  taucs_printf("oocsp_numfact: %lg sec io write\n",time_append);
+#ifdef SNODES
+  taucs_printf("oocsp_numfact: %lg sec snode preparations for dense ops\n",time_snode_prepare);
+  taucs_printf("oocsp_numfact: %lg sec snode dense ops\n",time_snode_dense);
+  taucs_printf("oocsp_numfact: %lg sec snode detection\n",time_snode_detect);
+  taucs_printf("oocsp_numfact: %lg sec snode 1\n",time_snode_1);
+  taucs_printf("oocsp_numfact: %lg sec snode 2 (%lg)\n",time_snode_2,
+		                                      time_snode_21);
+  taucs_printf("oocsp_numfact: %lg sec snode 3\n",time_snode_3);
+  taucs_printf("oocsp_numfact: %lg sec snode 4\n",time_snode_4);
+#endif
+  taucs_printf("oocsp_numfact: \n");
+  taucs_printf("oocsp_numfact: %lg Mflop/s\n",(flops*1e-6)/(time_total));
+  taucs_printf("oocsp_numfact: %lg MB/s IO read\n",(bytes_read*1e-6)/(time_read));
+  taucs_printf("oocsp_numfact: %lg MB/s IO write\n",(bytes_appended*1e-6)/(time_append));
+  taucs_printf("oocsp_numfact: %lg col reuse\n",col_ooc_updates/col_read);
+  taucs_printf("oocsp_numfact: %lg percent IO\n",(time_read+time_append)/time_total);
+  taucs_printf("oocsp_numfact: %lg percent col/col\n",(time_colcol)/time_total);
+#endif
+
+
+#ifndef SIMPLE_COL_COL
+  {
+    extern int oocsp_spcol_n1,oocsp_spcol_n2;
+    taucs_printf("oocsp_numfact: spcol counts %d %d\n",oocsp_spcol_n1,oocsp_spcol_n2);
+  }
+#endif
+}
+
+
+/*********************************************************/
+/* FACTOR                                                */
+/*********************************************************/
+
+static 
+int
+oocsp_factor(taucs_ccs_matrix* A_in,
+	     taucs_io_handle* LU,
+             int*  colperm)
+{
+  /*int i,j; omer*/
+  
+  int* etree=NULL;
+  int* postorder=NULL;
+  int* l_colcounts=NULL;
+  int* u_colcounts=NULL;
+  int* panels=NULL;
+  int* schedstart=NULL;
+  int* schedend=NULL;
+  int* fetchnext=NULL;
+  int* ejectnext=NULL;
+  int  spawidth;
+  int  maxsn;
+
+  taucs_printf("taucs_ooc_lu: starting\n");
+
+  taucs_printf("taucs_ooc_lu: calling colanalyze\n");
+  oocsp_colanalyze(A_in,
+		taucs_io_get_basename(LU),
+		colperm,&etree,&postorder,&l_colcounts,&u_colcounts);
+  
+  taucs_printf("taucs_ooc_lu: calling panelize\n");
+  oocsp_panelize_simple(A_in->m,A_in->n,
+		        postorder,
+			l_colcounts,u_colcounts,etree,
+			&spawidth,&maxsn,
+			&panels,&schedstart,&schedend,&fetchnext,&ejectnext);
+
+  taucs_printf("taucs_ooc_lu: calling numfact\n");
+  oocsp_numfact(A_in,colperm,
+		/*L,U,*/
+		LU,
+		panels,
+		schedstart,
+		schedend,
+		fetchnext,
+		ejectnext,
+		l_colcounts,
+		u_colcounts,
+		spawidth,maxsn);
+
+  taucs_printf("taucs_ooc_lu: done, returning\n");
+
+  return 0;
+}
+
+void
+taucs_dtl(ooc_factor_lu)(taucs_ccs_matrix* A_in,
+		         int    colperm[],
+                         taucs_io_handle* LU,
+		         double memory)
+{
+  remaining_memory = memory;
+  taucs_printf("taucs_ooc_factor_lu: using %.0lf MBytes of in-core memory\n",
+	     (remaining_memory)/1048576.0);
+  oocsp_factor(A_in,LU,colperm);
+}
+
+/*********************************************************/
+/* SOLVE                                                 */
+/*********************************************************/
+
+static int
+oocsp_solve(taucs_io_handle* LU,
+	    taucs_datatype* X,
+	    taucs_datatype* B)
+{
+  int i,ip,j,n;
+  taucs_datatype* Y;
+  taucs_datatype  Aij;
+  taucs_datatype* values;
+  int*    indices;
+  int*    irowperm;
+  int*    Lclen;
+  int*    Uclen;
+  int*    colperm;
+  int*    ipivots;
+
+  int     found;
+  
+  double  time_solve = taucs_wtime();
+  double  bytes_read = 0;
+
+  taucs_printf("oocsp_solve: starting\n");
+
+  /* n = L->nrows; */
+  taucs_io_read(LU, HEADER_NROWS, 1, 1, TAUCS_INT, &n);
+
+  Y       = (taucs_datatype*) taucs_malloc(n * sizeof(taucs_datatype));
+  values  = (taucs_datatype*) taucs_malloc(n * sizeof(taucs_datatype));
+  indices = (int*)    taucs_malloc(n * sizeof(int));
+  irowperm= (int*)    taucs_malloc(n * sizeof(int));
+
+  Lclen   = (int*)    taucs_malloc(n * sizeof(int));
+  Uclen   = (int*)    taucs_malloc(n * sizeof(int));
+
+  colperm = (int*)    taucs_malloc(n * sizeof(int));
+  ipivots = (int*)    taucs_malloc(n * sizeof(int));
+  assert(Y && values && indices && irowperm && Lclen && Uclen && colperm && ipivots);
+
+  taucs_io_read(LU, HEADER_LCLEN, n, 1, TAUCS_INT, Lclen);
+  taucs_io_read(LU, HEADER_UCLEN, n, 1, TAUCS_INT, Uclen);
+
+  taucs_io_read(LU, HEADER_COLPERM, n, 1, TAUCS_INT, colperm);
+  taucs_io_read(LU, HEADER_IPIVOTS, n, 1, TAUCS_INT, ipivots);
+
+  for(i=0; i<n; i++)
+     irowperm[ipivots[i]]=i;
+
+  /*
+  taucs_printf("colperm = [");
+  for(i=0; i<n; i++)
+    taucs_printf("%d, ",colperm[i]);
+  taucs_printf("\b\b]\n");
+
+  taucs_printf("ipivots = [");
+  for(i=0; i<n; i++)
+    taucs_printf("%d, ",ipivots[i]);
+  taucs_printf("\b\b]\n");
+  */
+  
+
+  /* start by permuting B, X=PB */
+
+  /*
+  for (i=0; i<n; i++)
+    PB[i] = B[ ipivots[i] ];
+  */
+
+  for (i=0; i<n; i++)
+    Y[i] = B[i];
+
+  for (j=0; j<n; j++) {
+    /*oocsp_readcol(L,j,indices,values);*/
+    Lreadcol(LU,j,Lclen[j],indices,values);
+    bytes_read += Lclen[j] * (sizeof(int) + sizeof(taucs_datatype));
+    for (ip=0; ip < Lclen[j]; ip++) {
+      i = indices[ip];
+      Aij = values[ip];
+      /*Y[i] = Y[i] - Aij*Y[irowperm[j]];*/
+      Y[i] = taucs_sub(Y[i] , taucs_mul( Aij , Y[irowperm[j]] ));
+    }
+  }
+
+  for (i=0; i<n; i++) X[i] = Y[i];
+
+  for (j=n-1; j>=0; j--) {
+    /*oocsp_readcol(U,j,indices,values);*/
+    Ureadcol(LU,j,Uclen[j],indices,values);
+    bytes_read += Uclen[j] * (sizeof(int) + sizeof(taucs_datatype));
+
+    found = 0;
+    for (ip=0; ip < Uclen[j]; ip++) {
+      i = indices[ip];
+      if (i == irowperm[j]) {
+	found = 1;
+	Aij = values[ip];
+	/*X[i] = X[i] / Aij;*/
+	X[i] = taucs_div( X[i] , Aij );
+	values[ip] = taucs_zero; /* so we don't multiply in the daxpy below */
+      }
+    }
+    assert( found );
+
+    for (ip=0; ip < Uclen[j]; ip++) {
+      i = indices[ip];
+      Aij = values[ip];
+      /*X[i] = X[i] - Aij*X[irowperm[j]];*/
+      X[i] = taucs_sub( X[i] , taucs_mul( Aij , X[irowperm[j]] ));
+    }
+  }
+
+
+  for (i=0; i<n; i++) Y[i] = X[i];
+  for (i=0; i<n; i++)
+    X[ ipivots[i] ] = Y[ i ];
+
+
+  for (i=0; i<n; i++) Y[i] = X[i];
+  for (i=0; i<n; i++)
+    X[ colperm[i] ] = Y[ i ];
+
+  /*
+    X[ colperm[i] ] = Y[ i ];
+
+  for (i=0; i<n; i++) Y[i] = X[i];
+  for (i=0; i<n; i++)
+    X[ ipivots[i] ] = Y[ i ];
+
+  */
+
+  /*
+  for (i=0; i<n; i++) if (colperm[i] == 0) j=i;
+  printf("rowperm[0]=%d irowperm[0]=%d colperm[0]=%d icp[0]=%d\n",ipivots[0], irowperm[0], colperm[0],j);
+  for (i=0; i<n; i++) if (colperm[i] == 1) j=i;
+  printf("rowperm[1]=%d irowperm[1]=%d colperm[1]=%d icp[1]=%d\n",ipivots[1], irowperm[1], colperm[1],j);
+  */
+
+  taucs_free(Y);
+  taucs_free(values);
+  taucs_free(indices);
+  taucs_free(irowperm);
+  taucs_free(Uclen);
+  taucs_free(Lclen);
+  taucs_free(ipivots);
+  taucs_free(colperm);
+
+  time_solve = (taucs_wtime() - time_solve);
+  taucs_printf("oocsp_solve: done in %.0lf seconds, read %.0lf bytes (%.0lf MBytes)\n",
+	     time_solve,bytes_read,bytes_read/1048576.0);
+
+  return 0; /* success */
+  
+} /* main */
+
+int taucs_dtl(ooc_solve_lu)(taucs_io_handle*   LU,
+			    taucs_datatype* x, taucs_datatype* b)
+{
+  /*oocsp_solve(LU,x,b); omer*/
+	return oocsp_solve(LU,x,b);
+}
+
+#endif /*#ifndef TAUCS_CORE_GENERAL*/
+
+/*********************************************************/
+/* USER CALLABLE ROUTINES                                */
+/*********************************************************/
+
+#ifdef TAUCS_CORE_GENERAL
+
+int taucs_ooc_factor_lu(taucs_ccs_matrix* A,
+			int*              colperm,
+                        taucs_io_handle*  LU,                       
+			double            memory)
+{
+#ifdef TAUCS_CONFIG_DREAL
+  if (A->flags & TAUCS_DOUBLE) {
+    taucs_dooc_factor_lu(A,colperm,LU,memory);
+    return 0;
+  }
+#endif
+
+#ifdef TAUCS_CONFIG_DCOMPLEX
+  if (A->flags & TAUCS_DCOMPLEX) {
+    taucs_zooc_factor_lu(A,colperm,LU,memory);
+    return 0;
+  }
+#endif
+
+#ifdef TAUCS_CONFIG_SREAL
+  if (A->flags & TAUCS_SINGLE) {
+    taucs_sooc_factor_lu(A,colperm,LU,memory);
+    return 0;
+  }
+#endif
+
+#ifdef TAUCS_CONFIG_SCOMPLEX
+  if (A->flags & TAUCS_SCOMPLEX) {
+    taucs_cooc_factor_lu(A,colperm,LU,memory);
+    return 0;
+  }
+#endif
+
+  assert(0);
+  return -1;
+}
+
+int taucs_ooc_solve_lu (taucs_io_handle* LU,
+			void* x, void* b)
+{
+  int flags;
+
+  taucs_io_read(LU, HEADER_FLAGS, 1, 1, TAUCS_INT, &flags);
+
+  printf("taucs_ooc_solve_lu: starting, DZSC=%d%d%d%d\n",
+	 (flags & TAUCS_DOUBLE  ) != 0,
+	 (flags & TAUCS_DCOMPLEX) != 0,
+	 (flags & TAUCS_SINGLE  ) != 0,
+	 (flags & TAUCS_SCOMPLEX) != 0);
+  
+#ifdef TAUCS_CONFIG_DREAL
+  if (flags & TAUCS_DOUBLE) {
+    taucs_dooc_solve_lu(LU,x,b);
+    return 0;
+  }
+#endif
+
+#ifdef TAUCS_CONFIG_DCOMPLEX
+  if (flags & TAUCS_DCOMPLEX) {
+    taucs_zooc_solve_lu(LU,x,b);
+    return 0;
+  }
+#endif
+
+#ifdef TAUCS_CONFIG_SREAL
+  if (flags & TAUCS_SINGLE) {
+    taucs_sooc_solve_lu(LU,x,b);
+    return 0;
+  }
+#endif
+
+#ifdef TAUCS_CONFIG_SCOMPLEX
+  if (flags & TAUCS_SCOMPLEX) {
+    taucs_cooc_solve_lu(LU,x,b);
+    return 0;
+  }
+#endif
+
+  assert(0);
+  return -1;
+}
+
+#endif /*#ifdef TAUCS_CORE_GENERAL*/
+
+/*********************************************************/
+/* END OF FILE                                           */
+/*********************************************************/
+
+
diff --git a/contrib/taucs/src/taucs_ccs_ops.c b/contrib/taucs/src/taucs_ccs_ops.c
new file mode 100644
index 0000000000000000000000000000000000000000..e7ab8a5404bf6400e57aef67caee00cf97b8ed19
--- /dev/null
+++ b/contrib/taucs/src/taucs_ccs_ops.c
@@ -0,0 +1,500 @@
+/*********************************************************/
+/* TAUCS                                                 */
+/* Author: Sivan Toledo                                  */
+/*********************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <math.h>
+#include "taucs.h"
+
+#ifndef TAUCS_CORE
+#error "You must define TAUCS_CORE to compile this file"
+#endif
+
+/*********************************************************/
+/* split into left, right columns                        */
+/*********************************************************/
+
+#ifdef TAUCS_CORE_GENERAL
+void 
+taucs_ccs_split(taucs_ccs_matrix*  A, 
+		taucs_ccs_matrix** L, 
+		taucs_ccs_matrix** R, 
+		int p)
+{
+
+#ifdef TAUCS_DOUBLE_IN_BUILD
+  if (A->flags & TAUCS_DOUBLE)
+    taucs_dccs_split(A,L,R,p);
+#endif
+
+#ifdef TAUCS_SINGLE_IN_BUILD
+  if (A->flags & TAUCS_SINGLE)
+    taucs_sccs_split(A,L,R,p);
+#endif
+
+#ifdef TAUCS_DCOMPLEX_IN_BUILD
+  if (A->flags & TAUCS_DCOMPLEX)
+    taucs_zccs_split(A,L,R,p);
+#endif
+
+#ifdef TAUCS_SCOMPLEX_IN_BUILD
+  if (A->flags & TAUCS_SCOMPLEX)
+    taucs_cccs_split(A,L,R,p);
+#endif
+  
+  
+}
+#endif /*TAUCS_CORE_GENERAL*/
+
+#ifndef TAUCS_CORE_GENERAL
+
+/* split into left p columns, right p columns */
+void 
+taucs_dtl(ccs_split)(taucs_ccs_matrix*  A, 
+		     taucs_ccs_matrix** L, 
+		     taucs_ccs_matrix** R, 
+		     int p)
+{
+  int i,n;
+  int Lnnz, Rnnz;
+
+  assert((A->flags & TAUCS_SYMMETRIC) || (A->flags & TAUCS_TRIANGULAR));
+  assert(A->flags & TAUCS_LOWER);
+
+  n = A->n;
+
+  *L = (taucs_ccs_matrix*) taucs_malloc(sizeof(taucs_ccs_matrix));
+  *R = (taucs_ccs_matrix*) taucs_malloc(sizeof(taucs_ccs_matrix));
+  if (!(*L) || !(*R)) { 
+    taucs_printf("taucs_ccs_split: out of memory\n");
+    taucs_free(*L);
+    taucs_free(*R);
+    *L = *R = NULL;
+    return; 
+  }
+
+  Lnnz = 0;
+  for (i=0; i<p; i++)
+    Lnnz += ( (A->colptr)[i+1] - (A->colptr)[i] );
+    
+  (*L)->flags |= TAUCS_SYMMETRIC | TAUCS_LOWER;
+  (*L)->n = n;
+  (*L)->m = n;
+  (*L)->colptr = (int*)    taucs_malloc((n+1) * sizeof(int));
+  (*L)->rowind = (int*)    taucs_malloc(Lnnz   * sizeof(int));
+  (*L)->taucs_values = (void*)   taucs_malloc(Lnnz   * sizeof(taucs_datatype));
+  if (!((*L)->colptr) || !((*L)->rowind) || !((*L)->rowind)) {
+    	taucs_printf("taucs_ccs_split: out of memory: n=%d nnz=%d\n",n,Lnnz);
+	taucs_free((*L)->colptr); taucs_free((*L)->rowind); taucs_free((*L)->taucs_values);
+	taucs_free ((*L));
+	return; 
+  }
+
+  for (i=0; i<=p; i++)
+    ((*L)->colptr)[i] = (A->colptr)[i];   
+  for (i=p+1; i<n+1; i++)
+    ((*L)->colptr)[i] = ((*L)->colptr)[p]; /* other columns are empty */
+
+  for (i=0; i<Lnnz; i++) {
+    ((*L)->rowind)[i] = (A->rowind)[i];
+    ((*L)->taucs_values)[i] = (A->taucs_values)[i];
+  }
+
+  /* now copy right part of matrix into a p-by-p matrix */
+
+  Rnnz = 0;
+  for (i=p; i<n; i++)
+    Rnnz += ( (A->colptr)[i+1] - (A->colptr)[i] );
+    
+  (*R)->flags = TAUCS_SYMMETRIC | TAUCS_LOWER;
+  (*R)->n = n-p;
+  (*R)->m = n-p;
+  (*R)->colptr = (int*)    taucs_malloc((n-p+1) * sizeof(int));
+  (*R)->rowind = (int*)    taucs_malloc(Rnnz   * sizeof(int));
+  (*R)->taucs_values = (void*)   taucs_malloc(Rnnz   * sizeof(taucs_datatype));
+  if (!((*R)->colptr) || !((*R)->rowind) || !((*R)->rowind)) {
+    	taucs_printf("taucs_ccs_split: out of memory (3): p=%d nnz=%d\n",p,Rnnz);
+	taucs_free((*R)->colptr); taucs_free((*R)->rowind); taucs_free((*R)->taucs_values);
+	taucs_free((*L)->colptr); taucs_free((*L)->rowind); taucs_free((*L)->taucs_values);
+	taucs_free ((*R));
+	taucs_free ((*L));
+	return; 
+  }
+    
+  for (i=0; i<=(n-p); i++)
+    ((*R)->colptr)[i] = (A->colptr)[i+p] - Lnnz;   
+
+  for (i=0; i<Rnnz; i++) {
+    ((*R)->rowind)[i] = (A->rowind)[i + Lnnz] - p;
+    ((*R)->taucs_values)[i] = (A->taucs_values)[i + Lnnz];
+  }
+} 
+
+#endif /*#ifndef TAUCS_CORE_GENERAL*/
+
+/*********************************************************/
+/* permute symmetrically                                 */
+/*********************************************************/
+
+#ifdef TAUCS_CORE_GENERAL
+taucs_ccs_matrix* 
+taucs_ccs_permute_symmetrically(taucs_ccs_matrix* A, int* perm, int* invperm)
+{
+
+#ifdef TAUCS_DOUBLE_IN_BUILD
+  if (A->flags & TAUCS_DOUBLE)
+    return taucs_dccs_permute_symmetrically(A,perm,invperm);
+#endif
+
+#ifdef TAUCS_SINGLE_IN_BUILD
+  if (A->flags & TAUCS_SINGLE)
+    return taucs_sccs_permute_symmetrically(A,perm,invperm);
+#endif
+
+#ifdef TAUCS_DCOMPLEX_IN_BUILD
+  if (A->flags & TAUCS_DCOMPLEX)
+    return taucs_zccs_permute_symmetrically(A,perm,invperm);
+#endif
+
+#ifdef TAUCS_SCOMPLEX_IN_BUILD
+  if (A->flags & TAUCS_SCOMPLEX)
+    return taucs_cccs_permute_symmetrically(A,perm,invperm);
+#endif
+  
+  assert(0);
+  return NULL;
+}
+#endif /*TAUCS_CORE_GENERAL*/
+
+#ifndef TAUCS_CORE_GENERAL
+
+taucs_ccs_matrix* 
+taucs_dtl(ccs_permute_symmetrically)(taucs_ccs_matrix* A, int* perm, int* invperm)
+{
+  taucs_ccs_matrix* PAPT;
+  int n;
+  int nnz;
+  /*int* colptr;*/
+  int* len;
+  int i,j,ip,I,J;
+  taucs_datatype AIJ;
+
+  assert(A->flags & TAUCS_SYMMETRIC || A->flags & TAUCS_HERMITIAN);
+  assert(A->flags & TAUCS_LOWER);
+
+  n   = A->n;
+  nnz = (A->colptr)[n];
+
+  PAPT = taucs_dtl(ccs_create)(n,n,nnz);
+  if (!PAPT) return NULL;
+
+  /*PAPT->flags = TAUCS_SYMMETRIC | TAUCS_LOWER;*/
+  PAPT->flags = A->flags;
+
+  len    = (int*) taucs_malloc(n * sizeof(int));
+  /*colptr = (int*) taucs_malloc(n * sizeof(int));*/
+  if (!len) {
+    taucs_printf("taucs_ccs_permute_symmetrically: out of memory\n");
+    taucs_ccs_free(PAPT);
+    return NULL;
+  }
+
+  for (j=0; j<n; j++) len[j] = 0;
+
+  for (j=0; j<n; j++) {
+    for (ip = (A->colptr)[j]; ip < (A->colptr)[j+1]; ip++) {
+      /*i = (A->rowind)[ip] - (A->indshift);*/
+      i = (A->rowind)[ip];
+
+      I = invperm[i];
+      J = invperm[j];
+
+      if (I < J) {
+	int T = I; 
+	I = J;
+	J = T;
+      }
+
+      len[J] ++;
+      
+    }
+  }
+
+  (PAPT->colptr)[0] = 0;
+  for (j=1; j<=n; j++) (PAPT->colptr)[j] = (PAPT->colptr)[j-1] + len[j-1];
+
+  for (j=0; j<n; j++) len[j] = (PAPT->colptr)[j];
+  
+  for (j=0; j<n; j++) {
+    for (ip = (A->colptr)[j]; ip < (A->colptr)[j+1]; ip++) {
+      /*i   = (A->rowind)[ip] - (A->indshift);*/
+      i   = (A->rowind)[ip];
+      AIJ = (A->taucs_values)[ip];
+
+      I = invperm[i];
+      J = invperm[j];
+
+      if (I < J) {
+	int T = I; 
+	I = J;
+	J = T;
+	if (A->flags & TAUCS_HERMITIAN) AIJ = taucs_conj(AIJ);
+      }
+
+      /*(PAPT->rowind)[ len[J] ] = I + (PAPT->indshift);*/
+      (PAPT->rowind)[ len[J] ] = I;
+      (PAPT->taucs_values)[ len[J] ] = AIJ;
+
+      len[J] ++;
+    }
+  }
+
+  taucs_free(len);
+  return PAPT;
+}
+
+#endif /*#ifndef TAUCS_CORE_GENERAL*/
+
+/*********************************************************/
+/* compute B = A*X                                       */
+/* current restrictions: A must be square, real          */
+/*********************************************************/
+
+#ifdef TAUCS_CORE_GENERAL
+void 
+taucs_ccs_times_vec(taucs_ccs_matrix* m, 
+		    void* X,
+		    void* B)
+{
+
+#ifdef TAUCS_DOUBLE_IN_BUILD
+  if (m->flags & TAUCS_DOUBLE)
+    taucs_dccs_times_vec(m, (taucs_double*) X, (taucs_double*) B);
+#endif
+
+#ifdef TAUCS_SINGLE_IN_BUILD
+  if (m->flags & TAUCS_SINGLE)
+    taucs_sccs_times_vec(m, (taucs_single*) X, (taucs_single*) B);
+#endif
+
+#ifdef TAUCS_DCOMPLEX_IN_BUILD
+  if (m->flags & TAUCS_DCOMPLEX)
+    taucs_zccs_times_vec(m, (taucs_dcomplex*) X, (taucs_dcomplex*) B);
+#endif
+
+#ifdef TAUCS_SCOMPLEX_IN_BUILD
+  if (m->flags & TAUCS_SCOMPLEX)
+    taucs_cccs_times_vec(m, (taucs_scomplex*) X, (taucs_scomplex*) B);
+#endif
+  
+  
+}
+#endif /*TAUCS_CORE_GENERAL*/
+
+#ifndef TAUCS_CORE_GENERAL
+
+void 
+taucs_dtl(ccs_times_vec)(taucs_ccs_matrix* m, 
+			 taucs_datatype* X,
+			 taucs_datatype* B)
+{
+  int i,ip,j,n;
+  taucs_datatype Aij;
+
+  n = m->n;
+  
+  for (i=0; i < n; i++) B[i] = taucs_zero;
+
+  if (m->flags & TAUCS_SYMMETRIC) {
+    for (j=0; j<n; j++) {
+      for (ip = (m->colptr)[j]; ip < (m->colptr[j+1]); ip++) {
+	i   = (m->rowind)[ip];
+	Aij = (m->taucs_values)[ip];
+	
+	B[i] = taucs_add(B[i],taucs_mul(X[j],Aij));
+	if (i != j) 
+	  B[j] = taucs_add(B[j],taucs_mul(X[i],Aij));
+      }
+    }
+  } else if (m->flags & TAUCS_HERMITIAN) {
+    for (j=0; j<n; j++) {
+      for (ip = (m->colptr)[j]; ip < (m->colptr[j+1]); ip++) {
+	i   = (m->rowind)[ip];
+	Aij = (m->taucs_values)[ip];
+	
+	B[i] = taucs_add(B[i],taucs_mul(X[j],Aij));
+	if (i != j) 
+	  B[j] = taucs_add(B[j],taucs_mul(X[i],
+					  taucs_conj(Aij)));
+      }
+    }
+  } else {
+    for (j=0; j<n; j++) {
+      for (ip = (m->colptr)[j]; ip < (m->colptr[j+1]); ip++) {
+	i   = (m->rowind)[ip];
+	Aij = (m->taucs_values)[ip];
+	
+	B[i] = taucs_add(B[i],taucs_mul(X[j],Aij));
+      }
+    }
+  }
+} 
+
+#endif /*#ifndef TAUCS_CORE_GENERAL*/
+
+#ifdef TAUCS_CORE_SINGLE
+void 
+taucs_sccs_times_vec_dacc(taucs_ccs_matrix* m, 
+			 taucs_single* X,
+			 taucs_single* B)
+{
+  int i,ip,j,n;
+  taucs_single Aij;
+  taucs_double* Bd;
+
+  assert(m->flags & TAUCS_SYMMETRIC);
+  assert(m->flags & TAUCS_LOWER);
+  assert(m->flags & TAUCS_SINGLE);
+
+  n = m->n;
+
+  Bd = (taucs_double*) taucs_malloc(n * sizeof(taucs_double));
+  if (Bd == NULL) {
+    taucs_sccs_times_vec(m,X,B);
+    return;
+  }
+  
+  for (i=0; i < n; i++) Bd[i] = 0.0;
+
+  for (j=0; j<n; j++) {
+    for (ip = (m->colptr)[j]; ip < (m->colptr[j+1]); ip++) {
+      i   = (m->rowind)[ip];
+      Aij = (m->taucs_values)[ip];
+
+      Bd[i] += X[j] * Aij;
+      if (i != j) 
+	Bd[j] += X[i] *Aij;
+    }
+  }
+
+  for (i=0; i < n; i++) B[i] = (taucs_single) Bd[i];
+  taucs_free(Bd);
+} 
+#endif
+
+/*********************************************************/
+/* augment diagonals to diagonal dominance               */
+/* current restrictions: A must be square, real          */
+/*********************************************************/
+
+#ifdef TAUCS_CORE_GENERAL
+taucs_ccs_matrix* 
+taucs_ccs_augment_nonpositive_offdiagonals(taucs_ccs_matrix* A)
+{
+
+#ifdef TAUCS_DOUBLE_IN_BUILD
+  if (A->flags & TAUCS_DOUBLE)
+    taucs_dccs_augment_nonpositive_offdiagonals(A);
+#endif
+
+#ifdef TAUCS_SINGLE_IN_BUILD
+  if (A->flags & TAUCS_SINGLE)
+    taucs_sccs_augment_nonpositive_offdiagonals(A);
+#endif
+
+#ifdef TAUCS_DCOMPLEX_IN_BUILD
+  if (A->flags & TAUCS_DCOMPLEX)
+    taucs_zccs_augment_nonpositive_offdiagonals(A);
+#endif
+
+#ifdef TAUCS_SCOMPLEX_IN_BUILD
+  if (A->flags & TAUCS_SCOMPLEX)
+    taucs_cccs_augment_nonpositive_offdiagonals(A);
+#endif
+  
+      
+  assert(0);
+  return NULL;
+}
+#endif /*TAUCS_CORE_GENERAL*/
+
+#ifndef TAUCS_CORE_GENERAL
+
+taucs_ccs_matrix* 
+taucs_dtl(ccs_augment_nonpositive_offdiagonals)(taucs_ccs_matrix* A)
+{
+#ifdef TAUCS_CORE_COMPLEX
+  assert(0);
+#else
+  int n,i,j;
+  int *tmp;
+  taucs_ccs_matrix* A_tmp;
+  
+  if (!(A->flags & TAUCS_SYMMETRIC) || !(A->flags & TAUCS_LOWER)) {
+    taucs_printf("taucs_ccs_augment_nonpositive_offdiagonal: matrix not symmetric or not lower\n");
+    return NULL;
+  }
+
+  n=A->n;
+
+  tmp = (int *)taucs_calloc((2*n+1),sizeof(int));
+  if (!tmp) {
+    taucs_printf("taucs_ccs_augment_nonpositive_offdiagonal: out of memory\n");
+    return NULL;
+  }
+
+  A_tmp = taucs_dtl(ccs_create)(2*n,2*n,2*(A->colptr[n]));
+  if (A_tmp == NULL) {
+    taucs_free(tmp);
+    return NULL;
+  }
+  A_tmp->flags |= TAUCS_SYMMETRIC | TAUCS_LOWER;
+  
+  
+  for(i=0;i<n;i++) {
+    for(j=A->colptr[i];j<A->colptr[i+1];j++) {
+      if ((i == A->rowind[j])||(A->taucs_values[j] < 0)) {
+	tmp[i]++;
+	tmp[i+n]++;
+      } else {
+	tmp[i]++;
+	tmp[A->rowind[j]]++;
+      }
+    }
+  }
+
+  A_tmp->colptr[0]=0;
+  for(i=0;i<2*n;i++) A_tmp->colptr[i+1] = A_tmp->colptr[i] + tmp[i];
+  for(i=0;i<2*n;i++) tmp[i] = A_tmp->colptr[i];
+  
+  for(i=0;i<n;i++) {
+    for(j=A->colptr[i];j<A->colptr[i+1];j++) {
+      if ((i == A->rowind[j])||(A->taucs_values[j] < 0)) {
+	A_tmp->rowind[tmp[i]]=A->rowind[j];
+	A_tmp->taucs_values[tmp[i]++]=A->taucs_values[j];
+	A_tmp->rowind[tmp[i+n]]=A->rowind[j]+n;
+	A_tmp->taucs_values[tmp[i+n]++]=A->taucs_values[j];
+      } else {
+	A_tmp->rowind[tmp[i]]=A->rowind[j]+n;
+	A_tmp->taucs_values[tmp[i]++]=-A->taucs_values[j];
+	A_tmp->rowind[tmp[A->rowind[j]]]=i+n;
+	A_tmp->taucs_values[tmp[A->rowind[j]]++]=-A->taucs_values[j];
+      }
+    }
+  }
+  taucs_free(tmp);
+
+  return A_tmp;
+#endif
+	/* added omer*/
+	return NULL;
+}
+
+#endif /*#ifndef TAUCS_CORE_GENERAL*/
+/*********************************************************/
+/*                                                       */
+/*********************************************************/
+
diff --git a/contrib/taucs/src/taucs_ccs_order.c b/contrib/taucs/src/taucs_ccs_order.c
new file mode 100644
index 0000000000000000000000000000000000000000..a7b6b97bbd3e5e94fded760b9fccfebf679db7d5
--- /dev/null
+++ b/contrib/taucs/src/taucs_ccs_order.c
@@ -0,0 +1,843 @@
+/*********************************************************/
+/* TAUCS                                                 */
+/* Author: Sivan Toledo                                  */
+/*********************************************************/
+
+#ifdef TAUCS_CORE_GENERAL
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <math.h>
+#include "taucs.h"
+
+/*********************************************************/
+/* Interface to AMD                                      */
+/*********************************************************/
+
+#include "../external/src/colamd.h"
+
+static void
+taucs_ccs_colamd(taucs_ccs_matrix* m, 
+		 int** perm, int** invperm,
+		 char* which)
+{
+#ifndef TAUCS_CONFIG_COLAMD
+  taucs_printf("taucs_ccs_colamd: COLAMD routines not linked.\n");
+  *perm    = NULL;
+  *invperm = NULL;
+  return;
+#else
+  double knobs[COLAMD_KNOBS];
+  int    Alen;
+  int*   A;
+  int*   p;
+  int*   ip;
+  int    k,nnz;
+  int i;
+  
+  if (m->flags & TAUCS_SYMMETRIC || m->flags & TAUCS_HERMITIAN) {
+    taucs_printf("taucs_ccs_colamd: not applicable for symmetric or hermitian matrices\n");
+    return;
+  }
+
+  taucs_printf("taucs_ccs_colamd: starting\n");
+  
+  *perm    = NULL;
+  *invperm = NULL;
+
+  nnz = (m->colptr)[m->n];
+  
+  p  = (int*) taucs_malloc((m->n + 1) * sizeof(int));
+  ip = (int*) taucs_malloc((m->n + 1) * sizeof(int));
+  assert(p && ip);
+  
+  Alen = colamd_recommended(nnz, m->m, m->n);
+  A = taucs_malloc(Alen * sizeof(int)); assert(A);
+  assert(A);
+  colamd_set_defaults (knobs) ;
+  
+  for (i=0; i<=m->n; i++)  p[i] = (m->colptr)[i];
+  for (k=0; k<nnz; k++)     A[k] = (m->rowind)[k];
+  
+  taucs_printf("oocsp_ccs_colamd: calling colamd matrix is %dx%d, nnz=%d\n",
+	     m->m,m->n,nnz);
+  if (!colamd (m->m, m->n, Alen, A, p, knobs)) {
+    taucs_printf("oocsp_ccs_colamd: colamd failed\n");
+    taucs_free(A);
+    taucs_free(p);
+    return;
+  }
+  taucs_printf("oocsp_ccs_colamd: colamd returned\n");
+  
+  taucs_free(A);
+
+  *perm    = p;
+  *invperm = ip;
+
+  for (i=0; i<m->n; i++) (*invperm)[(*perm)[i]] = i;
+#endif
+}
+
+/*********************************************************/
+/* Interface to AMD                                      */
+/*********************************************************/
+
+static void
+taucs_ccs_amd(taucs_ccs_matrix* m, 
+	      int** perm, int** invperm,
+	      char* which)
+{
+#ifndef TAUCS_CONFIG_AMD
+  taucs_printf("taucs_ccs_amd: AMD routines not linked.\n");
+  *perm    = NULL;
+  *invperm = NULL;
+  return;
+#else
+  int  n, iwlen, pfree, ncmpa, iovflo;
+  int* iw;
+  int* pe;
+  int* degree;
+  int* nv;
+  int* next;
+  int* last;
+  int* head;
+  int* elen;
+  int* w;
+  int* len;
+
+  int  nnz,i,j,ip;
+  
+  taucs_printf("taucs_ccs_amd: starting (%s)\n",which);
+
+  if (!(m->flags & TAUCS_SYMMETRIC) && !(m->flags & TAUCS_HERMITIAN)) {
+    taucs_printf("taucs_ccs_amd: AMD ordering only works on symmetric matrices.\n");
+    *perm    = NULL;
+    *invperm = NULL;
+    return;
+  }
+  /* this routine may actually work on UPPER as well */
+  if (!(m->flags & TAUCS_LOWER)) {
+    taucs_printf("taucs_ccs_amd: the lower part of the matrix must be represented.\n");
+    *perm    = NULL;
+    *invperm = NULL;
+    return;
+  }
+    
+  *perm    = NULL;
+  *invperm = NULL;
+
+  n   = m->n;
+  nnz = (m->colptr)[n];
+  
+  pe     = (int*) taucs_malloc(n * sizeof(int));
+  degree = (int*) taucs_malloc(n * sizeof(int));
+  nv     = (int*) taucs_malloc(n * sizeof(int));
+  next   = (int*) taucs_malloc(n * sizeof(int));
+  last   = (int*) taucs_malloc(n * sizeof(int));
+  head   = (int*) taucs_malloc(n * sizeof(int));
+  elen   = (int*) taucs_malloc(n * sizeof(int));
+  w      = (int*) taucs_malloc(n * sizeof(int));
+  len    = (int*) taucs_malloc(n * sizeof(int));
+
+  /* AMD docs recommend iwlen >= 1.2 nnz, but this leads to compressions */
+  iwlen = n + (int) (2.0 * 2.0*(nnz - n));
+
+  taucs_printf("taucs_ccs_amd: allocating %d ints for iw\n",iwlen);
+
+  iw = (int*) taucs_malloc(iwlen * sizeof(int));
+
+  if (!pe || !degree || !nv || !next || !last || !head 
+      || !elen || !w || !len || !iw) {
+    taucs_printf("taucs_ccs_amd: out of memory\n");
+    taucs_free(pe    );
+    taucs_free(degree);
+    taucs_free(nv    );
+    taucs_free(next  );
+    taucs_free(last  );
+    taucs_free(head  );
+    taucs_free(elen  );
+    taucs_free(w     );
+    taucs_free(len   );
+    taucs_free(iw    );
+    return;
+  }
+
+  /*
+  assert(iw && pe && degree && nv && next && last && head &&
+	 elen && w && len); 
+  */
+
+  assert(sizeof(int) == 4);
+  /*iovflo = 2147483648; */ /* for 32-bit only! */
+  iovflo = 2147483647; /* for 32-bit only! */
+
+  for (i=0; i<n; i++) len[i] = 0;
+
+  for (j=0; j<n; j++) {
+    for (ip = (m->colptr)[j]; ip < (m->colptr)[j+1]; ip++) {
+      i = (m->rowind)[ip];
+      /*i = (m->rowind)[ip] - (m->indshift);*/
+      if (i != j) {
+	len[i] ++;
+	len[j] ++;
+      }
+    }
+  }
+
+  pe[0] = 1;
+  for (i=1; i<n; i++) pe[i] = pe[i-1] + len[i-1];
+  
+  pfree = pe[n-1] + len[n-1];
+
+  /* use degree as a temporary */
+
+  for (i=0; i<n; i++) degree[i] = pe[i] - 1;
+
+  for (j=0; j<n; j++) {
+    for (ip = (m->colptr)[j]; ip < (m->colptr)[j+1]; ip++) {
+      /*i = (m->rowind)[ip] - (m->indshift);*/
+      i = (m->rowind)[ip];
+      if (i != j) {
+	iw[ degree[i] ] = j+1;
+	iw[ degree[j] ] = i+1;
+	degree[i] ++;
+	degree[j] ++;
+      }
+    }
+  }
+
+  taucs_printf("taucs_ccs_amd: calling amd matrix is %dx%d, nnz=%d\n",
+	     n,n,nnz);
+
+  if (!strcmp(which,"mmd")) 
+    amdexa_(&n, pe, iw, len, &iwlen, &pfree, nv, next,
+	    last, head, elen, degree, &ncmpa, w, &iovflo);
+  else if (!strcmp(which,"md")) 
+    amdtru_(&n, pe, iw, len, &iwlen, &pfree, nv, next,
+	  last, head, elen, degree, &ncmpa, w, &iovflo);
+  else if (!strcmp(which,"amd")) 
+    amdbar_(&n, pe, iw, len, &iwlen, &pfree, nv, next,
+	    last, head, elen, degree, &ncmpa, w, &iovflo);
+  else {
+    taucs_printf("taucs_ccs_amd: WARNING - invalid ordering requested (%s)\n",which);
+    return;
+  }
+
+  taucs_printf("taucs_ccs_amd: amd returned. optimal iwlen=%d (in this run was %d), %d compressions\n",
+	     pfree,iwlen,ncmpa);
+  /*
+  {
+    FILE* f;
+    f=fopen("p.ijv","w");
+    for (i=0; i<n; i++) fprintf(f,"%d\n",last[i]);
+    fclose(f);
+  }
+  */
+
+  taucs_free(pe    );
+  taucs_free(degree);
+  taucs_free(nv    );
+  taucs_free(next  );
+  /* free(last  ); */
+  taucs_free(head  );
+  taucs_free(elen  );
+  taucs_free(w     );
+  /* free(len   ); */
+  taucs_free(iw    );
+  
+  for (i=0; i<n; i++) last[i] --;
+  for (i=0; i<n; i++) len[ last[i] ] = i;
+
+  *perm    = last;
+  *invperm = len;
+#endif
+}
+
+/*********************************************************/
+/* Interface to MMD                                      */
+/*********************************************************/
+
+static void
+taucs_ccs_genmmd(taucs_ccs_matrix* m, 
+		 int** perm, int** invperm,
+		 char* which)
+{
+#ifndef TAUCS_CONFIG_GENMMD
+  taucs_printf("taucs_ccs_genmmd: GENMMD routines not linked.\n");
+  *perm    = NULL;
+  *invperm = NULL;
+  return;
+#else
+  int  n, maxint, delta, nofsub;
+  int* xadj;
+  int* adjncy;
+  int* invp;
+  int* prm;
+  int* dhead;
+  int* qsize;
+  int* llist;
+  int* marker;
+
+  int* len;
+  int* next;
+
+  int  nnz,i,j,ip;
+  
+  /*taucs_printf("taucs_ccs_genmmd: starting (%s)\n",which);*/
+
+  if (!(m->flags & TAUCS_SYMMETRIC) && !(m->flags & TAUCS_HERMITIAN)) {
+    taucs_printf("taucs_ccs_genmmd: GENMMD ordering only works on symmetric matrices.\n");
+    *perm    = NULL;
+    *invperm = NULL;
+    return;
+  }
+  /* this routine may actually work on UPPER as well */
+  if (!(m->flags & TAUCS_LOWER)) {
+    taucs_printf("taucs_ccs_genmmd: the lower part of the matrix must be represented.\n");
+    *perm    = NULL;
+    *invperm = NULL;
+    return;
+  }
+
+  *perm    = NULL;
+  *invperm = NULL;
+
+  n   = m->n;
+  nnz = (m->colptr)[n];
+  
+  /* I copied the value of delta and the size of */
+  /* from SuperLU. Sivan                         */
+
+  delta = 1; /* DELTA is a parameter to allow the choice of nodes
+		whose degree <= min-degree + DELTA. */
+  delta = 1; /* DELTA is a parameter to allow the choice of nodes
+		whose degree <= min-degree + DELTA. */
+  /*maxint = 2147483648;*/ /* 2**31-1, for 32-bit only! */
+  maxint = 32000;
+
+  assert(sizeof(int) == 4);
+  maxint = 2147483647; /* 2**31-1, for 32-bit only! */
+
+  xadj   = (int*) taucs_malloc((n+1)     * sizeof(int));
+  adjncy = (int*) taucs_malloc((2*nnz-n) * sizeof(int));
+  invp   = (int*) taucs_malloc((n+1)     * sizeof(int));
+  prm    = (int*) taucs_malloc(n         * sizeof(int));
+  dhead  = (int*) taucs_malloc((n+1)     * sizeof(int));
+  qsize  = (int*) taucs_malloc((n+1)     * sizeof(int));
+  llist  = (int*) taucs_malloc(n         * sizeof(int));
+  marker = (int*) taucs_malloc(n         * sizeof(int));
+
+  if (!xadj || !adjncy || !invp || !prm 
+      || !dhead || !qsize || !llist || !marker) {
+    taucs_free(xadj  );
+    taucs_free(adjncy);
+    taucs_free(invp  );
+    taucs_free(prm   );
+    taucs_free(dhead );
+    taucs_free(qsize );
+    taucs_free(llist );
+    taucs_free(marker);
+    return;
+  }
+
+  len  = dhead; /* we reuse space */
+  next = qsize; /* we reuse space */
+
+  for (i=0; i<n; i++) len[i] = 0;
+
+  for (j=0; j<n; j++) {
+    for (ip = (m->colptr)[j]; ip < (m->colptr)[j+1]; ip++) {
+      /*i = (m->rowind)[ip] - (m->indshift);*/
+      i = (m->rowind)[ip];
+      if (i != j) {
+	len[i] ++;
+	len[j] ++;
+      } else {
+	/*len[i] ++;*/
+      }
+    }
+  }
+
+  xadj[0] = 1;
+  for (i=1; i<=n; i++) xadj[i] = xadj[i-1] + len[i-1];
+
+  /*for (i=0; i<=n; i++) printf("xadj[%d]=%d\n",i,xadj[i]);*/
+  
+  /* use degree as a temporary */
+
+  for (i=0; i<n; i++) next[i] = xadj[i] - 1;
+
+  for (j=0; j<n; j++) {
+    for (ip = (m->colptr)[j]; ip < (m->colptr)[j+1]; ip++) {
+      /*i = (m->rowind)[ip] - (m->indshift);*/
+      i = (m->rowind)[ip];
+      assert( next[i] < 2*nnz-n );
+      assert( next[j] < 2*nnz-n );
+      if (i != j) {
+	adjncy[ next[i] ] = j+1;
+	adjncy[ next[j] ] = i+1;
+	next[i] ++;
+	next[j] ++;
+      } else {
+	/*
+        adjncy[ next[i] ] = j+1;
+	next[i] ++;
+	*/
+      }
+    }
+  }
+
+  /*
+  for (j=0; j<n; j++) {
+    qsort(adjncy + (xadj[j] - 1),
+	  xadj[j+1] - xadj[j],
+	  sizeof(int),
+	  compare_ints);
+    printf("+++ %d: ",j+1);
+    for (ip=xadj[j]-1; ip<xadj[j+1]-1;ip++)
+      printf("%d ",adjncy[ip]);
+    printf("\n");
+  }
+  */
+
+  /*
+  taucs_printf("taucs_ccs_genmmd: calling genmmd, matrix is %dx%d, nnz=%d\n",
+	     n,n,nnz);
+  */
+
+  genmmd_(&n,
+	  xadj, adjncy,
+	  invp,prm,
+	  &delta,
+	  dhead,qsize,llist,marker,
+	  &maxint,&nofsub);
+
+
+  /*taucs_printf("taucs_ccs_genmmd: genmmd returned.\n");*/
+
+  /*
+  {
+    FILE* f;
+    f=fopen("p.ijv","w");
+    for (i=0; i<n; i++) fprintf(f,"%d %d\n",prm[i],invp[i]);
+    fclose(f);
+  }
+  */
+
+  taucs_free(marker);
+  taucs_free(llist );
+  taucs_free(qsize );
+  taucs_free(dhead );
+  taucs_free(xadj  );
+  taucs_free(adjncy);
+  
+  for (i=0; i<n; i++) prm[i] --;
+  for (i=0; i<n; i++) invp[ prm[i] ] = i;
+
+  *perm    = prm;
+  *invperm = invp;
+#endif
+}
+
+/*********************************************************/
+/* No-fill ordering for trees                            */
+/*********************************************************/
+
+static void 
+taucs_ccs_treeorder(taucs_ccs_matrix* m,
+		    int** perm,
+		    int** invperm)
+{
+  int  n,nnz,i,j,ip,k,p,nleaves;
+  int* adjptr;
+  int* adj;
+  int* len;
+  int* ptr;
+  int* degree;
+  int* leaves;
+
+  if (!(m->flags & TAUCS_SYMMETRIC) && !(m->flags & TAUCS_HERMITIAN)) {
+    taucs_printf("taucs_ccs_treeorder: tree ordering only works on symmetric matrices.\n");
+    *perm    = NULL;
+    *invperm = NULL;
+    return;
+  }
+  /* this routine may actually work on UPPER as well */
+  if (!(m->flags & TAUCS_LOWER)) {
+    taucs_printf("taucs_ccs_treeorder: the lower part of the matrix must be represented.\n");
+    *perm    = NULL;
+    *invperm = NULL;
+    return;
+  }
+
+  n   = m->n;
+  nnz = (m->colptr)[n];
+  
+  taucs_printf("taucs_ccs_treeorder: starting, matrix is %dx%d, # edges=%d\n",
+	     n,n,nnz-n);
+
+  *perm    = (int*) taucs_malloc(n * sizeof(int));
+  *invperm = (int*) taucs_malloc(n * sizeof(int));
+
+  /* we can reuse buffers: don't need invperm until the end */
+  /* also, we can reuse perm for leaves but it's messy.     */
+  len    = (int*) taucs_malloc(n * sizeof(int));
+  degree = (int*) taucs_malloc(n * sizeof(int));
+  leaves = (int*) taucs_malloc(n * sizeof(int));
+
+  adjptr = (int*) taucs_malloc(n * sizeof(int));
+  adj    = (int*) taucs_malloc(2*(nnz-n) * sizeof(int));
+
+  if (!(*perm) || !(*invperm) || !adjptr || !adj || !len || !degree || ! leaves) {
+    taucs_free(adj);
+    taucs_free(adjptr);
+    taucs_free(len);
+    taucs_free(leaves);
+    taucs_free(degree);
+    taucs_free(*perm);
+    taucs_free(*invperm);
+    *perm = *invperm = NULL;
+  }
+
+  for (i=0; i<n; i++) len[i] = 0;
+
+  for (j=0; j<n; j++) {
+    for (ip = (m->colptr)[j]; ip < (m->colptr)[j+1]; ip++) {
+      /*i = (m->rowind)[ip] - (m->indshift);*/
+      i = (m->rowind)[ip];
+      if (i != j) {
+	len[i] ++;
+	len[j] ++;
+      }
+    }
+  }
+
+  nleaves = 0;
+  for (i=0; i<n; i++) {
+    degree[i] = len[i]; 
+    if (degree[i] <= 1) {
+      leaves[nleaves] = i;
+      nleaves++;
+    }
+  }
+
+  adjptr[0] = 0;
+  for (i=1; i<n; i++) adjptr[i] = adjptr[i-1] + len[i-1];
+
+  ptr =  *perm;
+  for (i=0; i<n; i++) ptr[i] = adjptr[i];
+
+  for (j=0; j<n; j++) {
+    for (ip = (m->colptr)[j]; ip < (m->colptr)[j+1]; ip++) {
+      /*i = (m->rowind)[ip] - (m->indshift);*/
+      i = (m->rowind)[ip];
+      if (i != j) {
+	adj[ ptr[i] ] = j;
+	adj[ ptr[j] ] = i;
+	ptr[i] ++;
+	ptr[j] ++;
+      }
+    }
+  }
+
+  /*
+  taucs_printf("taucs_ccs_treeorder: %d initial leaves: ",nleaves);
+  for (i=0; i<nleaves; i++) 
+    taucs_printf("%d ",leaves[i]);
+  taucs_printf("\n");
+  */
+
+  for (i=0; i<n; i++) {
+    nleaves--;
+    if (nleaves <= 0) {
+      /* not a tree */
+      taucs_free(adj);
+      taucs_free(adjptr);
+      taucs_free(len);
+      taucs_free(leaves);
+      taucs_free(degree);
+      taucs_free(*perm);
+      taucs_free(*invperm);
+      *perm = *invperm = NULL;
+    }
+    j = leaves[nleaves];
+
+    /*taucs_printf("taucs_ccs_treeorder: next leaf is %d, degree=%d\n",j,len[j]);*/
+    
+    (*perm)   [ i ] = j;
+    (*invperm)[ j ] = i;
+    
+    if (len[j] > 0) {
+      if (len[j] != 1) {
+	/* not a tree */
+	taucs_free(adj);
+	taucs_free(adjptr);
+	taucs_free(len);
+	taucs_free(leaves);
+	taucs_free(degree);
+	taucs_free(*perm);
+	taucs_free(*invperm);
+	*perm = *invperm = NULL;
+      }
+      p = adj[ adjptr[j] ]; 
+      
+      /*taucs_printf("taucs_ccs_treeorder: parent of %d is %d\n",j,p);*/
+
+      for (k = 0; k < len[p]; k++)
+	if (adj[ adjptr[p] + k ] == j) break;
+
+      if ( k >= len[p] ) { /* otherwise j does not show up in p's adjacency list */
+	/* not a tree */
+	taucs_free(adj);
+	taucs_free(adjptr);
+	taucs_free(len);
+	taucs_free(leaves);
+	taucs_free(degree);
+	taucs_free(*perm);
+	taucs_free(*invperm);
+	*perm = *invperm = NULL;
+      }
+
+      /* now delete j from p's adjacency list and compress */
+      len[p] --;
+      for (; k < len[p]; k++)
+	adj[ adjptr[p] + k ] = adj[ adjptr[p] + k+1 ];
+
+      if (len[p] == 1) {  /* degree was higher and now is 1 */
+	leaves[ nleaves ] = p;
+	nleaves++;
+      }
+    }
+  }
+
+  taucs_free(adj);
+  taucs_free(adjptr);
+  taucs_free(len);
+  taucs_free(leaves);
+  taucs_free(degree);
+
+  /*
+  taucs_printf("taucs_ccs_treeorder: ordering: ");
+  for (i=0; i<n; i++) 
+    taucs_printf("%d ",(*perm)[i]);
+  taucs_printf("\n");
+  */
+
+  taucs_printf("taucs_ccs_treeorder: done\n");
+}
+
+/*********************************************************/
+/* Interface to METIS                                    */
+/*********************************************************/
+
+/* from stuct.h in metis */
+typedef int idxtype; 
+/* from metis.h */
+void METIS_NodeND(int *, idxtype *, idxtype *, int *, int *, idxtype *, idxtype *);
+
+static void 
+taucs_ccs_metis(taucs_ccs_matrix* m, 
+		int** perm, int** invperm,
+		char* which)
+{
+#ifndef TAUCS_CONFIG_METIS
+  taucs_printf("taucs_ccs_metis: METIS routines not linked.\n");
+  *perm    = NULL;
+  *invperm = NULL;
+  return;
+#else
+  int  n,nnz,i,j,ip;
+  int* xadj;
+  int* adj;
+  int  num_flag     = 0;
+  int  options_flag = 0;
+  int* len;
+  int* ptr;
+
+  /* taucs_printf("taucs_ccs_metis: starting (%s)\n",which); */
+
+  if (!(m->flags & TAUCS_SYMMETRIC) && !(m->flags & TAUCS_HERMITIAN)) {
+    taucs_printf("taucs_ccs_treeorder: METIS ordering only works on symmetric matrices.\n");
+    *perm    = NULL;
+    *invperm = NULL;
+    return;
+  }
+  /* this routine may actually work on UPPER as well */
+  if (!(m->flags & TAUCS_LOWER)) {
+    taucs_printf("taucs_ccs_metis: the lower part of the matrix must be represented.\n");
+    *perm    = NULL;
+    *invperm = NULL;
+    return;
+  }
+
+  n   = m->n;
+  nnz = (m->colptr)[n];
+  
+  *perm    = (int*) taucs_malloc(n * sizeof(int));
+  *invperm = (int*) taucs_malloc(n * sizeof(int));
+
+  xadj = (int*) taucs_malloc((n+1) * sizeof(int));
+  /* Change suggested by Yifan Hu for diagonal matrices */
+  /* and for matrices with no diagonal */
+  /* adj  = (int*) taucs_malloc(2*(nnz-n) * sizeof(int));*/
+  adj  = (int*) taucs_malloc(2* nnz * sizeof(int));
+
+  if (!(*perm) || !(*invperm) || !xadj || !adj) {
+    taucs_free(*perm);
+    taucs_free(*invperm);
+    taucs_free(xadj);
+    taucs_free(adj);
+    *perm = *invperm = NULL;
+    return;
+  }
+
+  /* assert(*perm && *invperm && xadj && adj);*/
+
+  ptr = len = *perm;
+
+  for (i=0; i<n; i++) len[i] = 0;
+
+  for (j=0; j<n; j++) {
+    for (ip = (m->colptr)[j]; ip < (m->colptr)[j+1]; ip++) {
+      /*i = (m->rowind)[ip] - (m->indshift);*/
+      i = (m->rowind)[ip];
+      if (i != j) {
+	len[i] ++;
+	len[j] ++;
+      }
+    }
+  }
+
+  xadj[0] = 0;
+  for (i=1; i<=n; i++) xadj[i] = xadj[i-1] + len[i-1];
+  
+  for (i=0; i<n; i++) ptr[i] = xadj[i];
+
+  for (j=0; j<n; j++) {
+    for (ip = (m->colptr)[j]; ip < (m->colptr)[j+1]; ip++) {
+      /*i = (m->rowind)[ip] - (m->indshift);*/
+      i = (m->rowind)[ip];
+      if (i != j) {
+	adj[ ptr[i] ] = j;
+	adj[ ptr[j] ] = i;
+	ptr[i] ++;
+	ptr[j] ++;
+      }
+    }
+  }
+
+  /* taucs_printf("taucs_ccs_metis: calling metis matrix is %dx%d, nnz=%d\n", */
+	     /* n,n,nnz); */
+
+  METIS_NodeND(&n,
+	       xadj,adj,
+	       &num_flag, &options_flag,
+	       *perm,*invperm);
+
+  /* taucs_printf("taucs_ccs_metis: metis returned\n"); */
+
+  /*
+  {
+    FILE* f;
+    f=fopen("p.ijv","w");
+    for (i=0; i<n; i++) fprintf(f,"%d\n",last[i]);
+    fclose(f);
+  }
+  */
+
+  taucs_free(xadj);
+  taucs_free(adj);
+#endif
+}
+
+/*********************************************************/
+/* RANDOM PERMUTATION                                    */
+/*********************************************************/
+
+static void 
+taucs_ccs_randomperm(int n,int** perm, int** invperm)
+{
+  int i;
+
+  *perm    = (int*) taucs_malloc(n * sizeof(int));
+  *invperm = (int*) taucs_malloc(n * sizeof(int));
+  if (!(*perm) || !(*invperm)) {
+    taucs_free(*perm); taucs_free(*invperm);
+    *perm = *invperm = NULL;
+    taucs_printf("taucs_ccs_randomperm: out of memory for permutation\n");
+    return;
+  }
+
+  for (i=0; i<n; i++) (*perm)[i] = i;
+
+  for (i=0; i<n; i++) {
+    int i1, i2;
+    int t;
+
+    i1 = rand() % (n - i);
+    i2 = n - i - 1;
+    
+    t = (*perm)[i1];
+    (*perm)[i1] = (*perm)[i2];
+    (*perm)[i2] = t;
+  }
+
+  for (i=0; i<n; i++) (*invperm)[(*perm)[i]] = i;
+  return;
+}
+
+/*********************************************************/
+/* MAIN ORDERING ROUTINE                                 */
+/*********************************************************/
+
+void 
+taucs_ccs_order(taucs_ccs_matrix* m, 
+		int** perm, int** invperm,
+		char* which)
+{
+  if (!strcmp(which,"mmd") || !strcmp(which,"amd") || !strcmp(which,"md")) 
+    taucs_ccs_amd(m,perm,invperm,which);
+  else if (!strcmp(which,"metis"))
+    taucs_ccs_metis(m,perm,invperm,which);
+  else if (!strcmp(which,"genmmd"))
+    taucs_ccs_genmmd(m,perm,invperm,which);
+  else if (!strcmp(which,"colamd"))
+    taucs_ccs_colamd(m,perm,invperm,which);
+  else if (!strcmp(which,"random"))
+    taucs_ccs_randomperm(m->n,perm,invperm);
+  else if (!strcmp(which,"tree")) {
+    taucs_ccs_treeorder(m,perm,invperm);
+    if (*perm == NULL) /* perhaps the graph of the matrix is not a tree */
+      taucs_ccs_metis(m,perm,invperm,"metis");
+  }
+  else if (!strcmp(which,"identity")) {
+    int i;
+    *perm    = (int*) taucs_malloc((m->n) * sizeof(int));
+    *invperm = (int*) taucs_malloc((m->n) * sizeof(int));
+    if (!(*perm) || !(*invperm)) {
+      taucs_free(*perm); taucs_free(*invperm);
+      *perm = *invperm = NULL;
+      taucs_printf("taucs_ccs_order: out of memory for identity permutation\n");
+      return;
+    }
+    for (i=0; i<m->n; i++) (*perm)[i] = (*invperm)[i] = i;
+    return;
+  }
+  else {
+    taucs_printf("taucs_ccs_order: invalid ordering requested (%s)\n",which);
+    *perm = *invperm = NULL;
+  }
+}
+
+/*********************************************************/
+/*                                                       */
+/*********************************************************/
+
+#endif /* TAUCS_CORE_GENERAL */
+
+/*********************************************************/
+/*                                                       */
+/*********************************************************/
+
diff --git a/contrib/taucs/src/taucs_ccs_solve_llt.c b/contrib/taucs/src/taucs_ccs_solve_llt.c
new file mode 100644
index 0000000000000000000000000000000000000000..895f13dbdfeca55f62efab039dc8f4ad33659faf
--- /dev/null
+++ b/contrib/taucs/src/taucs_ccs_solve_llt.c
@@ -0,0 +1,484 @@
+/*********************************************************/
+/* TAUCS                                                 */
+/* Author: Sivan Toledo                                  */
+/*********************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <assert.h>
+#include "taucs.h"
+
+
+/*********************************************************/
+/*                                                       */
+/*********************************************************/
+
+
+#ifndef TAUCS_CORE_GENERAL
+int 
+taucs_dtl(ccs_solve_llt)(void* vL, taucs_datatype* x, taucs_datatype* b)
+{
+  taucs_ccs_matrix* L = (taucs_ccs_matrix*) vL;
+
+  int n;
+  int i,j;
+  int ip,jp;
+  taucs_datatype  Aij, Ajj, Aii;
+  taucs_datatype* y;
+
+  if (!(L->flags & TAUCS_TRIANGULAR)) {
+    taucs_printf("taucs_ccs_solve_llt: factor matrix must be triangular\n");
+    return -1;
+  }
+  if (!(L->flags & TAUCS_LOWER)) {
+    taucs_printf("taucs_ccs_solve_llt: lower part must be represented\n");
+    return -1;
+  }
+
+  n = L->n;
+
+  y = (taucs_datatype*) taucs_malloc( n * sizeof(taucs_datatype) );
+  if (!y) return -1;
+
+  for (i=0; i<n; i++) x[i] = b[i];
+
+  /* Solve L y = b = x  */
+
+  for (j=0; j<n; j++) {
+
+    /* we put diagonal elements first */
+    ip = (L->colptr)[j];
+    i = (L->rowind)[ip];
+    assert (i==j);
+    Ajj = (L->taucs_values)[ip];
+    
+    /* 
+    for (ip = (L->colptr)[j]; ip < (L->colptr)[j+1]; ip++) {
+      i = (L->rowind)[ip];
+      if (i==j) {
+	Ajj = (L->taucs_values)[ip];
+	break;
+      }
+    }
+    */
+
+    /*y[j] = x[j] / Ajj;*/
+    y[j] = taucs_div( x[j] , Ajj );
+
+    for (ip = (L->colptr)[j] + 1; ip < (L->colptr)[j+1]; ip++) {
+      i = (L->rowind)[ip];
+      Aij = (L->taucs_values)[ip];
+      /*x[i] -= y[j]*Aij;*/
+      x[i] = taucs_sub( x[i], taucs_mul( y[j],Aij ));
+    }
+
+    /*
+    for (ip = (L->colptr)[j]; ip < (L->colptr)[j+1]; ip++) {
+      i = (L->rowind)[ip];
+      if (i != j) {
+	Aij = (L->taucs_values)[ip];
+	x[i] -= y[j]*Aij;
+      }
+    }
+    */
+  }
+  
+  /* Solve L^T x = y */
+
+  for (i=n-1; i>=0; i--) {
+
+    for (jp = (L->colptr)[i]+1; jp < (L->colptr)[i+1]; jp++) {
+      j = (L->rowind)[jp];
+      Aij = taucs_conj( (L->taucs_values)[jp] );
+      /*y[i] -= x[j]*Aij;*/
+      y[i] = taucs_sub( y[i], taucs_mul( x[j],Aij ));
+    }
+    /*
+    for (jp = (L->colptr)[i]; jp < (L->colptr)[i+1]; jp++) {
+      j = (L->rowind)[jp];
+      if (i != j) {
+	Aij = (L->taucs_values)[jp];
+	y[i] -= x[j]*Aij;
+      }
+    }
+    */
+
+    jp = (L->colptr)[i];
+    j = (L->rowind)[jp];
+    Aii = (L->taucs_values)[jp]; 
+
+    /*
+    for (jp = (L->colptr)[i]; jp < (L->colptr)[i+1]; jp++) {
+      j = (L->rowind)[jp];
+      if (i==j) {
+	Aii = (L->taucs_values)[jp];
+	break;
+      }
+    }
+    */
+
+    /*x[i] = y[i] / Aii;*/
+    x[i] = taucs_div( y[i] , Aii );
+
+  }
+
+  taucs_free(y);
+
+  return 0;
+}
+
+/***************** SOLVE LLT PARTIAL ********************/
+
+int 
+taucs_dtl(ccs_solve_schur)(taucs_ccs_matrix* L,
+			   taucs_ccs_matrix* schur_comp,
+			   int    (*schur_precond_fn)(void*,void* x,void* b),
+			   void*  schur_precond_args,
+			   int    maxits,
+			   double convratio,
+			   taucs_datatype* x, taucs_datatype* b)
+{
+  int n;
+  int i,j;
+  int ip,jp;
+  taucs_datatype  Aij, Ajj, Aii;
+  taucs_datatype* y;
+
+  int p;
+
+  if (!(L->flags & TAUCS_TRIANGULAR)) {
+    taucs_printf("taucs_ccs_solve_llt: factor matrix must be triangular\n");
+    return -1;
+  }
+  if (!(L->flags & TAUCS_LOWER)) {
+    taucs_printf("taucs_ccs_solve_llt: lower part must be represented\n");
+    return -1;
+  }
+
+  n = L->n;
+  p = n - (schur_comp->n);
+
+  y = (taucs_datatype*) taucs_malloc( n * sizeof(taucs_datatype) );
+  if (!y) return -1;
+
+  for (i=0; i<n; i++) x[i] = b[i];
+
+  /* Solve L y = b = x  */
+
+  for (j=0; j<p; j++) {
+
+    /* we put diagonal elements first */
+    ip = (L->colptr)[j];
+    i = (L->rowind)[ip];
+    assert (i==j);
+    Ajj = (L->taucs_values)[ip];
+    
+    /*y[j] = x[j] / Ajj;*/
+    y[j] = taucs_div( x[j] , Ajj );
+
+    for (ip = (L->colptr)[j] + 1; ip < (L->colptr)[j+1]; ip++) {
+      i = (L->rowind)[ip];
+      Aij = (L->taucs_values)[ip];
+      /*x[i] -= y[j]*Aij;*/
+      x[i] = taucs_sub( x[i], taucs_mul( y[j],Aij ));
+    }
+  }
+
+  /* 
+     now y_1 is computed, L_11 y_1 = b_1, 
+     x_2 holds (b_2 - L_21 y_1).
+     move y_2 <- x_2.
+  */
+
+  for (i=p; i<n; i++) y[i] = x[i];
+
+  /* Now solve x_2 <- (A_22 - L_21 L_21^T)^-1 y_2 */ 
+  
+  /*taucs_printf("symccs_solve_schur: calling CG on Schur complement\n");*/
+  /* sivan: removed for testing the complex codes */
+  assert(0);
+#if 0
+  taucs_conjugate_gradients (schur_comp,
+			     schur_precond_fn,
+			     schur_precond_args,
+			     x+p,          /* this is x_2 */
+			     y+p,          /* this is y_2 */
+			     maxits,       /* itermax */
+			     convratio     /* conv tolerance */
+			     );
+#endif
+  /*taucs_printf("taucs_ccs_solve_llt_partial: CG on Schur complement returned\n");*/
+  
+  /* Now we have x_2, solve L_11^T x_1 = y_1 - L_21^T x_2 */
+
+  for (i=p-1; i>=0; i--) {
+
+    for (jp = (L->colptr)[i]+1; jp < (L->colptr)[i+1]; jp++) {
+      j = (L->rowind)[jp];
+      Aij = (L->taucs_values)[jp];
+      /*y[i] -= x[j]*Aij;*/
+      y[i] = taucs_sub( y[i], taucs_mul( x[j],Aij ));
+    }
+
+    jp = (L->colptr)[i];
+    j = (L->rowind)[jp];
+    Aii = (L->taucs_values)[jp];
+
+    /*x[i] = y[i] / Aii;*/
+    x[i] = taucs_div( y[i] , Aii );
+
+  }
+
+  taucs_free(y);
+
+  return 0;
+}
+
+/*********************************************************/
+/* LDL^T solve                                           */
+/*********************************************************/
+
+int 
+taucs_dtl(ccs_solve_ldlt)(void* vL, taucs_datatype* x, taucs_datatype* b)
+{
+  taucs_ccs_matrix* L = (taucs_ccs_matrix*) vL;
+
+  int n;
+  int i,j;
+  int ip,jp;
+  taucs_datatype  Ajj = taucs_zero_const; /* just to suppress the warning */
+  taucs_datatype  Aij = taucs_zero_const; /* just to suppress the warning */
+  taucs_datatype* y;
+
+  /* taucs_printf("taucs_ccs_solve_ldlt: starting\n"); */
+
+  if (!(L->flags & TAUCS_TRIANGULAR)) {
+    taucs_printf("taucs_ccs_solve_ldlt: factor matrix must be triangular\n");
+    return -1;
+  }
+  if (!(L->flags & TAUCS_LOWER)) {
+    taucs_printf("taucs_ccs_solve_ldlt: lower part must be represented\n");
+    return -1;
+  }
+
+  n = L->n;
+
+  y = (taucs_datatype*) taucs_malloc( n * sizeof(taucs_datatype) );
+  if (!y) return -1;
+
+  for (i=0; i<n; i++) x[i] = b[i];
+
+  /* Solve L y = b = x  */
+
+  /* taucs_printf("taucs_ccs_solve_ldlt: solving L y = b\n"); */
+
+  for (j=0; j<n; j++) {
+
+#if 0
+    /* we put diagonal elements first */
+    ip = (L->colptr)[j];
+    i = (L->rowind)[ip];
+    assert (i==j);
+    /*Ajj = 1.0;*/
+    Ajj = taucs_one;
+    
+    /*y[j] = x[j] / Ajj;*/
+    y[j] = taucs_div( x[j] , Ajj );
+#else
+    y[j] = x[j];
+#endif
+
+    if (taucs_isnan(y[j]) || taucs_isinf(y[j])) {
+      taucs_printf("taucs_ccs_solve_ldlt: inf/nan in column %d (L); %e+%ei / %e+%ei\n",
+		   j,
+		   taucs_re(x[j]),taucs_im(x[j]),
+		   taucs_re(Ajj ),taucs_im(Ajj ));
+    }
+
+    /*printf("A(%d,%d) = %lg; y[%d] = %lg\n",j,j,Ajj,i,y[i]);*/
+
+    for (ip = (L->colptr)[j] + 1; ip < (L->colptr)[j+1]; ip++) {
+      i = (L->rowind)[ip];
+      Aij = (L->taucs_values)[ip];
+
+      /*x[i] -= y[j]*Aij;*/
+      x[i] = taucs_sub( x[i], taucs_mul( y[j],Aij ));
+    }
+  }
+  
+  /* Solve D y = y */
+
+  for (j=0; j<n; j++) {
+
+    /* we put diagonal elements first */
+    ip = (L->colptr)[j];
+    i = (L->rowind)[ip];
+    assert (i==j);
+    Ajj = (L->taucs_values)[ip];
+    
+    /* y[j] = y[j] / Ajj; */
+    y[j] = taucs_div( y[j] , Ajj );
+  }
+
+  /* Solve L^T x = y */
+
+  /* taucs_printf("taucs_ccs_solve_ldlt: solving L^T x = y\n");*/
+
+  for (i=n-1; i>=0; i--) {
+
+    for (jp = (L->colptr)[i]+1; jp < (L->colptr)[i+1]; jp++) {
+      j = (L->rowind)[jp];
+      /* Aij = (L->taucs_values)[jp]; */
+      Aij = taucs_conj( (L->taucs_values)[jp] );
+      /*y[i] -= x[j]*Aij;*/
+      y[i] = taucs_sub( y[i] , taucs_mul( x[j],Aij ));
+    }
+
+#if 0
+    jp = (L->colptr)[i];
+    j = (L->rowind)[jp];
+    /*Aii = 1.0;*/
+    Aii = taucs_one;
+
+    /* x[i] = y[i] / Aii;*/
+    x[i] = taucs_div( y[i] , Aii );
+#else
+    x[i] = y[i];
+#endif
+
+    if (taucs_isnan(x[i]) || taucs_isinf(x[i]))
+	      taucs_printf("symccs_solve_ldlt: inf/nan in row %d (LT)\n",i);
+
+    /*printf("A(%d,%d) = %lg; x[%d] = %lg\n",i,i,Aii,i,x[i]); */
+  }
+
+  taucs_free(y);
+
+  return 0;
+}
+
+#endif /*#ifndef TAUCS_CORE_GENERAL*/
+
+#ifdef TAUCS_CORE_GENERAL
+int 
+taucs_ccs_solve_schur(taucs_ccs_matrix* L,
+		      taucs_ccs_matrix* schur_comp,
+		      int    (*schur_precond_fn)(void*,void* x,void* b),
+		      void*  schur_precond_args,
+		      int    maxits,
+		      double convratio,
+		      void* x, void* b)
+{
+
+#ifdef TAUCS_DOUBLE_IN_BUILD
+  if (L->flags & TAUCS_DOUBLE)
+    return taucs_dccs_solve_schur(L,
+				  schur_comp,
+				  schur_precond_fn,
+				  schur_precond_args,
+				  maxits,
+				  convratio,
+				  (taucs_double*)x,(taucs_double*)b);
+#endif
+
+#ifdef TAUCS_SINGLE_IN_BUILD
+  if (L->flags & TAUCS_SINGLE)
+    return taucs_sccs_solve_schur(L,
+				  schur_comp,
+				  schur_precond_fn,
+				  schur_precond_args,
+				  maxits,convratio,
+				  (taucs_single*)x,(taucs_single*)b);
+#endif
+
+#ifdef TAUCS_DCOMPLEX_IN_BUILD
+  if (L->flags & TAUCS_DCOMPLEX)
+    return taucs_zccs_solve_schur(L,
+				  schur_comp,
+				  schur_precond_fn,
+				  schur_precond_args,
+				  maxits,convratio,
+				  (taucs_dcomplex*)x,
+				  (taucs_dcomplex*)b);
+#endif
+
+#ifdef TAUCS_SCOMPLEX_IN_BUILD
+  if (L->flags & TAUCS_SCOMPLEX)
+    return taucs_cccs_solve_schur(L,
+				  schur_comp,
+				  schur_precond_fn,
+				  schur_precond_args,
+				  maxits,convratio,
+				  (taucs_scomplex*)x,(taucs_scomplex*)b);
+#endif
+  
+  assert(0);
+  return -1;
+}
+
+int
+taucs_ccs_solve_llt(void* vL, void* x, void* b)
+{
+  taucs_ccs_matrix* L = (taucs_ccs_matrix*) vL;
+
+#ifdef TAUCS_DOUBLE_IN_BUILD
+  if (L->flags & TAUCS_DOUBLE)
+    return taucs_dccs_solve_llt(L,(taucs_double*) x, (taucs_double*) b);
+#endif
+
+#ifdef TAUCS_SINGLE_IN_BUILD
+  if (L->flags & TAUCS_SINGLE)
+    return taucs_sccs_solve_llt(L,(taucs_single*) x, (taucs_single*) b);
+#endif
+
+#ifdef TAUCS_DCOMPLEX_IN_BUILD
+  if (L->flags & TAUCS_DCOMPLEX)
+    return taucs_zccs_solve_llt(L,(taucs_dcomplex*) x, (taucs_dcomplex*) b);
+#endif
+
+#ifdef TAUCS_SCOMPLEX_IN_BUILD
+  if (L->flags & TAUCS_SCOMPLEX)
+    return taucs_cccs_solve_llt(L,(taucs_scomplex*) x, (taucs_scomplex*) b);
+#endif
+  
+  assert(0);
+  return -1;
+}
+
+int
+taucs_ccs_solve_ldlt(void* vL, void* x, void* b)
+{
+  taucs_ccs_matrix* L = (taucs_ccs_matrix*) vL;
+
+#ifdef TAUCS_DOUBLE_IN_BUILD
+  if (L->flags & TAUCS_DOUBLE)
+    return taucs_dccs_solve_ldlt(L,(taucs_double*) x, (taucs_double*) b);
+#endif
+
+#ifdef TAUCS_SINGLE_IN_BUILD
+  if (L->flags & TAUCS_SINGLE)
+    return taucs_sccs_solve_ldlt(L,(taucs_single*) x, (taucs_single*) b);
+#endif
+
+#ifdef TAUCS_DCOMPLEX_IN_BUILD
+  if (L->flags & TAUCS_DCOMPLEX)
+    return taucs_zccs_solve_ldlt(L,(taucs_dcomplex*) x, (taucs_dcomplex*) b);
+#endif
+
+#ifdef TAUCS_SCOMPLEX_IN_BUILD
+  if (L->flags & TAUCS_SCOMPLEX)
+    return taucs_cccs_solve_ldlt(L,(taucs_scomplex*) x, (taucs_scomplex*) b);
+#endif
+
+  /*omer*/
+  assert(0);
+  return -1;
+
+}
+#endif /*TAUCS_CORE_GENERAL*/
+
+
+/*********************************************************/
+/*                                                       */
+/*********************************************************/
diff --git a/contrib/taucs/src/taucs_ccs_xxt.c b/contrib/taucs/src/taucs_ccs_xxt.c
new file mode 100644
index 0000000000000000000000000000000000000000..a3e8f63f0b2e84c520820508fb93f98181745df7
--- /dev/null
+++ b/contrib/taucs/src/taucs_ccs_xxt.c
@@ -0,0 +1,547 @@
+/*********************************************************/
+/* TAUCS                                                 */
+/* Author: Sivan Toledo                                  */
+/* File  : taucs_ccs_xxt.c                               */
+/* Description: computes the Cholesky factor of A^-1     */
+/*********************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <math.h>
+#include "taucs.h"
+
+#ifdef TAUCS_CORE_DOUBLE
+
+typedef struct {
+  int     length;
+  int*    indices;
+  int*    bitmap;
+  double* values;
+} spa;
+
+/*********************************************************/
+/* Returns the strictly upper part of A                  */
+/*********************************************************/
+
+static 
+taucs_ccs_matrix*
+ccs_syml_to_symu(taucs_ccs_matrix* A) {
+  taucs_ccs_matrix* U;
+  int n;
+  int* temp;
+  int i,j,ip;/*kp,k,jp omer*/
+  double v;
+
+  n = A->n;
+
+  temp = (int*) taucs_malloc(n * sizeof(int));
+  if (!temp) return NULL;
+
+  U = taucs_dtl(ccs_create)(n, n, (A->colptr)[n] - n);
+  if (!U) {
+    taucs_free(temp);
+    return NULL;
+  }
+
+  U->flags = TAUCS_SYMMETRIC | TAUCS_UPPER;
+
+  for (j=0; j<=n; j++) (U->colptr)[j] = 0;
+  for (j=0; j<n; j++)  temp[j] = 0;
+
+  for (j=0; j<n; j++) {
+    for (ip=(A->colptr)[j]; ip<(A->colptr)[j+1]; ip++) {
+      i = (A->rowind)[ip];
+      if (i!=j) temp[i]++;
+    }
+  }
+
+  for (j=1; j<=n; j++) (U->colptr)[j] = (U->colptr)[j-1] + temp[j-1];
+  for (j=0; j< n; j++) temp[j] = (U->colptr)[j];
+  
+  for (j=0; j<n; j++) {
+    for (ip=(A->colptr)[j]; ip<(A->colptr)[j+1]; ip++) {
+      i = (A->rowind)[ip];
+      v = (A->taucs_values)[ip];
+      if (i!=j) {
+	(U->rowind)[ temp[i] ] = j;
+	(U->taucs_values)[ temp[i] ] = v;
+	temp[i]++;
+      }
+    }
+  }
+
+  /*
+  taucs_ccs_write_ijv(A,"AA.ijv");
+  taucs_ccs_write_ijv(U,"UU.ijv");
+  */
+
+  assert((U->colptr)[n] == (A->colptr)[n] - n);
+
+  return U;
+}
+
+/*********************************************************/
+/*                                                       */
+/*********************************************************/
+
+static spa* spa_create(int n)
+{
+  int i;
+  spa* s;
+  
+  s = (spa*) taucs_malloc( sizeof(spa) );
+  if ( !s ) return NULL;
+
+  s->indices = (int*)    taucs_malloc( n * sizeof(int) );
+  s->bitmap  = (int*)    taucs_malloc( n * sizeof(int) );
+  s->values  = (double*) taucs_malloc( n * sizeof(double) );
+
+  if ( !(s->indices) || !(s->values) || !(s->bitmap) ) {
+    taucs_printf("chol: cannot create spa\n");
+    taucs_free( s->indices );
+    taucs_free( s->bitmap  );
+    taucs_free( s->values  );
+    taucs_free( s );
+    return NULL;
+  }
+
+  s->length = 0;
+
+  for (i=0; i<n; i++) (s->bitmap)[i] = -1;
+  
+  return s;
+}
+
+static void spa_free(spa* s)
+{
+  taucs_free( s->indices );
+  taucs_free( s->values  );
+  taucs_free( s );
+}
+
+static void spa_set_lu(spa* s, taucs_ccs_matrix* L, taucs_ccs_matrix* U, int j)
+{
+  int i, ip, next;
+  double Aij;
+  
+  assert(j < L->n);
+
+  next = 0;
+  for (ip = (U->colptr)[j]; ip < (U->colptr)[j+1]; ip++) {
+    i   = (U->rowind)[ip];
+    Aij = (U->taucs_values)[ip];
+
+    assert( i < j ); /* U must be strictly upper */
+    
+    (s->indices)[ next ] = i;
+    (s->values) [ i    ] = Aij;
+    (s->bitmap) [ i    ] = j;
+    next++;
+  }
+  for (ip = (L->colptr)[j]; ip < (L->colptr)[j+1]; ip++) {
+    i   = (L->rowind)[ip];
+    Aij = (L->taucs_values)[ip];
+
+    assert( i >= j ); /* A must be lower */
+    
+    (s->indices)[ next ] = i;
+    (s->values) [ i    ] = Aij;
+    (s->bitmap) [ i    ] = j;
+    next++;
+  }
+
+  s->length = next;
+}
+
+static void spa_scale_add(spa* s, int j, taucs_ccs_matrix* A, int k, double alpha)
+{
+  int i, ip, next;
+  double Aik;
+  
+  assert(k < A->n);
+
+  /*
+  printf("spa_scale_add: updating column %d with column %d\n",j,k);
+  printf("spa_scale_add: colptr %d to %d-1\n",(A->colptr)[k],(A->colptr)[k+1]);
+  */
+
+  next = 0;
+  for (ip = (A->colptr)[k]; ip < (A->colptr)[k+1]; ip++) {
+    i   = (A->rowind)[ip];
+    /*if (i < j) continue;*/
+    Aik = (A->taucs_values)[ip];
+
+    if ( (s->bitmap)[ i ] < j ) {
+      /*printf("fill in (%d,%d)\n",i,j);*/
+      (s->bitmap)[i] = j;
+      (s->values)[i] = 0.0;
+      (s->indices)[ s->length ] = i;
+      (s->length)++;
+    }
+
+    (s->values)[ i ] += alpha * Aik;
+
+    /*printf("spa_scale_add: A(%d,%d) -= %lg * %lg ==> %lg\n",i,j,alpha,Aik,(s->values)[i]);*/
+  }
+}
+		    
+static double spa_dot(spa* s, int j, taucs_ccs_matrix* A, int k)
+{
+  int i, ip;
+  double Aik;
+  double x = 0.0;
+  
+  assert(k < A->n);
+
+  /*
+  printf("spa_dot: updating column %d with column %d\n",j,k);
+  printf("spa_dot: colptr %d to %d-1\n",(A->colptr)[k],(A->colptr)[k+1]);
+  */
+
+  for (ip = (A->colptr)[k]; ip < (A->colptr)[k+1]; ip++) {
+    i   = (A->rowind)[ip];
+    Aik = (A->taucs_values)[ip];
+    
+
+    if ( (s->bitmap)[ i ] == j ) {
+      /*printf("j=%d, i=%d k=%d ::: %lg, %lg\n",j,i,k,Aik,(s->values)[ i ]);*/
+      x += Aik * (s->values)[ i ];
+    } else {
+      /*printf("@@@ j=%d, i=%d k=%d ::: %lg, %lg\n",j,i,k,Aik,(s->values)[ i ]);*/
+    }
+  }
+
+  return x;
+}
+		    
+static double spa_A_norm(spa* s, int j, taucs_ccs_matrix* A)
+{
+  int i, ip, k, kp;
+  double Aik;
+  double x = 0.0;
+  
+  assert(A->flags | TAUCS_SYMMETRIC);
+  assert(A->flags | TAUCS_LOWER);
+
+  /*
+  printf("spa_scale_add: updating column %d with column %d\n",j,k);
+  printf("spa_scale_add: colptr %d to %d-1\n",(A->colptr)[k],(A->colptr)[k+1]);
+  */
+
+  for (kp=0; kp<s->length; kp++) {
+    k = (s->indices)[kp];
+    
+    for (ip = (A->colptr)[k]; ip < (A->colptr)[k+1]; ip++) {
+      i   = (A->rowind)[ip];
+      Aik = (A->taucs_values)[ip];
+      
+      if ( (s->bitmap)[ i ] == j ) {
+	/*printf("j=%d, i=%d k=%d ::: %lg, %lg\n",j,i,k,Aik,(s->values)[ i ]);*/
+	if (i == k)
+	  x += (s->values)[k] * Aik * (s->values)[ i ];
+	else 
+	  x += 2.0 * (s->values)[k] * Aik * (s->values)[ i ];
+      }
+    }
+  }
+
+  return x;
+}
+		    
+/*********************************************************/
+/*                                                       */
+/*********************************************************/
+
+int*    rowlist;
+int*    rowlist_next;
+int*    rowlist_colind;
+double* rowlist_values;
+
+int     rowlist_freelist;
+int     rowlist_size;
+
+static int rowlist_create(int n)
+{
+  int i;
+
+  rowlist = (int*) taucs_malloc( n * sizeof(int) );
+
+  rowlist_size    = 1000;
+  rowlist_next    = (int*)    taucs_malloc( rowlist_size * sizeof(int) );
+  rowlist_colind = (int*)    taucs_malloc( rowlist_size * sizeof(int) );
+  rowlist_values  = (double*) taucs_malloc( rowlist_size * sizeof(double) );
+
+  for (i=0; i<n; i++) rowlist[i] = -1; /* no list yet for row i */
+
+  /* free list */
+  rowlist_freelist = 0; 
+  for (i=0; i<rowlist_size-1; i++) rowlist_next[i] = i+1; 
+  rowlist_next[rowlist_size-1] = -1;
+				   
+  return 0;
+}
+
+static void rowlist_free()
+{
+  taucs_free(rowlist);
+  taucs_free(rowlist_next);
+  taucs_free(rowlist_colind);
+  taucs_free(rowlist_values);
+}
+
+/*static void rowlist_freerow(int i){}*/
+
+static void rowlist_add(int i,int j,double v)
+{
+  int l;
+
+  if (rowlist_freelist == -1) {
+    int inc = 1000;
+    int ii;
+
+    rowlist_next   = (int*)    taucs_realloc( rowlist_next,   (rowlist_size+inc) * sizeof(int) );
+    rowlist_colind = (int*)    taucs_realloc( rowlist_colind, (rowlist_size+inc) * sizeof(int) );
+    rowlist_values = (double*) taucs_realloc( rowlist_values, (rowlist_size+inc) * sizeof(double) );
+
+    rowlist_freelist = rowlist_size;
+    for (ii=rowlist_size; ii<rowlist_size+inc-1; ii++)
+      rowlist_next[ii] = ii+1;
+    rowlist_next[ rowlist_size+inc-1 ] = -1;
+
+    rowlist_size    += inc;
+  }
+
+  l = rowlist_freelist;
+  rowlist_freelist = rowlist_next[ rowlist_freelist ];
+
+  rowlist_next  [ l ] = rowlist[ i ];
+  rowlist_colind[ l ] = j;
+  rowlist_values[ l ] = v;
+  
+  rowlist[ i ] = l;
+}
+
+static int rowlist_getfirst(int i)
+{
+  return rowlist[ i ];
+}
+
+static int rowlist_getnext(int l)
+{
+  return rowlist_next[ l ];
+}
+
+static int rowlist_getcolind(int l)
+{
+  return rowlist_colind[ l ];
+}
+
+/*
+static double rowlist_getvalue(int l)
+{
+  return rowlist_values[ l ];
+}
+*/
+
+/*********************************************************/
+/* Inverse Cholesky factorization                        */
+/*********************************************************/
+
+taucs_ccs_matrix* 
+taucs_ccs_factor_xxt(taucs_ccs_matrix* A)
+{
+  int            i,j,k,l,n,ip,next,Lnnz;
+  double v;/*Lkj,pivot,norm omer*/
+  spa*           s;
+  spa*           Aej;
+  taucs_ccs_matrix* L;
+  taucs_ccs_matrix* U;
+  /*int Aj_nnz;omer*/
+  /*double flops = 0.0;*/
+  double x;
+  int* bitmap;
+
+  if (!(A->flags & TAUCS_SYMMETRIC)) {
+    taucs_printf("taucs_ccs_factor_xxt: matrix must be symmetric\n");
+    return NULL;
+  }
+  if (!(A->flags & TAUCS_LOWER)) {
+    taucs_printf("taucs_ccs_factor_xxt: lower part must be represented\n");
+    return NULL;
+  }
+
+  if (!(A->flags & TAUCS_DOUBLE)) {
+    taucs_printf("taucs_ccs_factor_xxt: only works for double-precision real matrices\n");
+    return NULL;
+  }
+
+  n = A->n;
+
+  taucs_printf("taucs_ccs_factor_xxt: starting n=%d\n",n);
+
+  bitmap = (int*) taucs_malloc(n * sizeof(int));
+  if (!bitmap) return NULL;
+  for (i=0; i<n; i++) bitmap[i] = -1;
+
+  U = ccs_syml_to_symu(A);
+
+
+  L = taucs_dtl(ccs_create)(n,n,1000);
+  /*  L->flags = TAUCS_TRIANGULAR | TAUCS_LOWER; */
+  L->flags = 0;
+
+  Lnnz = 1000;
+  next = 0;
+
+  s   = spa_create(n);
+  Aej = spa_create(n);
+  rowlist_create(n);
+
+  for (j=0; j<n; j++) {
+
+    /* set the spa to ej */
+
+    s->length = 1;
+    (s->values)[j] = 1.0;
+    (s->bitmap)[j] = j;
+    (s->indices)[0] = j;
+
+    /* compute A*ej, get both upper and lower parts! */
+
+    spa_set_lu(Aej,A,U,j);
+
+    /*for (k=0; k<j; k++) {*/
+
+    for (ip=0; ip<Aej->length; ip++) {
+      i = (Aej->indices)[ip];
+      
+      for (l = rowlist_getfirst(i); 
+	   l != -1; 
+	   l = rowlist_getnext(l)) {
+	k   = rowlist_getcolind(l);
+	
+	if (bitmap[k] == j) continue;
+	bitmap[k] = j;
+
+	/* inner product of column k of X with A*ej */
+      
+	x = spa_dot(Aej,j,L,k);
+	if (x != 0.0) {
+	  /*printf("adding column %d to e_%d, before=%d\n",k,j,s->length);*/
+	  spa_scale_add(s,j,L,k,-x); /* L_*j -= x * L_*k  */
+	}
+      }
+    }
+
+    /* normalize the column to unit A-norm */
+
+    x = sqrt(spa_A_norm(s,j,A));
+    /*printf("A-norm of column %d = %lg\n",j,x);*/
+
+    for (ip = 0; ip < s->length; ip++) {
+      i = (s->indices)[ip];
+      (s->values)[i] /= x;
+    }
+
+    /* we now add the j'th column of L to the taucs_ccs */
+    
+    if ( next+(s->length) > Lnnz ) {
+      int*    rowind;
+      double* values;
+      int inc = max( 8192, s->length );
+      
+      Lnnz += inc;
+
+      rowind = (int*)    taucs_realloc( L->rowind, Lnnz * sizeof(int) );
+      values = (double*) taucs_realloc( L->taucs_values, Lnnz * sizeof(double) );
+      /* check for errors */
+      assert( rowind && values );
+      L->rowind = rowind;
+      L->taucs_values = values;
+    }
+
+    (L->colptr)[j] = next;
+
+    for (ip = 0; ip < s->length; ip++) {
+      i = (s->indices)[ip];
+      v = (s->values)[i];
+      
+      (L->rowind)[next] = i;
+      (L->taucs_values)[next] = v;
+      next++;
+      rowlist_add(i,j,v);
+    }
+
+    (L->colptr)[j+1] = next;
+  }
+
+  (L->colptr)[n] = next;
+  
+  taucs_free(bitmap);
+  rowlist_free();
+  spa_free(Aej);
+  spa_free(s);
+  taucs_ccs_free(U);
+
+  taucs_printf("taucs_ccs_factor_xxt: done; nnz(L) = %d\n",(L->colptr)[n]);
+
+  return L;
+}
+
+/*********************************************************/
+/* XXT Solve                                             */
+/*********************************************************/
+
+int
+taucs_ccs_solve_xxt(void* vX, double* x, double* b)
+{
+  taucs_ccs_matrix* X = (taucs_ccs_matrix*) vX;
+  int n;
+  int i,j,ip;
+  double v;
+  double* y;
+
+  if (!(X->flags & TAUCS_TRIANGULAR)
+      || !(X->flags & TAUCS_LOWER)
+      || !(X->flags & TAUCS_DOUBLE)
+      ) {
+    taucs_printf("taucs_ccs_solve_xxt: matrix must be lower triangular double-precision real\n");
+    return 0;
+  }
+
+  n = X->n;
+
+  y = (double*) taucs_malloc(n * sizeof(double));
+  if (!y) return -1;
+
+  /* multiply by X' */
+
+  for (j=0; j<n; j++) {
+    y[j] = 0.0;
+
+    for (ip=(X->colptr)[j]; ip<(X->colptr)[j+1]; ip++) {
+      i = (X->rowind)[ip];
+      v = (X->taucs_values)[ip];
+      y[j] += v*b[i];
+    }
+  }
+
+  for (i=0; i<n; i++) x[i] = 0.0;
+
+  /* multiply by X */
+
+  for (j=0; j<n; j++) {
+    for (ip=(X->colptr)[j]; ip<(X->colptr)[j+1]; ip++) {
+      i = (X->rowind)[ip];
+      v = (X->taucs_values)[ip];
+      x[i] += v*y[j];
+    }
+  }
+
+  taucs_free(y);
+
+  return 0;
+}
+
+#endif /* TAUCS_CORE_DOUBLE */
diff --git a/contrib/taucs/src/taucs_complex.c b/contrib/taucs/src/taucs_complex.c
new file mode 100644
index 0000000000000000000000000000000000000000..926b134ba175845b203420d6aea1f56589ad9b34
--- /dev/null
+++ b/contrib/taucs/src/taucs_complex.c
@@ -0,0 +1,212 @@
+
+/*********************************************************/
+/* TAUCS                                                 */
+/* Author: Sivan Toledo                                  */
+/*                                                       */
+/* Simple complex arithmetic routines.                   */
+/* They are called if the compiler does not support      */
+/* complex. GCC supports complex, and so do all C99      */
+/* compilers.                                            */
+/*                                                       */
+/*********************************************************/
+
+#include <math.h>
+#include "taucs.h"
+
+#ifdef TAUCS_CORE_DOUBLE
+double taucs_get_nan()
+{
+  double zero = 0.0;
+  double inf  = 1.0 / zero;
+  double nan  = inf - inf;
+  return nan;
+}
+#endif
+
+#ifdef TAUCS_CORE_DOUBLE
+taucs_double taucs_dzero_const     =  0.0;
+taucs_double taucs_done_const      =  1.0;
+taucs_double taucs_dminusone_const = -1.0;
+#endif
+
+#ifdef TAUCS_CORE_SINGLE
+taucs_single taucs_szero_const     =  0.0f;
+taucs_single taucs_sone_const      =  1.0f;
+taucs_single taucs_sminusone_const = -1.0f;
+#endif
+
+/*#if defined(__GNUC__) && !defined(TAUCS_CONFIG_GENERIC_COMPLEX)*/
+#ifdef TAUCS_C99_COMPLEX
+
+#ifdef TAUCS_CORE_DCOMPLEX
+taucs_dcomplex taucs_zzero_const     =  0.0+0.0*_Complex_I;
+taucs_dcomplex taucs_zone_const      =  1.0+0.0*_Complex_I;
+taucs_dcomplex taucs_zminusone_const = -1.0+0.0*_Complex_I;
+#endif
+
+#ifdef TAUCS_CORE_SCOMPLEX
+taucs_scomplex  taucs_czero_const     =  0.0f+0.0f*_Complex_I;
+taucs_scomplex  taucs_cone_const      =  1.0f+0.0f*_Complex_I;
+taucs_scomplex  taucs_cminusone_const = -1.0f+0.0f*_Complex_I;
+#endif
+
+#else /* TAUCS_C99_COMPLEX */
+
+#ifdef TAUCS_CORE_DCOMPLEX
+taucs_dcomplex taucs_zzero_const     = { 0.0 , 0.0 };
+taucs_dcomplex taucs_zone_const      = { 1.0 , 0.0 };
+taucs_dcomplex taucs_zminusone_const = {-1.0 , 0.0 };
+#endif
+
+#ifdef TAUCS_CORE_SCOMPLEX
+taucs_scomplex  taucs_czero_const     = { 0.0f, 0.0f};
+taucs_scomplex  taucs_cone_const      = { 1.0f, 0.0f};
+taucs_scomplex  taucs_cminusone_const = {-1.0f, 0.0f};
+#endif
+
+#ifdef TAUCS_CORE_COMPLEX
+
+taucs_datatype
+taucs_dtl(complex_create_fn)(taucs_real_datatype r, taucs_real_datatype i)
+{
+  taucs_datatype c;
+  taucs_re(c) = r;
+  taucs_im(c) = i;
+  return c;
+}
+
+taucs_datatype
+taucs_dtl(add_fn)(taucs_datatype a, taucs_datatype b)
+{
+  taucs_datatype c;
+  taucs_re(c) = taucs_re(a) + taucs_re(b);
+  taucs_im(c) = taucs_im(a) + taucs_im(b);
+  return c;
+}
+
+taucs_datatype
+taucs_dtl(sub_fn)(taucs_datatype a, taucs_datatype b)
+{
+  taucs_datatype c;
+  taucs_re(c) = taucs_re(a) - taucs_re(b);
+  taucs_im(c) = taucs_im(a) - taucs_im(b);
+  return c;
+}
+
+taucs_datatype
+taucs_dtl(mul_fn)(taucs_datatype a, taucs_datatype b)
+{
+  taucs_datatype c;
+  taucs_re(c) = taucs_re(a) * taucs_re(b) - taucs_im(a) * taucs_im(b);
+  taucs_im(c) = taucs_re(a) * taucs_im(b) + taucs_im(a) * taucs_re(b);
+  return c;
+}
+
+taucs_datatype
+taucs_dtl(div_fn)(taucs_datatype a, taucs_datatype b)
+{
+  taucs_datatype c;
+  /*double r,den; omer*/
+	taucs_real_datatype r,den; 
+
+  if (fabs(taucs_re(b)) >= fabs(taucs_im(b))) {
+    r   = taucs_im(b) / taucs_re(b);
+    den = taucs_re(b) + r * taucs_im(b);
+    taucs_re(c) = (taucs_re(a) + r * taucs_im(a))/den;
+    taucs_im(c) = (taucs_im(a) - r * taucs_re(a))/den;
+  } else {
+    r   = taucs_re(b) / taucs_im(b);
+    den = taucs_im(b) + r * taucs_re(b);
+    taucs_re(c) = (r * taucs_re(a) + taucs_im(a))/den;
+    taucs_im(c) = (r * taucs_im(a) - taucs_re(a))/den;
+  }
+  return c;
+}
+
+taucs_datatype
+taucs_dtl(conj_fn)(taucs_datatype a)
+{
+  taucs_datatype c;
+  taucs_re(c) =   taucs_re(a);
+  taucs_im(c) = - taucs_im(a);
+  return c;
+}
+
+taucs_datatype
+taucs_dtl(neg_fn)(taucs_datatype a)
+{
+  taucs_datatype c;
+  taucs_re(c) = - taucs_re(a);
+  taucs_im(c) = - taucs_im(a);
+  return c;
+}
+
+double
+taucs_dtl(abs_fn)(taucs_datatype a)
+{
+  double x,y,temp;
+
+#if 1
+  x = fabs(taucs_re(a));
+  y = fabs(taucs_im(a));
+  
+  if (x==0.0) return y;
+  if (y==0.0) return x;
+  
+  if (x > y) {
+    temp = y/x;
+    return ( x*sqrt(1.0+temp*temp) );
+  } else {
+    temp = x/y;
+    return ( y*sqrt(1.0+temp*temp) );
+  }
+#else
+  return hypot(taucs_re(a), taucs_im(a));
+#endif
+}
+
+taucs_datatype
+taucs_dtl(sqrt_fn)(taucs_datatype a)
+{
+  taucs_datatype c;
+  double x,y,t;/*,w; omer*/
+	taucs_real_datatype w; 
+
+  if (taucs_re(a) == 0.0 && taucs_im(a) == 0.0) {
+    taucs_re(c) = 0.0;
+    taucs_im(c) = 0.0;
+  } else {
+    x = fabs((double) taucs_re(a));
+    y = fabs((double) taucs_im(a));
+    if (x >= y) {
+      t = y/x;
+      w = (taucs_real_datatype )(sqrt(x) * sqrt(0.5 * (1.0 + sqrt(1.0 + t * t))));
+    } else {
+      t = x/y;
+      w = (taucs_real_datatype )(sqrt(y) * sqrt(0.5 * (t + sqrt(1.0 + t * t))));
+    }
+
+    if (taucs_re(a) > 0.0) {
+      taucs_re(c) = w;
+			/*taucs_im(c) = taucs_im(a) / (2.0 * w); omer*/
+      taucs_im(c) = (taucs_real_datatype)(taucs_im(a) / (2.0 * w));
+    } else {
+      x = (taucs_im(a) >= 0.0) ? w : -w;
+      taucs_im(c) = (taucs_real_datatype )x;
+      /*taucs_re(c) = taucs_im(a) / (2.0 * x); omer*/
+			taucs_re(c) = (taucs_real_datatype)(taucs_im(a) / (2.0 * x));
+    }
+  }
+  
+  return c;
+}
+
+#endif /* TAUCS_C99_COMPLEX */
+
+#endif /* TAUCS_CORE_COMPLEX */
+
+
+
+
+
+
diff --git a/contrib/taucs/src/taucs_gremban.c b/contrib/taucs/src/taucs_gremban.c
new file mode 100644
index 0000000000000000000000000000000000000000..10cb3d994f0b9e1f0f8030aa242d5cb4eff546d2
--- /dev/null
+++ b/contrib/taucs/src/taucs_gremban.c
@@ -0,0 +1,1548 @@
+/*********************************************************/
+/* TAUCS                                                 */
+/* Author: Doron Chen                                    */
+/* File  : taucs_gremban.c                               */
+/* Description: constructs multilevel support            */
+/*        reconditioners (including Gremban-Miller)      */
+/*********************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <math.h>
+#include <string.h>
+#include "taucs.h"
+
+/*#include <unistd.h>*/
+
+/*long int random() omer*/
+
+#ifdef TAUCS_CORE_DOUBLE
+
+/* #include "../metis-4.0/Lib/defs.h" */
+/* #include "../metis-4.0/Lib/struct.h" */
+/* #include "../metis-4.0/Lib/proto.h" */
+
+typedef int idxtype;
+
+typedef struct {
+  taucs_ccs_matrix* L;
+  int n,k;
+  double* Ztilde;
+  double* Rtilde;
+} multilevel_args;
+
+typedef struct {
+  int    i;
+  int    j;
+  double v;
+} wedge; /* weighted edge */
+
+typedef struct {
+  int n;
+  int nent;
+  int max_size;
+  wedge* edges;
+} graph;
+
+#define Do(i,n) for ((i)=0;(i)<(n);(i)++)
+
+static
+taucs_ccs_matrix* construct_ccs_matrix(int nent,int n)
+{
+  taucs_ccs_matrix *out;
+  
+  out = (taucs_ccs_matrix *)taucs_malloc(sizeof(taucs_ccs_matrix));
+  if (!out) return NULL;
+  out->colptr = (int *)taucs_malloc((n+1)*sizeof(int));
+  out->rowind = (int *)taucs_malloc(nent*sizeof(int));
+  out->taucs_values = (double *)taucs_malloc(nent*sizeof(double));
+  if (!(out->colptr) || !(out->rowind) || !(out->taucs_values)) {
+    taucs_free(out->colptr);
+    taucs_free(out->rowind);
+    taucs_free(out->taucs_values);
+    taucs_free(out);
+    return NULL;
+  }
+  
+  out->n = n;
+  out->m = n;
+  out->flags = TAUCS_SYMMETRIC | TAUCS_LOWER | TAUCS_DOUBLE;
+  
+  return out;
+}
+
+static
+taucs_ccs_matrix *graph_to_ccs_matrix(graph *A)
+{
+  taucs_ccs_matrix *out;
+  int n,nent,i,j1,j2;
+  int *tmp;
+
+  n = A->n;
+  nent = A->nent;
+
+  tmp = (int *)taucs_malloc(n*sizeof(int));
+  if (!tmp) return NULL;
+
+  out=construct_ccs_matrix(nent,n);
+  if (!out) {
+    taucs_free(tmp);
+    return NULL;
+  }
+  out->flags = TAUCS_SYMMETRIC | TAUCS_LOWER | TAUCS_DOUBLE;
+
+  Do(i,n)
+    tmp[i] = 0;
+  Do(i,nent)
+    tmp[min((A->edges)[i].i,(A->edges)[i].j)]++;
+  out->colptr[0] = 0;
+  Do(i,n)
+    out->colptr[i+1] = out->colptr[i] + tmp[i];
+
+  Do(i,n)
+    tmp[i] = out->colptr[i];
+
+  Do(i,nent)
+    {
+      j1 = min((A->edges)[i].i , (A->edges)[i].j);
+      j2 = max((A->edges)[i].i , (A->edges)[i].j);
+      out->rowind[tmp[j1]]=j2;
+      out->taucs_values[tmp[j1]]=(A->edges)[i].v;
+      tmp[j1]++;
+    }
+
+  taucs_free(tmp);
+  return(out);
+}
+
+#if 0
+static
+graph* construct_graph(int size)
+{
+  graph *out;
+  
+  out = (graph *)taucs_malloc(sizeof(graph));
+  if (!out) return NULL;
+
+  out->edges = (wedge*) taucs_malloc(size*sizeof(wedge));
+  if (!(out->edges)) {
+    taucs_free(out);
+    return NULL;
+  }
+  
+  out->max_size = size;
+
+  return out;
+}
+#endif /* 0, we don't need this function */
+
+static
+void free_graph(graph *a)
+{
+  if(a)
+    {
+      taucs_free(a->edges);
+      taucs_free(a);
+    }
+}
+
+/* we use the version in taucs_vaidya.c */
+#if 1
+extern int taucs_check_diag_dominant_matrix(graph *A, int force_diagonal_dominance);
+#else
+extern int check_diag_dominant_matrix(graph *A, int force_diagonal_dominance);
+#define EPSILON 0.00000001
+static
+int check_diag_dominant_matrix(graph *A)
+{
+  int i;
+  double *sum;
+  int n;
+
+  n = A->n;
+
+  sum = (double *)taucs_calloc(n,sizeof(double));
+  if (!sum) return -1;
+
+  Do(i,A->nent)
+    {
+      if ((A->edges)[i].i != (A->edges)[i].j)
+	{
+	  sum[(A->edges)[i].i]-=fabs((A->edges)[i].v);
+	  sum[(A->edges)[i].j]-=fabs((A->edges)[i].v);
+	}
+      else
+	{
+	  sum[(A->edges)[i].i]+=fabs((A->edges)[i].v);
+	  if ((A->edges)[i].v < 0)
+	    {
+	      taucs_printf("ERROR! This matrix is not diagonally dominant. It has negative diagonals.\n");
+	      taucs_free(sum);
+	      return -2;
+	    }
+	}
+      
+    }
+  
+  Do(i,n)
+    {
+      if (sum[i] < -EPSILON)
+	{
+	  taucs_printf("ERROR! This matrix is not diagonally dominant. sum[%d] = %lf\n",i,sum[i]);
+	  taucs_free(sum);
+	  return -2;
+	}
+    }
+  
+  taucs_free(sum);
+  return 0;
+}
+#endif
+
+
+int
+taucs_sg_preconditioner_solve(void*  vP,
+			      double* Z, 
+			      double* R)
+{
+  multilevel_args* P = (multilevel_args*) vP;
+  /*int nplusk = (P->L)->n;*/
+  int i;
+  int n = P->n;
+  int k = P->k;
+
+  for (i=0; i<n;   i++) (P->Rtilde)[i] = R[i];
+  for (i=n; i<n+k; i++) (P->Rtilde)[i] = 0.0;
+
+  taucs_ccs_solve_llt(P->L,
+		      P->Ztilde,
+		      P->Rtilde);
+
+  for (i=0; i<n;   i++) Z[i] = (P->Ztilde)[i];
+
+  return 0;
+}
+
+typedef struct {
+  int n;
+  idxtype *colptr;
+  idxtype *rowind;
+  idxtype *values;
+} Metis_struct;
+
+Metis_struct *Metis_struct_create(int n,int nent)
+{
+  Metis_struct *out;
+
+  out=(Metis_struct *)taucs_malloc(sizeof(Metis_struct));
+  
+  if (!out)
+    return NULL;
+  
+  out->n = n;
+  out->colptr = (idxtype *)taucs_malloc((n+1)*sizeof(idxtype));
+  out->rowind = (idxtype *)taucs_malloc(nent*sizeof(idxtype));
+  out->values = (idxtype *)taucs_malloc(nent*sizeof(idxtype));
+  
+  if ((out->colptr==NULL)||(out->rowind==NULL)||(out->values==NULL))
+    {
+      taucs_free(out->colptr);
+      taucs_free(out->rowind);
+      taucs_free(out->values);
+      return NULL;
+    }
+
+  return(out);
+}
+
+void Metis_struct_free(Metis_struct *A)
+{
+  if (A)
+    {
+      taucs_free(A->colptr);
+      taucs_free(A->rowind);
+      taucs_free(A->values);
+      taucs_free(A);
+    }
+}
+
+Metis_struct *taucs_ccs_matrix_to_Metis_struct(taucs_ccs_matrix *A)
+{
+  Metis_struct *out;
+  int n,nent,i,j,j1,j2;
+  int *tmp;
+
+  n = A->n;
+  nent = 0;
+
+  tmp = (int *)taucs_malloc(n*sizeof(int));
+  if (!tmp) return NULL;
+
+  Do(i,n)
+    tmp[i] = 0;
+  Do(i,n)
+    {
+
+      for(j=A->colptr[i];j<A->colptr[i+1];j++)
+	if (i!=(A->rowind)[j])
+	  {
+	    tmp[i]++;
+	    tmp[(A->rowind)[j]]++;
+	    nent+=2;
+	  }
+    }
+
+  out = Metis_struct_create(n,nent);
+  if (out == NULL)
+    {
+      taucs_free(tmp);
+      return NULL;
+    }
+  
+  out->colptr[0] = 0;
+  Do(i,n)
+    out->colptr[i+1] = out->colptr[i] + tmp[i];
+
+  Do(i,n)
+    tmp[i] = out->colptr[i];
+
+  Do(i,n)
+    for(j=A->colptr[i];j<A->colptr[i+1];j++)    
+      if (i!=A->rowind[j])
+	{
+	  j1 = i;
+	  j2 = A->rowind[j];
+	  out->rowind[tmp[j1]]=j2;
+	  out->rowind[tmp[j2]]=j1;
+	  /* out->values[tmp[j1]]=(idxtype)min(10000,-A->values[j]); */
+	  /* out->values[tmp[j2]]=(idxtype)min(10000,-A->values[j]); */
+	  out->values[tmp[j1]]=1;
+	  out->values[tmp[j2]]=1;
+	  tmp[j1]++;
+	  tmp[j2]++;
+	}
+
+  taucs_free(tmp);
+  return(out);
+}
+
+taucs_ccs_matrix *taucs_ccs_matrix_to_taucs_ccs_matrix(taucs_ccs_matrix *A,double *diag)
+{
+  taucs_ccs_matrix *out;
+  int n,nent,i,j,j1,j2;
+  int *tmp;
+
+  n = A->n;
+  nent = 0;
+
+  tmp = (int *)taucs_malloc(n*sizeof(int));
+  if (!tmp) return NULL;
+
+  Do(i,n)
+    tmp[i] = 0;
+
+  for(i=0;i<n;i++)
+    {
+      for(j=(A->colptr[i]);j<(A->colptr[i+1]);j++)
+	{
+	  if (i!=(A->rowind)[j])
+	    {
+	      tmp[i]++;
+	      tmp[(A->rowind)[j]]++;
+	      nent+=2;
+	    }
+	  else
+	    diag[i] = (A->taucs_values)[j];
+	}
+    }
+
+  out = taucs_dtl(ccs_create)(n,n,nent);
+  if (out == NULL)
+    {
+      taucs_free(tmp);
+      return NULL;
+    }
+  
+  out->colptr[0] = 0;
+  Do(i,n)
+    out->colptr[i+1] = out->colptr[i] + tmp[i];
+
+  Do(i,n)
+    tmp[i] = out->colptr[i];
+
+  Do(i,n)
+    for(j=A->colptr[i];j<A->colptr[i+1];j++)    
+      if (i!=A->rowind[j])
+	{
+	  j1 = i;
+	  j2 = A->rowind[j];
+	  out->rowind[tmp[j1]]=j2;
+	  out->rowind[tmp[j2]]=j1;
+	  out->taucs_values[tmp[j1]]=(idxtype)A->taucs_values[j];
+	  out->taucs_values[tmp[j2]]=(idxtype)A->taucs_values[j];
+	  tmp[j1]++;
+	  tmp[j2]++;
+	}
+
+  taucs_free(tmp);
+  return(out);
+}
+
+void Metis_struct_print(Metis_struct *A)
+{
+  int i;
+  int j;
+  int n,nent;
+
+  n=A->n;
+  nent = A->colptr[n];
+
+  Do(i,n)
+    for(j=A->colptr[i];j<A->colptr[i+1];j++)
+      printf("%d %d %d\n",i,A->rowind[j],A->values[j]);
+  exit(345);
+}
+
+graph *graph_create(int size)
+{
+  graph *out;
+
+  out = (graph *)taucs_malloc(sizeof(graph));
+  if (out == NULL)
+    return NULL;
+  
+  out->edges = (wedge*) taucs_malloc(size*sizeof(wedge));
+  if (!out->edges) {
+    taucs_free (out);
+    return NULL;
+  }
+  
+  out->max_size = size;
+  
+  return out;
+}
+
+typedef unsigned char byte;
+
+typedef struct {
+  byte type; /* 0 - gremban, 1 - toledo, 2 - vaidya */
+  int k;
+} instruction;
+
+int partition(int *quicksort_array_nodes_1,int *quicksort_array_nodes_2,double *quicksort_array_values,int p,int r)
+{
+  int x1,x2,i,j,tmpi1,tmpi2;
+  double tmpd;
+  
+  x1 = quicksort_array_nodes_1[p];
+  x2 = quicksort_array_nodes_2[p];
+  i = p-1;
+  j = r+1;
+  while(1)
+    {
+      do
+	j--;
+      while ((quicksort_array_nodes_1[j]>x1)||((quicksort_array_nodes_1[j]==x1)&&(quicksort_array_nodes_2[j]>x2)));
+      do
+	i++;
+      while ((quicksort_array_nodes_1[i]<x1)||((quicksort_array_nodes_1[i]==x1)&&(quicksort_array_nodes_2[i]<x2)));
+      
+      if (i<j)
+	{
+	  tmpi1 = quicksort_array_nodes_1[i];
+	  tmpi2 = quicksort_array_nodes_2[i];
+	  tmpd = quicksort_array_values[i];
+	  quicksort_array_nodes_1[i] = quicksort_array_nodes_1[j];
+	  quicksort_array_nodes_2[i] = quicksort_array_nodes_2[j];
+	  quicksort_array_values[i] = quicksort_array_values[j];
+	  quicksort_array_nodes_1[j] = tmpi1;
+	  quicksort_array_nodes_2[j] = tmpi2;
+	  quicksort_array_values[j] = tmpd;
+	}
+      else
+	return(j);
+    }
+}
+
+void quicksort(int *quicksort_array_nodes_1,int *quicksort_array_nodes_2,double *quicksort_array_values,int p,int r)
+{
+  int q;
+  
+  if (p<r)
+    {
+      q = partition(quicksort_array_nodes_1,quicksort_array_nodes_2,quicksort_array_values,p,r);
+      quicksort(quicksort_array_nodes_1,quicksort_array_nodes_2,quicksort_array_values,p,q);
+      quicksort(quicksort_array_nodes_1,quicksort_array_nodes_2,quicksort_array_values,q+1,r);
+    }
+  
+}
+
+/* from metis.h */
+extern
+void METIS_PartGraphRecursive(int *, 
+			      idxtype *, 
+			      idxtype *, 
+			      idxtype *, 
+			      idxtype *, 
+			      int *, 
+			      int *, 
+			      int *, 
+			      int *, 
+			      int *, 
+			      idxtype *); 
+void METIS_PartGraphKway(int *, idxtype *, idxtype *, idxtype *, idxtype *, int *, int *, int *, int *, int *, idxtype *); 
+
+int quicksort_and_shrink(int *quicksort_array_nodes_1,int *quicksort_array_nodes_2,double *quicksort_array_values,int quicksort_index)
+{
+  int i,outindex=0,curr_pair1,curr_pair2,tmp,tmp1,tmp2;
+  double acc=0,tmp3;
+
+  for(i=0;i<quicksort_index;i++)
+    {
+      tmp = rand()%(quicksort_index-i);
+      tmp1 = quicksort_array_nodes_1[i+tmp];
+      tmp2 = quicksort_array_nodes_2[i+tmp];
+      tmp3 = quicksort_array_values[i+tmp];
+      quicksort_array_nodes_1[i+tmp]=quicksort_array_nodes_1[i];
+      quicksort_array_nodes_2[i+tmp]=quicksort_array_nodes_2[i];
+      quicksort_array_values[i+tmp]=quicksort_array_values[i];
+      quicksort_array_nodes_1[i]=tmp1;
+      quicksort_array_nodes_2[i]=tmp2;
+      quicksort_array_values[i]=tmp3;
+    }
+
+  quicksort(quicksort_array_nodes_1,quicksort_array_nodes_2,quicksort_array_values,0,quicksort_index-1);
+
+  curr_pair1 = quicksort_array_nodes_1[0];
+  curr_pair2 = quicksort_array_nodes_2[0];
+  
+  for(i=0;i<quicksort_index;i++)
+    {
+      if ((quicksort_array_nodes_1[i]!=curr_pair1)||(quicksort_array_nodes_2[i]!=curr_pair2))
+	{
+	  quicksort_array_nodes_1[outindex]=curr_pair1;
+	  quicksort_array_nodes_2[outindex]=curr_pair2;
+	  quicksort_array_values[outindex++]=acc;
+	  acc=quicksort_array_values[i];
+	  curr_pair1=quicksort_array_nodes_1[i];
+	  curr_pair2=quicksort_array_nodes_2[i];
+	}
+      else
+	acc += quicksort_array_values[i];
+    }
+  
+  quicksort_array_nodes_1[outindex]=curr_pair1;
+  quicksort_array_nodes_2[outindex]=curr_pair2;
+  quicksort_array_values[outindex++]=acc;
+  
+  return(outindex);
+  
+}
+
+int create_recursive_preconditioner(graph *out, /* output: graph of the preconditioner. */
+				    int curr_vertex, 
+				    int *next_unused_vertex,
+				    int *curr_entry,
+				    Metis_struct *father,
+				    int *vertex_perm, /* translates the vertex numbers in mtxA to the actual vertex numbers */
+				    int *inv_perm,  /* inv_perm is an array of -1 when this function is entered */
+				    double diag,
+				    double *diagonal, /* diagonal values of mtxA */
+				    instruction *inst, /* array of instructions : how the preconditioner is built at each level */
+				    int max_inst, /* number of levels of instructions */
+				    int curr_inst, /* current level of instructions */
+				    taucs_ccs_matrix *taucs_ccs_mtxA,   /* NOTICE - taucs_ccs_mtxA should not contain the diagonal values,
+									   and it should contains each entry twice !!! */
+				    char *ordering,
+				    int *p1
+				    )
+{
+  int i,j,n,k,orig_n,p,t1,p2;
+  /* int t2 */
+  int options[5]={0};
+  int wgtflag = 1;
+  int numflag = 0;
+  int nparts,edgecut;
+  idxtype *part;
+  Metis_struct **sons;
+  int *tmp,*tmp1,*vertices_in_subgraphs;
+  double *weights,*diags;
+  int *perm_tmp=NULL,*inv_perm_tmp=NULL;
+  int **vertex_perms;
+  int local_next_unused_vertex;
+  int success = 1;
+  graph *order_graph;
+  taucs_ccs_matrix *order_ccs=NULL;
+  int is_root=0;
+  static int ordering_counter,first=1;
+  static int ordering_counter_leaves = 0;
+  int *quicksort_array_nodes_1=NULL,*quicksort_array_nodes_2=NULL,quicksort_index=0;
+  double *quicksort_array_values=NULL;
+  taucs_ccs_matrix *vaidya_ccs=NULL;
+
+  if (first)
+    {
+      first = 0;
+      ordering_counter = curr_vertex;
+      is_root = 1;
+    }
+
+  local_next_unused_vertex = *next_unused_vertex;
+  
+  n = father->n;
+  orig_n = taucs_ccs_mtxA->n;
+
+  part = (idxtype *)taucs_malloc(n*sizeof(idxtype));
+  if (!part)
+    return 0;
+
+  nparts = min(inst[curr_inst].k,n);
+  if (curr_inst == max_inst)
+    nparts = n;
+  
+  if (nparts == n)
+    for(i=0;i<n;i++)
+      part[i]=i;
+  else
+    if (nparts == 1)
+      for(i=0;i<n;i++)
+	part[i]=0;
+    else
+      {
+	int *visited; /* helps determine how many parts the graph was actually divided into */
+	int actual_nparts=0;
+	/* taucs_printf("calling METIS\n"); */
+#ifdef NOMETIS
+	/* omer - for warning*/
+	edgecut = 0;
+#else
+	if (nparts < 8)
+	  METIS_PartGraphRecursive(&n,father->colptr,father->rowind,
+				   NULL,father->values,&wgtflag,&numflag,
+				   &nparts,options,&edgecut,part);
+	else
+	  METIS_PartGraphKway(&n,father->colptr,father->rowind,
+			      NULL,father->values,&wgtflag,&numflag,
+			      &nparts,options,&edgecut,part);
+#endif	
+	/* taucs_printf("calling METIS: done\n"); */
+	visited=taucs_calloc(nparts,sizeof(int));
+	for(i=0;i<n;i++)
+	  if(visited[part[i]]==0)
+	    {
+	      visited[part[i]]=1;
+	      actual_nparts++;
+	    }
+	
+	if (actual_nparts!=nparts)
+	  {
+	    actual_nparts=0;
+	    for(i=0;i<nparts;i++)
+	      if (visited[i])
+		visited[i]=actual_nparts++;
+	    
+	    for(i=0;i<n;i++)
+	      part[i] = visited[part[i]];
+	    nparts = actual_nparts;
+	  }
+	taucs_free(visited);
+	
+      }
+  
+  for(i=0;i<n;i++)
+    inv_perm[vertex_perm[i]] = i;
+
+  weights = (double *)taucs_calloc(n,sizeof(double));
+  diags = (double *)taucs_calloc(nparts,sizeof(double));
+  if ((!weights)||(!diags))
+    {
+      taucs_free(part);
+      taucs_free(weights);
+      taucs_free(diags);
+      return(0);
+    }
+
+  if (inst[curr_inst].type == 0) /* Gremban */
+    {
+      for(i=0;i<n;i++)
+	{
+	  p = part[i];
+	  t1 = vertex_perm[i];
+	  for(j=taucs_ccs_mtxA->colptr[t1];j<taucs_ccs_mtxA->colptr[t1+1];j++)
+	    if ((inv_perm[taucs_ccs_mtxA->rowind[j]] == -1) || (part[inv_perm[taucs_ccs_mtxA->rowind[j]]] != p))
+	      weights[p] += taucs_ccs_mtxA->taucs_values[j]; /* weights[p] is the sum of all weights of all edges between part p and the rest the graph */
+	}
+      if (curr_inst < max_inst)
+	{
+	  for(i=0;i<nparts;i++)
+	    if (weights[i]!=0)
+	      {
+		out->edges[(*curr_entry)].i=curr_vertex;
+		out->edges[(*curr_entry)].j=local_next_unused_vertex+i;
+		out->edges[(*curr_entry)].v=weights[i];
+		diags[i] -= weights[i];
+		diag     -= weights[i];
+		(*curr_entry)++;
+	      }
+	}
+      else /* curr_inst == max_inst */
+	{
+	  for(i=0;i<nparts;i++)
+	    if (weights[i]!=0)
+	      {
+		out->edges[(*curr_entry)].i=curr_vertex;
+		out->edges[(*curr_entry)].j=vertex_perm[i];
+		out->edges[(*curr_entry)].v=weights[i];
+		diag -= weights[i];
+		(*curr_entry)++;
+	      }
+	  for(i=0;i<nparts;i++)
+	    {
+	      out->edges[(*curr_entry)].i=vertex_perm[i];
+	      out->edges[(*curr_entry)].j=vertex_perm[i];
+	      if (diagonal[vertex_perm[i]])
+		out->edges[(*curr_entry)].v=diagonal[vertex_perm[i]];
+	      else
+		out->edges[(*curr_entry)].v=1;
+	      (*curr_entry)++;
+	      p1[ordering_counter_leaves++] = vertex_perm[i];
+	    }
+	}
+    }
+  else /* Toledo or Vaidya */
+    {
+      int order_count;
+      
+      for(i=0;i<n;i++)
+	{
+	  p = part[i];
+	  t1 = vertex_perm[i];
+	
+	  for(j=taucs_ccs_mtxA->colptr[t1];j<taucs_ccs_mtxA->colptr[t1+1];j++)
+	    {
+	      if (inv_perm[taucs_ccs_mtxA->rowind[j]] == -1)
+		weights[p] += taucs_ccs_mtxA->taucs_values[j]; /* weights[p] is the sum of all weights of all edges between part p and vertices outside current subgraph */
+	      else
+		if (part[inv_perm[taucs_ccs_mtxA->rowind[j]]] > p)
+		  quicksort_index++;
+	    }
+	  
+	}
+
+      if (quicksort_index)
+	{
+	  
+	  quicksort_array_nodes_1 = (int *)taucs_malloc(quicksort_index*sizeof(int));
+	  quicksort_array_nodes_2 = (int *)taucs_malloc(quicksort_index*sizeof(int));
+	  quicksort_array_values = (double *)taucs_malloc(quicksort_index*sizeof(double));
+	  if ((!quicksort_array_nodes_1)||(!quicksort_array_nodes_2)||(!quicksort_array_values))
+	    {
+	      taucs_free(quicksort_array_nodes_1);
+	      taucs_free(quicksort_array_nodes_2);
+	      taucs_free(quicksort_array_values);
+	      taucs_free(part);
+	      taucs_free(weights);
+	      taucs_free(diags);
+	      return(0);
+	    }
+	  
+	  quicksort_index = 0;
+	  
+	  for(i=0;i<n;i++)
+	    {
+	      p = part[i];
+	      t1 = vertex_perm[i];
+	      
+	      for(j=taucs_ccs_mtxA->colptr[t1];j<taucs_ccs_mtxA->colptr[t1+1];j++)
+		{
+		  if (inv_perm[taucs_ccs_mtxA->rowind[j]] != -1)
+		    if (part[inv_perm[taucs_ccs_mtxA->rowind[j]]] > p)
+		      /* since each entry appears twice in taucs_ccs_mtxA,
+			 we need not update pairs when part[...]<p */
+		      {
+			p2=part[inv_perm[taucs_ccs_mtxA->rowind[j]]];
+			quicksort_array_nodes_1[quicksort_index] = p;
+			quicksort_array_nodes_2[quicksort_index] = p2;
+			quicksort_array_values[quicksort_index++] = taucs_ccs_mtxA->taucs_values[j];
+		      }
+		}
+	    }
+	  
+	  quicksort_index = quicksort_and_shrink(quicksort_array_nodes_1,quicksort_array_nodes_2,quicksort_array_values,quicksort_index);
+
+	}
+      
+      order_count = quicksort_index;
+
+      order_graph = graph_create(order_count+nparts);
+      order_graph->nent = order_count+nparts;
+      order_count = 0;
+      order_graph->n = nparts;
+      for(i=0;i<nparts;i++)
+	{
+	  order_graph->edges[order_count].i=i;
+	  order_graph->edges[order_count].j=i;
+	  order_graph->edges[order_count].v=0;
+	  order_count++;
+	}
+      if (curr_inst < max_inst)
+	{
+	  for(i=0;i<nparts;i++)
+	    if (weights[i]!=0)
+	      {
+		out->edges[(*curr_entry)].i=curr_vertex;
+		out->edges[(*curr_entry)].j=local_next_unused_vertex+i;
+		out->edges[(*curr_entry)].v=weights[i];
+		diags[i] -= weights[i];
+		diag     -= weights[i];
+		(*curr_entry)++;
+	      }
+
+	  for(k=0;k<quicksort_index;k++)
+	    {
+	      i = quicksort_array_nodes_1[k];
+	      j = quicksort_array_nodes_2[k];
+	      order_graph->edges[order_count].i=i;
+	      order_graph->edges[order_count].j=j;
+	      order_graph->edges[order_count].v=quicksort_array_values[k];
+	      order_graph->edges[i].v-=quicksort_array_values[k];
+	      order_graph->edges[j].v-=quicksort_array_values[k];
+	      order_count++;
+	    }
+
+	  order_ccs = graph_to_ccs_matrix(order_graph);
+	  free_graph(order_graph);
+	  if (!order_ccs)
+	    {
+	      taucs_free(part);
+	      taucs_free(weights);
+	      taucs_free(diags);
+	      taucs_free(quicksort_array_values);
+	      taucs_free(quicksort_array_nodes_1);
+	      taucs_free(quicksort_array_nodes_2);
+	      return(0);
+	    }
+
+	  if (inst[curr_inst].type == 1) /* Toledo */
+	    {
+	      for(k=0;k<quicksort_index;k++)
+		{
+		  i = quicksort_array_nodes_1[k];
+		  j = quicksort_array_nodes_2[k];
+		  out->edges[(*curr_entry)].i=local_next_unused_vertex+i;
+		  out->edges[(*curr_entry)].j=local_next_unused_vertex+j;
+		  out->edges[(*curr_entry)].v=quicksort_array_values[k];
+		  diags[i] -= quicksort_array_values[k];
+		  diags[j] -= quicksort_array_values[k];
+		  weights[i] += quicksort_array_values[k];
+		  weights[j] += quicksort_array_values[k];
+		  (*curr_entry)++;
+		}
+
+	    }
+	  else /* Vaidya */
+	    {
+	      vaidya_ccs = taucs_amwb_preconditioner_create(order_ccs,1,(order_ccs->n)/8,0);
+	      for(i=0;i<vaidya_ccs->n;i++)
+		for(j=vaidya_ccs->colptr[i];j<vaidya_ccs->colptr[i+1];j++)
+		  if (i != (vaidya_ccs->rowind[j]))
+		    {
+		      out->edges[(*curr_entry)].i=local_next_unused_vertex+i;
+		      out->edges[(*curr_entry)].j=local_next_unused_vertex+vaidya_ccs->rowind[j];
+		      out->edges[(*curr_entry)].v=vaidya_ccs->taucs_values[j];
+		      diags[i] -= vaidya_ccs->taucs_values[j];
+		      diags[vaidya_ccs->rowind[j]] -= vaidya_ccs->taucs_values[j];
+		      weights[i] += quicksort_array_values[k];
+		      weights[vaidya_ccs->rowind[j]] += quicksort_array_values[k];
+		      (*curr_entry)++;
+		    }
+	      
+	      taucs_ccs_free(vaidya_ccs);
+	    }
+
+	  taucs_ccs_order(order_ccs,&perm_tmp,&inv_perm_tmp,ordering);
+	  taucs_free(inv_perm_tmp);
+	  taucs_ccs_free(order_ccs);
+	}
+      else /* curr_inst == max_inst */
+	{
+	  for(i=0;i<nparts;i++)
+	    if (weights[i]!=0)
+	      {
+		out->edges[(*curr_entry)].i=curr_vertex;
+		out->edges[(*curr_entry)].j=vertex_perm[i];
+		out->edges[(*curr_entry)].v=weights[i];
+		diag     -= weights[i];
+		(*curr_entry)++;
+	      }
+	  for(k=0;k<quicksort_index;k++)
+	    {
+	      i = quicksort_array_nodes_1[k];
+	      j = quicksort_array_nodes_2[k];
+	      order_graph->edges[order_count].i=i;
+	      order_graph->edges[order_count].j=j;
+	      order_graph->edges[order_count].v=quicksort_array_values[k];
+	      order_graph->edges[i].v-=quicksort_array_values[k];
+	      order_graph->edges[j].v-=quicksort_array_values[k];
+	      order_count++;
+	    }
+	  order_ccs = graph_to_ccs_matrix(order_graph);
+	  free_graph(order_graph);
+	  if (!order_ccs)
+	    {
+	      taucs_free(part);
+	      taucs_free(weights);
+	      taucs_free(diags);
+	      taucs_free(quicksort_array_values);
+	      taucs_free(quicksort_array_nodes_1);
+	      taucs_free(quicksort_array_nodes_2);
+	      return(0);
+	    }
+	  
+	  if (inst[curr_inst].type == 1) /* Toledo */
+	    {
+	      for(k=0;k<quicksort_index;k++)
+		{
+		  i = quicksort_array_nodes_1[k];
+		  j = quicksort_array_nodes_2[k];
+		  out->edges[(*curr_entry)].i=vertex_perm[i];
+		  out->edges[(*curr_entry)].j=vertex_perm[j];
+		  out->edges[(*curr_entry)].v=quicksort_array_values[k];
+		  weights[i] += quicksort_array_values[k];
+		  weights[j] += quicksort_array_values[k];
+		  (*curr_entry)++;
+		}
+	    }
+	  else /* Vaidya */
+	    {
+	      vaidya_ccs = taucs_amwb_preconditioner_create(order_ccs,1,1,0);
+	      for(i=0;i<vaidya_ccs->n;i++)
+		for(j=vaidya_ccs->colptr[i];j<vaidya_ccs->colptr[i+1];j++)
+		  if (i != (vaidya_ccs->rowind[j]))
+		    {
+		      out->edges[(*curr_entry)].i=vertex_perm[i];
+		      out->edges[(*curr_entry)].j=vertex_perm[vaidya_ccs->rowind[j]];
+		      out->edges[(*curr_entry)].v=vaidya_ccs->taucs_values[j];
+		      weights[i] += vaidya_ccs->taucs_values[j];
+		      weights[vaidya_ccs->rowind[j]] += vaidya_ccs->taucs_values[j];
+		      (*curr_entry)++;
+		    }
+	      taucs_ccs_free(vaidya_ccs);
+	    }
+
+	  taucs_ccs_order(order_ccs,&perm_tmp,&inv_perm_tmp,ordering);
+	  taucs_free(inv_perm_tmp);
+	  taucs_ccs_free(order_ccs);
+	  for(i=0;i<nparts;i++)
+	    {
+	      out->edges[(*curr_entry)].i=vertex_perm[i];
+	      out->edges[(*curr_entry)].j=vertex_perm[i];
+	      if (diagonal[vertex_perm[i]])
+		out->edges[(*curr_entry)].v=diagonal[vertex_perm[i]];
+	      else
+		out->edges[(*curr_entry)].v=1;
+	      (*curr_entry)++;
+	      p1[ordering_counter_leaves++] = vertex_perm[perm_tmp[i]];	      
+	    }
+	}
+    }
+  
+
+  taucs_free(quicksort_array_values);
+  taucs_free(quicksort_array_nodes_1);
+  taucs_free(quicksort_array_nodes_2);
+
+  out->edges[(*curr_entry)].i=curr_vertex;
+  out->edges[(*curr_entry)].j=curr_vertex;
+  if (diag)
+    out->edges[(*curr_entry)].v=diag;
+  else
+    out->edges[(*curr_entry)].v=1;
+  (*curr_entry)++;
+  
+  /* return inv_perm to its original state - contains only -1-s */
+  for(i=0;i<n;i++)
+    inv_perm[vertex_perm[i]] = -1;
+  
+  vertices_in_subgraphs = (int *)taucs_malloc(n*sizeof(int)); /* for each vertex i, tmp[i] is the vertex's position in the subgraph*/
+  tmp = (int *)taucs_calloc(nparts,sizeof(int));  /* tmp[i] is the next free vertex in part i */
+  tmp1 = (int *)taucs_calloc(nparts,sizeof(int)); /* helps compute number of vertices in each part i */
+  
+  if ((!vertices_in_subgraphs)||(!tmp)||(!tmp1))
+    {
+      taucs_free(part);
+      taucs_free(weights);
+      taucs_free(diags);
+      taucs_free(vertices_in_subgraphs);
+      taucs_free(tmp);
+      taucs_free(tmp1);
+      return(0);
+    }
+
+  for(i=0;i<n;i++)
+    {
+      vertices_in_subgraphs[i]=tmp[part[i]]++;
+      tmp1[part[i]]++;
+    }
+
+  for(i=0;i<nparts;i++)
+    tmp[i] = 0;
+  /* now tmp[i] will help compute number of edges in each part */
+  for(i=0;i<n;i++)
+    {
+      p = part[i];
+      for(j=father->colptr[i];j<father->colptr[i+1];j++)
+	{
+	  if (part[father->rowind[j]]==p)
+	    tmp[p]++;
+	}
+    }
+
+  sons = (Metis_struct **)taucs_malloc(nparts*sizeof(Metis_struct *));
+  if (!sons)
+    {
+      taucs_free(part);
+      taucs_free(weights);
+      taucs_free(diags);
+      taucs_free(vertices_in_subgraphs);
+      taucs_free(tmp);
+      taucs_free(tmp1);
+      return(0);
+    }
+
+  for(i=0;i<nparts;i++)
+    {
+      sons[i] = Metis_struct_create(tmp1[i],tmp[i]);
+      if (sons[i] == 0)
+	{
+	  taucs_free(part);
+	  taucs_free(diags);
+	  taucs_free(weights);
+	  taucs_free(vertices_in_subgraphs);
+	  taucs_free(tmp);
+	  taucs_free(tmp1);
+	  for(j=0;j<i;j++)
+	    Metis_struct_free(sons[j]);
+	  taucs_free(sons);
+	  return(0);
+	}
+    }
+
+  for(i=0;i<nparts;i++)
+    {
+      tmp[i] = 0;
+      sons[i]->colptr[0]=0;
+    }
+  
+  /* tmp[p] is the next free extry in sons[p]->colptr[p] */
+  for(i=0;i<n;i++)
+    {
+      p = part[i];
+      for(j=father->colptr[i];j<father->colptr[i+1];j++)
+	if(part[father->rowind[j]]==p)
+	  {
+	    sons[p]->rowind[tmp[p]] = vertices_in_subgraphs[father->rowind[j]];
+	    sons[p]->values[tmp[p]] = father->values[j];
+	    tmp[p]++;
+	  }
+      sons[p]->colptr[vertices_in_subgraphs[i]+1]=tmp[p];
+    }
+
+  Metis_struct_free(father);
+  taucs_free(tmp);
+
+  if (curr_inst < max_inst)
+    (*next_unused_vertex) += nparts;
+
+  out->n = max(out->n,*next_unused_vertex);
+
+  vertex_perms=(int **)taucs_malloc(nparts*sizeof(int *));
+  if (!vertex_perms)
+    {
+      taucs_free(part);
+      taucs_free(weights);
+      taucs_free(diags);
+      taucs_free(vertices_in_subgraphs);
+      taucs_free(tmp1);
+      for(j=0;j<nparts;j++)
+	Metis_struct_free(sons[j]);
+      taucs_free(sons);
+      return(0);
+    }
+  
+  for(i=0;i<nparts;i++)
+    {
+      vertex_perms[i] = (int *)taucs_malloc(tmp1[i]*sizeof(int));
+      if (!vertex_perms[i])
+	{
+	  taucs_free(part);
+	  taucs_free(diags);
+	  taucs_free(weights);
+	  taucs_free(vertices_in_subgraphs);
+	  taucs_free(tmp1);
+	  for(j=0;j<nparts;j++)
+	    Metis_struct_free(sons[j]);
+	  taucs_free(sons);
+	  for(j=0;j<i;j++)
+	    taucs_free(vertex_perms[i]);
+	  taucs_free(vertex_perms);
+	  return(0);
+	}
+    }
+  for(i=0;i<n;i++)
+    vertex_perms[part[i]][vertices_in_subgraphs[i]] = vertex_perm[i];
+  
+  taucs_free(vertices_in_subgraphs);
+
+  taucs_free(part);
+
+  if (curr_inst < max_inst)
+    {
+      for(i=0;i<nparts;i++)
+	{
+	  if (tmp1[i] > 1) /* if the subgraph contains more than one vertex */
+	    {
+	      success = create_recursive_preconditioner(out,local_next_unused_vertex+i,next_unused_vertex,curr_entry,sons[i],vertex_perms[i],inv_perm,
+							diags[i],diagonal,inst, max_inst,curr_inst+1,taucs_ccs_mtxA,ordering,p1);
+	      if (success == 0)
+		{
+		  taucs_free(diags);
+		  taucs_free(weights);
+		  taucs_free(tmp1);
+		  for(j=0;j<nparts;j++)
+		    Metis_struct_free(sons[j]);
+		  taucs_free(sons);
+		  for(j=0;j<i;j++)
+		    taucs_free(vertex_perms[i]);
+		  taucs_free(vertex_perms);
+		  return(0);
+		}
+	      
+	    }
+	  else
+	    {
+	      
+	      /* I sure hope there is no bug in here */
+	      out->edges[(*curr_entry)].i=local_next_unused_vertex+i;
+	      out->edges[(*curr_entry)].j=vertex_perms[i][0];
+	      out->edges[(*curr_entry)].v=weights[i];
+	      (*curr_entry)++;
+	      
+	      out->edges[(*curr_entry)].i=local_next_unused_vertex+i;
+	      out->edges[(*curr_entry)].j=local_next_unused_vertex+i;
+	      out->edges[(*curr_entry)].v=-2*weights[i];
+	      (*curr_entry)++;
+	      
+	      out->edges[(*curr_entry)].i=vertex_perms[i][0];
+	      out->edges[(*curr_entry)].j=vertex_perms[i][0];
+	      out->edges[(*curr_entry)].v=diagonal[vertex_perms[i][0]];
+	      (*curr_entry)++;
+	      Metis_struct_free(sons[i]);
+	      p1[ordering_counter_leaves++] = vertex_perms[i][0];
+	    }
+	}
+
+      if (inst[curr_inst].type == 0) /* Gremban */
+	for(j=0;j<nparts;j++)
+	  p1[ordering_counter++] = local_next_unused_vertex+j;
+      else /* Toledo or Vaidya */
+	for(j=0;j<nparts;j++)
+	  p1[ordering_counter++] = local_next_unused_vertex+perm_tmp[j];
+
+    }
+
+  taucs_free(perm_tmp);  
+
+  taucs_free(tmp1);
+  taucs_free(weights);
+  taucs_free(diags);
+  
+  /* for(j=0;j<nparts;j++) */
+    /* Metis_struct_free(sons[j]); */
+  taucs_free(sons);
+  
+  for (i=0;i<nparts;i++)
+    taucs_free(vertex_perms[i]);
+  taucs_free(vertex_perms);
+  
+  if (is_root)
+    p1[ordering_counter++] = curr_vertex;
+
+  return(1);
+}
+
+#if 0
+void print_ccs_mat(taucs_ccs_matrix a)
+{
+  int i, j;
+  taucs_printf("%d %d %d\n",a.n,a.n,a.colptr[a.n]);  
+  for(i=0;i<a.n;i++)
+    for(j=a.colptr[i];j<a.colptr[i+1];j++)
+      taucs_printf("%lg %lg %lg\n",(double)i,(double)a.rowind[j],a.values[j]);
+ 
+}
+#endif
+
+int is_perm(int *perm,int n)
+{
+  int *tmp,i;
+  tmp = taucs_calloc(n,sizeof(int));
+  for(i=0;i<n;i++)
+    {
+      assert(perm[i] < n);
+      if (tmp[perm[i]])
+	{
+	  printf("NO WAY!!!\n");exit(345);
+	  return(0);
+	}
+      tmp[perm[i]] = 1;
+    }
+  taucs_free(tmp);
+  return(1);
+}
+
+void *taucs_sg_preconditioner_create(taucs_ccs_matrix *A,
+				     int **perm,
+				     int **invperm,
+				     char* ordering,
+				     char *gremban_command)
+{
+#ifdef NOMETIS
+  return NULL;
+#else
+  instruction *inst;
+  int preconditioner_n,n;
+  int *vertex_perm,*inv_perm;
+  int tmp=1,tmp2=0,tmp3=0,i,k;
+  Metis_struct *Metis_A;
+  taucs_ccs_matrix *symmetric_A;
+  int next_unused_vertex,curr_entry;
+  int success=1;
+  graph *out;
+  double *diagonal;
+  taucs_ccs_matrix *out1;
+  multilevel_args* P;
+  int depth;
+  taucs_ccs_matrix* PGPT;
+  void* snL;
+  int *p, *ip;
+
+  double wtime_recursive_create, wtime_supernodal_factor,wtime_factor_llt;
+
+  if (gremban_command[0] == 'r')
+    {
+      char *p,*p1;
+      int k,type;
+      
+      depth = 0;
+      p = gremban_command;
+      while((p=(strstr(p,":")))!=NULL)
+	{
+	  p++;
+	  depth++;
+	}
+      if (depth!=2)
+	{
+	  printf("Command string should have three parts 'regular:GM/CT/VA:number_of_parts_in_each_level'\n");
+	  exit(345);
+	}
+      p = gremban_command;
+      
+      p1 = strstr(p,":");
+      if (p1)
+	*p1=0;
+
+      if (strcmp(p,"regular")!=0)
+	{
+	  printf("Syntax error in Gremban string. Exiting");
+	  exit(345);
+	}
+      
+      p = p1+1;
+      p1 = strstr(p,":");
+      if (p1)
+	*p1=0;
+      p1++;
+	  
+      if (strcmp(p,"GM")==0)
+	type = 0;
+      else
+	if (strcmp(p,"CT")==0)
+	  type = 1;
+	else
+	  if (strcmp(p,"VA")==0)
+	    type = 2;
+	  else
+	    {
+	      printf("must choose CT or GM or VA. %s. Exiting\n",p);
+	      exit(345);
+	    }
+	  
+      sscanf(p1,"%d",&k);
+
+      if (k < 2)
+	{
+	  printf("Must divide into at least 2 parts at each level. Exiting\n");
+	  exit(345);
+	}
+
+      depth = (int)(log(A->n)/log(k))+1;
+      inst = (instruction *)taucs_malloc(depth*sizeof(instruction));
+      for(i=0;i<depth;i++)
+	{
+	  inst[i].type = type;
+	  inst[i].k = k;
+	}
+    }
+  else
+    {
+      char *p,*p1,*p2;
+      int k;
+     
+      depth = 1;
+      p = gremban_command;
+      while((p=(strstr(p,":")))!=NULL)
+	{
+	  p++;
+	  depth++;
+	}
+      if (depth%2)
+	{
+	  printf("Command string should have 2 strings for each level\n");
+	  exit(345);
+	}
+      p = gremban_command;
+
+      depth = depth/2;
+      if (depth<2)
+	{
+	  printf("Command string should describe a preconditioner of depth 2 at least\n");
+	  exit(345);
+	}
+
+      inst = (instruction *)taucs_malloc(depth*sizeof(instruction));
+      for(i=0;i<depth;i++)
+	{
+	  p1 = strstr(p,":");
+	  if (p1)
+	    *p1=0;
+	  p1++;
+	  
+	  if (strcmp(p,"GM")==0)
+	    inst[i].type = 0;
+	  else
+	    if (strcmp(p,"CT")==0)
+	      inst[i].type = 1;
+	    else
+	      if (strcmp(p,"VA")==0)
+		inst[i].type = 2;
+	  
+	      else
+		{
+		  printf("must choose CT or GM or VA. Exiting\n");
+		  exit(345);
+		}
+	  
+	  p2 = strstr(p1,":");
+	  if (p2)
+	    *p2=0;
+	  
+	  sscanf(p1,"%d",&k);
+	  inst[i].k = k;
+	  p = p2+1;
+	}
+    }
+
+  n = A->n;
+  preconditioner_n = (A->n);
+
+  vertex_perm = (int *)taucs_malloc(n*sizeof(int));
+  inv_perm    = (int *)taucs_malloc(n*sizeof(int));
+  if ((!vertex_perm)||(!inv_perm))
+    {
+      taucs_free(vertex_perm);
+      taucs_free(inv_perm);
+      return(NULL);
+    }
+  
+  for(i=0;i<n;i++)
+    {
+      vertex_perm[i] = i;
+      inv_perm[i] = -1;
+    }
+  
+  for(i=0;i<depth;i++)
+    {
+      tmp3 += tmp;
+      tmp2 += min((A->colptr)[n],(tmp*(tmp-1))/2);
+      preconditioner_n += tmp;
+      tmp *= inst[i].k;
+    }
+  
+  tmp3 += preconditioner_n;
+  tmp2 += (A->colptr)[n] + preconditioner_n;
+  out = graph_create(tmp2);
+  out->n = 0;
+  if (out == NULL)
+    {
+      taucs_free(vertex_perm);
+      taucs_free(inv_perm);
+      return(NULL);
+    }
+
+  Metis_A = taucs_ccs_matrix_to_Metis_struct(A);
+  if (Metis_A == NULL)
+    {
+      taucs_free(vertex_perm);
+      taucs_free(inv_perm);
+      free_graph(out);
+      return(NULL); 
+    }
+  
+  diagonal = (double *)taucs_malloc(n*sizeof(double));
+  if (!diagonal)
+    {
+      taucs_free(vertex_perm);
+      taucs_free(inv_perm);
+      free_graph(out);
+      Metis_struct_free(Metis_A);
+      return(NULL); 
+    }
+  symmetric_A = taucs_ccs_matrix_to_taucs_ccs_matrix(A,diagonal);
+  if (symmetric_A == NULL)
+    {
+      taucs_free(vertex_perm);
+      taucs_free(inv_perm);
+      /*taucs_free_graph(out); omer*/
+			free_graph(out);
+      Metis_struct_free(Metis_A);
+      taucs_free(diagonal);
+      return(NULL); 
+    }
+
+  next_unused_vertex = n+1;
+
+  curr_entry = 0;
+  
+  p = (int *)taucs_malloc(tmp3*sizeof(int));
+  if(!p)
+    {
+      taucs_free(vertex_perm);
+      taucs_free(inv_perm);
+      /*taucs_free_graph(out); omer*/
+			free_graph(out);
+      Metis_struct_free(Metis_A);
+      taucs_free(diagonal);
+      return(NULL); 
+    }
+
+  wtime_recursive_create = taucs_wtime();
+  success = create_recursive_preconditioner(out,n,&next_unused_vertex,&curr_entry,Metis_A,vertex_perm,inv_perm,
+				  0.0,diagonal,inst,depth-1,0,symmetric_A,ordering,p);
+  wtime_recursive_create = taucs_wtime()-wtime_recursive_create;
+  taucs_printf("\tRecursive Creation time = % 10.3f seconds\n",wtime_recursive_create);
+
+  taucs_free(diagonal);
+  taucs_free(vertex_perm);
+  taucs_free(inv_perm);
+  /* Metis_struct_free(Metis_A); */
+  taucs_ccs_free(symmetric_A);
+
+  if(success == 0)
+    return(NULL);
+
+  ip = (int *)taucs_malloc(out->n*sizeof(int));
+  *perm = (int *)taucs_malloc(n*sizeof(int));
+  *invperm = (int *)taucs_malloc(n*sizeof(int));
+  if ((!ip)||(!*perm)||(!*invperm))
+    {
+      taucs_free(ip);
+      taucs_free(*perm);
+      taucs_free(*invperm);
+      taucs_free(p);
+      free_graph(out);
+      Metis_struct_free(Metis_A);
+      return(NULL); 
+    }
+
+  /* is_perm(p,out->n); */
+  
+  for(i=0;i<out->n;i++)
+    ip[p[i]] = i;
+  
+  for(i=0;i<n;i++)
+    (*perm)[i] = p[i];
+
+  for(i=0;i<n;i++)
+    (*invperm)[(*perm)[i]] = i;
+
+
+  out->nent=curr_entry;
+  taucs_check_diag_dominant_matrix(out,1);
+
+  k = (out->n)-n;
+
+  out1 = graph_to_ccs_matrix(out);
+
+  if (out1 == NULL)
+    return(0);
+
+  /* taucs_ccs_write_ijv(A,"A.ijv"); */
+  /* taucs_ccs_write_ijv(out1,"G.ijv"); */
+  
+  PGPT = taucs_ccs_permute_symmetrically(out1,p,ip);
+  
+  taucs_ccs_free(out1);
+  
+  P = (multilevel_args*) taucs_malloc(sizeof(multilevel_args));
+  if (!P)
+    return(NULL);
+
+  wtime_factor_llt = taucs_wtime();
+  taucs_printf("taucs_gremban: factoring, preconditioner has %d rows/cols\n",
+	       PGPT->n);
+
+  /*  taucs_ccs_write_ijv( PGPT ,"G.ijv");*/
+  /*P->L = taucs_ccs_factor_llt(PGPT,0.0,0);*/
+  /*  taucs_ccs_write_ijv( P->L ,"L.ijv");*/
+
+  snL = taucs_ccs_factor_llt_mf(PGPT);
+  wtime_factor_llt = taucs_wtime()-wtime_factor_llt;
+  taucs_printf("\tFactor LL^t time = % 10.3f seconds\n",wtime_factor_llt);
+  if (!snL)
+    return(NULL);
+  wtime_supernodal_factor = taucs_wtime();
+  P->L = taucs_supernodal_factor_to_ccs(snL);
+  wtime_supernodal_factor = taucs_wtime()-wtime_supernodal_factor;
+  taucs_printf("\tSupernodal-factor-to-ccs factor time = % 10.3f seconds\n",wtime_supernodal_factor);
+  taucs_supernodal_factor_free(snL);
+  
+  taucs_free(p);
+  taucs_free(ip);
+  
+  P->Ztilde = (double*) taucs_malloc((n+k) * sizeof(double));
+  P->Rtilde = (double*) taucs_malloc((n+k) * sizeof(double));
+  if ((!(P->Ztilde))||(!(P->Rtilde)))
+    return(NULL);
+  P->n = n;
+  P->k = k;
+  
+  /* printf(">>>%d %d\n",(*perm)[0],(*invperm)[0]); */
+
+  return P;
+#endif
+}
+
+void taucs_sg_preconditioner_free(void* vP) 
+{
+  multilevel_args* P = (multilevel_args*) vP;
+
+  taucs_free(P->Rtilde);
+  taucs_free(P->Ztilde);
+  taucs_ccs_free(P->L);
+  taucs_free(P);
+}
+
+#endif /* TAUCS_CORE_DOUBLE */
+
diff --git a/contrib/taucs/src/taucs_iter.c b/contrib/taucs/src/taucs_iter.c
new file mode 100644
index 0000000000000000000000000000000000000000..0da5f692ec8c30a1f96cedd48f3cf4787959d89d
--- /dev/null
+++ b/contrib/taucs/src/taucs_iter.c
@@ -0,0 +1,431 @@
+/*********************************************************/
+/* TAUCS                                                 */
+/* Author: Sivan Toledo                                  */
+/*********************************************************/
+
+#include <stdio.h>
+#include <math.h>
+#include <assert.h>
+#include "taucs.h"
+
+#ifdef TAUCS_CORE_DOUBLE
+
+/*********************************************************/
+/* utilities                                             */
+/*********************************************************/
+/*extern int _isnan(double);*/
+
+static double dotprod(int n, double* v, double* u)
+{
+  double x;
+  int i;
+
+  for (i=0, x=0.0; i<n; i++) x += v[i]*u[i];
+
+  return x;
+}
+
+static double twonorm(int n, double* v)
+{
+  /*
+  double norm;
+  int i;
+
+  for (i=0, norm=0.0; i<n; i++) norm += v[i]*v[i];
+
+  norm = sqrt(norm);
+  return norm;
+  */
+
+  double ssq, scale, absvi;/*norm omer*/
+  int i;
+
+  if (n==1) return fabs(v[0]);
+
+  scale = 0.0;
+  ssq   = 1.0;
+
+  for (i=0; i<n; i++) {
+    if ( v[i] != 0 ) {
+      absvi = fabs(v[i]);
+      if (scale < absvi) {
+	ssq   = 1.0 + ssq * (scale/absvi)*(scale/absvi);
+	scale = absvi;
+      } else
+	ssq   = ssq + (absvi/scale)*(absvi/scale);
+    }
+  }
+  return scale * sqrt( ssq );
+}
+
+/*********************************************************/
+/* conjugate gradients                                   */
+/*********************************************************/
+
+int 
+taucs_conjugate_gradients(taucs_ccs_matrix* A,
+			  int               (*precond_fn)(void*,void* x,void* b),
+			  void*             precond_args,
+			  void*             vX,
+			  void*             vB,
+			  int               itermax,
+			  double            convergetol
+			  )
+{
+  double* X = (double*) vX;
+  double* B = (double*) vB;
+  double *P, *R, *Q, *Z ;
+  double Alpha, Beta, Rho, Init_norm, ratio, Res_norm, Rtmp ;
+  double Rho0 = 0.0; /* warning */
+  /*double t1, t2,  cpus[9] ; omer*/
+  /*
+  double one[2] = {1.0, 0.0};
+  double zero[2] = {0.0, 0.0} ;
+  */
+  /*  double Tiny = 0.0;*/
+  double Tiny = 0.1e-28;
+  int    Iter;
+  /*int    stats[6] ; omer*/
+  int    i,n;
+
+#define RESVEC_NO
+#ifdef RESVEC
+  FILE* f;
+  double* resvec = (double*) taucs_malloc((itermax+2) * sizeof(double));
+  assert(resvec);
+  for (i=0; i<=itermax; i++) {
+    /*double inf = 1.0/0.0; omer*/
+    double nan = taucs_get_nan()/*inf - inf; omer*/
+    assert(taucs_isnan(nan));
+    resvec[i] = nan;
+  }
+#endif
+
+  n = A->n;
+ 
+  P = (double*) taucs_malloc(n * sizeof(double));
+  R = (double*) taucs_malloc(n * sizeof(double));
+  Q = (double*) taucs_malloc(n * sizeof(double));
+  Z = (double*) taucs_malloc(n * sizeof(double));
+
+#define TAUCS_REMOVE_CONST_NO
+#ifdef TAUCS_REMOVE_CONST
+    {
+      double s;
+      for (i=0, s=0.0; i<n; i++) s += B[i];
+      for (i=0, s=0.0; i<n; i++) B[i] -= s;
+    }
+#endif
+
+  /*
+  for (i=0; i<n; i++) X[i] = 0;
+  for (i=0; i<n; i++) R[i] = B[i];
+  */
+
+  taucs_ccs_times_vec(A,X,R);
+  for (i=0; i<n; i++) R[i] = B[i] - R[i];
+
+  Res_norm = Init_norm = twonorm(n,R);
+  printf("two norm of initial residual %.2e\n",Init_norm);
+  if ( Init_norm == 0.0 ) Init_norm = 1.0;
+  ratio = 1.0;
+ 
+  Iter = 0;
+ 
+#ifdef RESVEC
+  resvec[Iter] = Res_norm;
+#endif
+
+  while ( ratio > convergetol && Iter <= itermax ) {
+    Iter++;
+    
+    if (precond_fn)
+      (*precond_fn)(precond_args,Z,R);
+    else
+      for (i=0; i<n; i++) Z[i] = R[i];
+
+    for (i=0,Rho=0.0; i<n; i++) Rho += R[i] * Z[i];
+
+    if ( Iter == 1 ) {
+      for (i=0; i<n; i++) P[i] = Z[i];
+    } else {
+      Beta = Rho /(Rho0 + Tiny);
+      for (i=0; i<n; i++) P[i] = Z[i] + Beta * P[i];
+    };
+ 
+    taucs_ccs_times_vec(A,P,Q); /* Q = A*P */
+
+    for (i=0,Rtmp=0.0; i<n; i++) Rtmp += P[i] * Q[i];
+
+    Alpha = Rho/(Rtmp+Tiny);
+
+    for (i=0; i<n; i++) X[i] = X[i] + Alpha * P[i];
+
+    for (i=0; i<n; i++) R[i] = R[i] - Alpha * Q[i];
+
+#ifdef TAUCS_REMOVE_CONST
+    {
+      double s;
+      for (i=0, s=0.0; i<n; i++) s += R[i];
+      for (i=0, s=0.0; i<n; i++) R[i] -= s;
+    }
+#endif
+
+
+    Rho0  = Rho;
+
+    Res_norm = twonorm(n,R);
+
+#if 0
+    taucs_ccs_times_vec(A,X,R);
+    for (i=0; i<n; i++) R[i] -= B[i];
+    Res_norm = twonorm(n,R);
+#endif
+
+#ifdef RESVEC
+  resvec[Iter] = Res_norm;
+#endif
+
+    ratio = Res_norm/Init_norm;
+    if (Iter % 25 == 0) 
+      taucs_printf("cg: n=%d at iteration %d the convergence ratio is %.2e, Rnorm %.2e\n", 
+		   A->n,Iter, ratio,Res_norm) ;
+  }
+  if (Iter > 0) {
+    taucs_printf("cg: n=%d iterations = %d Reduction in residual norm %.2e, Rnorm %.2e\n", 
+		 A->n,Iter,ratio,Res_norm) ;
+    taucs_ccs_times_vec(A,X,R);
+    for (i=0; i<n; i++) R[i] = B[i] - R[i];
+    taucs_printf("cg: true residual norm %.2e\n",twonorm(n,R));
+  }
+
+  taucs_free(P) ;
+  taucs_free(R) ;
+  taucs_free(Q) ;
+  taucs_free(Z) ;
+ 
+#ifdef RESVEC
+  f=fopen("resvec","a");
+  assert(f);
+  for (i=0; i<=itermax && !taucs_isnan(resvec[i]); i++) {
+    fprintf(f,"%.3e\n",resvec[i]);
+  }
+  fclose(f);
+  taucs_free(resvec);
+#endif
+
+  return 0; 
+}                                                                             
+
+/*********************************************************/
+/* minres                                                */
+/*********************************************************/
+
+int 
+taucs_minres(taucs_ccs_matrix*  A,
+	     int                (*precond_fn)(void*,void* x,void* b),
+	     void*              precond_args,
+	     void*              vX,
+	     void*              vB,
+	     int                itermax,
+	     double             convergetol)
+{
+  double* X = (double*) vX;
+  double* B = (double*) vB;
+
+  double *Xcg, *R, *V, *VV, *Vold, *Volder, *M, *Mold, *Molder;
+  double tolb, normr, alpha, beta, beta1, betaold;
+  double gamma, gammabar, delta, deltabar, epsilon;
+  double cs,sn,snprod, numer, denom;
+  int    Iter;
+  int    i,n;
+
+  n = A->n;
+ 
+  R      = (double*) taucs_malloc(n * sizeof(double));
+  Xcg    = (double*) taucs_malloc(n * sizeof(double));
+  VV     = (double*) taucs_malloc(n * sizeof(double));
+  V      = (double*) taucs_malloc(n * sizeof(double));
+  Vold   = (double*) taucs_malloc(n * sizeof(double));
+  Volder = (double*) taucs_malloc(n * sizeof(double));
+  M      = (double*) taucs_malloc(n * sizeof(double));
+  Mold   = (double*) taucs_malloc(n * sizeof(double));
+  Molder = (double*) taucs_malloc(n * sizeof(double));
+
+  tolb = convergetol * twonorm(n,B);
+  taucs_printf("minres: residual convergence tolerance %.1e\n",tolb);
+ 
+  for (i=0; i<n; i++) X[i] = 0;    /* x = 0 */
+  for (i=0; i<n; i++) R[i] = B[i]; /* r = b-A*x */
+
+  normr = twonorm(n,R);
+  if ( normr == 0.0 ) {
+    taucs_printf("minres: initial residual == 0\n");
+    return -1;
+  }
+
+  for (i=0; i<n; i++) V[i]    = R[i];    /* v = r */
+  for (i=0; i<n; i++) Vold[i] = R[i];    /* vold = r */
+  
+  if (precond_fn)
+    (*precond_fn)(precond_args,V,Vold);
+  else
+    for (i=0; i<n; i++) V[i] = Vold[i];
+  
+  beta1 = dotprod(n,Vold,V);
+  if (beta1 < 0.0) {
+    taucs_printf("minres: error (1)\n");
+    return -1;
+  }
+  beta1 = sqrt(beta1);
+
+  { int flag = 0;
+    for (i=0; i<n; i++) {
+      if (taucs_isnan(V[i]) && flag < 10) 
+	taucs_printf("minres: V has nan's in position %d\n",i);
+      flag++;
+    }
+  }
+
+
+  snprod = beta1;
+  taucs_printf(">>> %e %e %e\n",beta1,snprod,normr);
+
+
+  for (i=0; i<n; i++) VV[i] = V[i] / beta1;
+  
+  taucs_ccs_times_vec(A,VV,V); /* V = A*VV */
+  
+  alpha = dotprod(n,VV,V);
+  
+  for (i=0; i<n; i++) V[i] -= (alpha/beta1) * Vold[i];
+  
+  /* local reorthogonalization */
+
+  numer = dotprod(n,VV,V);
+  denom = dotprod(n,VV,VV);
+
+  for (i=0; i<n; i++) V[i] -= (numer/denom) * VV[i];
+
+  for (i=0; i<n; i++) Volder[i] = Vold[i];
+  for (i=0; i<n; i++) Vold[i]   = V[i];
+  
+  if (precond_fn)
+    (*precond_fn)(precond_args,V,Vold);
+  else
+    for (i=0; i<n; i++) V[i] = Vold[i];
+  
+  betaold = beta1;
+  beta = dotprod(n,Vold,V);
+  if (beta < 0.0) {
+    taucs_printf("minres: error (2)\n");
+    return -1;
+  }
+  beta = sqrt(beta);
+  
+  gammabar = alpha;
+  epsilon = 0.0;
+  deltabar = beta;
+  gamma = sqrt(gammabar*gammabar + beta*beta);
+
+
+  for (i=0; i<n; i++) Mold[i] = 0.0;
+  for (i=0; i<n; i++) M[i]    = VV[i] / gamma;
+
+  cs = gammabar / gamma;
+  sn = beta / gamma;
+
+
+  for (i=0; i<n; i++) X[i] += snprod*cs*M[i];
+  snprod = snprod * sn;
+
+  /* generate CG iterates */
+  for (i=0; i<n; i++) Xcg[i] = X[i] + snprod*(sn/cs)*M[i];
+
+  /* compute residual again */
+  
+  taucs_ccs_times_vec(A,X,R); 
+  for (i=0; i<n; i++) R[i] = B[i] - R[i];  /* r = b - A*x */
+  normr = twonorm(n,R);
+
+  taucs_printf("minres: starting iterations, residual norm is %.1e\n",normr);
+  
+  for ( Iter=1; Iter <= itermax; Iter++ ) {
+
+    for (i=0; i<n; i++) VV[i] = V[i] / beta;
+    taucs_ccs_times_vec(A,VV,V); 
+    for (i=0; i<n; i++) V[i] -= (beta/betaold) * Volder[i];
+    alpha = dotprod(n,VV,V);
+    for (i=0; i<n; i++) V[i] -= (alpha/beta) * Vold[i];
+
+    for (i=0; i<n; i++) Volder[i] = Vold[i];
+    for (i=0; i<n; i++) Vold  [i] = V   [i];
+    
+    if (precond_fn)
+      (*precond_fn)(precond_args,V,Vold);
+    else
+      for (i=0; i<n; i++) V[i] = Vold[i];
+
+    betaold = beta;
+    beta = dotprod(n,Vold,V);
+    if (beta < 0.0) {
+      taucs_printf("minres: error (3)\n");
+      return -1;
+    }
+    beta = sqrt(beta);
+
+    delta = cs*deltabar + sn*alpha;
+    for (i=0; i<n; i++) Molder[i] = Mold[i];
+    for (i=0; i<n; i++) Mold  [i] = M   [i];
+    for (i=0; i<n; i++) M[i] = VV[i] - delta*Mold[i] - epsilon*Molder[i];
+    gammabar = sn*deltabar - cs*alpha;
+    epsilon = sn*beta;
+    deltabar = -cs*beta;
+    gamma = sqrt(gammabar*gammabar + beta*beta);
+    for (i=0; i<n; i++) M[i] = M[i]/ gamma;
+    cs = gammabar / gamma;
+    sn = beta / gamma;
+
+    /* stagnation test; skipped */
+    
+    for (i=0; i<n; i++) X[i] += snprod*cs*M[i];
+    snprod = snprod*sn;
+    for (i=0; i<n; i++) Xcg[i] = X[i] + snprod*(sn/cs)*M[i];
+    
+    if (precond_fn) {
+      taucs_ccs_times_vec(A,X,R); 
+      for (i=0; i<n; i++) R[i] = B[i] - R[i];  /* r = b - A*x */
+      normr = twonorm(n,R);
+    } else {
+      normr = fabs(snprod); 
+      if (normr <= tolb) {
+	/* double check */
+	taucs_ccs_times_vec(A,X,R); 
+	for (i=0; i<n; i++) R[i] = B[i] - R[i];  /* r = b - A*x */
+	normr = twonorm(n,R);
+      }
+    }
+
+    if (Iter > -1)
+      taucs_printf("minres: n=%d iterations = %d residual norm %12.4e\n", A->n,Iter,normr);
+
+    if (normr <= tolb) break;
+  }
+
+  taucs_printf("minres: done. n=%d iterations = %d residual norm %12.4e\n", A->n,Iter,normr);
+ 
+  taucs_free(Molder) ;
+  taucs_free(Mold) ;
+  taucs_free(M) ;
+  taucs_free(Volder) ;
+  taucs_free(Vold) ;
+  taucs_free(V) ;
+  taucs_free(VV) ;
+  taucs_free(Xcg) ;
+  taucs_free(R) ;
+ 
+  return 0; 
+}                                                                             
+
+#endif /* TAUCS_CORE_DOUBLE */
+
diff --git a/contrib/taucs/src/taucs_linsolve.c b/contrib/taucs/src/taucs_linsolve.c
new file mode 100644
index 0000000000000000000000000000000000000000..a6f2aa9bbeaeb55137f90a4dd12f27736303048e
--- /dev/null
+++ b/contrib/taucs/src/taucs_linsolve.c
@@ -0,0 +1,629 @@
+/*********************************************************/
+/* TAUCS                                                 */
+/* Author: Sivan Toledo                                  */
+/*********************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+/*#include <stdarg.h>*/
+#include <assert.h>
+#include <math.h>
+
+#define TAUCS_CORE_CILK
+
+#include "taucs.h"
+
+#define TRUE 1
+#define FALSE 0
+
+#ifdef TAUCS_CORE_GENERAL
+
+/*********************************************************/
+/* utility routines                                      */
+/*********************************************************/
+
+static int element_size(int flags)
+{
+  if (flags & TAUCS_SINGLE)   return sizeof(taucs_single);
+  if (flags & TAUCS_DOUBLE)   return sizeof(taucs_double);
+  if (flags & TAUCS_SCOMPLEX) return sizeof(taucs_scomplex);
+  if (flags & TAUCS_DCOMPLEX) return sizeof(taucs_dcomplex);
+  if (flags & TAUCS_INT)      return sizeof(int);
+  assert(0);
+  return -1;
+}
+
+/*********************************************************/
+/* argument parsing                                      */
+/*********************************************************/
+
+int taucs_getopt_boolean(char* cmd, void* args[], char* name, int* x) {
+  int lc = strlen(cmd);
+  int ln = strlen(name);
+  if (!strncmp(cmd,name,ln)) {
+    if (lc > ln && cmd[ln] == '.') return 0;
+    if (lc > ln && cmd[ln] == '=') {
+      if (cmd[ln+1] == '#') {
+	unsigned int p;
+	if (sscanf(cmd+ln+2,"%u",&p) == 1) {
+	  unsigned int i;
+	  for (i=0; args[i]; i++) {
+	    if (i==p) { *x = *( (int*) args[i] ); return 1; }
+	  }
+	  taucs_printf("taucs: WARNING, pointer argument out of range in [%s]\n",cmd);
+	}
+	taucs_printf("taucs: WARNING, illegal pointer argument in [%s]\n",cmd);
+	return 0;
+      }
+      if (!strcmp(cmd+ln+1,"true")) {
+	*x = TRUE;
+	return 1;
+      }
+      if (!strcmp(cmd+ln+1,"false")) {
+	*x = FALSE;
+	return 1;
+      }
+    }
+    taucs_printf("taucs: WARNING, illegal argument in [%s]\n",cmd);
+  }
+
+  return 0;
+}
+
+int taucs_getopt_double(char* cmd, void* args[], char* name, double* x) {
+  int lc = strlen(cmd);
+  int ln = strlen(name);
+  if (!strncmp(cmd,name,ln)) {
+    if (lc > ln && cmd[ln] == '.') return 0;
+    if (lc > ln && cmd[ln] == '=') {
+      if (cmd[ln+1] == '#') {
+	unsigned int p;
+	if (sscanf(cmd+ln+2,"%u",&p) == 1) {
+	  unsigned int i;
+	  for (i=0; args[i]; i++) {
+	    if (i==p) { *x = *( (double*) args[i] ); return 1; }
+	  }
+	  taucs_printf("taucs: WARNING, pointer argument out of range in [%s]\n",cmd);
+	}
+	taucs_printf("taucs: WARNING, illegal pointer argument in [%s]\n",cmd);
+	return 0;
+      }
+      if (sscanf(cmd+ln+1,"%le",x) == 1) {
+	return 1;
+      }
+    }
+    taucs_printf("taucs: WARNING, illegal argument in [%s]\n",cmd);
+  }
+
+  return 0;
+}
+
+
+int taucs_getopt_pointer(char* cmd, void* args[], char* name, void** x) {
+  int lc = strlen(cmd);
+  int ln = strlen(name);
+  if (!strncmp(cmd,name,ln)) {
+    if (lc > ln && cmd[ln] == '.') return 0;
+    if (lc > ln && cmd[ln] == '=') {
+      if (cmd[ln+1] == '#') {
+	unsigned int p;
+	if (sscanf(cmd+ln+2,"%u",&p) == 1) {
+	  unsigned int i;
+	  for (i=0; args[i]; i++)
+	    if (i==p) { *x = *( (void**) args[i] ); return 1; }
+	  taucs_printf("taucs: WARNING, pointer argument out of range in [%s]\n",cmd);
+	}
+	taucs_printf("taucs: WARNING, illegal pointer argument in [%s]\n",cmd);
+	return 0;
+      }
+    }
+    taucs_printf("taucs: WARNING, illegal argument in [%s]\n",cmd);
+  }
+
+  return 0;
+}
+
+int taucs_getopt_string(char* cmd, void* args[], char* name, char** x) {
+  int lc = strlen(cmd);
+  int ln = strlen(name);
+  if (!strncmp(cmd,name,ln)) {
+    if (lc > ln && cmd[ln] == '.') return 0;
+    if (lc > ln && cmd[ln] == '=') {
+      if (cmd[ln+1] == '#') {
+	unsigned int p;
+	if (sscanf(cmd+ln+2,"%u",&p) == 1) {
+	  unsigned int i;
+	  for (i=0; args[i]; i++)
+	    if (i==p) { *x = *( (char**) args[i] ); return 1; }
+	  taucs_printf("taucs: WARNING, pointer argument out of range in [%s]\n",cmd);
+	}
+	taucs_printf("taucs: WARNING, illegal pointer argument in [%s]\n",cmd);
+	return 0;
+      }
+      *x = cmd+ln+1;
+      return 1;
+    }
+    taucs_printf("taucs: WARNING, illegal argument in [%s]\n",cmd);
+  }
+
+  return 0;
+}
+
+/*********************************************************/
+/* Generic Factor routines                               */
+/* (Experimental, unstable interface)                    */
+/*********************************************************/
+
+#define TAUCS_FACTORTYPE_NONE           0
+#define TAUCS_FACTORTYPE_LLT_SUPERNODAL 1
+#define TAUCS_FACTORTYPE_LLT_CCS        2
+#define TAUCS_FACTORTYPE_LDLT_CCS       3
+#define TAUCS_FACTORTYPE_LLT_OOC        4
+#define TAUCS_FACTORTYPE_LU_OOC         5
+
+typedef struct {
+  int   n;
+  int   flags;
+  int   type;
+  int*  rowperm;
+  int*  colperm;
+  void* L;
+} taucs_factorization;
+
+static void taucs_linsolve_free(void* vF)
+{
+  taucs_factorization* F = (taucs_factorization*) vF;
+
+  if (!F) return;
+
+  if (F->type == TAUCS_FACTORTYPE_LLT_SUPERNODAL)
+    taucs_supernodal_factor_free(F->L);
+  if (F->type == TAUCS_FACTORTYPE_LLT_CCS)
+    taucs_ccs_free(F->L);
+  taucs_free(F->rowperm);
+  taucs_free(F->colperm);
+  taucs_free(F);
+}
+
+int taucs_linsolve(taucs_ccs_matrix* A, 
+		   void**            F,
+		   int               nrhs,
+		   void*             X,
+		   void*             B,
+		   char*             options[],
+		   void*             opt_arg[])
+{
+  int retcode = TAUCS_SUCCESS;
+  double tw,tc;
+
+  int i;
+  taucs_ccs_matrix*    PAPT    = NULL;
+  int*                 rowperm = NULL;
+  int*                 colperm = NULL;
+  taucs_factorization* f       = NULL;
+
+  void* PX = NULL;
+  void* PB = NULL;
+
+  void*  opt_context   = NULL;
+  double opt_cilk_nproc= -1.0;
+#ifdef TAUCS_CILK
+  int    local_context = FALSE;
+#endif
+
+  int    opt_factor    =  1;
+  int    opt_symbolic  =  1;
+  int    opt_numeric   =  1;
+
+  int    opt_llt       =  0;
+  int    opt_lu        =  0;
+
+  int    opt_mf        =  0;
+  int    opt_ll        =  0;
+
+  double opt_maxdepth  = 0.0; /* default meaning no limit */
+
+  int    opt_ooc       =  0;
+  char*            opt_ooc_name   = NULL;
+  void*            opt_ooc_handle = NULL;
+  int              local_handle_open   = FALSE;
+  int              local_handle_create = FALSE;
+  double           opt_ooc_memory = -1.0;
+
+  char*            opt_ordering   = NULL;
+
+  int    opt_cg          = 0;
+  int    opt_minres      = 0;
+  double opt_maxits      = 300.0;
+  double opt_convergetol = 1e-6;
+
+  int    opt_sg          = 0;
+  int    opt_amwb        = 0;
+  double opt_amwb_sg     = 1;
+  double opt_amwb_rnd    = 170566;
+  taucs_ccs_matrix* M    = NULL;
+  taucs_ccs_matrix* PMPT = NULL;
+
+  if (!A && nrhs==0) {
+    if (F) taucs_linsolve_free(*F);
+    *F = NULL;
+    return TAUCS_SUCCESS;
+  }
+
+  if (options) {
+    for (i=0; options[i]; i++) {
+      int understood = FALSE;
+      
+      understood |= taucs_getopt_pointer(options[i],opt_arg,"taucs.cilk.context",&opt_context);
+      understood |= taucs_getopt_double(options[i],opt_arg,"taucs.cilk.nproc",&opt_cilk_nproc);
+
+      understood |= taucs_getopt_boolean(options[i],opt_arg,"taucs.approximate.sg",&opt_sg); 
+      understood |= taucs_getopt_boolean(options[i],opt_arg,"taucs.approximate.amwb",&opt_amwb); 
+      understood |= taucs_getopt_double(options[i],opt_arg,"taucs.approximate.amwb.randomseed",&opt_amwb_rnd); 
+      understood |= taucs_getopt_double(options[i],opt_arg,"taucs.approximate.amwb.subgraphs",&opt_amwb_sg); 
+      understood |= taucs_getopt_boolean(options[i],opt_arg,"taucs.factor",&opt_factor); 
+      understood |= taucs_getopt_boolean(options[i],opt_arg,"taucs.factor.symbolic",&opt_symbolic); 
+      understood |= taucs_getopt_boolean(options[i],opt_arg,"taucs.factor.numeric",&opt_numeric); 
+      understood |= taucs_getopt_boolean(options[i],opt_arg,"taucs.factor.LLT",&opt_llt); 
+      understood |= taucs_getopt_boolean(options[i],opt_arg,"taucs.factor.LU",&opt_lu); 
+      understood |= taucs_getopt_boolean(options[i],opt_arg,"taucs.factor.mf",&opt_mf); 
+      understood |= taucs_getopt_boolean(options[i],opt_arg,"taucs.factor.ll",&opt_ll); 
+      understood |= taucs_getopt_string(options[i],opt_arg,"taucs.factor.ordering",&opt_ordering); 
+      understood |= taucs_getopt_double(options[i],opt_arg,"taucs.maxdepth",&opt_maxdepth); 
+
+      understood |= taucs_getopt_boolean(options[i],opt_arg,"taucs.ooc",&opt_ooc); 
+      understood |= taucs_getopt_string (options[i],opt_arg,"taucs.ooc.basename",&opt_ooc_name); 
+      understood |= taucs_getopt_pointer(options[i],opt_arg,"taucs.ooc.iohandle",&opt_ooc_handle); 
+      understood |= taucs_getopt_double (options[i],opt_arg,"taucs.ooc.memory",  &opt_ooc_memory); 
+
+      understood |= taucs_getopt_boolean(options[i],opt_arg,"taucs.solve.cg",&opt_cg); 
+      understood |= taucs_getopt_boolean(options[i],opt_arg,"taucs.solve.minres",&opt_minres); 
+      understood |= taucs_getopt_double(options[i],opt_arg,"taucs.solve.maxits",&opt_maxits); 
+      understood |= taucs_getopt_double(options[i],opt_arg,"taucs.solve.convergetol",&opt_convergetol); 
+
+      if (!understood) taucs_printf("taucs_linsolve: illegal option [[%s]]\n",
+				    options[i]);
+    }
+  }
+
+  /* First, construct a preconditioner if one is needed */
+
+  if (opt_amwb) {
+    M = taucs_amwb_preconditioner_create(A,(int) opt_amwb_rnd,opt_amwb_sg,0 /* stretch flag */);
+    if (!M)
+      taucs_printf("taucs_linsolve: AMWB preconditioner construction failed, using A\n");
+  }
+
+  /* First, decide on the kind of factorization */
+
+  if (opt_factor) {
+    taucs_printf("taucs_linsolve: preparing to factor\n");
+    f = (taucs_factorization*) taucs_malloc(sizeof(taucs_factorization));
+    if (!f) {
+      taucs_printf("taucs_factor: memory allocation\n");
+      retcode = TAUCS_ERROR_NOMEM;
+      goto release_and_return;
+    }
+    f->n       = A->n;
+    f->type    = TAUCS_FACTORTYPE_NONE;
+    f->flags   = A->flags; /* remember data type */
+
+    if (!opt_numeric && (nrhs > 0)) {
+      taucs_printf("taucs_linsolve: WARNING, you can't solve without a numeric factorization\n");
+      opt_numeric = 1;
+    }
+
+    /* decide on ordering and order */  
+
+    if (!opt_ordering)
+      opt_ordering = opt_lu ? 
+	"colamd" : 
+#if defined(TAUCS_CONFIG_METIS)
+	"metis"
+#elif defined(TAUCS_CONFIG_GENMMD)
+	"genmmd"
+#elif defined(TAUCS_CONFIG_AMD)
+	"amd"
+#endif
+	;
+  
+    taucs_printf("taucs_linsolve: ordering (llt=%d, lu=%d, ordering=%s)\n",
+		 opt_llt,opt_lu,opt_ordering);
+    tw = taucs_wtime();
+    tc = taucs_ctime();
+    taucs_ccs_order(M ? M : A,&rowperm,&colperm,opt_ordering);
+    if (!rowperm) {
+      taucs_printf("taucs_factor: ordering failed\n");
+      retcode = TAUCS_ERROR_NOMEM;
+      goto release_and_return;
+    } else
+      taucs_printf("taucs_linsolve: ordering time %.02e seconds (%.02e seconds CPU time)\n",taucs_wtime()-tw,taucs_ctime()-tc);
+
+    f->rowperm = rowperm;
+    f->colperm = colperm;
+
+    if (opt_llt) {
+      taucs_printf("taucs_linsolve: starting LLT factorization\n");
+      if (M) {
+	taucs_printf("taucs_linsolve: pre-factorization permuting of M\n");
+	PMPT = taucs_ccs_permute_symmetrically(M,rowperm,colperm);
+	if (!PMPT) {
+	  taucs_printf("taucs_factor: permute rows and columns failed\n");
+	  retcode = TAUCS_ERROR_NOMEM;
+	  goto release_and_return;
+	}
+      } else {
+	taucs_printf("taucs_linsolve: pre-factorization permuting of A\n");
+	PAPT = taucs_ccs_permute_symmetrically(A,rowperm,colperm);
+	if (!PAPT) {
+	  taucs_printf("taucs_factor: permute rows and columns failed\n");
+	  retcode = TAUCS_ERROR_NOMEM;
+	  goto release_and_return;
+	}
+      }
+
+      if (opt_ooc) {
+	taucs_printf("taucs_linsolve: starting OOC LLT factorization\n");
+	if ((!opt_ooc_name && !opt_ooc_handle)
+	    || (opt_ooc_name && opt_ooc_handle)) {
+	  taucs_printf("taucs_linsolve: ERROR, you must specify either a basename or an iohandle for an out-of-core factorization\n");
+	  retcode = TAUCS_ERROR_BADARGS;
+	  goto release_and_return;
+	}
+
+	if (opt_ooc_name) {
+	  opt_ooc_handle = taucs_io_open_multifile(opt_ooc_name);
+	  if (opt_ooc_handle) {
+	    local_handle_open = TRUE;
+	  } else {
+	    opt_ooc_handle = taucs_io_create_multifile(opt_ooc_name);
+	    if (opt_ooc_handle) {
+	      local_handle_create = TRUE;
+	    } else {
+	      taucs_printf("taucs_linsolve: ERROR, could neither open nor create file [%s]\n",
+			   opt_ooc_name);
+	      retcode = TAUCS_ERROR;
+	      goto release_and_return;
+	    }
+	  }
+	}
+	taucs_printf("taucs_linsolve: ooc file created?=%d opened?=%d\n",
+		     local_handle_create,local_handle_open);
+	if (opt_ooc_memory < 0.0) opt_ooc_memory = taucs_available_memory_size();
+	if (taucs_ooc_factor_llt(PMPT ? PMPT : PAPT, 
+				 opt_ooc_handle, opt_ooc_memory) == TAUCS_SUCCESS)
+	  f->type = TAUCS_FACTORTYPE_LLT_OOC;
+	else {
+	  retcode = TAUCS_ERROR;
+	  goto release_and_return;
+	}
+      } else { /* in-core */
+	taucs_printf("taucs_linsolve: starting IC LLT factorization\n");
+	if (opt_mf) {
+	  taucs_printf("taucs_linsolve: starting IC LLT MF factorization\n");
+
+#ifdef TAUCS_CILK
+	  if (!opt_context) {
+	    char* argv[16]  = {"program_name" };
+	    char  bufs[16][16];
+	    int   p = 0;
+	    int   argc;
+
+	    for (argc=1; argc<16; argc++) argv[argc] = 0;
+	    argc = 1;
+	    
+	    argv[argc++] = "--pthread-stacksize";
+	    argv[argc++] = "2000000";
+	    argv[argc++] = "--stack";
+	    argv[argc++] = "2000000";
+
+	    if (opt_cilk_nproc > 0) {
+	      argv[argc++] = "--nproc";
+	      sprintf(bufs[p],"%d",(int) opt_cilk_nproc);
+	      argv[argc++] = bufs[p++];
+	    }
+
+	    taucs_printf("taucs_ccs_linsolve:_cilk_init\n");
+	    opt_context = Cilk_init(&argc,argv);
+	    local_context = TRUE;
+	  }
+#endif
+
+	  if (!opt_numeric && opt_symbolic)
+	    f->L = taucs_ccs_factor_llt_symbolic_maxdepth(PMPT ? PMPT : PAPT,(int) opt_maxdepth);
+
+	  if (opt_numeric && !opt_symbolic) {
+	    int rc;
+	    if (!F 
+		|| !(*F) 
+		|| ((taucs_factorization*)*F)->type != TAUCS_FACTORTYPE_LLT_SUPERNODAL) {
+	      taucs_printf("taucs_linsolve: ERROR, you need to provide a symbolic factorization for a numeric factorization\n");
+	      retcode = TAUCS_ERROR_BADARGS;
+	      goto release_and_return;
+	    }
+	    f->L = ((taucs_factorization*)*F)->L;
+	    taucs_supernodal_factor_free_numeric(f->L);
+
+#ifdef TAUCS_CILK	  
+	    rc = EXPORT(taucs_ccs_factor_llt_numeric)(opt_context, PMPT ? PMPT : PAPT, f->L);
+#else
+	    rc = taucs_ccs_factor_llt_numeric(PMPT ? PMPT : PAPT, f->L);
+#endif
+	  }
+
+	  if (opt_numeric && opt_symbolic) {
+#ifdef TAUCS_CILK	  
+	    f->L = EXPORT(taucs_ccs_factor_llt_mf_maxdepth)(opt_context,
+							    PMPT ? PMPT : PAPT,
+							    (int) opt_maxdepth);
+#else
+	    f->L = taucs_ccs_factor_llt_mf_maxdepth(PMPT ? PMPT : PAPT,(int) opt_maxdepth);
+#endif
+	  }
+
+	  if (! (f->L) ) {
+	    taucs_printf("taucs_factor: factorization failed\n");
+	    retcode = TAUCS_ERROR;
+	    goto release_and_return;
+	  } else {
+	    f->type = TAUCS_FACTORTYPE_LLT_SUPERNODAL;
+	  }
+
+#ifdef TAUCS_CILK
+	  if (local_context) Cilk_terminate((CilkContext*) opt_context);
+#endif
+	} else if (opt_ll || TRUE) { /* this will be the default LLT */
+	  taucs_printf("taucs_linsolve: starting IC LLT LL factorization\n");
+	  f->L = taucs_ccs_factor_llt_ll_maxdepth(PMPT ? PMPT : PAPT,(int) opt_maxdepth);
+	  if (! (f->L) ) {
+	    taucs_printf("taucs_factor: factorization failed\n");
+	    retcode = TAUCS_ERROR;
+	    goto release_and_return;
+	  } else {
+	    f->type = TAUCS_FACTORTYPE_LLT_SUPERNODAL;
+	  }
+	} /* left-looking */
+      } /* in-core */
+    } /* llt */
+  }
+
+  if (nrhs > 0) {
+
+    int             (*precond_fn)(void*,void* x,void* b) = NULL;
+    void*           precond_arg = NULL;
+    int    j;
+
+    if (!f) {
+      if (!F || !(*F)) {
+	taucs_printf("taucs_linsolve: can't solve, no factorization\n");
+	retcode = TAUCS_ERROR;
+	goto release_and_return;
+      } else {
+	if (F && *F)
+	  f = (taucs_factorization*) *F;
+	else {
+	  taucs_printf("taucs_linsolve: can't solve, no factorization\n");
+	  retcode = TAUCS_ERROR;
+	  goto release_and_return;
+	} 
+      }
+    }
+
+    taucs_printf("taucs_linsolve: preparing to solve\n");
+    PX = (void*) taucs_malloc(element_size(A->flags)*nrhs*(A->n));
+    PB = (void*) taucs_malloc(element_size(A->flags)*nrhs*(A->n));
+    if (!PB || !PX) {
+      taucs_printf("taucs_linsolve: memory allocation\n");
+      retcode = TAUCS_ERROR_NOMEM;
+      goto release_and_return;
+    }      
+
+    switch (f->type) {
+    case TAUCS_FACTORTYPE_NONE:
+      taucs_printf("taucs_linsolve: WARNING, no preconditioner\n");
+      precond_fn  = NULL;
+      precond_arg = NULL;
+      break;
+    case TAUCS_FACTORTYPE_LLT_SUPERNODAL:
+      precond_fn  = taucs_supernodal_solve_llt;
+      precond_arg = f->L;
+      break;
+    case TAUCS_FACTORTYPE_LLT_CCS:
+      precond_fn  = taucs_ccs_solve_llt;
+      precond_arg = f->L;
+      break;
+    case TAUCS_FACTORTYPE_LDLT_CCS:
+      precond_fn  = taucs_ccs_solve_ldlt;
+      precond_arg = f->L;
+      break;
+    case TAUCS_FACTORTYPE_LLT_OOC:
+      precond_fn  = taucs_ooc_solve_llt;
+      precond_arg = opt_ooc_handle;
+      break;
+    default:
+      assert(0);
+    }
+    
+    taucs_printf("taucs_linsolve: pre-solve permuting of A\n");
+    if (!PAPT) PAPT = taucs_ccs_permute_symmetrically(A,f->rowperm,f->colperm);
+    if (!PAPT) {
+      taucs_printf("taucs_factor: permute rows and columns failed\n");
+      retcode = TAUCS_ERROR_NOMEM;
+      goto release_and_return;
+    }
+
+    for (j=0; j<nrhs; j++) {
+      int ld = (A->n) * element_size(A->flags);
+      
+      taucs_vec_permute (A->n,A->flags,(char*)B+j*ld,(char*)PB+j*ld,f->rowperm);
+
+      if (opt_cg) {
+	taucs_conjugate_gradients (PAPT,
+				   precond_fn, precond_arg,
+				   (char*)PX+j*ld, (char*)PB+j*ld,
+				   (int) opt_maxits,
+				   opt_convergetol);
+	
+      } else if (opt_minres) {
+	taucs_minres              (PAPT,
+				   precond_fn, precond_arg,
+				   (char*)PX+j*ld, (char*)PB+j*ld,
+				   (int) opt_maxits,
+				   opt_convergetol);
+      } else if (precond_fn) {
+	(*precond_fn)(precond_arg,(char*)PX+j*ld,(char*)PB+j*ld);
+      } else {
+	taucs_printf("taucs_linsolve: I don't know how to solve!\n");
+	retcode = TAUCS_ERROR;
+	goto release_and_return;
+      }
+   
+      taucs_vec_ipermute(A->n,A->flags,(char*)PX+j*ld,(char*)X+j*ld,f->rowperm);
+    }
+
+    taucs_free(PB);
+    taucs_free(PX);
+  }
+
+  if (F) {
+    if (local_handle_open)   taucs_io_close(opt_ooc_handle);
+    if (local_handle_create) taucs_io_close(opt_ooc_handle);
+    
+    *F = f;
+  } else {
+    if (f->type == TAUCS_FACTORTYPE_LLT_OOC) {
+      if (local_handle_open)   taucs_io_close(opt_ooc_handle);
+      if (local_handle_create) taucs_io_delete(opt_ooc_handle);
+    }
+    taucs_linsolve_free(f);
+  }
+
+  taucs_ccs_free(PMPT);
+  taucs_ccs_free(PAPT);
+  taucs_ccs_free(M);
+
+  return retcode;
+
+release_and_return:
+  taucs_printf("taucs_linsolve: an error occured, releasing resources and bailing out\n");
+#ifdef TAUCS_CILK
+  if (local_context)  Cilk_terminate((CilkContext*) opt_context);
+#endif
+  taucs_free(rowperm);
+  taucs_free(colperm);
+  taucs_ccs_free(PMPT);
+  taucs_ccs_free(PAPT);
+  taucs_ccs_free(M);
+  taucs_free(PB);
+  taucs_free(PX);
+  taucs_free(f);
+  return retcode;
+}
+
+#endif /* TAUCS_CORE_GENERAL */
+
+/*********************************************************/
+/*                                                       */
+/*********************************************************/
+
diff --git a/contrib/taucs/src/taucs_logging.c b/contrib/taucs/src/taucs_logging.c
new file mode 100644
index 0000000000000000000000000000000000000000..58176ed4deb74fbbaad3be7cccf8bb359fcbdbbc
--- /dev/null
+++ b/contrib/taucs/src/taucs_logging.c
@@ -0,0 +1,84 @@
+/*********************************************************/
+/* TAUCS                                                 */
+/* Author: Sivan Toledo                                  */
+/*********************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <float.h>
+#include <math.h>
+#include <stdarg.h>
+
+#include "taucs.h"
+
+#ifndef OSTYPE_win32
+#include <unistd.h>
+#endif
+
+/*********************************************************/
+/* logging                                               */
+/*********************************************************/
+
+#define LOG_NONE 0
+#define LOG_STDERR 1
+#define LOG_STDOUT 2
+#define LOG_FILE   3
+
+static char log_file_prefix[256];
+static int  log_file_type = LOG_NONE;
+static int  first_time = 0;
+
+void
+taucs_logfile(char* file_prefix)
+{
+  if (!strcmp(file_prefix,"stderr")) {
+    log_file_type = LOG_STDERR;
+  } else if (!strcmp(file_prefix,"stdout")) {
+    log_file_type = LOG_STDOUT;
+  } else if (!strcmp(file_prefix,"none")) {
+    log_file_type = LOG_NONE;
+  } else {
+    strcpy(log_file_prefix,file_prefix);
+    log_file_type = LOG_FILE;
+    first_time = 1;
+  }
+}
+
+int
+taucs_printf(char *fmt, ...)
+{
+  static FILE* logf;
+  va_list      ap;
+
+  if (log_file_type == LOG_NONE) return 0;
+
+  if (first_time && log_file_type == LOG_FILE) {
+    char filename[256];
+
+    sprintf(filename,"%s",log_file_prefix);
+
+    if ((logf = fopen(filename,"w")) == NULL) {
+      fprintf(stderr,"could not open log file %s, exiting\n",filename);
+      exit(1);
+    }
+    first_time = 0;
+  }
+
+  if (log_file_type == LOG_STDERR) logf = stderr;
+  if (log_file_type == LOG_STDOUT) logf = stdout;
+
+  va_start(ap, fmt);
+
+  vfprintf(logf, fmt, ap);
+
+  fflush(logf);
+
+  va_end(ap);
+
+  return 0;
+}
+
+/*********************************************************/
+/*                                                       */
+/*********************************************************/
diff --git a/contrib/taucs/src/taucs_malloc.c b/contrib/taucs/src/taucs_malloc.c
new file mode 100644
index 0000000000000000000000000000000000000000..03b5c400233400fa306730f27c89eb56305360b0
--- /dev/null
+++ b/contrib/taucs/src/taucs_malloc.c
@@ -0,0 +1,305 @@
+
+#include <stdlib.h>
+#include "taucs.h"
+
+#undef malloc
+#undef calloc
+#undef realloc
+#undef free
+
+void* taucs_malloc_stub (size_t size)               { return malloc(size); }
+void* taucs_calloc_stub (size_t nmemb, size_t size) { return calloc(nmemb,size); }
+void* taucs_realloc_stub(void* ptr, size_t size)    { return realloc(ptr,size); }
+void  taucs_free_stub   (void* ptr)                 { free(ptr); }
+
+#if !defined(TAUCS_MEMORY_TEST_yes)
+
+double taucs_allocation_amount()   { return 0.0; }
+int    taucs_allocation_count()    { return 0; }
+int    taucs_allocation_attempts() { return 0; }
+void   taucs_allocation_assert_clean() {}
+void   taucs_allocation_mark_clean() {}
+void   taucs_allocation_induce_failure(int i) {}
+
+#else /* we do want memory testing */
+
+#define TABLE_SIZE 100000
+
+static int    allocation_initialized = 0;
+
+static void*  allocation_ptr  [TABLE_SIZE];
+static double allocation_size [TABLE_SIZE];
+static char*  allocation_file [TABLE_SIZE];
+static int    allocation_line [TABLE_SIZE];
+static int    allocation_clean[TABLE_SIZE];
+
+static int    allocation_attempts;
+static int    allocation_count;
+static double allocation_amount;
+static int    allocation_clean_count;
+static double allocation_clean_amount;
+
+static int    allocation_induced_failure;
+
+static void allocation_init()
+{
+  int i;
+
+  allocation_initialized = 1;
+
+  allocation_induced_failure = -1;
+  allocation_attempts = 0;
+  allocation_count = 0;
+  allocation_amount = 0.0;
+
+  for (i=0; i<TABLE_SIZE; i++)
+    allocation_ptr[i] = NULL;
+}
+
+static void allocation_insert(void* ptr, double size, char* file, int line)
+{
+  int i,j,slot;
+  union {
+    void* p;
+    int   i;
+  } converter;
+
+  slot = -1; /* none found yet */
+
+  converter.p = ptr;
+  j = converter.i % TABLE_SIZE;
+
+  for (i=0; i<TABLE_SIZE; i++) {
+    if (allocation_ptr[j] == NULL) {
+      slot = j;
+      break;
+    }
+    j++;
+  }
+
+  if (slot == -1) {
+    taucs_printf("TAUCS ALLOCATION ERROR: ALLOCATION TABLE IS FULL\n");
+    exit(1);
+  }
+
+  allocation_ptr  [slot] = ptr;
+  allocation_size [slot] = size;
+  allocation_file [slot] = file;
+  allocation_line [slot] = line;
+  allocation_clean[slot] = 0;
+}
+
+static double allocation_delete(void* ptr)
+{
+  int i,j,slot;
+  union {
+    void* p;
+    int   i;
+  } converter;
+  double size;
+
+  slot = -1; /* none found yet */
+
+  converter.p = ptr;
+  j = converter.i % TABLE_SIZE;
+
+  for (i=0; i<TABLE_SIZE; i++) {
+    if (allocation_ptr[j] == ptr) {
+      slot = j;
+      break;
+    }
+    j++;
+  }
+
+  if (slot == -1) return -1.0;
+
+  size = allocation_size[slot];
+
+  allocation_ptr[slot] = NULL;
+
+  return size;
+}
+
+double taucs_allocation_amount()   { return allocation_amount; }
+int    taucs_allocation_count()    { return allocation_count; }
+int    taucs_allocation_attempts() { return allocation_attempts; }
+
+void   taucs_allocation_induce_failure(int i)  { allocation_induced_failure = i; }
+
+void taucs_allocation_assert_clean() 
+{
+  int i,clean = 1;
+ 
+  for (i=0; i<TABLE_SIZE; i++) {
+    if (allocation_ptr[i] != NULL && !allocation_clean[i]) {
+      clean = 0;
+    }
+  }
+
+  if (!clean) {
+    taucs_printf("TAUCS ALLOCATION ERROR: ASSERTED CLEAN, BUT FOUND\n");
+
+    for (i=0; i<TABLE_SIZE; i++) {
+      if (allocation_ptr[i] != NULL && !allocation_clean[i]) {
+	taucs_printf("\tBLOCK ALLOCATED AT %s:%d STILL ALLOCATED (%.2e BYTES)\n",
+		     allocation_file[i],allocation_line[i],allocation_size[i]);
+      }
+    }
+    exit(1);
+  }
+}
+
+void taucs_allocation_mark_clean() 
+{
+  int i;
+
+  allocation_attempts = 0;
+  allocation_clean_count  = allocation_count;
+  allocation_clean_amount = allocation_amount;
+
+  for (i=0; i<TABLE_SIZE; i++) {
+    if (allocation_ptr[i] != NULL)
+      allocation_clean[i] = 1;
+  }
+}
+
+void* taucs_internal_calloc(size_t nmemb, size_t size,
+			    char* file, int line)
+{
+  void* ptr;
+
+  if (nmemb*size == 0) {
+    taucs_printf("TAUCS ALLOCATION: ZERO SIZE (%s:%d)\n",
+		 file, line);
+    return NULL;
+  }
+
+  if (!allocation_initialized) allocation_init();
+
+  if (allocation_induced_failure == allocation_attempts) {
+    allocation_induced_failure = -1;
+    taucs_printf("TAUCS ALLOCATION: INDUCING FAILURE (%s:%d, count=%d)\n",
+		 file, line, allocation_attempts);
+    return NULL;
+  }
+
+  ptr = calloc(nmemb,size);
+  
+  if (ptr) {
+    allocation_count++;
+    allocation_attempts++;
+    allocation_amount += (double) nmemb * (double) size;
+    allocation_insert(ptr, (double) nmemb * (double) size, file, line);
+  } else 
+    taucs_printf("TAUCS ALLOCATION WARNING: CALLOC AT %s:%d FAILED\n",
+		 file,line);
+
+  return ptr;
+}
+
+void* taucs_internal_malloc(size_t size,
+			    char* file, int line)
+{
+  void* ptr;
+
+  if (size == 0) {
+    taucs_printf("TAUCS ALLOCATION: ZERO SIZE (%s:%d)\n",
+		 file, line);
+    return NULL;
+  }
+
+  if (!allocation_initialized) allocation_init();
+
+  if (allocation_induced_failure == allocation_attempts) {
+    allocation_induced_failure = -1;
+    taucs_printf("TAUCS ALLOCATION: INDUCING FAILURE (%s:%d, count=%d)\n",
+		 file, line, allocation_attempts);
+    return NULL;
+  }
+
+  ptr = malloc(size);
+
+  if (ptr) {
+    allocation_count++;
+    allocation_attempts++;
+    allocation_amount += (double) size;
+    allocation_insert(ptr, (double) size, file, line);
+  } else 
+    taucs_printf("TAUCS ALLOCATION WARNING: CALLOC AT %s:%d FAILED\n",
+		 file,line);
+
+  return ptr;
+}
+
+void* taucs_internal_realloc(void *oldptr, size_t size,
+			    char* file, int line)
+     
+{
+  void* ptr;
+
+  if (size == 0) {
+    taucs_printf("TAUCS ALLOCATION: ZERO SIZE (%s:%d)\n",
+		 file, line);
+    return NULL;
+  }
+
+  if (!allocation_initialized) allocation_init();
+
+
+  if (allocation_induced_failure == allocation_attempts) {
+    allocation_induced_failure = -1;
+    taucs_printf("TAUCS ALLOCATION: INDUCING FAILURE (%s:%d, count=%d)\n",
+		 file, line, allocation_attempts);
+    return NULL;
+  }
+
+  ptr= realloc(oldptr,size);
+
+  /* if realloc returns NULL, nothing happened (memory is not freed) */
+
+  if (ptr) {
+    double oldsize;
+
+    oldsize = allocation_delete(oldptr);
+    if (oldsize == -1.0) {
+      taucs_printf("TAUCS ALLOCATION ERROR: REALLOC AT %s:%d NOT ALLOCATED\n",
+		   file,line);
+      exit(1);
+    }
+    allocation_count--;
+    allocation_amount -= oldsize;
+
+    allocation_count++;
+    allocation_attempts++;
+    allocation_amount += (double) size;
+    allocation_insert(ptr, (double) size, file, line);
+  } else 
+    taucs_printf("TAUCS ALLOCATION WARNING: REALLOC AT %s:%d FAILED\n",
+		 file,line);
+
+  return ptr;
+}
+
+void taucs_internal_free(void *oldptr,
+			 char* file, int line)
+{
+  double oldsize;
+
+  if (!allocation_initialized) allocation_init();
+
+  if (!oldptr) return;
+
+  oldsize = allocation_delete(oldptr);
+  if (oldsize == -1.0) {
+    taucs_printf("TAUCS ALLOCATION ERROR: FREE AT %s:%d NOT ALLOCATED\n",
+		 file,line);
+    exit(1);
+  }
+  allocation_count--;
+  allocation_amount -= oldsize;
+
+  free(oldptr);
+}
+
+#endif /* TAUCS_MEMORY_TEST_yes */
+
diff --git a/contrib/taucs/src/taucs_memory.c b/contrib/taucs/src/taucs_memory.c
new file mode 100644
index 0000000000000000000000000000000000000000..d355e06c0697f0d360f3ab6342bdfd935d58e7e9
--- /dev/null
+++ b/contrib/taucs/src/taucs_memory.c
@@ -0,0 +1,341 @@
+/*********************************************************/
+/* TAUCS                                                 */
+/* Author: Sivan Toledo                                  */
+/*********************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <float.h>
+#include <math.h>
+#include <stdarg.h>
+
+#include "taucs.h"
+
+#ifndef OSTYPE_win32
+#include <unistd.h>
+#endif
+
+
+/********************************************************************/
+/* taucs_maximize_stacksize                                         */
+/*   tries to extend the stack as much as possible, to prevent      */
+/*   stack overflows in recursive functions                         */
+/********************************************************************/
+
+#ifndef OSTYPE_win32
+#include <unistd.h>
+
+/* without _XPG4_2, sys/time.h does not define timeval */
+#ifdef OSTYPE_solaris
+#define _XPG4_2
+#endif
+
+#include <sys/time.h>
+#include <sys/resource.h>
+
+int taucs_maximize_stacksize()
+{
+  struct rlimit l;
+  char rlim_cur[64];
+  char rlim_max[64];
+
+  if (getrlimit(RLIMIT_STACK, &l) != 0) {
+    taucs_printf("taucs_maximize_stacksize: getrlimit() failed\n");
+    return -1;
+  }
+
+  if (l.rlim_cur == RLIM_INFINITY) sprintf(rlim_cur,"unlimited");
+  else                             sprintf(rlim_cur,"%dk",(int) l.rlim_cur / 1024);
+  if (l.rlim_max == RLIM_INFINITY) sprintf(rlim_max,"unlimited");
+  else                             sprintf(rlim_max,"%dk",(int) l.rlim_max / 1024);
+  taucs_printf("taucs_maximize_stacksize: current stack size %s, max is %s\n",
+	       rlim_cur, rlim_max);
+
+  if (l.rlim_cur != l.rlim_max) {
+
+    l.rlim_cur = l.rlim_max;
+    
+    if (setrlimit(RLIMIT_STACK, &l) != 0) {
+      taucs_printf("taucs_maximize_stacksize: setrlimit() failed\n");
+      return -1;
+    }
+    
+    if (getrlimit(RLIMIT_STACK, &l) != 0) {
+      taucs_printf("taucs_maximize_stacksize: getrlimit() failed\n");
+      return -1;
+    }
+    
+    if (l.rlim_cur == RLIM_INFINITY) sprintf(rlim_cur,"unlimited");
+    else                             sprintf(rlim_cur,"%dk",(int) l.rlim_cur / 1024);
+    if (l.rlim_max == RLIM_INFINITY) sprintf(rlim_max,"unlimited");
+    else                             sprintf(rlim_max,"%dk",(int) l.rlim_max / 1024);
+    taucs_printf("taucs_maximize_stacksize: current stack size %s, max is %s\n",
+		 rlim_cur, rlim_max);
+  }
+
+  return 0;
+}
+
+#else /* win32 */
+int taucs_maximize_stacksize()
+{
+  taucs_printf("taucs_maximize_stacksize: not supported on Win32,\n");
+  taucs_printf("taucs_maximize_stacksize: compile with /F[stacksize] or run\n");
+  taucs_printf("taucs_maximize_stacksize: EDITBIN /STACK:[stacksize] *.EXE\n");
+  return -1;
+}
+  
+#endif
+
+
+/********************************************************************/
+/* taucs_system_memory_size                                         */
+/*   returns size of memory reported by the operating system        */
+/*   should not normally be called by the user (call _avail_)       */
+/********************************************************************/
+
+#ifdef OSTYPE_linux
+#define TAUCS_SYSTEM_MEMORY_SIZE_DEFINED
+
+double taucs_system_memory_size()
+{
+  FILE* f;
+  double m;
+  double m_sc;
+
+  m_sc  = (double) sysconf(_SC_PAGESIZE);
+  m_sc *= (double) sysconf(_SC_PHYS_PAGES);
+
+  /* total memory is the first number in /proc/meminfo */
+
+  f = fopen("/proc/meminfo","r");
+  if (f==NULL) return m_sc;
+  if (fscanf(f,"%*[a-zA-Z :\n\r]%lf",&m) != 1) return m_sc;
+
+  if (m != m_sc) {
+    taucs_printf("Warning: /proc/meminfo reports %lfMB of memory while\n",
+	       m/1048576.0);
+    taucs_printf("         sysconf       reports %lfMB of memory\n",
+	       m_sc/1048576.0);
+  }
+
+  return m;
+}
+#endif
+
+#ifdef OSTYPE_darwin
+#define TAUCS_SYSTEM_MEMORY_SIZE_DEFINED
+
+/* This is a BSD4.4 interface, so it should work on other BSD systems */
+
+#include <sys/types.h>
+#include <sys/sysctl.h>
+
+double taucs_system_memory_size()
+{
+  int mib[2] = { CTL_HW, HW_PHYSMEM };
+  int int_retval;
+  size_t len = sizeof(int);
+  
+  taucs_printf("taucs_system_memory_size: calling sysctl\n");
+  mib[1] = HW_PAGESIZE;
+  if ( sysctl(mib,2,
+	      &int_retval,&len,
+	      NULL, 0)) {
+    taucs_printf("taucs_system_memory_size: ERROR, sysctl failed (on darwin)\n");
+    return -1.0;
+  }
+  taucs_printf("  sysctl pagesize %d bytes\n",int_retval);
+
+  mib[1] = HW_PHYSMEM;
+  if ( sysctl(mib,2,
+	      &int_retval,&len,
+	      NULL, 0)) {
+    taucs_printf("taucs_system_memory_size: ERROR, sysctl failed (on darwin)\n");
+    return -1.0;
+  }
+  taucs_printf("  sysctl physmem %d bytes\n",int_retval);
+
+  return (double) int_retval;
+}
+#endif
+
+#ifdef OSTYPE_aix
+#define TAUCS_SYSTEM_MEMORY_SIZE_DEFINED
+double taucs_system_memory_size()
+{
+  FILE* f;
+  double m;
+
+  int child_stdout[2];
+
+  pipe(child_stdout);
+
+  if (fork() == 0) {
+    char* argv[10];
+    int   i = -1;
+    argv[i++] = "lsattr";
+    argv[i++] = "-E";
+    argv[i++] = "-F";
+    argv[i++] = "value";
+    argv[i++] = "-l";
+    argv[i++] = "sys0";
+    argv[i++] = "-a";
+    argv[i++] = "realmem";
+    argv[i++] = 0;
+    close(child_stdout[0]);
+    dup2(child_stdout[1],1); 
+    execv("/usr/sbin/lsattr",argv);
+    perror("System error (execv)");
+    exit(0);
+  } else {
+    char buffer[256];
+    int  nbytes;
+    /* parent continues */
+    close(child_stdout[1]);
+    nbytes = read(child_stdout[0],buffer,256);
+    close(child_stdout[0]);
+    if (sscanf(buffer,"%lf",&m) != 1)
+      return -1.0;
+    return 1024.0 * m;
+  }
+}
+#endif
+
+#ifdef OSTYPE_solaris
+#define TAUCS_SYSTEM_MEMORY_SIZE_DEFINED
+/*#include <sys/unistd.h>*/
+
+double taucs_system_memory_size()
+{
+  double m;
+
+  m  = (double) sysconf(_SC_PAGESIZE);
+  m *= (double) sysconf(_SC_PHYS_PAGES);
+
+  return m;
+}
+#endif
+
+#ifdef OSTYPE_win32
+#define TAUCS_SYSTEM_MEMORY_SIZE_DEFINED
+#include <windows.h>
+
+double taucs_system_memory_size()
+{
+  MEMORYSTATUS ms;
+
+  GlobalMemoryStatus(&ms);
+
+  taucs_printf("taucs_system_memory_size: returning information from GlobalMemoryStatus\n");
+  taucs_printf("  Warning: may be incorrect when the machine has more than 4 GB,\n");
+  taucs_printf("  Warning: or if there are more than 2 GB but the executable was\n");
+  taucs_printf("  Warning: compiled without the /LARGEADDRESSWARE liner flag.\n");
+
+  taucs_printf("  Memory load                    = %03d%%\n",ms.dwMemoryLoad);
+  taucs_printf("  Total Physical Memory          = %.0f MB\n",(double) ms.dwTotalPhys /1048576.0 );
+  taucs_printf("  Available Physical Memory      = %.0f MB\n",(double) ms.dwAvailPhys /1048576.0 );
+  taucs_printf("  Total Page File                = %.0f MB\n",(double) ms.dwTotalPageFile /1048576.0 );
+  taucs_printf("  Available Memory in Page File  = %.0f MB\n",(double) ms.dwAvailPageFile /1048576.0 );
+  taucs_printf("  Address-Space Size             = %.0f MB\n",(double) ms.dwTotalVirtual /1048576.0 );
+  taucs_printf("  Address-Space Available Memory = %.0f MB\n",(double) ms.dwAvailVirtual /1048576.0 );
+
+  return (double) ms.dwTotalPhys;
+}
+#endif
+
+#ifdef OSTYPE_irix
+#define TAUCS_SYSTEM_MEMORY_SIZE_DEFINED
+#include <sys/unistd.h>
+#include <sys/types.h>
+#include <sys/sysmp.h>
+#include <sys/sysinfo.h>
+
+double taucs_system_memory_size()
+{
+  double m;
+  struct rminfo p;
+
+  m  = (double) sysconf(_SC_PAGESIZE);
+  /*m  = (double) getpagesize();*/
+
+  taucs_printf("***************** %.0lf ************\n",m);
+
+  if (sysmp(MP_SAGET, MPSA_RMINFO, &p) == -1) {
+    perror("sysmp");
+    exit(1);
+  }
+  m = (double) (p.physmem);
+  taucs_printf("**$$$* %.0lf\n",m);  
+
+  return m;
+}
+#endif
+
+#ifndef TAUCS_SYSTEM_MEMORY_SIZE_DEFINED
+#define TAUCS_SYSTEM_MEMORY_SIZE_DEFINED
+double taucs_system_memory_size()
+{ 
+  taucs_printf("Warning: cannot automatically determine main memory size\n");
+  taucs_printf("         for this platform\n");
+  return -1.0; 
+}
+#endif
+
+
+/********************************************************************/
+/* taucs_avail_memory_size                                            */
+/*   returns size of memory available for allocation                */
+/********************************************************************/
+
+double taucs_available_memory_size() 
+{
+  double m_sys;
+  double m,m_low,m_high,m_tol;
+  char*  p;
+
+  m_sys = taucs_system_memory_size();
+  
+  /* malloc test */
+
+  m = 1048576.0;
+
+  while ( (p=(char*) taucs_malloc( (size_t) (m*2.0) )) != NULL ) {
+    taucs_free(p);
+    m = m*2.0;
+  }
+
+  m_low  = m;
+  m_high = 2.0*m;
+  m_tol  = m / 128.0;
+
+  while ( m_high - m_low > m_tol ) {
+    m = m_low + ( (m_high-m_low)/2.0 );
+    taucs_printf("taucs_avail_memory_size: [%.0lf %.0lf %.0lf]\n",
+	       m_low  / 1048576.0,
+	       m      / 1048576.0,
+	       m_high / 1048576.0);
+    if ( (p=(char*) taucs_malloc( (size_t) m )) != NULL ) 
+      m_low = m;
+    else 
+      m_high = m;
+    taucs_free(p);
+  }
+
+  m = m_low;
+
+  taucs_printf("taucs_avail_memory_size: malloc test=%.0lf MB sys test=%.0lf MB\n",
+	     m / 1048576.0,
+	     m_sys / 1048576.0
+	     );
+
+  /* if m_sys is meaningful, then we limit m by 0.75*m_sys */
+
+  if (m_sys > 0) {
+    m_sys = floor(0.75 * m_sys); 
+    if (m_sys < m) m = m_sys;
+  }
+
+  return m;
+}
+
diff --git a/contrib/taucs/src/taucs_ooc_io.c b/contrib/taucs/src/taucs_ooc_io.c
new file mode 100644
index 0000000000000000000000000000000000000000..94b7700443a41b291e072d69ec6d877db5eec0c4
--- /dev/null
+++ b/contrib/taucs/src/taucs_ooc_io.c
@@ -0,0 +1,1318 @@
+/*********************************************************/
+/* TAUCS                                                 */
+/* Author: Vladimir Rotking and Sivan Toledo             */
+/*                                                       */
+/* Out-of-Core Sparse Matrix I/O Subroutines             */
+/*********************************************************/
+
+/*************************************************************/
+/*                                                           */
+/*************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "taucs.h"
+
+#include <assert.h>
+#include <math.h>
+
+#ifdef OSTYPE_win32
+#include <io.h>
+#else
+#include <unistd.h>
+#include <sys/uio.h>
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+
+/*************************************************************/
+/* io routines                                               */
+/*************************************************************/
+
+/*
+#define TAUCS_PACKED  256
+#define TAUCS_BYROW   512
+*/
+/* #define IO_TYPE_COMPLEX       2 */
+
+/* out-of-core matrix file types */
+
+#define IO_TYPE_SINGLEFILE    1
+#define IO_TYPE_MULTIFILE     0
+
+/* maximum file size in GB */
+
+#define IO_FILE_RESTRICTION   1024
+
+/* in taucs.h:
+typedef struct {
+  int   type;
+  int   nmatrices;
+  void* type_specific;
+} taucs_io_handle;
+*/
+
+typedef struct {
+  int   m;
+  int   n;
+  int   flags;
+  off_t offset;
+} taucs_io_matrix_singlefile;
+
+typedef struct {
+  int   f;
+  off_t last_offset;
+  taucs_io_matrix_singlefile* matrices;
+} taucs_io_handle_singlefile;
+
+typedef struct {
+  int    m;
+  int    n;
+  int    flags;
+  double offset;
+} taucs_io_matrix_multifile;
+
+typedef struct {
+  int    f[1024];
+  double last_offset;
+  int    last_created_file;
+  char   basename[256];
+  taucs_io_matrix_multifile* matrices;
+} taucs_io_handle_multifile;
+
+
+#define TAUCS_FILE_SIGNATURE "taucs"
+
+/*
+double disc_read = 0.0;
+double bytes_read =0.0;
+double time_read = 0.0;
+double disc_write = 0.0;
+double bytes_write = 0.0;
+double time_write = 0.0;
+*/
+
+/*************************************************************/
+/*                                                           */
+/*************************************************************/
+
+static int element_size(int flags)
+{
+  if (flags & TAUCS_SINGLE)   return sizeof(taucs_single);
+  if (flags & TAUCS_DOUBLE)   return sizeof(taucs_double);
+  if (flags & TAUCS_SCOMPLEX) return sizeof(taucs_scomplex);
+  if (flags & TAUCS_DCOMPLEX) return sizeof(taucs_dcomplex);
+  if (flags & TAUCS_INT)      return sizeof(int);
+  assert(0);
+  return -1;
+}
+
+/*************************************************************/
+/*                                                           */
+/*************************************************************/
+
+taucs_io_handle* taucs_io_create_multifile(char* basename)
+{
+  int f;
+  ssize_t nbytes;
+  int     nmatrices;
+  double  offset;
+  taucs_io_handle* h;
+  mode_t mode;
+  mode_t perm;
+  char filename[256];
+
+  sprintf(filename,"%s.%d",basename,0);
+
+#ifdef OSTYPE_win32
+  mode = _O_RDWR | _O_CREAT | _O_BINARY;
+  perm = _S_IREAD | _S_IWRITE | _S_IEXEC;
+#else
+  mode = O_RDWR | O_CREAT;
+  perm = 0644;
+#endif
+
+  f = open(filename,mode,perm);
+
+  if (f == -1) {
+    taucs_printf("taucs_create: Could not create metadata file %s\n",filename);
+    return NULL;
+  }
+
+  nbytes = write(f,
+		 TAUCS_FILE_SIGNATURE,
+		 strlen(TAUCS_FILE_SIGNATURE));
+ if (nbytes != strlen(TAUCS_FILE_SIGNATURE)) { 
+    taucs_printf("taucs_create: Error writing metadata.\n");
+    return NULL;
+  }
+
+  nmatrices = 0;
+  offset = (double)(strlen(TAUCS_FILE_SIGNATURE)+sizeof(int)+sizeof(double));
+
+  nbytes = write(f,&nmatrices,sizeof(int));
+  if (nbytes != sizeof(int)) { 
+    taucs_printf("taucs_create: Error writing metadata (2).\n");
+    return NULL;
+  }
+
+  nbytes = write(f,&offset   ,sizeof(double));
+  if (nbytes != sizeof(double)) { 
+    taucs_printf("taucs_create: Error writing metadata (3).\n");
+    return NULL;
+  }
+
+  h = (taucs_io_handle*) taucs_malloc(sizeof(taucs_io_handle));
+  if (!h) {
+    taucs_printf("taucs_create: out of memory (4)\n");
+    return NULL;
+  }
+  h->type      = IO_TYPE_MULTIFILE;
+  h->nmatrices = 0;
+  h->type_specific = (taucs_io_handle_multifile*) taucs_malloc(sizeof(taucs_io_handle_multifile));
+  if (!h->type_specific) {
+    taucs_printf("taucs_create: out of memory (5)\n");
+    taucs_free(h);
+    return NULL;
+  }
+  ((taucs_io_handle_multifile*)h->type_specific)->f[0] = f;
+  ((taucs_io_handle_multifile*)h->type_specific)->matrices = NULL;
+  ((taucs_io_handle_multifile*)h->type_specific)->last_offset = offset;
+  ((taucs_io_handle_multifile*)h->type_specific)->last_created_file = 0;
+  strcpy(((taucs_io_handle_multifile*)h->type_specific)->basename,basename);
+
+  h->nreads = h->nwrites = h->bytes_read =
+    h->bytes_written = h->read_time =h->write_time = 0.0;
+
+  return h;
+}
+
+taucs_io_handle* taucs_io_create_singlefile(char* filename)
+{
+  int f;
+  ssize_t nbytes;
+  int     nmatrices;
+  off_t   offset;
+  taucs_io_handle* h;
+  mode_t mode;
+  mode_t perm;
+
+#ifdef OSTYPE_win32
+  mode = _O_RDWR | _O_CREAT | _O_BINARY;
+  perm = _S_IREAD | _S_IWRITE | _S_IEXEC;
+#else
+  mode = O_RDWR | O_CREAT;
+  perm = 0644;
+#endif
+
+  f = open(filename,mode,perm);
+
+  if (f == -1) {
+    taucs_printf("taucs_create: Could not create metadata file %s\n",filename);
+    return NULL;
+  }
+
+  nbytes = write(f,
+		 TAUCS_FILE_SIGNATURE,
+		 strlen(TAUCS_FILE_SIGNATURE));
+ if (nbytes != strlen(TAUCS_FILE_SIGNATURE)) { 
+    taucs_printf("taucs_create: Error writing metadata.\n");
+    return NULL;
+  }
+
+  nmatrices = 0;
+  offset = strlen(TAUCS_FILE_SIGNATURE) 
+    + sizeof(int)
+    + sizeof(off_t);
+
+  nbytes = write(f,&nmatrices,sizeof(int  ));
+  if (nbytes != sizeof(int)) { 
+    taucs_printf("taucs_create: Error writing metadata (2).\n");
+    return NULL;
+  }
+  nbytes = write(f,&offset   ,sizeof(off_t));
+  if (nbytes != sizeof(off_t)) { 
+    taucs_printf("taucs_create: Error writing metadata (3).\n");
+    return NULL;
+  }
+
+  h = (taucs_io_handle*) taucs_malloc(sizeof(taucs_io_handle));
+  if (!h) {
+    taucs_printf("taucs_create: out of memory (4)\n");
+    return NULL;
+  }
+  h->type      = IO_TYPE_SINGLEFILE;
+  h->nmatrices = 0;
+  h->type_specific = (taucs_io_handle_singlefile*) taucs_malloc(sizeof(taucs_io_handle_singlefile));
+  if (!h->type_specific) {
+    taucs_printf("taucs_create: out of memory (5)\n");
+    taucs_free(h);
+    return NULL;
+  }
+  ((taucs_io_handle_singlefile*)h->type_specific)->f = f;
+  ((taucs_io_handle_singlefile*)h->type_specific)->matrices = NULL;
+  ((taucs_io_handle_singlefile*)h->type_specific)->last_offset = offset;
+
+  h->nreads = h->nwrites = h->bytes_read =
+    h->bytes_written = h->read_time =h->write_time = 0.0;
+
+  return h;
+}
+
+int taucs_io_append(taucs_io_handle* f,
+		    int   index,
+		    int   m,int n,
+		    int   flags,
+		    void* data
+		    )
+{
+  int this_size = 0; /* warning */
+  int next_size;
+  int first_size = 0;
+  int added_files = 0;
+  int written_bytes = 0;
+  off_t this_offset;
+  double this_multi_offset,new_last_offset;
+  ssize_t nbytes;
+  int i;
+  mode_t mode;
+  mode_t perm;
+  char filename[256];
+  int file_id;
+  double wtime;
+ 
+  wtime = taucs_wtime();
+ 
+  if (f->type == IO_TYPE_SINGLEFILE) {
+    taucs_io_handle_singlefile* h = ((taucs_io_handle_singlefile*) f->type_specific);
+    taucs_io_matrix_singlefile* matrices;
+   
+    if (index >= f->nmatrices){    
+      ((taucs_io_handle_singlefile*)f->type_specific)->matrices = 
+	(taucs_io_matrix_singlefile*) taucs_realloc(h->matrices,
+					      (index + 1) * 
+					      sizeof(taucs_io_matrix_singlefile));
+      for(i=f->nmatrices;i<index;i++){
+	h->matrices[i].m = -1;
+	h->matrices[i].n = -1;
+	h->matrices[i].flags = -1;
+	h->matrices[i].offset = -1;
+      }
+      f->nmatrices = index+1;
+    }
+    else
+      if(h->matrices[index].m!=-1||h->matrices[index].n!=-1){
+	taucs_printf("taucs_append: try append more than once for index=%d \n",index);
+	return -1;
+      }
+    
+    if (!((taucs_io_handle_singlefile*)f->type_specific)->matrices) {
+      taucs_printf("taucs_append: out of memory \n");
+      return -1;
+    }
+
+    matrices = h->matrices;
+    this_offset = h->last_offset;
+    matrices[index].m = m;
+    matrices[index].n = n;
+    matrices[index].flags = flags;
+    matrices[index].offset = this_offset;
+    /*this_size = m * n * ((flags & TAUCS_INT) ? sizeof(int) : sizeof(double));*/
+    this_size = m * n * element_size(flags);
+    h->last_offset +=this_size; 
+    
+
+    /*taucs_printf("debug1: index = %d offset = %d\n ",index,this_offset);*/
+    if (lseek(h->f, this_offset, SEEK_SET) == -1) {
+      taucs_printf("taucs_append: lseek failed\n");
+      return -1;
+    }
+    
+    nbytes = write(h->f, data, this_size);
+    /*    taucs_printf("debug for nbytes = %d this_size = %d \n ",nbytes,this_size);*/
+    /*if (nbytes != this_size) { omer*/
+		if ((int)nbytes != this_size) { 
+      taucs_printf("taucs_append: Error writing data (%s:%d).\n",__FILE__,__LINE__);
+      return -1;
+    }
+  }
+
+  if (f->type == IO_TYPE_MULTIFILE) {
+    taucs_io_handle_multifile* h = ((taucs_io_handle_multifile*) f->type_specific);
+    taucs_io_matrix_multifile* matrices;
+   
+    if (index >= f->nmatrices){    
+      ((taucs_io_handle_multifile*)f->type_specific)->matrices = 
+	(taucs_io_matrix_multifile*) taucs_realloc(h->matrices,
+					      (index + 1) * 
+					      sizeof(taucs_io_matrix_multifile));
+      for(i=f->nmatrices;i<index;i++){
+	h->matrices[i].m = -1;
+	h->matrices[i].n = -1;
+	h->matrices[i].flags = -1;
+	h->matrices[i].offset = -1.0;
+      }
+      f->nmatrices = index+1;
+    }
+    else
+      if(h->matrices[index].m!=-1||h->matrices[index].n!=-1){
+	taucs_printf("taucs_append: try append more than once for index=%d \n",index);
+	return -1;
+      }
+    
+    if (!((taucs_io_handle_multifile*)f->type_specific)->matrices) {
+      taucs_printf("taucs_append: out of memory \n");
+      return -1;
+    }
+
+    matrices = h->matrices;
+    matrices[index].m = m;
+    matrices[index].n = n;
+    matrices[index].flags = flags;
+    matrices[index].offset =  h->last_offset;
+    /*this_size = m * n * ((flags & TAUCS_INT) ? sizeof(int) : sizeof(double));*/
+    this_size = m * n * element_size(flags);
+    new_last_offset = h->last_offset + ((double)this_size);
+    /*    taucs_printf("debug1: index = %d offset = %lf\n ",index,h->last_offset);*/
+    
+    if(new_last_offset < ((h->last_created_file+1)*IO_FILE_RESTRICTION*1024.0*1024.0)){    
+      this_multi_offset = h->last_offset - ((h->last_created_file)*IO_FILE_RESTRICTION*1024.0*1024.0);   
+      if (lseek(h->f[h->last_created_file],(off_t)this_multi_offset, SEEK_SET) == -1) {
+	taucs_printf("taucs_append: lseek failed\n");
+	return -1;
+      }
+					    
+      nbytes = write(h->f[h->last_created_file], data, this_size);
+      /*if (nbytes != this_size) { omer*/
+			if ((int)nbytes != this_size) { 
+	taucs_printf("taucs_append: Error writing data (%s:%d).\n",__FILE__,__LINE__);
+	taucs_printf("taucs_append: index %d n %d m %d\n",index,n,m);
+	taucs_printf("taucs_append: trying to write %d bytes from %08x, wrote %d\n",
+		     this_size,data,nbytes);
+	if (nbytes==-1) perror("taucs_append");
+	return -1;
+      }
+    }
+    else{
+      if(h->last_offset < ((h->last_created_file+1)*IO_FILE_RESTRICTION*1024.0*1024.0)){
+	this_multi_offset = h->last_offset - 
+	  ((h->last_created_file)*IO_FILE_RESTRICTION*1024.0*1024.0);
+	if (lseek(h->f[h->last_created_file],(off_t)this_multi_offset, SEEK_SET) == -1) {
+	  taucs_printf("taucs_append: lseek failed\n");
+	  return -1;
+	}
+	first_size = (int)((IO_FILE_RESTRICTION*1024.0*1024.0) - this_multi_offset);
+	nbytes = write(h->f[h->last_created_file], data, first_size);
+	/*if (nbytes != first_size) { omer*/
+	if ((int)nbytes != first_size) { 
+	  taucs_printf("taucs_append: Error writing data (%s:%d).\n",__FILE__,__LINE__);
+	  return -1;
+	}
+      }
+
+      this_multi_offset = 0.0;
+      next_size = this_size - first_size;
+      written_bytes = first_size;
+      while(next_size>0){
+	if(next_size>IO_FILE_RESTRICTION*1024*1024)
+	  next_size = IO_FILE_RESTRICTION*1024*1024;
+	sprintf(filename,"%s.%d",h->basename,(h->last_created_file+1));
+
+#ifdef OSTYPE_win32
+	mode = _O_RDWR | _O_CREAT | _O_BINARY;
+	perm = _S_IREAD | _S_IWRITE | _S_IEXEC;
+#else
+	mode = O_RDWR | O_CREAT;
+	perm = 0644;
+#endif
+
+	file_id = open(filename,mode,perm);
+      
+	if (file_id == -1) {
+	  taucs_printf("taucs_append: Could not create metadata file %s\n",filename);
+	  return -1;
+	}
+	added_files++;
+	h->last_created_file++;
+	h->f[h->last_created_file] = file_id;
+	nbytes = write(h->f[h->last_created_file],((char*)data)+written_bytes,next_size);
+	/*if (nbytes != next_size) { omer*/
+	if ((int)nbytes != next_size) { 
+	  taucs_printf("taucs_append: Error writing data (%s:%d).\n",__FILE__,__LINE__);
+	  return -1;
+	}
+	written_bytes += next_size;
+	next_size = this_size - written_bytes;
+      }
+    }
+    h->last_offset = new_last_offset; 
+ }
+  
+  wtime = taucs_wtime()-wtime;
+
+  f->nwrites       += 1.0;
+  f->bytes_written += (double) this_size;
+  f->write_time    += wtime;
+
+  /*disc_write += 1.0;*/
+  /*bytes_write += (double)this_size;*/
+  /*time_write += wtime;*/
+
+  return 0;
+}
+
+int   taucs_io_write(taucs_io_handle* f,
+		     int   index,
+		     int   m,int n,
+		     int   flags,
+		     void* data
+		     )
+{
+  int this_size;
+  off_t this_offset;
+  ssize_t nbytes;
+  double curr_file_offset;
+  int first_size;
+  int next_size,start_file_index;
+  int write_bytes;
+
+  if (f->type == IO_TYPE_SINGLEFILE) {
+    taucs_io_handle_singlefile* h = ((taucs_io_handle_singlefile*) f->type_specific);
+    taucs_io_matrix_singlefile* matrices;
+    
+    if (index>=f->nmatrices) return -1;
+    matrices = h->matrices;
+    /*this_size = m * n * ((flags & TAUCS_INT) ? sizeof(int) : sizeof(double));*/
+    this_size = m * n * element_size(flags);
+    this_offset = matrices[index].offset;
+        
+    if (lseek(h->f, this_offset, SEEK_SET) == -1) {
+      taucs_printf("taucs_write: lseek failed\n");
+      return -1;
+    }
+    nbytes = write(h->f, data, this_size);
+    /*if (nbytes != this_size) { omer*/
+		if ((int)nbytes != this_size) { 
+      taucs_printf("taucs_write: Error writing data (%s:%d).\n",__FILE__,__LINE__);
+      return -1;
+    }
+  }
+
+if (f->type == IO_TYPE_MULTIFILE) {
+    taucs_io_handle_multifile* h = ((taucs_io_handle_multifile*) f->type_specific);
+    taucs_io_matrix_multifile* matrices;
+    
+    if (index>=f->nmatrices) return -1;
+    matrices = h->matrices;
+    /*this_size = m * n * ((flags & TAUCS_INT) ? sizeof(int) : sizeof(double));*/
+    this_size = m * n * element_size(flags);
+    start_file_index = (int)floor((matrices[index].offset/(IO_FILE_RESTRICTION*1024*1024)));
+    curr_file_offset = matrices[index].offset - start_file_index*(IO_FILE_RESTRICTION*1024.0*1024.0);
+
+    if (lseek(h->f[start_file_index],(off_t) curr_file_offset, SEEK_SET) == -1) {
+      taucs_printf("taucs_write: lseek failed\n");
+      return -1;
+    }
+    first_size = (int)((IO_FILE_RESTRICTION*1024.0*1024.0) - curr_file_offset);
+    if(this_size<first_size) first_size = this_size;
+
+    nbytes = write(h->f[start_file_index], data, first_size);
+    /*if (nbytes != first_size) { omer*/
+		if ((int)nbytes != first_size) { 
+      taucs_printf("taucs_write: Error writing data (%s:%d).\n",__FILE__,__LINE__);
+      return -1;
+    }
+    next_size = this_size - first_size;
+    write_bytes = first_size;
+
+    while(next_size>0){
+      if(next_size>IO_FILE_RESTRICTION*1024*1024)
+	next_size = IO_FILE_RESTRICTION*1024*1024;
+	start_file_index++;
+	if (lseek(h->f[start_file_index],0, SEEK_SET) == -1) {
+	  taucs_printf("taucs_write: lseek failed\n");
+	  return -1;
+	}
+	nbytes = write(h->f[start_file_index],((char*)data)+write_bytes,next_size);
+	/*if (nbytes != next_size) { omer*/
+	if ((int)nbytes != next_size) { 
+	  taucs_printf("taucs_write: Error writing data (%s:%d).\n",__FILE__,__LINE__);
+	  return -1;
+	}
+	write_bytes += next_size;
+	next_size = this_size - write_bytes;
+      }
+  }
+
+  return 0;
+}
+
+int   taucs_io_read(taucs_io_handle* f,
+		    int    index,
+		    int    m,int n,
+		    int    flags,
+		    void* data
+		    )
+{
+  int this_size = 0; /* warning */
+  off_t this_offset;
+  ssize_t nbytes;
+  double curr_file_offset;
+  int first_size;
+  int next_size,start_file_index;
+  int read_bytes;
+  double wtime;
+
+  wtime = taucs_wtime();
+
+  if (f->type == IO_TYPE_SINGLEFILE) {
+    taucs_io_handle_singlefile* h = ((taucs_io_handle_singlefile*) f->type_specific);
+    taucs_io_matrix_singlefile* matrices;
+    
+    if (index>=f->nmatrices) return -1;
+    matrices = h->matrices;
+    /*this_size = m * n * ((flags & TAUCS_INT) ? sizeof(int) : sizeof(double));*/
+    this_size = m * n * element_size(flags);
+    this_offset = matrices[index].offset;
+        
+    if (lseek(h->f, this_offset, SEEK_SET) == -1) {
+      taucs_printf("taucs_read: lseek failed\n");
+      return -1;
+    }
+    nbytes = read(h->f, data, this_size);
+    /*if (nbytes != this_size) { omer*/
+		if ((int)nbytes != this_size) { 
+      taucs_printf("taucs_read: Error reading data .\n");
+      return -1;
+    }
+  }
+
+  if (f->type == IO_TYPE_MULTIFILE) {
+    taucs_io_handle_multifile* h = ((taucs_io_handle_multifile*) f->type_specific);
+    taucs_io_matrix_multifile* matrices;
+    
+    if (index>=f->nmatrices) return -1;
+    matrices = h->matrices;
+    /*this_size = m * n * ((flags & TAUCS_INT) ? sizeof(int) : sizeof(double));*/
+    this_size = m * n * element_size(flags);
+    start_file_index = (int)floor((matrices[index].offset/(IO_FILE_RESTRICTION*1024*1024)));
+    curr_file_offset = matrices[index].offset - start_file_index*(IO_FILE_RESTRICTION*1024.0*1024.0);
+
+    /* for find overflow */
+    assert(curr_file_offset < IO_FILE_RESTRICTION*1024.0*1024.0);
+
+    if (lseek(h->f[start_file_index],(off_t) curr_file_offset, SEEK_SET) == -1) {
+      taucs_printf("taucs_read: lseek failed\n");
+      return -1;
+    }
+    first_size = (int)(IO_FILE_RESTRICTION*1024.0*1024.0 - curr_file_offset);
+    if(this_size<first_size) first_size = this_size;
+
+    nbytes = read(h->f[start_file_index], data, first_size);
+    /*if (nbytes != first_size) { omer*/
+		if ((int)nbytes != first_size) { 
+      taucs_printf("taucs_read: Error reading data .\n");
+      return -1;
+    }
+    next_size = this_size - first_size;
+    read_bytes = first_size;
+
+    while(next_size>0){
+      if(next_size>IO_FILE_RESTRICTION*1024*1024)
+	next_size = IO_FILE_RESTRICTION*1024*1024;
+	start_file_index++;
+	if (lseek(h->f[start_file_index],0, SEEK_SET) == -1) {
+	  taucs_printf("taucs_read: lseek failed\n");
+	  return -1;
+	}
+	nbytes = read(h->f[start_file_index],((char*)data)+read_bytes,next_size);
+	/*if (nbytes != next_size) { omer*/
+	if ((int)nbytes != next_size) { 
+	  taucs_printf("taucs_read: Error reading data .\n");
+	  return -1;
+	}
+	read_bytes += next_size;
+	next_size = this_size - read_bytes;
+      }
+  }
+  
+  wtime = taucs_wtime()-wtime;
+
+  f->nreads     += 1.0;
+  f->read_time  += wtime;
+  f->bytes_read += (double) this_size;
+
+  /*disc_read += 1.0;*/
+  /*bytes_read += (double)this_size;*/
+  /*time_read += wtime;*/
+  
+  return 0;
+}
+
+int   taucs_io_close(taucs_io_handle* f)
+{
+  int i;
+  /*int this_size,last_size; omer*/
+  /*off_t offset; omer*/
+  ssize_t nbytes;
+  double curr_file_offset;
+  mode_t mode;
+  mode_t perm;
+  char filename[256];
+  int file_id;
+  int first_size;
+
+  if (f->type == IO_TYPE_SINGLEFILE) {
+    taucs_io_handle_singlefile* h = ((taucs_io_handle_singlefile*) f->type_specific);
+    taucs_io_matrix_singlefile* matrices;
+    matrices = h->matrices;
+
+    /* this no longer works! must deal with flags correctly for all data types */
+    /*last_size = matrices[f->nmatrices-1].m * matrices[f->nmatrices-1].n * ((matrices[f->nmatrices-1].flags & TAUCS_INT) ? sizeof(int) : sizeof(double));
+      offset = matrices[f->nmatrices-1].offset + last_size;*/
+
+    if (lseek(h->f, strlen(TAUCS_FILE_SIGNATURE), SEEK_SET) == -1) {
+      taucs_printf("taucs_close: lseek failed\n");
+      return -1;
+    }
+    /* writing number of matrices */
+    nbytes = write(h->f,&f->nmatrices, sizeof(int));
+    if (nbytes != sizeof(int)) { 
+      taucs_printf("taucs_close: Error writing metadata.\n");
+      return -1;
+    }
+    /* writing start offset of metadata */
+    nbytes = write(h->f,&(h->last_offset), sizeof(int));
+    if (nbytes != sizeof(int)) { 
+      taucs_printf("taucs_close: Error writing metadata.\n");
+      return -1;
+    }
+    if (lseek(h->f, h->last_offset, SEEK_SET) == -1) {
+	taucs_printf("taucs_close: lseek failed\n");
+	return -1;
+      }
+    /* writing metadata  for every matrix */
+    for(i=0; i<f->nmatrices; i++){
+      nbytes = write(h->f,&matrices[i].m, sizeof(int));
+      if (nbytes != sizeof(int)) { 
+	taucs_printf("taucs_close: Error writing data (%s:%d).\n",__FILE__,__LINE__);
+	return -1;
+	}
+      nbytes = write(h->f,&matrices[i].n, sizeof(int));
+      if (nbytes != sizeof(int)) { 
+	taucs_printf("taucs_close: Error writing data (%s:%d).\n",__FILE__,__LINE__);
+	return -1;
+	}
+      nbytes = write(h->f,&matrices[i].flags, sizeof(int));
+      if (nbytes != sizeof(int)) { 
+	taucs_printf("taucs_close: Error writing data (%s:%d).\n",__FILE__,__LINE__);
+	return -1;
+	}
+      nbytes = write(h->f,&matrices[i].offset, sizeof(off_t));
+      if (nbytes != sizeof(off_t)) { 
+	taucs_printf("taucs_close: Error writing data (%s:%d).\n",__FILE__,__LINE__);
+	return -1;
+      }
+    }
+    taucs_free(matrices);
+  }
+
+  if (f->type == IO_TYPE_MULTIFILE) {
+    taucs_io_handle_multifile* h = ((taucs_io_handle_multifile*) f->type_specific);
+    taucs_io_matrix_multifile* matrices;
+    
+    matrices = h->matrices;
+
+    if (lseek(h->f[0], strlen(TAUCS_FILE_SIGNATURE), SEEK_SET) == -1) {
+      taucs_printf("taucs_close: lseek failed\n");
+      return -1;
+    }
+    /* writing number of matrices */
+    nbytes = write(h->f[0],&(f->nmatrices), sizeof(int));
+    if (nbytes != sizeof(int)) { 
+      taucs_printf("taucs_close: Error writing metadata.\n");
+      return -1;
+    }
+    /* writing start offset of metadata */
+    nbytes = write(h->f[0],&(h->last_offset), sizeof(double));
+    if (nbytes != sizeof(double)) { 
+	taucs_printf("taucs_close: Error writing metadata.\n");
+	return -1;
+    }
+
+    curr_file_offset = h->last_offset - (h->last_created_file)*(IO_FILE_RESTRICTION*1024.0*1024.0);
+    if(!((int)curr_file_offset)){
+      sprintf(filename,"%s.%d",h->basename,(h->last_created_file+1));
+#ifdef OSTYPE_win32
+      mode = _O_RDWR | _O_CREAT | _O_BINARY;
+      perm = _S_IREAD | _S_IWRITE | _S_IEXEC;
+#else
+      mode = O_RDWR | O_CREAT;
+      perm = 0644;
+#endif
+
+      file_id = open(filename,mode,perm);
+      
+      if (file_id == -1) {
+	taucs_printf("taucs_close: Could not create metadata file %s\n",filename);
+	return -1;
+      }
+      h->last_created_file++;
+	h->f[h->last_created_file] = file_id;
+    }
+    else
+      if (lseek(h->f[h->last_created_file],(off_t) curr_file_offset, SEEK_SET) == -1) {
+	taucs_printf("taucs_close: lseek failed\n");
+	return -1;
+      }
+    
+    /* writing metadata  for every matrix */
+    for(i=0; i<f->nmatrices; i++){
+      /* write m */
+      if(curr_file_offset+(double)sizeof(int)<IO_FILE_RESTRICTION*1024.0*1024.0){
+	nbytes = write(h->f[h->last_created_file],&(matrices[i].m),sizeof(int));
+	if (nbytes != sizeof(int)){ 
+	  taucs_printf("taucs_close: Error writing data (%s:%d).\n",__FILE__,__LINE__);
+	  return -1;
+	}
+	curr_file_offset += (double)sizeof(int);
+      }
+      else{
+	first_size = IO_FILE_RESTRICTION*1024*1024 - (int)curr_file_offset;
+	nbytes = write(h->f[h->last_created_file],&(matrices[i].m),first_size);
+	/*if (nbytes != first_size) { omer*/
+	if ((int)nbytes != first_size) { 
+	  taucs_printf("taucs_close: Error writing data .\n");
+	  return -1;
+	}
+	sprintf(filename,"%s.%d",h->basename,(h->last_created_file+1));
+
+#ifdef OSTYPE_win32
+	mode = _O_RDWR | _O_CREAT | _O_BINARY;
+	perm = _S_IREAD | _S_IWRITE | _S_IEXEC;
+#else
+	mode = O_RDWR | O_CREAT;
+	perm = 0644;
+#endif
+
+	file_id = open(filename,mode,perm);
+	
+	if (file_id == -1) {
+	  taucs_printf("taucs_close: Could not create metadata file %s\n",filename);
+	  return -1;
+	}
+	h->last_created_file++;
+	h->f[h->last_created_file] = file_id;
+	nbytes = write(h->f[h->last_created_file],(char*)&(matrices[i].m)+first_size,sizeof(int)-first_size);
+	if (nbytes != sizeof(int)-first_size){ 
+	  taucs_printf("taucs_close: Error writing data .\n");
+	  return -1;
+	}
+	curr_file_offset = (double)(sizeof(int)-first_size);
+      }
+      /* write n */
+      if(curr_file_offset+(double)sizeof(int)<IO_FILE_RESTRICTION*1024.0*1024.0){
+	nbytes = write(h->f[h->last_created_file],&(matrices[i].n),sizeof(int));
+	if (nbytes != sizeof(int)){ 
+	  taucs_printf("taucs_close: Error writing data .\n");
+	  return -1;
+	}
+	curr_file_offset += (double)sizeof(int);
+      }
+      else{
+	first_size = IO_FILE_RESTRICTION*1024*1024 - (int)curr_file_offset;
+	nbytes = write(h->f[h->last_created_file],&(matrices[i].n),first_size);
+	/*if (nbytes != first_size) { omer*/
+	if ((int)nbytes != first_size) { 
+	  taucs_printf("taucs_close: Error writing data .\n");
+	  return -1;
+	}
+	sprintf(filename,"%s.%d",h->basename,(h->last_created_file+1));
+#ifdef OSTYPE_win32
+	mode = _O_RDWR | _O_CREAT | _O_BINARY;
+	perm = _S_IREAD | _S_IWRITE | _S_IEXEC;
+#else
+	mode = O_RDWR | O_CREAT;
+	perm = 0644;
+#endif
+	file_id = open(filename,mode,perm);
+	
+	if (file_id == -1) {
+	  taucs_printf("taucs_close: Could not create metadata file %s\n",filename);
+	  return -1;
+	}
+	h->last_created_file++;
+	h->f[h->last_created_file] = file_id;
+	nbytes = write(h->f[h->last_created_file],(char*)&(matrices[i].n)+first_size,sizeof(int)-first_size);
+	if (nbytes != sizeof(int)-first_size){ 
+	  taucs_printf("taucs_close: Error writing data .\n");
+	  return -1;
+	}
+	curr_file_offset = (double)(sizeof(int)-first_size);
+      }
+      
+      /* write flags */
+      if(curr_file_offset+(double)sizeof(int)<IO_FILE_RESTRICTION*1024.0*1024.0){
+	nbytes = write(h->f[h->last_created_file],&(matrices[i].flags),sizeof(int));
+	if (nbytes != sizeof(int)){ 
+	  taucs_printf("taucs_close: Error writing data .\n");
+	  return -1;
+	}
+	curr_file_offset += (double)sizeof(int);
+      }
+      else{
+	first_size = IO_FILE_RESTRICTION*1024*1024 - (int)curr_file_offset;
+	nbytes = write(h->f[h->last_created_file],&(matrices[i].flags),first_size);
+	/*if (nbytes != first_size) { omer*/
+	if ((int)nbytes != first_size) { 
+	  taucs_printf("taucs_close: Error writing data .\n");
+	  return -1;
+	}
+	sprintf(filename,"%s.%d",h->basename,(h->last_created_file+1));
+#ifdef OSTYPE_win32
+	mode = _O_RDWR | _O_CREAT | _O_BINARY;
+	perm = _S_IREAD | _S_IWRITE | _S_IEXEC;
+#else
+	mode = O_RDWR | O_CREAT;
+	perm = 0644;
+#endif
+	file_id = open(filename,mode,perm);
+	
+	if (file_id == -1) {
+	  taucs_printf("taucs_close: Could not create metadata file %s\n",filename);
+	  return -1;
+	}
+	h->last_created_file++;
+	h->f[h->last_created_file] = file_id;
+	nbytes = write(h->f[h->last_created_file],(char*)&(matrices[i].flags)+first_size,sizeof(int)-first_size);
+	if (nbytes != sizeof(int)-first_size){ 
+	    taucs_printf("taucs_close: Error writing data .\n");
+	    return -1;
+	}
+	curr_file_offset = (double)(sizeof(int)-first_size);
+      }
+      /* write offset */
+      if(curr_file_offset+(double)sizeof(double)<IO_FILE_RESTRICTION*1024.0*1024.0){
+	nbytes = write(h->f[h->last_created_file],&(matrices[i].offset),sizeof(double));
+	if (nbytes != sizeof(double)){ 
+	  taucs_printf("taucs_close: Error writing data .\n");
+	  return -1;
+	}
+	curr_file_offset += (double)sizeof(double);
+      }
+      else{
+	first_size = IO_FILE_RESTRICTION*1024*1024 - (int)curr_file_offset;
+	nbytes = write(h->f[h->last_created_file],&(matrices[i].offset),first_size);
+	/*if (nbytes != first_size) { omer*/
+	if ((int)nbytes != first_size) { 
+	  taucs_printf("taucs_close: Error writing data .\n");
+	  return -1;
+	}
+	sprintf(filename,"%s.%d",h->basename,(h->last_created_file+1));
+#ifdef OSTYPE_win32
+	mode = _O_RDWR | _O_CREAT | _O_BINARY;
+	perm = _S_IREAD | _S_IWRITE | _S_IEXEC;
+#else
+	mode = O_RDWR | O_CREAT;
+	perm = 0644;
+#endif
+
+	file_id = open(filename,mode,perm);
+	if (file_id == -1) {
+	  taucs_printf("taucs_close: Could not create metadata file %s\n",filename);
+	    return -1;
+	}
+	h->last_created_file++;
+	h->f[h->last_created_file] = file_id;
+	nbytes = write(h->f[h->last_created_file],(char*)&(matrices[i].offset)+first_size,sizeof(double)-first_size);
+	  if (nbytes != sizeof(double)-first_size){ 
+	    taucs_printf("taucs_close: Error writing data .\n");
+	    return -1;
+	  }
+	  curr_file_offset = (double)(sizeof(double)-first_size);
+      }
+    }
+    for(i=0;i<=h->last_created_file;i++){
+      file_id=close(h->f[i]);
+      if (file_id == -1) {
+	sprintf(filename,"%s.%d",h->basename,i);
+	taucs_printf("taucs_close: Could not close data file %s\n",filename);
+	return -1;
+      }
+    }
+    taucs_free(matrices);
+  }
+  
+  taucs_free(f->type_specific);
+  taucs_free(f);
+  
+  return 0;
+}
+
+taucs_io_handle* taucs_io_open_singlefile(char* filename)
+{
+  int f;
+  ssize_t nbytes;
+  /*int     nmatrices; omer*/
+  taucs_io_handle* h;
+  mode_t mode;
+  taucs_io_handle_singlefile* hs;
+  int i;
+
+#ifdef OSTYPE_win32
+  mode = _O_RDWR | _O_BINARY;
+#else
+  mode = O_RDWR;
+#endif
+  f  = open(filename,mode);
+  if (f == -1) {
+    taucs_printf("taucs_open: Could not open existed data file %s\n",filename);
+    return NULL;
+  }
+
+  h = (taucs_io_handle*) taucs_malloc(sizeof(taucs_io_handle));
+  if (!h) {
+    taucs_printf("taucs_open: out of memory (4)\n");
+    return NULL;
+  }
+  h->type      = IO_TYPE_SINGLEFILE;
+  h->type_specific = (taucs_io_handle_singlefile*) taucs_malloc(sizeof(taucs_io_handle_singlefile));
+  if (!h->type_specific) {
+    taucs_printf("taucs_open: out of memory \n");
+    taucs_free(h);
+    return NULL;
+  }
+  hs = h->type_specific;
+  hs->f = f;
+ 
+  if (lseek(hs->f, strlen(TAUCS_FILE_SIGNATURE), SEEK_SET) == -1) {
+    taucs_printf("taucs_open: lseek failed\n");
+    return NULL;
+  }
+  nbytes = read(hs->f, &h->nmatrices, sizeof(int));
+  if (nbytes != sizeof(int)) { 
+    taucs_printf("taucs_open: Error read data .\n");
+    return NULL;
+  }
+  nbytes = read(hs->f, &hs->last_offset, sizeof(int));
+  if (nbytes != sizeof(int)) { 
+    taucs_printf("taucs_open: Error read data .\n");
+    return NULL;
+  }
+
+  hs->matrices = 
+      (taucs_io_matrix_singlefile*) taucs_malloc((h->nmatrices)* sizeof(taucs_io_matrix_singlefile));
+
+  /* seek of start offset of data */
+  if (lseek(hs->f, hs->last_offset, SEEK_SET) == -1) {
+    taucs_printf("taucs_open: lseek failed\n");
+    return NULL;
+  }
+  /* reading metadata  for every matrix */
+  for(i=0; i<h->nmatrices; i++){
+    nbytes = read(hs->f,&hs->matrices[i].m, sizeof(int));
+    if (nbytes != sizeof(int)) { 
+      taucs_printf("taucs_open: Error writing data .\n");
+      return NULL;
+    }
+    nbytes = read(hs->f,&hs->matrices[i].n, sizeof(int));
+    if (nbytes != sizeof(int)) { 
+      taucs_printf("taucs_open: Error writing data .\n");
+      return NULL;
+    }
+    nbytes = read(hs->f,&hs->matrices[i].flags, sizeof(int));
+    if (nbytes != sizeof(int)) { 
+      taucs_printf("taucs_open: Error writing data .\n");
+      return NULL;
+    }
+    nbytes = read(hs->f,&hs->matrices[i].offset, sizeof(off_t));
+    if (nbytes != sizeof(off_t)) { 
+      taucs_printf("taucs_open: Error writing data .\n");
+      return NULL;
+    }
+  }
+  return h;
+}
+
+taucs_io_handle* taucs_io_open_multifile(char* basename)
+{
+  int file_id;
+  ssize_t nbytes;
+  /*int     nmatrices; omer*/
+  taucs_io_handle* h;
+  mode_t mode;
+  taucs_io_handle_multifile* hs;
+  int i;
+  char filename[256];
+  int start_file_index;
+  double curr_file_offset;
+  int first_size;
+
+  sprintf(filename,"%s.%d",basename,0);
+#ifdef OSTYPE_win32
+  mode = _O_RDWR | _O_BINARY;
+#else
+  mode = O_RDWR;
+#endif
+  file_id = open(filename,mode);
+
+  if (file_id == -1) {
+    taucs_printf("taucs_open: Could not open file %s\n",filename);
+    return NULL;
+  }
+
+  h = (taucs_io_handle*) taucs_malloc(sizeof(taucs_io_handle));
+  if (!h) {
+    taucs_printf("taucs_open: out of memory (4)\n");
+    return NULL;
+  }
+  h->type      = IO_TYPE_MULTIFILE;
+  h->type_specific = (taucs_io_handle_multifile*) taucs_malloc(sizeof(taucs_io_handle_multifile));
+  if (!h->type_specific) {
+    taucs_printf("taucs_open: out of memory \n");
+    taucs_free(h);
+    return NULL;
+  }
+  hs = h->type_specific;
+  hs->f[0] = file_id;
+  strcpy(hs->basename,basename);
+ 
+  if (lseek(hs->f[0], strlen(TAUCS_FILE_SIGNATURE), SEEK_SET) == -1) {
+    taucs_printf("taucs_open: lseek failed\n");
+    return NULL;
+  }
+  nbytes = read(hs->f[0], &h->nmatrices, sizeof(int));
+  if (nbytes != sizeof(int)) { 
+    taucs_printf("taucs_open: Error read data .\n");
+    return NULL;
+  }
+  nbytes = read(hs->f[0], &hs->last_offset, sizeof(double));
+  if (nbytes != sizeof(double)) { 
+    taucs_printf("taucs_open: Error read data .\n");
+    return NULL;
+  }
+
+  hs->matrices = 
+      (taucs_io_matrix_multifile*) taucs_malloc((h->nmatrices)* sizeof(taucs_io_matrix_multifile));
+
+  /* open all files before including start */
+  start_file_index = (int)floor(((hs->last_offset)/(IO_FILE_RESTRICTION*1024*1024)));
+  hs->last_created_file = start_file_index;
+  for(i=0;i<=start_file_index;i++){
+    sprintf(filename,"%s.%d",hs->basename,i);
+    file_id = open(filename,mode);
+    if (file_id == -1) {
+      taucs_printf("taucs_open: Could not open data file %s\n",filename);
+      return NULL;
+    }
+    hs->f[i] = file_id;
+  }
+  
+  curr_file_offset = hs->last_offset - start_file_index*(IO_FILE_RESTRICTION*1024.0*1024.0);
+  /* seek of start offset of data */
+  if (lseek(hs->f[start_file_index],(off_t) curr_file_offset, SEEK_SET) == -1) {
+    taucs_printf("taucs_open: lseek failed\n");
+    return NULL;
+  }
+  /* reading metadata  for every matrix */
+  for(i=0; i<h->nmatrices; i++){
+    /* read m */
+    if((int)(curr_file_offset+sizeof(int))<IO_FILE_RESTRICTION*1024*1024){
+      nbytes = read(hs->f[start_file_index],&hs->matrices[i].m,sizeof(int));
+      if (nbytes != sizeof(int)){ 
+	taucs_printf("taucs_open: Error in open data .\n");
+	return NULL;
+      }
+      curr_file_offset += (double)sizeof(int);
+    }
+    else{
+      first_size = IO_FILE_RESTRICTION*1024*1024 - (int)curr_file_offset;
+      nbytes = read(hs->f[start_file_index],&hs->matrices[i].m,first_size);
+      /*if (nbytes != first_size) { omer*/
+			if ((int)nbytes != first_size) { 
+				taucs_printf("taucs_open: Error in open data .\n");
+				return NULL;
+      }
+      start_file_index++;
+      sprintf(filename,"%s.%d",hs->basename,start_file_index);
+      file_id = open(filename,mode);
+      if (file_id == -1) {
+	taucs_printf("taucs_open: Could not open data file %s\n",filename);
+	return NULL;
+      }
+      hs->f[start_file_index] = file_id;
+      nbytes = read( hs->f[start_file_index],(char*)&hs->matrices[i].m+first_size,sizeof(int)-first_size);
+      if (nbytes != sizeof(int)-first_size){ 
+	taucs_printf("taucs_open: Error in open data .\n");
+	    return NULL;
+      }
+      curr_file_offset = (double)(sizeof(int)-first_size);
+    }
+
+    /* read n */
+    if((int)(curr_file_offset+sizeof(int))<IO_FILE_RESTRICTION*1024*1024){
+      nbytes = read(hs->f[start_file_index],&hs->matrices[i].n,sizeof(int));
+      if (nbytes != sizeof(int)){ 
+	taucs_printf("taucs_open: Error in open data .\n");
+	return NULL;
+      }
+      curr_file_offset += (double)sizeof(int);
+    }
+    else{
+      first_size = IO_FILE_RESTRICTION*1024*1024 - (int)curr_file_offset;
+      nbytes = read(hs->f[start_file_index],&hs->matrices[i].n,first_size);
+      /*if (nbytes != first_size) { omer*/
+			if ((int)nbytes != first_size) { 
+				taucs_printf("taucs_open: Error in open data .\n");
+				return NULL;
+      }
+      start_file_index++;
+      sprintf(filename,"%s.%d",hs->basename,start_file_index);
+      file_id = open(filename,mode);
+      if (file_id == -1) {
+	taucs_printf("taucs_open: Could not open data file %s\n",filename);
+	return NULL;
+      }
+      hs->f[start_file_index] = file_id;
+      nbytes = read( hs->f[start_file_index],(char*)&hs->matrices[i].n+first_size,sizeof(int)-first_size);
+      if (nbytes != sizeof(int)-first_size){ 
+	taucs_printf("taucs_open: Error in open data .\n");
+	    return NULL;
+      }
+      curr_file_offset = (double)(sizeof(int)-first_size);
+    }
+
+    /* read flags */
+    if((int)(curr_file_offset+sizeof(int))<IO_FILE_RESTRICTION*1024*1024){
+      nbytes = read(hs->f[start_file_index],&hs->matrices[i].flags,sizeof(int));
+      if (nbytes != sizeof(int)){ 
+	taucs_printf("taucs_open: Error in open data .\n");
+	return NULL;
+      }
+      curr_file_offset += (double)sizeof(int);
+    }
+    else{
+      first_size = IO_FILE_RESTRICTION*1024*1024 - (int)curr_file_offset;
+      nbytes = read(hs->f[start_file_index],&hs->matrices[i].flags,first_size);
+      /*if (nbytes != first_size) { omer*/
+			if ((int)nbytes != first_size) { 
+	taucs_printf("taucs_open: Error in open data .\n");
+	return NULL;
+      }
+      start_file_index++;
+      sprintf(filename,"%s.%d",hs->basename,start_file_index);
+      file_id = open(filename,mode);
+      if (file_id == -1) {
+	taucs_printf("taucs_open: Could not open data file %s\n",filename);
+	return NULL;
+      }
+      hs->f[start_file_index] = file_id;
+      nbytes = read( hs->f[start_file_index],(char*)&hs->matrices[i].flags+first_size,sizeof(int)-first_size);
+      if (nbytes != sizeof(int)-first_size){ 
+	taucs_printf("taucs_open: Error in open data .\n");
+	    return NULL;
+      }
+      curr_file_offset = (double)(sizeof(int)-first_size);
+    }
+
+    /* read offset */
+    if((int)(curr_file_offset+sizeof(double))<IO_FILE_RESTRICTION*1024*1024){
+      nbytes = read(hs->f[start_file_index],&hs->matrices[i].offset,sizeof(double));
+      if (nbytes != sizeof(double)){ 
+	taucs_printf("taucs_open: Error in open data .\n");
+	return NULL;
+      }
+      curr_file_offset += (double)sizeof(double);
+    }
+    else{
+      first_size = IO_FILE_RESTRICTION*1024*1024 - (int)curr_file_offset;
+      nbytes = read(hs->f[start_file_index],&hs->matrices[i].offset,first_size);
+      /*if (nbytes != first_size) { omer*/
+			if ((int)nbytes != first_size) { 
+				taucs_printf("taucs_open: Error in open data .\n");
+				return NULL;
+      }
+      start_file_index++;
+      sprintf(filename,"%s.%d",hs->basename,start_file_index);
+      file_id = open(filename,mode);
+      if (file_id == -1) {
+	taucs_printf("taucs_open: Could not open data file %s\n",filename);
+	return NULL;
+      }
+      hs->f[start_file_index] = file_id;
+      nbytes = read( hs->f[start_file_index],(char*)&hs->matrices[i].offset+first_size,sizeof(double)-first_size);
+      if (nbytes != sizeof(double)-first_size){ 
+	taucs_printf("taucs_open: Error in open data .\n");
+	return NULL;
+      }
+      curr_file_offset = (double)(sizeof(double)-first_size);
+    }
+  }
+
+  return h;
+}
+
+int taucs_io_delete(taucs_io_handle* f)
+{
+  int i;
+  char filename[256];
+  int  return_code = 0;
+
+  taucs_printf("taucs_io_delete: starting\n");
+
+  if (f->type == IO_TYPE_SINGLEFILE) {
+    taucs_printf("taucs_io_delete: delete only works on multifile; delete singlefile directly\n");
+    return -1;
+  }
+  if (f->type == IO_TYPE_MULTIFILE) {
+    taucs_io_handle_multifile* h = ((taucs_io_handle_multifile*) f->type_specific);
+    /*taucs_io_matrix_multifile* matrices; omer*/
+
+    for (i=0; i <= h->last_created_file; i++) {
+      close((h->f)[i]);
+      sprintf(filename,"%s.%d",h->basename,i);
+      if (unlink(filename) == -1) {
+	taucs_printf("taucs_io_delete: could not delete <%s>\n",filename);
+	return_code = -1;
+      }
+    }
+
+    taucs_free(h->matrices);
+  }
+  
+  taucs_free(f->type_specific);
+  taucs_free(f);
+  
+  taucs_printf("taucs_io_delete: done\n");
+
+  return return_code;
+}
+
+/*********************************************************/
+/* GET_BASENAME                                          */
+/* This routine is used in the ooc_lu code to generate   */
+/* additional temporary files                            */
+/*********************************************************/
+
+char* taucs_io_get_basename(taucs_io_handle* f)
+{
+  if (f->type == IO_TYPE_SINGLEFILE) {
+    taucs_printf("taucs_io_get_basename: WARNING: only works on multifile\n");
+    return NULL;
+  }
+  if (f->type == IO_TYPE_MULTIFILE) {
+    taucs_io_handle_multifile* h = ((taucs_io_handle_multifile*) f->type_specific);
+    return h->basename;
+  }
+  return NULL;
+}
+
+
+/*************************************************************/
+/*                                                           */
+/*************************************************************/
diff --git a/contrib/taucs/src/taucs_private.h b/contrib/taucs/src/taucs_private.h
new file mode 100644
index 0000000000000000000000000000000000000000..9fff21672fda44768cb1217e88398aa1f87b0fe0
--- /dev/null
+++ b/contrib/taucs/src/taucs_private.h
@@ -0,0 +1,383 @@
+/*********************************************************/
+/* TAUCS                                                 */
+/* Author: Sivan Toledo                                  */
+/*********************************************************/
+
+#if defined(TAUCS_CORE_CILK) && defined(TAUCS_CILK)
+#pragma lang -C
+#endif
+
+/*** taucs_ccs_factor.c ***/
+int taucs_getopt_boolean(char* cmd, void* args[], char* name, int*    x);
+int taucs_getopt_double (char* cmd, void* args[], char* name, double* x);
+int taucs_getopt_pointer(char* cmd, void* args[], char* name, void**  x);
+int taucs_getopt_string (char* cmd, void* args[], char* name, char**  x);
+
+int taucs_linsolve(taucs_ccs_matrix* A, 
+		   void**            F,
+		   int               nrhs,
+		   void*             X,
+		   void*             B,
+		   char*             options[],
+		   void*             opt_arg[]);
+
+/*** taucs_ccs_base.c ***/
+
+extern taucs_datatype taucs_dtl(zero_const);
+extern taucs_datatype taucs_dtl(one_const);
+
+#ifndef TAUCS_C99_COMPLEX
+taucs_datatype taucs_dtl(complex_create_fn)(taucs_real_datatype re, 
+					    taucs_real_datatype im);
+#endif
+taucs_datatype taucs_dtl(add_fn)(taucs_datatype a, taucs_datatype b);
+taucs_datatype taucs_dtl(sub_fn)(taucs_datatype a, taucs_datatype b);
+taucs_datatype taucs_dtl(mul_fn)(taucs_datatype a, taucs_datatype b);
+taucs_datatype taucs_dtl(div_fn)(taucs_datatype a, taucs_datatype b);
+taucs_datatype taucs_dtl(neg_fn)(taucs_datatype a);
+taucs_datatype taucs_dtl(sqrt_fn)(taucs_datatype a);
+taucs_datatype taucs_dtl(conj_fn)(taucs_datatype a);
+double         taucs_dtl(abs_fn)(taucs_datatype a);
+
+/*** taucs_ccs_base.c ***/
+
+taucs_ccs_matrix* taucs_dtl(ccs_create)          (int m, int n, int nnz);
+taucs_ccs_matrix* taucs_ccs_create               (int m, int n, int nnz, int flags);
+void              taucs_dtl(ccs_free)            (taucs_ccs_matrix* matrix);
+void              taucs_ccs_free                 (taucs_ccs_matrix* matrix);
+
+/*** taucs_ccs_ops.c ***/
+
+void              taucs_dtl(ccs_split)           (taucs_ccs_matrix* A, 
+						  taucs_ccs_matrix** L, 
+						  taucs_ccs_matrix** R, 
+						  int p);
+void              taucs_ccs_split                (taucs_ccs_matrix* A, 
+						  taucs_ccs_matrix** L, 
+						  taucs_ccs_matrix** R, 
+						  int p);
+
+taucs_ccs_matrix* taucs_dtl(ccs_permute_symmetrically)(taucs_ccs_matrix* A, 
+						       int* perm, int* invperm);
+taucs_ccs_matrix*     taucs_ccs_permute_symmetrically (taucs_ccs_matrix* A, 
+						       int* perm, int* invperm);
+
+void              taucs_dtl(ccs_times_vec)       (taucs_ccs_matrix* m, 
+						  taucs_datatype* X,
+						  taucs_datatype* B);
+void                        taucs_ccs_times_vec  (taucs_ccs_matrix* m, 
+						  void* X,
+						  void* B);
+
+/* matrix-vector with double-precision accumulator for iterative refinement */
+void              taucs_sccs_times_vec_dacc      (taucs_ccs_matrix* m, 
+						  taucs_single* X,
+						  taucs_single* B);
+
+taucs_ccs_matrix* taucs_dtl(ccs_augment_nonpositive_offdiagonals)(taucs_ccs_matrix* A);
+taucs_ccs_matrix*     taucs_ccs_augment_nonpositive_offdiagonals (taucs_ccs_matrix* A);
+
+/*** taucs_ccs_io.c ***/
+
+int               taucs_dtl(ccs_write_ijv)       (taucs_ccs_matrix* matrix, 
+						  char* filename);
+int                    taucs_ccs_write_ijv       (taucs_ccs_matrix* matrix, 
+						  char* filename);
+taucs_ccs_matrix* taucs_dtl(ccs_read_ijv)        (char* filename,int flags);
+taucs_ccs_matrix*     taucs_ccs_read_ijv         (char* filename,int flags);
+taucs_ccs_matrix* taucs_dtl(ccs_read_mtx)        (char* filename,int flags);
+taucs_ccs_matrix*     taucs_ccs_read_mtx         (char* filename,int flags);
+taucs_ccs_matrix* taucs_dtl(ccs_read_ccs)        (char* filename,int flags);
+taucs_ccs_matrix*     taucs_ccs_read_ccs         (char* filename,int flags);
+taucs_ccs_matrix* taucs_ccs_read_binary          (char* filename);
+void*             taucs_vec_read_binary (int n, int flags,          char* filename);
+int               taucs_vec_write_binary(int n, int flags, void* v, char* filename);
+taucs_ccs_matrix* taucs_ccs_read_hb              (char* filename,int flags);
+
+/*** taucs_ccs_order.c ***/
+
+void              taucs_ccs_order                (taucs_ccs_matrix* matrix, 
+						  int** perm, int** invperm,
+						  char* which);
+
+/*** taucs_ccs_factor_llt.c ***/
+
+taucs_ccs_matrix* taucs_dtl(ccs_factor_llt)      (taucs_ccs_matrix* A,
+						  double droptol, int modified);
+taucs_ccs_matrix*     taucs_ccs_factor_llt       (taucs_ccs_matrix* A,
+						  double droptol, int modified);
+taucs_ccs_matrix* taucs_ccs_factor_llt_partial   (taucs_ccs_matrix* A, 
+						  int p);
+taucs_ccs_matrix* taucs_dtl(ccs_factor_llt_partial)(taucs_ccs_matrix* A, 
+						    int p);
+
+taucs_ccs_matrix* taucs_dtl(ccs_factor_ldlt)     (taucs_ccs_matrix* A);
+taucs_ccs_matrix*     taucs_ccs_factor_ldlt      (taucs_ccs_matrix* A);
+
+taucs_ccs_matrix* taucs_dtl(ccs_factor_xxt)      (taucs_ccs_matrix* A);
+
+/*** taucs_ccs_solve_llt.c ***/
+
+int               taucs_ccs_solve_llt            (void* L, void* x, void* b);
+int               taucs_dtl(ccs_solve_llt)       (void* L, taucs_datatype* x, taucs_datatype* b);
+int               taucs_ccs_solve_ldlt           (void* L, void* x, void* b);
+int               taucs_dtl(ccs_solve_ldlt)      (void* L, taucs_datatype* x, taucs_datatype* b);
+int               taucs_ccs_solve_schur          (taucs_ccs_matrix* L,
+						  taucs_ccs_matrix* schur_comp,
+						  int    (*schur_precond_fn)(void*,void* x,void* b),
+						  void*  schur_precond_args,
+						  int    maxits,
+						  double convratio,
+						  void* x, void* b);
+int               taucs_dtl(ccs_solve_schur)     (taucs_ccs_matrix* L,
+						  taucs_ccs_matrix* schur_comp,
+						  int    (*schur_precond_fn)(void*,void* x,void* b),
+						  void*  schur_precond_args,
+						  int    maxits,
+						  double convratio,
+						  taucs_datatype* x, taucs_datatype* b);
+
+/***  ***/
+
+taucs_ccs_matrix* taucs_ccs_factor_xxt           (taucs_ccs_matrix* A);
+int               taucs_ccs_solve_xxt            (void* X, double* x, double* b);
+
+taucs_ccs_matrix* taucs_ccs_generate_mesh2d      (int n,char *which);
+taucs_ccs_matrix* taucs_ccs_generate_mesh2d_negative(int n);
+taucs_ccs_matrix* taucs_ccs_generate_mesh3d      (int X, int Y, int Z);
+taucs_ccs_matrix* taucs_ccs_generate_dense       (int m,int n, int flags);
+taucs_ccs_matrix* taucs_ccs_generate_rrn         (int X, int Y, int Z, 
+						  double drop_probability, 
+						  double rmin);
+taucs_ccs_matrix* taucs_ccs_generate_discontinuous(int X, int Y, int Z, 
+						   double jump);
+double* taucs_vec_generate_continuous            (int X, int Y, int Z, char* which);
+
+int taucs_conjugate_gradients                    (taucs_ccs_matrix*  A,
+						  int               (*precond_fn)(void*,void* x,void* b),
+						  void*             precond_args,
+						  void*             X,
+						  void*             B,
+						  int               itermax,
+						  double            convergetol);
+
+int taucs_minres                                 (taucs_ccs_matrix*  A,
+						  int               (*precond_fn)(void*,void* x,void* b),
+						  void*             precond_args,
+						  void*             X,
+						  void*             B,
+						  int               itermax,
+						  double            convergetol);
+
+int taucs_sg_preconditioner_solve                (void*   P,
+						  double* z, 
+						  double* r);
+
+void *taucs_sg_preconditioner_create             (taucs_ccs_matrix *A,
+						  int **perm,
+						  int **invperm,
+						  char* ordering,
+						  char *specification);
+void taucs_sg_preconditioner_free                (void* P);
+
+taucs_ccs_matrix*
+taucs_amwb_preconditioner_create                 (taucs_ccs_matrix *symccs_mtxA, 
+						  int rnd,
+						  double subgraphs,
+						  int stretch_flag);
+
+void* 
+taucs_recursive_amwb_preconditioner_create       (taucs_ccs_matrix* A, 
+						  double c, 
+						  double epsilon, 
+						  int nsmall,
+						  int maxlevels,
+						  int innerits,
+						  double convratio,
+						  int** perm, 
+						  int** invperm);
+
+int
+taucs_recursive_amwb_preconditioner_solve        (void* P, 
+						  void* Z, 
+						  void* R);
+
+int taucs_dtl(ccs_etree)                         (taucs_ccs_matrix* A,
+						  int* parent,
+						  int* l_colcount,
+						  int* l_rowcount,
+						  int* l_nnz);
+
+int      
+taucs_dtl(ccs_symbolic_elimination)              (taucs_ccs_matrix* A,
+						  void* L,
+						  int do_order,
+						  int max_depth
+						  );
+
+void* taucs_dtl(ccs_factor_llt_symbolic)         (taucs_ccs_matrix* A);
+void* taucs_dtl(ccs_factor_llt_symbolic_maxdepth)(taucs_ccs_matrix* A,int max_depth);
+taucs_cilk int   taucs_dtl(ccs_factor_llt_numeric)          (taucs_ccs_matrix* A,void* L);
+
+taucs_cilk void* taucs_dtl(ccs_factor_llt_mf)               (taucs_ccs_matrix* A);
+taucs_cilk void* taucs_dtl(ccs_factor_llt_mf_maxdepth)      (taucs_ccs_matrix* A,int max_depth);
+void* taucs_dtl(ccs_factor_llt_ll)               (taucs_ccs_matrix* A);
+void* taucs_dtl(ccs_factor_llt_ll_maxdepth)      (taucs_ccs_matrix* A,int max_depth);
+int   taucs_dtl(supernodal_solve_llt)            (void* vL, void* x, void* b);
+void taucs_dtl(supernodal_factor_free)                (void* L);
+void taucs_dtl(supernodal_factor_free_numeric)        (void* L);
+taucs_ccs_matrix* taucs_dtl(supernodal_factor_to_ccs) (void* L);
+taucs_datatype* taucs_dtl(supernodal_factor_get_diag) (void* L);
+
+int taucs_ccs_etree                              (taucs_ccs_matrix* A,
+						  int* parent,
+						  int* l_colcount,
+						  int* l_rowcount,
+						  int* l_nnz);
+
+int      
+taucs_ccs_symbolic_elimination                   (taucs_ccs_matrix* A,
+						  void* L,
+						  int do_order,
+						  int max_depth
+						  );
+
+void* taucs_ccs_factor_llt_symbolic              (taucs_ccs_matrix* A);
+void* taucs_ccs_factor_llt_symbolic_maxdepth     (taucs_ccs_matrix* A,int max_depth);
+taucs_cilk int   taucs_ccs_factor_llt_numeric               (taucs_ccs_matrix* A,void* L);
+
+taucs_cilk void* taucs_ccs_factor_llt_mf                    (taucs_ccs_matrix* A);
+taucs_cilk void* taucs_ccs_factor_llt_mf_maxdepth           (taucs_ccs_matrix* A,int max_depth);
+void* taucs_ccs_factor_llt_ll                    (taucs_ccs_matrix* A);
+void* taucs_ccs_factor_llt_ll_maxdepth           (taucs_ccs_matrix* A,int max_depth);
+int   taucs_supernodal_solve_llt                 (void* vL, void* x, void* b);
+void taucs_supernodal_factor_free                (void* L);
+void taucs_supernodal_factor_free_numeric        (void* L);
+taucs_ccs_matrix* taucs_supernodal_factor_to_ccs (void* L);
+void* taucs_supernodal_factor_get_diag           (void* L);
+
+taucs_double taucs_vec_norm2(int n, int flags, void* x);
+void* taucs_dtl(vec_create)  (int n);
+void* taucs_vec_create       (int n, int flags);
+void  taucs_dtl(vec_axpby)   (int n,
+			      taucs_real_datatype a,taucs_datatype* x,
+			      taucs_real_datatype b,taucs_datatype* y,
+			      taucs_datatype* axpby);
+void  taucs_vec_axpby        (int n,int flags,
+			      taucs_double a,void* x,
+			      taucs_double b,void* y,
+			      void* axpby);
+void taucs_dtl(vec_permute)  (int n, taucs_datatype v[],  taucs_datatype pv[], int p[]);
+void taucs_dtl(vec_ipermute) (int n, taucs_datatype pv[], taucs_datatype v[],  int invp[]);
+void taucs_vec_permute(int n, int flags, void* v, void* pv, int p[]);
+void taucs_vec_ipermute(int n, int flags, void* v, void* pv, int p[]);
+
+
+/*********************************************************/
+/* Utilities                                             */
+/*********************************************************/
+
+void   taucs_logfile(char* file_prefix);
+int    taucs_printf(char *fmt, ...);
+int    taucs_maximize_stacksize(void);
+double taucs_system_memory_size(void);
+double taucs_available_memory_size(void);
+double taucs_wtime(void);
+double taucs_ctime(void);
+
+/*********************************************************/
+/* Out-of-core IO routines                               */
+/*********************************************************/
+
+taucs_io_handle* taucs_io_create_singlefile(char* filename);
+taucs_io_handle* taucs_io_open_singlefile(char* filename);
+
+taucs_io_handle* taucs_io_create_multifile(char* filename);
+taucs_io_handle* taucs_io_open_multifile(char* filename);
+
+int              taucs_io_close (taucs_io_handle* f);
+int              taucs_io_delete(taucs_io_handle* f);
+
+int              taucs_io_append(taucs_io_handle* f,
+				 int   index,
+				 int   m,int n,
+				 int   flags,
+				 void* data
+				 );
+int              taucs_io_write(taucs_io_handle* f,
+				int   index,
+				int   m,int n,
+				int   flags,
+				void* data
+				);
+int              taucs_io_read(taucs_io_handle* f,
+			       int   index,
+			       int   m,int n,
+			       int   flags,
+			       void* data
+			       );
+
+char*            taucs_io_get_basename(taucs_io_handle* f);
+
+/*********************************************************/
+/* Out-of-core Sparse Choleksy routines                  */
+/*********************************************************/
+
+int taucs_dtl(ooc_factor_llt)(taucs_ccs_matrix* A, 
+			      taucs_io_handle*  L,
+			      double memory);
+/*added omer*/
+int taucs_dtl(ooc_factor_llt_panelchoice)(taucs_ccs_matrix* A, 
+					  taucs_io_handle* handle,
+					  double memory,
+					  int panelization_method);
+/* end omer*/
+int taucs_dtl(ooc_solve_llt) (void* L /* actual type: taucs_io_handle* */,
+			      void* x, void* b);
+
+int taucs_ooc_factor_llt(taucs_ccs_matrix* A, 
+			 taucs_io_handle*  L,
+			 double memory);
+int taucs_ooc_solve_llt (void* L /* actual type: taucs_io_handle* */,
+			 void* x, void* b);
+
+/*********************************************************/
+/* Out-of-core Sparse LU                                 */
+/*********************************************************/
+
+void taucs_dtl(ooc_factor_lu)(taucs_ccs_matrix* A_in,
+		              int    colperm[],
+                              taucs_io_handle* LU,
+  	                      double memory);
+
+int  taucs_ooc_factor_lu     (taucs_ccs_matrix* A_in,
+		              int*   colperm,
+                              taucs_io_handle* LU,
+  	                      double memory);
+
+int taucs_dtl(ooc_solve_lu)(taucs_io_handle*   LU,
+			    taucs_datatype* x, 
+                            taucs_datatype* b);
+
+int taucs_ooc_solve_lu     (taucs_io_handle*   LU,
+			    void* x, 
+                            void* b);
+
+
+/*********************************************************/
+/* Utilities                                             */
+/*********************************************************/
+
+void   taucs_logfile(char* file_prefix);
+int    taucs_printf(char *fmt, ...);
+double taucs_system_memory_size(void);
+double taucs_available_memory_size(void);
+double taucs_wtime(void);
+double taucs_ctime(void);
+
+/*********************************************************/
+/*                                                       */
+/*********************************************************/
+
+
+
diff --git a/contrib/taucs/src/taucs_recvaidya.c b/contrib/taucs/src/taucs_recvaidya.c
new file mode 100644
index 0000000000000000000000000000000000000000..7bb20cb76acbf440dc8a4dbfd54efd4b28849375
--- /dev/null
+++ b/contrib/taucs/src/taucs_recvaidya.c
@@ -0,0 +1,428 @@
+/*********************************************************/
+/* TAUCS                                                 */
+/* Author: Sivan Toledo                                  */
+/*                                                       */
+/* Recursive Vaidya preconditioners.                     */
+/*********************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <math.h>
+#include "taucs.h"
+
+/*#include <unistd.h>*/
+
+/*
+long int random();
+void srandom(unsigned int seed);
+*/
+
+#ifdef TAUCS_CORE
+
+typedef struct {
+  taucs_ccs_matrix** B;
+  taucs_ccs_matrix** S;
+  taucs_ccs_matrix** L;
+  int             levels;
+  int             level;
+  double          convratio;
+  double          maxits;
+} recvaidya_args;
+
+static int
+recvaidya_order(taucs_ccs_matrix* m,
+		int** perm,
+		int** invperm,
+		int*  P)
+{
+  int  n,nnz,i,j,ip,k,p,nleaves;
+  int* adjptr;
+  int* adj;
+  int* len;
+  int* ptr;
+  int* degree;
+  int* leaves;
+
+  n   = m->n;
+  nnz = (m->colptr)[n];
+  
+  taucs_printf("recvaidya_order: starting, matrix is %dx%d, # edges=%d\n",
+	     n,n,nnz-n);
+
+  *perm    = (int*) taucs_malloc(n * sizeof(int));
+  *invperm = (int*) taucs_malloc(n * sizeof(int));
+
+  /* we can reuse buffers: don't need invperm until the end */
+  /* also, we can reuse perm for leaves but it's messy.     */
+  len    = (int*) taucs_malloc(n * sizeof(int));
+  degree = (int*) taucs_malloc(n * sizeof(int));
+  leaves = (int*) taucs_malloc(n * sizeof(int));
+
+  adjptr = (int*) taucs_malloc(n * sizeof(int));
+  adj    = (int*) taucs_malloc(2*(nnz-n) * sizeof(int));
+
+  if (!(*perm) || !(*invperm) || !adjptr || !adj || !len || !degree || ! leaves) {
+    taucs_free(adj);
+    taucs_free(adjptr);
+    taucs_free(len);
+    taucs_free(leaves);
+    taucs_free(degree);
+    taucs_free(*perm);
+    taucs_free(*invperm);
+    return -1;
+  }
+
+  for (i=0; i<n; i++) len[i] = 0;
+
+  for (j=0; j<n; j++) {
+    for (ip = (m->colptr)[j]; ip < (m->colptr)[j+1]; ip++) {
+      /*i = (m->rowind)[ip] - (m->indshift);*/
+      i = (m->rowind)[ip];
+      if (i != j) {
+	len[i] ++;
+	len[j] ++;
+      }
+    }
+  }
+
+  nleaves = 0;
+  for (i=0; i<n; i++) {
+    degree[i] = len[i]; 
+    if (degree[i] <= 1) {
+      leaves[nleaves] = i;
+      nleaves++;
+    }
+  }
+
+  adjptr[0] = 0;
+  for (i=1; i<n; i++) adjptr[i] = adjptr[i-1] + len[i-1];
+
+  ptr =  *perm;
+  for (i=0; i<n; i++) ptr[i] = adjptr[i];
+
+  for (j=0; j<n; j++) {
+    for (ip = (m->colptr)[j]; ip < (m->colptr)[j+1]; ip++) {
+      /*i = (m->rowind)[ip] - (m->indshift);*/
+      i = (m->rowind)[ip];
+      if (i != j) {
+	adj[ ptr[i] ] = j;
+	adj[ ptr[j] ] = i;
+	ptr[i] ++;
+	ptr[j] ++;
+      }
+    }
+  }
+
+  /* now the graph data structure is ready */
+
+  /* we first eliminate leaves until all the degrees >= 2 */
+
+  i = 0;
+  while (nleaves > 0) {
+    /*taucs_printf("recvaidya_order: nleaves=%d\n",nleaves);*/
+    nleaves--;
+    j = leaves[nleaves];
+
+    /*taucs_printf("recvaidya_order: next leaf is %d, degree=%d\n",j,len[j]);*/
+    
+    (*perm)   [ i ] = j;
+    (*invperm)[ j ] = i;
+    i++;
+    
+    if (len[j] > 0) {
+      assert(len[j] == 1); /* j must be a degree-1 vertex */
+      len[j]--;            /* eliminate j */
+      p = adj[ adjptr[j] ]; 
+      
+      /*taucs_printf("symccs_treeorder: parent of %d is %d\n",j,p);*/
+
+      for (k = 0; k < len[p]; k++)
+	if (adj[ adjptr[p] + k ] == j) break;
+
+      assert( k < len[p] ); /* j must be a neighbor of p */
+	
+      /* now delete j from p's adjacency list and compress */
+      len[p] --;
+      for (; k < len[p]; k++)
+	adj[ adjptr[p] + k ] = adj[ adjptr[p] + k+1 ];
+
+      if (len[p] == 1) { /* degree was higher and now is 1 */
+	leaves[ nleaves ] = p;
+	nleaves++;
+      }
+    }
+  }
+
+  /* an eliminated vertix j must have len[j]==0        */
+  /* we can now eliminate all the degree-2 vertices    */
+  /* elimination of degree-2 vertices does not change  */
+  /* degrees, so we first find them and then eliminate */
+
+  for (j=0; j<n; j++) {
+    if (len[j] == 2) {
+      (*perm)[i]    = j;
+      (*invperm)[j] = i;
+      i++;
+      len[j] = 0; /* eliminate from the graph */
+    }
+  }
+
+  *P = i;
+  taucs_printf("recvaidya_order: eliminating %d vertices (remaining have deg>2)\n",
+	     *P);
+  
+  for (j=0; j<n; j++) {
+    if (len[j] > 0) {
+      (*perm)[i]    = j;
+      (*invperm)[j] = i;
+      i++;
+    }
+  }
+
+  assert( i == n );
+
+  taucs_free(adj);
+  taucs_free(adjptr);
+  taucs_free(len);
+  taucs_free(leaves);
+  taucs_free(degree);
+
+  taucs_printf("recvaidya_order: done\n");
+
+  return 0;
+}
+
+void* 
+taucs_recursive_amwb_preconditioner_create(taucs_ccs_matrix* A, 
+					   double c, 
+					   double epsilon, 
+					   int nsmall,
+					   int maxlevels,
+					   int innerits,
+					   double convratio,
+					   int** perm, 
+					   int** invperm)
+{
+  int l,i,k;
+  int levels;
+  int P[32];
+
+#if 0
+  taucs_ccs_matrix* Sx[32]; /* Schur complements                        */
+  taucs_ccs_matrix* Lx[32]; /* Partial LL^T factors                     */
+#endif
+
+  taucs_ccs_matrix** S; /* Schur complements                        */
+  taucs_ccs_matrix** L; /* Partial LL^T factors                     */
+
+  double exponent = 1.0/(1.0+epsilon);
+
+  recvaidya_args* args;
+
+  int* perml;  /* local permutation for level l */
+  int* iperml; /* local permutation for level l */
+  int* tmpperm;
+
+  int next = 0;
+
+  if (maxlevels > 32) {
+    taucs_printf("taucs_recursive_amwb_preconditioner_create: maxlevel must be 32 or less\n");
+    return NULL;
+  }
+
+  args = (recvaidya_args*) taucs_malloc(sizeof(recvaidya_args));
+  S    = (taucs_ccs_matrix**) taucs_malloc(32 * sizeof(taucs_ccs_matrix*));
+  L    = (taucs_ccs_matrix**) taucs_malloc(32 * sizeof(taucs_ccs_matrix*));
+
+  *perm    = (int*) taucs_malloc(A->n * sizeof(int));
+  *invperm = (int*) taucs_malloc(A->n * sizeof(int));
+  tmpperm = *invperm;
+  assert(args && *perm && *invperm);
+
+  for (i=0; i<A->n; i++) (*perm)[i] = (*invperm)[i] = i;
+
+  for (l=0; l<32; l++)
+    S[l] = L[l] = NULL;
+  
+  for (l=0; l<maxlevels; l++) {
+
+    taucs_ccs_matrix* Al;
+    taucs_ccs_matrix* V;    /* a Vaidya preconditioner */
+    taucs_ccs_matrix* PVPT;
+    taucs_ccs_matrix* Ll;
+    int     p;
+    int     n;
+    int rnd;
+    int seed = 123;
+    double t;
+
+    if (l==0) Al = A;
+    else      Al = S[l];
+
+    n = Al->n;
+    if (n==0) {l--; break;}
+
+    t = c * pow( (double)n, exponent );
+    taucs_printf("recvaidya_create: n=%d c=%.2le eps=%.2le ==> t=%.0lf\n",
+	       n,c,epsilon,t);
+    srand(seed);
+    rnd = rand();
+    V = taucs_amwb_preconditioner_create(Al,rnd,t,0 /* stretch flag */);
+
+   if (n <= nsmall || l==maxlevels-1) {
+      taucs_printf("recvaidya_create: n=%d <= nsmall=%d (or max level)\n",
+		 n,nsmall);
+      taucs_ccs_order(V,&perml,&iperml,"metis");
+      PVPT = taucs_ccs_permute_symmetrically(V,perml,iperml);
+      taucs_ccs_free(V);
+      /*
+      taucs_ccs_order(Al,&perml,&iperml,"metis");
+      PVPT = taucs_ccs_permute_symmetrically(Al,perml,iperml);
+      */
+      p=n;
+    } else {
+      recvaidya_order(V,&perml,&iperml,&p);
+      PVPT = taucs_ccs_permute_symmetrically(V,perml,iperml);
+      taucs_ccs_free(V);
+      /*
+      taucs_ccs_order(V,&perml,&iperml,"md");
+      */
+      if (p>n) p=n;
+    }
+
+    P[l] = p;
+
+    /* now compose the permutations */
+
+    for (i=0; i<next; i++)
+      tmpperm[i] = (*perm)[i];
+    for (i=next; i<next+n; i++)
+      tmpperm[i] = (*perm)[ next + perml[ i - next ] ];
+
+    for (i=0; i<next+n; i++)
+      (*perm)[i] = tmpperm[i];
+    for (i=0; i<next+n; i++)
+      (*invperm)[ (*perm)[i] ] = i;
+
+    for (k=1; k<=l; k++) {
+      int* backperm;
+      int* ibackperm;
+
+      taucs_ccs_matrix* PLPT;
+      taucs_ccs_matrix* PSPT;
+
+      int N;
+
+      N = L[k-1]->n; 
+
+      backperm  = (int*) taucs_malloc(N * sizeof(int));
+      ibackperm = (int*) taucs_malloc(N * sizeof(int));
+
+      for (i=0   ; i<N-n ; i++) backperm[i] = i;
+      for (i=N-n ; i<N   ; i++) backperm[i] = (N-n) + perml[i-(N-n)];
+      for (i=0   ; i<N   ; i++) ibackperm[backperm[i]] = i;
+      
+      PLPT = taucs_ccs_permute_symmetrically(L[k-1],backperm,ibackperm);
+      taucs_ccs_free(L[k-1]);
+      L[k-1] = PLPT;
+
+      taucs_free(backperm);
+      taucs_free(ibackperm);
+
+      N = S[k]->n; 
+
+      backperm  = (int*) taucs_malloc(N * sizeof(int));
+      ibackperm = (int*) taucs_malloc(N * sizeof(int));
+
+      for (i=0   ; i<N-n ; i++) backperm[i] = i;
+      for (i=N-n ; i<N   ; i++) backperm[i] = (N-n) + perml[i-(N-n)];
+      for (i=0   ; i<N   ; i++) ibackperm[backperm[i]] = i;
+      
+      PSPT = taucs_ccs_permute_symmetrically(S[k],backperm,ibackperm);
+      taucs_ccs_free(S[k]);
+      S[k] = PSPT;
+
+      taucs_free(backperm);
+      taucs_free(ibackperm);
+    }
+
+    if (p<n) {
+      Ll = taucs_ccs_factor_llt_partial(PVPT,p);
+      taucs_ccs_free(PVPT);
+      taucs_ccs_split(Ll,&(L[l]),&(S[l+1]),p);
+      (L[l])   -> flags = TAUCS_TRIANGULAR | TAUCS_LOWER;
+      (S[l+1]) -> flags = TAUCS_SYMMETRIC  | TAUCS_LOWER;
+      taucs_ccs_free(Ll);
+    } else {
+      L[l] = taucs_ccs_factor_llt(PVPT,0.0,0);
+      taucs_ccs_free(PVPT);
+      break;
+    }
+
+    next += p;
+  }
+
+  levels = l+1;
+
+  taucs_printf("recvaidya-create: %d levels [ ",levels);
+  for (l=0; l<levels; l++) taucs_printf("%d ",P[l]);
+  taucs_printf("]\n");
+
+  args->levels = levels;
+  args->convratio = convratio;
+  args->maxits = innerits;
+  args->level  = 0;
+  args->S = S;
+  args->L = L;
+
+  return args;
+  
+}
+
+int
+taucs_recursive_amwb_preconditioner_solve(void* vP,
+					  void* vZ, 
+					  void* vR)
+{
+  recvaidya_args* P = (recvaidya_args*) vP;
+  double* Z = (double*) vZ;
+  double* R = (double*) vR;
+  recvaidya_args args;
+
+  if ( P->level == (P->levels)-1 ) {
+    /* this is the last level, L is a complete factor */
+
+    /*
+    taucs_printf("recvaidya_solve: level=%d/%d, direct solve\n",
+	       P->level,P->levels);
+    */
+    
+    taucs_ccs_solve_llt((P->L)[P->level],
+			   Z, R);
+  } else {
+    /*
+    taucs_printf("recvaidya_solve: level=%d/%d, Schur complement solve\n",
+	       P->level,P->levels);
+    */
+
+    args       = *P; /* copy the data but modify next level! */
+    args.level = (P->level) + 1;
+    
+    taucs_ccs_solve_schur((P->L)[P->level],
+			  (P->S)[(P->level) + 1],
+			  taucs_recursive_amwb_preconditioner_solve,
+			  &args,
+			  (int)(P->maxits),
+			  (P->convratio),
+			  Z, R);
+  }
+  return 0;
+}
+
+#endif /* TAUCS_CORE */  
+		      
+
+
+
+
diff --git a/contrib/taucs/src/taucs_sn_llt.c b/contrib/taucs/src/taucs_sn_llt.c
new file mode 100644
index 0000000000000000000000000000000000000000..dd05a6f9013166d8adc9299748fd26447e137fed
--- /dev/null
+++ b/contrib/taucs/src/taucs_sn_llt.c
@@ -0,0 +1,4020 @@
+/*************************************************************/
+/*                                                           */
+/*************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+//#define NDEBUG
+#include <assert.h>
+
+#define TAUCS_CORE_CILK
+#include "taucs.h"
+
+#ifdef TAUCS_CILK
+#pragma lang -C
+#endif
+
+#ifndef TAUCS_CORE_GENERAL
+#ifdef TAUCS_CILK
+
+/*** GEMM ***/
+
+#define TAUCS_THRESHOLD_GEMM_SMALL 20
+#define TAUCS_THRESHOLD_GEMM_BLAS  80
+
+static void
+taucs_gemm_NCm1p1_small(int m, int n, int k, 
+			taucs_datatype* A, int lda,
+			taucs_datatype* B, int ldb,
+			taucs_datatype* C, int ldc)
+{
+  int j,i,l;
+  taucs_datatype* Cj;
+  taucs_datatype* Ail;
+  taucs_datatype* Bjl;
+  taucs_datatype  Cij;
+
+  Cj = C;
+  for (j=0; j<n; j++) {
+    for (i=0; i<m; i++) {
+      Cij = *Cj;
+      Ail = A + i;
+      Bjl = B + j;
+      for (l=0; l<k; l++) {
+	Cij = taucs_sub( Cij, taucs_mul( *Ail, taucs_conj( *Bjl ) ) );
+	Ail += lda;
+	Bjl += ldb;
+      }
+      *Cj = Cij;
+      Cj++;
+    }
+    Cj = Cj + ldc - m;
+    /* now Cj is at the top of column j+1 */
+  }
+}
+
+cilk static void 
+taucs_cilk_gemm(char* transa, char* transb,
+		int* pm, int*  pn, int* pk,
+		taucs_real_datatype* alpha, 
+		taucs_datatype *A, int *plda,
+		taucs_datatype *B, int *pldb,
+		taucs_real_datatype* beta, 
+		taucs_datatype *C, int *pldc)
+{
+  int    m  = *pm;
+  int    n  = *pn;
+  int    k  = *pk;
+
+  assert(*transa == 'N');
+  assert(*transb == 'C');
+  assert(*alpha  ==-1.0);
+  assert(*beta   == 1.0);
+
+  if (n <= TAUCS_THRESHOLD_GEMM_SMALL && k <= TAUCS_THRESHOLD_GEMM_SMALL) {
+    /*fprintf(stderr,"GEMM SMALL\n");*/
+    taucs_gemm_NCm1p1_small(m,n,k,A,*plda,B,*pldb,C,*pldc);
+    return;
+  }
+
+  if (n <= TAUCS_THRESHOLD_GEMM_BLAS && k <= TAUCS_THRESHOLD_GEMM_BLAS) {
+    /*fprintf(stderr,"GEMM BLAS\n");*/
+    taucs_gemm(transa, transb,
+	       pm, pn, pk,
+	       alpha, 
+	       A, plda,
+	       B, pldb,
+	       beta,
+	       C, pldc);
+    return;
+  }
+
+  if (k >= n && k >= m) {
+    int khalf1 = k/2;
+    int khalf2 = k-khalf1;
+    int lda = *plda;
+    int ldb = *pldb;
+    /*fprintf(stderr,"GEMM K/2\n");*/
+    spawn taucs_cilk_gemm(transa,transb, 
+			  pm, pn, &khalf1,
+			  alpha, 
+			  A, plda,
+			  B, pldb,
+			  beta,
+			  C, pldc);
+    sync;
+    spawn taucs_cilk_gemm(transa,transb, 
+			  pm, pn, &khalf2,
+			  alpha, 
+			  A + khalf1*lda, plda,
+			  B + khalf1*ldb, pldb,
+			  beta,
+			  C, pldc);
+    sync;
+    return;
+  } 
+
+  if (n >= k && n >= m) {
+    int nhalf1 = n/2;
+    int nhalf2 = n-nhalf1;
+    int ldc = *pldc;
+    /*fprintf(stderr,"GEMM N/2\n");*/
+
+    spawn taucs_cilk_gemm(transa,transb, 
+			  pm, &nhalf1, pk,
+			  alpha, 
+			  A, plda,
+			  B, pldb,
+			  beta,
+			  C, pldc);
+
+
+    spawn taucs_cilk_gemm(transa,transb, 
+			  pm, &nhalf2, pk,
+			  alpha, 
+			  A, plda,
+			  B + nhalf1, pldb,
+			  beta,
+			  C + nhalf1*ldc, pldc);
+    sync;
+    return;
+  }
+
+  if (1 /* m >= k && m >= n*/) { /* the condition must be true */
+    int mhalf1 = m/2;
+    int mhalf2 = m-mhalf1;
+    /*fprintf(stderr,"GEMM M/2\n");*/
+
+    spawn taucs_cilk_gemm(transa,transb, 
+			  &mhalf1, pn, pk,
+			  alpha, 
+			  A, plda,
+			  B, pldb,
+			  beta,
+			  C, pldc);
+
+    spawn taucs_cilk_gemm(transa,transb, 
+			  &mhalf2, pn, pk,
+			  alpha, 
+			  A + mhalf1, plda,
+			  B, pldb,
+			  beta,
+			  C + mhalf1, pldc);
+    sync;
+    return;
+  }
+
+  assert(0);
+
+}
+
+/*** HERK ***/
+
+#define TAUCS_THRESHOLD_HERK_SMALL 20
+#define TAUCS_THRESHOLD_HERK_BLAS  80
+
+static void
+taucs_herk_LNm1p1_small(int n, int k, 
+			taucs_datatype* A, int lda,
+			taucs_datatype* C, int ldc)
+{
+  int j,i,l;
+  taucs_datatype* Cj;
+  taucs_datatype* Ail;
+  taucs_datatype* Ajl;
+  taucs_datatype  Cij;
+
+  Cj = C;
+  for (j=0; j<n; j++) {
+    for (i=j; i<n; i++) {
+      Cij = *Cj;
+      Ail = A + i;
+      Ajl = A + j;
+      for (l=0; l<k; l++) {
+	Cij = taucs_sub( Cij, taucs_mul( *Ail, taucs_conj( *Ajl ) ) );
+	Ail += lda;
+	Ajl += lda;
+      }
+      *Cj = Cij;
+      Cj++;
+    }
+    Cj = Cj + ldc - n;
+    /* now Cj is at the top of column j+1, move to the diagonal */
+    Cj = Cj + j+1;
+  }
+}
+
+cilk static void 
+taucs_cilk_herk(char* uplo, char* trans,
+		int*  pn, int* pk,
+		taucs_real_datatype* alpha, 
+		taucs_datatype *A, int *plda,
+		taucs_real_datatype* beta, 
+		taucs_datatype *C, int *pldc)
+{
+  int    n  = *pn;
+  int    k  = *pk;
+
+  assert(*uplo  == 'L');
+  assert(*trans == 'N');
+  assert(*alpha ==-1.0);
+  assert(*beta  == 1.0);
+
+  if (n <= TAUCS_THRESHOLD_HERK_SMALL && k <= TAUCS_THRESHOLD_HERK_SMALL) {
+    /*fprintf(stderr,"HERK SMALL\n");*/
+    taucs_herk_LNm1p1_small(n,k,A,*plda,C,*pldc);
+    return;
+  }
+
+  if (n <= TAUCS_THRESHOLD_HERK_BLAS && k <= TAUCS_THRESHOLD_HERK_BLAS) {
+    /*fprintf(stderr,"HERK BLAS\n");*/
+    taucs_herk(uplo,trans,
+	       pn, pk,
+	       alpha, 
+	       A, plda,
+	       beta,
+	       C, pldc);
+    return;
+  }
+
+  if (k > n) {
+    int khalf1 = k/2;
+    int khalf2 = k-khalf1;
+    int lda = *plda;
+    /*fprintf(stderr,"HERK K/2\n");*/
+    spawn taucs_cilk_herk(uplo,trans, 
+			  pn, &khalf1,
+			  alpha, 
+			  A, plda,
+			  beta,
+			  C, pldc);
+    sync;
+    spawn taucs_cilk_herk(uplo,trans,
+			  pn, &khalf2,
+			  alpha, 
+			  A + khalf1*lda, plda,
+			  beta,
+			  C, pldc);
+    sync;
+    return;
+  } else {
+    int ldc = *pldc;
+    int nhalf1 = n/2;
+    int nhalf2 = n-nhalf1;
+    /*fprintf(stderr,"HERK N/2\n");*/
+    spawn taucs_cilk_herk(uplo,trans, 
+			  &nhalf1, pk,
+			  alpha, 
+			  A, plda,
+			  beta,
+			  C, pldc);
+
+    spawn taucs_cilk_gemm("No Transpose", "Conjugate", 
+			  &nhalf2, &nhalf1, pk, 
+			  &taucs_minusone_const, 
+			  A+nhalf1  , plda,
+			  A         , plda,
+			  &taucs_one_const, 
+			  C +nhalf1, pldc);
+
+    spawn taucs_cilk_herk(uplo,trans, 
+			  &nhalf2, pk,
+			  alpha, 
+			  A + nhalf1, plda,
+			  beta,
+			  C + nhalf1*ldc + nhalf1, pldc);
+
+    sync;
+    return;
+  }
+
+}
+
+/*** TRSM ***/
+
+#define TAUCS_THRESHOLD_TRSM_SMALL 20
+#define TAUCS_THRESHOLD_TRSM_BLAS  80
+
+static void
+taucs_trsm_RLCNp1_small(int m, int n, 
+			taucs_datatype* A, int lda,
+			taucs_datatype* B, int ldb)
+{
+  int j,i,k;
+  taucs_datatype* Bi;
+  taucs_datatype* Bik;
+  taucs_datatype* Ajk;
+  taucs_datatype  Bij;
+
+  Bi = B;
+  for (i=0; i<m; i++) {
+    for (j=0; j<n; j++) {
+      Bij = *(Bi + j*ldb);
+      Bik = Bi;
+      Ajk = A + j;
+      for (k=0; k<j; k++) {
+	Bij = taucs_sub( Bij, taucs_mul( *Bik, taucs_conj( *Ajk ) ) );
+	Bik += ldb;
+	Ajk += lda;
+      }
+      *(Bi + j*ldb) = taucs_div( Bij, taucs_conj( *Ajk ) ); 
+    }
+    Bi = Bi + 1;
+  }
+}
+
+cilk static void 
+taucs_cilk_trsm(char* side, char* uplo, char* transa, char* diag, 
+		int*  pm, int* pn,
+		taucs_datatype* alpha, 
+		taucs_datatype *A, int *plda,
+		taucs_datatype *B, int *pldb)
+{
+  int    n  = *pn;
+  int    m  = *pm;
+
+  assert(*side   == 'R');
+  assert(*uplo   == 'L');
+  assert(*transa == 'C');
+  assert(*diag   == 'N');
+  assert(taucs_re(*alpha) == 1.0);
+  assert(taucs_im(*alpha) == 0.0);
+
+  if (m <= TAUCS_THRESHOLD_TRSM_SMALL && n <= TAUCS_THRESHOLD_TRSM_SMALL) {
+    /*fprintf(stderr,"TRSM SMALL\n");*/
+    taucs_trsm_RLCNp1_small(m,n,A,*plda,B,*pldb);
+    return;
+  }
+
+  if (m <= TAUCS_THRESHOLD_TRSM_BLAS && n <= TAUCS_THRESHOLD_TRSM_BLAS) {
+    /*fprintf(stderr,"TRSM BLAS\n");*/
+    taucs_trsm(side,uplo,transa,diag, 
+	       pm, pn,
+	       alpha, 
+	       A, plda,
+	       B, pldb);
+    return;
+  }
+
+  if (m >= n) {
+    int mhalf1 = m/2;
+    int mhalf2 = m-mhalf1;
+    /*fprintf(stderr,"TRSM M/2\n");*/
+    spawn taucs_cilk_trsm(side,uplo,transa,diag, 
+			  &mhalf1, pn,
+			  alpha, 
+			  A, plda,
+			  B, pldb);
+    spawn taucs_cilk_trsm(side,uplo,transa,diag, 
+			  &mhalf2, pn,
+			  alpha, 
+			  A, plda,
+			  B+mhalf1, pldb);
+    sync;
+    return;
+  } else {
+    int lda = *plda;
+    int ldb = *pldb;
+    int nhalf1 = n/2;
+    int nhalf2 = n-nhalf1;
+    /*fprintf(stderr,"TRSM N/2\n");*/
+    spawn taucs_cilk_trsm(side,uplo,transa,diag, 
+			  pm, &nhalf1,
+			  alpha, 
+			  A, plda,
+			  B, pldb);
+    sync;
+    spawn taucs_cilk_gemm("No Transpose", "Conjugate", 
+			  pm, &nhalf2, &nhalf1, 
+			  &taucs_minusone_const, 
+			  B                  , pldb,
+			  A+nhalf1           , plda,
+			  &taucs_one_const, 
+			  B       +nhalf1*ldb, pldb);
+    sync;
+    spawn taucs_cilk_trsm(side,uplo,transa,diag, 
+			  pm, &nhalf2,
+			  alpha, 
+			  A+nhalf1+nhalf1*lda, plda,
+			  B       +nhalf1*ldb, pldb);
+    sync;
+    return;
+  }
+}
+
+/*** POTRF ***/
+
+#define TAUCS_THRESHOLD_POTRF_SMALL 20
+#define TAUCS_THRESHOLD_POTRF_BLAS  80
+
+/*
+  this routine is for Lower only, and returns
+  0 or the index of the nonpositive diagonal.
+*/
+static int
+taucs_potrf_lower_small(int n, taucs_datatype* A, int lda)
+{
+  int j,i,k;
+  taucs_datatype* Aj;
+  taucs_datatype* Ajk;
+  taucs_datatype* Aik;
+  taucs_datatype  Aij;
+  taucs_datatype  scale;
+
+  Aj = A;
+  for (j=0; j<n; j++) {
+    for (i=j; i<n; i++) {
+      Aij = taucs_zero_const;
+      Ajk = A+j; /* k = 0 */
+      Aik = A+i; /* k = 0 */
+      for (k=0; k<j; k++) {
+	Aij = taucs_add( Aij , taucs_mul( (*Aik) , (*Ajk) ) );
+	Aik += lda;
+	Ajk += lda;
+      }
+      Aj[i] = taucs_sub( Aj[i] , Aij );
+    }
+    if ( taucs_re(Aj[j]) < 0 ) return j+1;
+    scale = taucs_div( taucs_one_const, taucs_sqrt(Aj[j]) );
+    for (i=j; i<n; i++) 
+      Aj[i] = taucs_mul(Aj[i] , scale);
+    Aj += lda;
+  }
+
+  return 0;
+}
+
+cilk static void 
+taucs_cilk_potrf(char* uplo, 
+		 int*  pn,
+		 taucs_datatype* A, int* plda,
+		 int*  pinfo)
+{
+  int    n  = *pn;
+  int nhalf1,nhalf2;
+
+  assert(*uplo == 'L');
+
+  if (n <= TAUCS_THRESHOLD_POTRF_SMALL) {
+    /*fprintf(stderr,"POTRF SMALL\n");*/
+    *pinfo = taucs_potrf_lower_small(*pn,A,*plda);
+    return;
+  }
+
+  if (n <= TAUCS_THRESHOLD_POTRF_BLAS) {
+    /*fprintf(stderr,"POTRF BLAS\n");*/
+    taucs_potrf(uplo,pn,A,plda,pinfo);
+    return;
+  }
+
+  /*fprintf(stderr,"POTRF RECIRSIVE\n");*/
+
+  nhalf1 = n/2;
+  nhalf2 = n-nhalf1;
+
+  spawn taucs_cilk_potrf(uplo,&nhalf1,A,plda,pinfo);
+  sync;
+
+  if (*pinfo) return;
+
+  spawn taucs_cilk_trsm ("Right",
+			 "Lower",
+			 "Conjugate",
+			 "No unit diagonal",
+			 &nhalf2,&nhalf1,
+			 &taucs_one_const,
+			 A,       plda,
+			 A+nhalf1,plda);
+  sync;
+  /*  
+  taucs_trsm ("Right",
+	      "Lower",
+	      "Conjugate",
+	      "No unit diagonal",
+	      &nhalf2,&nhalf1,
+	      &taucs_one_const,
+	      A,       plda,
+	      A+nhalf1,plda);
+  */
+
+  spawn taucs_cilk_herk ("Lower",
+			 "No Conjugate",
+			 &nhalf2,&nhalf1,
+			 &taucs_minusone_real_const,
+			 A+nhalf1,plda,
+			 &taucs_one_real_const,
+			 A+nhalf1+(nhalf1 * *plda), plda);
+  sync;
+
+  spawn taucs_cilk_potrf(uplo,&nhalf2,A+nhalf1+(nhalf1 * *plda),plda,pinfo);
+  sync;
+
+  if (*pinfo) *pinfo += nhalf1;
+}
+#else
+#define taucs_cilk_potrf taucs_potrf
+#define taucs_cilk_gemm  taucs_gemm
+#define taucs_cilk_trsm  taucs_trsm
+#define taucs_cilk_herk  taucs_herk
+#endif
+#endif
+
+/*************************************************************/
+/* These are really generic routines                         */
+/*************************************************************/
+
+#if 0
+#ifdef TAUCS_CORE_GENERAL
+void* taucs_cilk_init() {
+#ifdef TAUCS_CILK
+  CilkContext* context;
+  int argc;
+#define CILK_ACTIVE_SIZE 8
+  char* argv[] = {"program_name","--nproc","8",0};
+  
+  for (argc=0; argv[argc]; argc++);
+
+  taucs_printf("taucs_cilk_init\n");
+  context = Cilk_init(&argc,argv);
+  return context;
+#else
+  taucs_printf("taucs_cilk_init: This is not a Cilk build\n");
+  return NULL;
+#endif
+}
+
+void taucs_cilk_terminate(void* context) {
+#ifdef TAUCS_CILK
+  Cilk_terminate((CilkContext*) context);
+#endif
+}
+
+#endif /* TAUCS_CORE_GENERAL */
+#endif
+/*************************************************************/
+/* End of Cilk-related generic routines                      */
+/*************************************************************/
+
+#if 0
+/*omer added this, I don't know why yet*/
+#ifdef TAUCS_CORE_GENERAL
+taucs_double taucs_dzero_const     =  0.0;
+taucs_double taucs_done_const      =  1.0;
+taucs_double taucs_dminusone_const = -1.0;
+
+taucs_single taucs_szero_const     =  0.0f;
+taucs_single taucs_sone_const      =  1.0f;
+taucs_single taucs_sminusone_const = -1.0f;
+#endif
+#endif 
+
+/*************************************************************/
+/* structures                                                */
+/*************************************************************/
+
+#define FALSE 0
+#define TRUE  1
+
+/*#define BLAS_FLOPS_CUTOFF  1000.0*/
+#define BLAS_FLOPS_CUTOFF  -1.0
+#define SOLVE_DENSE_CUTOFF 5
+
+typedef struct {
+  int     sn_size;
+  int     n;
+  int*    rowind;
+
+  int     up_size;
+  int*    sn_vertices;
+  int*    up_vertices;
+
+  taucs_datatype* f1;
+  taucs_datatype* f2;
+  taucs_datatype* u;
+
+} supernodal_frontal_matrix;
+
+#define SFM_F1 f1
+#define SFM_F2 f2
+#define SFM_U   u
+
+typedef struct {
+  int     flags;
+
+  char    uplo;     /* 'u' for upper, 'l' for lower, ' ' don't know; prefer lower. */
+  int     n;        /* size of matrix */
+  int     n_sn;     /* number of supernodes */
+
+  int* parent;      /* supernodal elimination tree */
+  int* first_child; 
+  int* next_child;
+
+  int* sn_size;     /* size of supernodes (diagonal block) */
+  int* sn_up_size;  /* size of subdiagonal update blocks   */
+  int** sn_struct;  /* row structure of supernodes         */
+
+  int* sn_blocks_ld;  /* lda of supernode blocks */
+  taucs_datatype** sn_blocks; /* supernode blocks        */
+    
+  int* up_blocks_ld;  /* lda of update blocks    */
+  taucs_datatype** up_blocks; /* update blocks           */
+} supernodal_factor_matrix;
+
+#ifdef TAUCS_CORE_GENERAL
+/*************************************************************/
+/* for qsort                                                 */
+/*************************************************************/
+
+/* this is never used */
+/*
+static int compare_ints(void* vx, void* vy)
+{
+  int* ix = (int*)vx;
+  int* iy = (int*)vy;
+  if (*ix < *iy) return -1;
+  if (*ix > *iy) return  1;
+  return 0;
+}
+*/
+
+static int* compare_indirect_map;
+static int compare_indirect_ints( const void* vx, const void* vy)
+{
+  int* ix = (int*)vx;
+  int* iy = (int*)vy;
+  if (compare_indirect_map[*ix] < compare_indirect_map[*iy]) return -1;
+  if (compare_indirect_map[*ix] > compare_indirect_map[*iy]) return  1;
+  return 0;
+}
+
+/*************************************************************/
+/* radix sort                                                */
+/*************************************************************/
+
+#if 0
+/* NCOUNTS = 2^LOGRADIX */
+
+#define RADIX_SORT_LOGRADIX 4
+#define RADIX_SORT_NCOUNTS  16
+
+static unsigned int counts[RADIX_SORT_NCOUNTS];
+
+static int
+radix_sort(unsigned int* x, int n)
+{
+  int i;
+  unsigned int mask;
+
+  unsigned int  ncounts;
+
+  unsigned int* y;
+  unsigned int* to;
+  unsigned int* from;
+
+  unsigned int v;
+  unsigned int partialsum;
+  unsigned int next;
+  unsigned int bits_sorted;
+
+  if (RADIX_SORT_LOGRADIX >= 8*sizeof(unsigned int)) {
+    taucs_printf("radix sort: radix too large.\n");
+    /* the computation of ncounts will fail */
+    return 0;
+  }
+
+  mask    = 0;
+  ncounts = 1;
+  for (i=0; i<RADIX_SORT_LOGRADIX; i++) {
+    mask = (mask << 1) | 1;
+    ncounts = ncounts << 1;
+  }
+
+  assert(ncounts==RADIX_SORT_NCOUNTS);
+
+  y      = (unsigned int*) taucs_malloc(n       * sizeof(unsigned int));
+  if (!y) {
+    taucs_printf("radix sort: out of memory.\n");
+    return -1;
+  }
+
+  from = x;
+  to   = y;
+
+  bits_sorted = 0;
+  while(bits_sorted < 8*sizeof(unsigned int)) {
+    for (i=0; i<ncounts; i++) counts[i] = 0;
+
+    for (i=0; i<n; i++) {
+      v = (from[i] >> bits_sorted) & mask;
+      assert(v < ncounts);
+      counts[v] ++;
+    }
+
+    partialsum = 0;
+    for (i=0; i<ncounts; i++) {
+      /*printf("<%d ",counts[i]);*/
+      next = counts[i];
+      counts[i] = partialsum;
+      /*printf("%d>\n",counts[i]);*/
+      partialsum = partialsum + next;
+    }
+
+    for (i=0; i<n; i++) {
+      v = (from[i] >> bits_sorted) & mask;
+      assert(counts[v] < n);
+      to[counts[v]] = from[i];
+      counts[v] ++;
+    }
+    /*
+    printf("===========\n");
+    for (i=0; i<n; i++) printf(">>%d>> %08x\n",bits_sorted,to[i]);
+    printf("===========\n");
+    */
+
+    bits_sorted += RADIX_SORT_LOGRADIX;
+    if (from == x) {
+      from = y;
+      to   = x;
+    } else {
+      from = x;
+      to   = y;
+    } 
+  }
+
+  if (from == y) 
+    for (i=0; i<n; i++) x[i] = y[i];
+
+  taucs_free(y);
+
+  return 0;
+}
+#endif
+
+#endif /* TAUCS_CORE_GENERAL */
+/*************************************************************/
+/* create and free the factor object                         */
+/*************************************************************/
+
+#ifndef TAUCS_CORE_GENERAL
+
+static supernodal_factor_matrix*
+multifrontal_supernodal_create()
+{
+  supernodal_factor_matrix* L;
+  
+  L = (supernodal_factor_matrix*) taucs_malloc(sizeof(supernodal_factor_matrix));
+  if (!L) return NULL;
+
+#ifdef TAUCS_CORE_SINGLE
+  L->flags = TAUCS_SINGLE;
+#endif
+
+#ifdef TAUCS_CORE_DOUBLE
+  L->flags = TAUCS_DOUBLE;
+#endif
+
+#ifdef TAUCS_CORE_SCOMPLEX
+  L->flags = TAUCS_SCOMPLEX;
+#endif
+
+#ifdef TAUCS_CORE_DCOMPLEX
+  L->flags = TAUCS_DCOMPLEX;
+#endif
+
+  L->uplo      = 'l';
+  L->n         = -1; /* unused */
+
+  L->sn_struct   = NULL;
+  L->sn_size     = NULL;
+  L->sn_up_size  = NULL;
+  L->parent      = NULL;
+  L->first_child = NULL;
+  L->next_child  = NULL;
+  L->sn_blocks_ld  = NULL;
+  L->sn_blocks     = NULL;
+  L->up_blocks_ld  = NULL;
+  L->up_blocks     = NULL;
+
+  return L;
+}
+
+void taucs_dtl(supernodal_factor_free)(void* vL)
+{
+  supernodal_factor_matrix* L = (supernodal_factor_matrix*) vL;
+  int sn;
+
+  if (!L) return;
+  
+  taucs_free(L->parent);
+  taucs_free(L->first_child);
+  taucs_free(L->next_child);
+
+  taucs_free(L->sn_size);
+  taucs_free(L->sn_up_size);
+  taucs_free(L->sn_blocks_ld);
+  taucs_free(L->up_blocks_ld);
+
+  if (L->sn_struct)   
+    for (sn=0; sn<L->n_sn; sn++)
+      taucs_free(L->sn_struct[sn]);
+
+  if (L->sn_blocks)   
+    for (sn=0; sn<L->n_sn; sn++)
+      taucs_free(L->sn_blocks[sn]);
+
+  if (L->up_blocks)   
+    for (sn=0; sn<L->n_sn; sn++)
+      taucs_free(L->up_blocks[sn]);
+
+  taucs_free(L->sn_struct);
+  taucs_free(L->sn_blocks);
+  taucs_free(L->up_blocks);
+
+  taucs_free(L);
+}
+
+void taucs_dtl(supernodal_factor_free_numeric)(void* vL)
+{
+  supernodal_factor_matrix* L = (supernodal_factor_matrix*) vL;
+  int sn;
+  
+  for (sn=0; sn<L->n_sn; sn++) {
+    taucs_free(L->sn_blocks[sn]);
+    L->sn_blocks[sn] = NULL;
+    taucs_free(L->up_blocks[sn]);
+    L->up_blocks[sn] = NULL;
+  }
+}
+
+taucs_ccs_matrix*
+taucs_dtl(supernodal_factor_to_ccs)(void* vL)
+{
+  supernodal_factor_matrix* L = (supernodal_factor_matrix*) vL;
+  taucs_ccs_matrix* C;
+  int n,nnz;
+  int i,j,ip,jp,sn,next;
+  taucs_datatype v;
+  int* len;
+
+  n = L->n;
+
+  len = (int*) taucs_malloc(n*sizeof(int));
+  if (!len) return NULL;
+
+  nnz = 0;
+  /*
+  for (sn=0; sn<L->n_sn; sn++) {
+    for (jp=0; jp<(L->sn_size)[sn]; jp++) {
+      j = (L->sn_struct)[sn][jp];
+      len[j] = (L->sn_up_size)[sn] - jp;
+      nnz += len[j];
+    }
+  }
+  */
+
+  for (sn=0; sn<L->n_sn; sn++) {
+    for (jp=0; jp<(L->sn_size)[sn]; jp++) {
+      j = (L->sn_struct)[sn][jp];
+      len[j] = 0;
+
+      for (ip=jp; ip<(L->sn_size)[sn]; ip++) {
+	i = (L->sn_struct)[sn][ ip ];
+	v = (L->sn_blocks)[sn][ jp*(L->sn_blocks_ld)[sn] + ip ];
+
+	if (taucs_re(v) || taucs_im(v)) { 
+	  len[j] ++;
+	  nnz ++;
+	}
+      }
+      for (ip=(L->sn_size)[sn]; ip<(L->sn_up_size)[sn]; ip++) {
+	i = (L->sn_struct)[sn][ ip ];
+	v = (L->up_blocks)[sn][ jp*(L->up_blocks_ld)[sn] + (ip-(L->sn_size)[sn]) ];
+
+	if (taucs_re(v) || taucs_im(v)) { 
+	  len[j] ++;
+	  nnz ++;
+	}
+      }
+    }
+  }
+
+
+  C = taucs_dtl(ccs_create)(n,n,nnz);
+  if (!C) {
+    taucs_free(len);
+    return NULL;
+  }
+
+#ifdef TAUCS_CORE_SINGLE
+  C->flags = TAUCS_SINGLE;
+#endif
+
+#ifdef TAUCS_CORE_DOUBLE
+  C->flags = TAUCS_DOUBLE;
+#endif
+
+#ifdef TAUCS_CORE_SCOMPLEX
+  C->flags = TAUCS_SCOMPLEX;
+#endif
+
+#ifdef TAUCS_CORE_DCOMPLEX
+  C->flags = TAUCS_DCOMPLEX;
+#endif
+
+  C->flags |= TAUCS_TRIANGULAR | TAUCS_LOWER;
+
+  (C->colptr)[0] = 0;
+  for (j=1; j<=n; j++) (C->colptr)[j] = (C->colptr)[j-1] + len[j-1];
+
+  taucs_free(len);
+
+  for (sn=0; sn<L->n_sn; sn++) {
+    for (jp=0; jp<(L->sn_size)[sn]; jp++) {
+      j = (L->sn_struct)[sn][jp];
+
+      next = (C->colptr)[j];
+
+      /*
+      memcpy((C->rowind) + next,
+	     ((L->sn_struct)[sn]) + jp,
+	     ((L->sn_up_size)[sn] - jp) * sizeof(int));
+      memcpy((C->taucs_values) + next,
+	     ((L->sn_blocks)[sn]) + (jp*(L->sn_blocks_ld)[sn] + jp),
+	     ((L->sn_size)[sn] - jp) * sizeof(taucs_datatype));
+      next += ((L->sn_size)[sn] - jp);
+      memcpy((C->taucs_values) + next,
+	     ((L->up_blocks)[sn]) + jp*(L->up_blocks_ld)[sn],
+	     ((L->sn_up_size)[sn] - (L->sn_size)[sn]) * sizeof(taucs_datatype));
+      */
+
+      for (ip=jp; ip<(L->sn_size)[sn]; ip++) {
+	i = (L->sn_struct)[sn][ ip ];
+	v = (L->sn_blocks)[sn][ jp*(L->sn_blocks_ld)[sn] + ip ];
+
+	if (!taucs_re(v) && !taucs_im(v)) continue;
+	/*if (v == 0.0) continue;*/
+
+	(C->rowind)[next] = i;
+	(C->taucs_values)[next] = v;
+	next++;
+      }
+      for (ip=(L->sn_size)[sn]; ip<(L->sn_up_size)[sn]; ip++) {
+	i = (L->sn_struct)[sn][ ip ];
+	v = (L->up_blocks)[sn][ jp*(L->up_blocks_ld)[sn] + (ip-(L->sn_size)[sn]) ];
+
+	if (!taucs_re(v) && !taucs_im(v)) continue;
+	/*if (v == 0.0) continue;*/
+
+	(C->rowind)[next] = i;
+	(C->taucs_values)[next] = v;
+	next++;
+      }
+    }
+  }
+
+  return C;
+}
+
+/* just get the diagonal of a supernodal factor, for Penny */
+
+taucs_datatype*
+taucs_dtl(supernodal_factor_get_diag)(void* vL)
+{
+  supernodal_factor_matrix* L = (supernodal_factor_matrix*) vL;
+  int j,ip,jp,sn;/*i,next omer*/
+  taucs_datatype  v;
+  taucs_datatype* diag;
+
+  diag = (taucs_datatype*) taucs_malloc((L->n) * sizeof(taucs_datatype));
+  if (!diag) return NULL;
+
+  for (sn=0; sn<L->n_sn; sn++) {
+    for (jp=0; jp<(L->sn_size)[sn]; jp++) {
+      j = (L->sn_struct)[sn][jp];
+
+      ip=jp; /* we just want the diagonal */
+      
+      v = (L->sn_blocks)[sn][ jp*(L->sn_blocks_ld)[sn] + ip ];
+      
+      diag[ j ] = v;
+    }
+  }
+
+  return diag;
+}
+
+
+/*************************************************************/
+/* create and free frontal matrices                          */
+/*************************************************************/
+
+static supernodal_frontal_matrix* 
+supernodal_frontal_create(int* firstcol_in_supernode,
+			  int sn_size,
+			  int n, 
+			  int* rowind)
+{
+  supernodal_frontal_matrix* tmp;
+
+  tmp = (supernodal_frontal_matrix*)taucs_malloc(sizeof(supernodal_frontal_matrix));
+  if(tmp==NULL) return NULL;
+
+  tmp->sn_size = sn_size;
+  tmp->n = n;
+
+  tmp->rowind = rowind;
+
+  tmp->n = n;
+  tmp->sn_size = sn_size;
+  tmp->up_size = n-sn_size;
+
+  tmp->sn_vertices = rowind;
+  tmp->up_vertices = rowind + sn_size;
+
+  /* on some platforms, malloc(0) fails, so we avoid such calls */
+
+  tmp->SFM_F1 = tmp->SFM_F2 = tmp->SFM_U = NULL;
+
+  if (tmp->sn_size)
+    tmp->SFM_F1 = (taucs_datatype*)taucs_calloc((tmp->sn_size)*(tmp->sn_size),sizeof(taucs_datatype));
+
+  if (tmp->sn_size && tmp->up_size)
+    tmp->SFM_F2 = (taucs_datatype*)taucs_calloc((tmp->up_size)*(tmp->sn_size),sizeof(taucs_datatype));
+
+  if (tmp->up_size)
+    tmp->SFM_U  = (taucs_datatype*)taucs_calloc((tmp->up_size)*(tmp->up_size),sizeof(taucs_datatype));
+
+  if((   tmp->SFM_F1==NULL && tmp->sn_size)
+     || (tmp->SFM_F2==NULL && tmp->sn_size && tmp->up_size)
+     || (tmp->SFM_U ==NULL && tmp->up_size)) {
+    taucs_free(tmp->SFM_U);
+    taucs_free(tmp->SFM_F1);
+    taucs_free(tmp->SFM_F2);
+    taucs_free(tmp);
+    return NULL;
+  }
+
+  assert(tmp);
+  return tmp;
+}
+
+static void supernodal_frontal_free(supernodal_frontal_matrix* to_del)
+{
+  /* 
+     SFM_F1 and SFM_F2 are moved to the factor,
+     but this function may be called before they are
+     moved.
+  */
+
+
+  if (to_del) {
+    taucs_free(to_del->SFM_F1);
+    taucs_free(to_del->SFM_F2);
+    taucs_free(to_del->SFM_U);
+    taucs_free(to_del);
+  }
+}
+
+/*************************************************************/
+/* factor a frontal matrix                                   */
+/*************************************************************/
+
+cilk
+static int
+multifrontal_supernodal_front_factor(int sn,
+				     int* firstcol_in_supernode,
+				     int sn_size,
+				     taucs_ccs_matrix* A,
+				     supernodal_frontal_matrix* mtr,
+				     int* bitmap,
+				     supernodal_factor_matrix* snL)
+{
+  int i,j;
+  int* ind;
+  taucs_datatype* re;
+  int INFO;
+
+  /* creating transform for real indices */
+  for(i=0;i<mtr->sn_size;i++) bitmap[mtr->sn_vertices[i]] = i;
+  for(i=0;i<mtr->up_size;i++) bitmap[mtr->up_vertices[i]] = mtr->sn_size + i;
+
+  /* adding sn_size column of A to first sn_size column of frontal matrix */
+
+  for(j=0;j<(mtr->sn_size);j++) {
+    ind = &(A->rowind[A->colptr[*(firstcol_in_supernode+j)]]);
+    re  = &(A->taucs_values[A->colptr[*(firstcol_in_supernode+j)]]); 
+    for(i=0;
+	i < A->colptr[*(firstcol_in_supernode+j)+1] 
+            - A->colptr[*(firstcol_in_supernode+j)];
+	i++) {
+      if (bitmap[ind[i]] < mtr->sn_size)
+	mtr->SFM_F1[ (mtr->sn_size)*j + bitmap[ind[i]]] =
+	  taucs_add( mtr->SFM_F1[ (mtr->sn_size)*j + bitmap[ind[i]]] , re[i] );
+      else
+	mtr->SFM_F2[ (mtr->up_size)*j + bitmap[ind[i]] - mtr->sn_size] =
+	  taucs_add( mtr->SFM_F2[ (mtr->up_size)*j + bitmap[ind[i]] - mtr->sn_size] , re[i] );
+    }
+  }
+
+  /* we use the BLAS through the Fortran interface */
+
+  /* solving of lower triangular system for L */
+  if (mtr->sn_size) {
+    /*
+    taucs_potrf ("LOWER",
+		 &(mtr->sn_size),
+		 mtr->SFM_F1,&(mtr->sn_size),
+		 &INFO);
+    */
+    spawn taucs_cilk_potrf ("LOWER",
+		 &(mtr->sn_size),
+		 mtr->SFM_F1,&(mtr->sn_size),
+		 &INFO);
+    sync;
+  }
+
+
+  if (INFO) {
+    taucs_printf("sivan %d %d\n",sn,sn_size);
+    taucs_printf("\t\tLL^T Factorization: Matrix is not positive definite.\n");
+    taucs_printf("\t\t                    nonpositive pivot in column %d\n",
+		 mtr->sn_vertices[INFO-1]);
+    return -1;
+  }
+
+  /* getting completion for found columns of L */
+  if (mtr->up_size && mtr->sn_size) {
+
+    spawn taucs_cilk_trsm ("Right",
+			   "Lower",
+			   "Conjugate",
+			   "No unit diagonal",
+			   &(mtr->up_size),&(mtr->sn_size),
+			   &taucs_one_const,
+			   mtr->SFM_F1,&(mtr->sn_size),
+			   mtr->SFM_F2,&(mtr->up_size));
+    sync;
+    /*
+    taucs_trsm ("Right",
+		"Lower",
+		"Conjugate",
+		"No unit diagonal",
+		&(mtr->up_size),&(mtr->sn_size),
+		&taucs_one_const,
+		mtr->SFM_F1,&(mtr->sn_size),
+		mtr->SFM_F2,&(mtr->up_size));
+    */
+  }
+
+  (snL->sn_blocks   )[sn] = mtr->SFM_F1;
+  (snL->sn_blocks_ld)[sn] = mtr->sn_size;
+
+  (snL->up_blocks   )[sn] = mtr->SFM_F2;
+  (snL->up_blocks_ld)[sn] = mtr->up_size;
+  /* printf("*** sn=%d up_ld=%d (%d)\n",sn,mtr->up_size,(snL->up_vertex_ptr)[sn+1] - (snL->up_vertex_ptr)[sn]);*/
+
+  /* computation of updated part of frontal matrix */
+  if (mtr->up_size && mtr->sn_size) {
+    spawn taucs_cilk_herk ("Lower",
+			   "No Conjugate",
+			   &(mtr->up_size),&(mtr->sn_size),
+			   &taucs_minusone_real_const,
+			   mtr->SFM_F2,&(mtr->up_size),
+			   &taucs_one_real_const,
+			   mtr->SFM_U, &(mtr->up_size));
+    sync;
+  }
+
+  mtr->SFM_F1 = NULL; /* so we don't free twice */
+  mtr->SFM_F2 = NULL; /* so we don't free twice */
+
+  return 0;
+ }
+
+/*************************************************************/
+/* extend-add                                                */
+/*************************************************************/
+
+static void 
+multifrontal_supernodal_front_extend_add(
+					 supernodal_frontal_matrix* parent_mtr,
+					 supernodal_frontal_matrix* my_mtr,
+					 int* bitmap)
+{
+  int j,i,parent_i,parent_j;
+  taucs_datatype v;
+
+  for(i=0;i<parent_mtr->sn_size;i++) bitmap[parent_mtr->sn_vertices[i]] = i;
+  for(i=0;i<parent_mtr->up_size;i++) bitmap[parent_mtr->up_vertices[i]] = (parent_mtr->sn_size)+i;
+
+  /* extend add operation for update matrix */
+  for(j=0;j<my_mtr->up_size;j++) {
+    for(i=j;i<my_mtr->up_size;i++) {
+      parent_j = bitmap[ my_mtr->up_vertices[j] ];
+      parent_i = bitmap[ my_mtr->up_vertices[i] ];
+      /* we could skip this if indices were sorted */
+      if (parent_j>parent_i) {
+	int tmp = parent_j;
+	parent_j = parent_i;
+	parent_i = tmp;
+      }
+
+      v = (my_mtr->SFM_U)[(my_mtr->up_size)*j+i];
+
+      if (parent_j < parent_mtr->sn_size) {
+	if (parent_i < parent_mtr->sn_size) {
+	  (parent_mtr->SFM_F1)[ (parent_mtr->sn_size)*parent_j + parent_i] =
+	    taucs_add( (parent_mtr->SFM_F1)[ (parent_mtr->sn_size)*parent_j + parent_i] , v );
+	} else {
+	  (parent_mtr->SFM_F2)[ (parent_mtr->up_size)*parent_j + (parent_i-parent_mtr->sn_size)] =
+	    taucs_add( (parent_mtr->SFM_F2)[ (parent_mtr->up_size)*parent_j + (parent_i-parent_mtr->sn_size)] , v );
+	}
+      } else {
+	(parent_mtr->SFM_U)[ (parent_mtr->up_size)*(parent_j-parent_mtr->sn_size) + (parent_i-parent_mtr->sn_size)] =
+	  taucs_add( (parent_mtr->SFM_U)[ (parent_mtr->up_size)*(parent_j-parent_mtr->sn_size) + (parent_i-parent_mtr->sn_size)] , v);
+      }
+    }
+  }
+}
+
+#endif /*#ifndef TAUCS_CORE_GENERAL*/
+
+/*************************************************************/
+/* symbolic elimination                                      */
+/*************************************************************/
+
+#ifdef TAUCS_CORE_GENERAL
+
+/* UNION FIND ROUTINES */
+
+static int uf_makeset(int* uf, int i)        { uf[i] = i; return i; }
+static int uf_find   (int* uf, int i)
+{ 
+  if (uf[i] != i) 
+    uf[i] = uf_find(uf,uf[i]); 
+  return uf[i]; 
+}
+static int uf_union  (int* uf, int s, int t) {
+  if (uf_find(uf,s) < uf_find(uf,t)) {
+    uf[uf_find(uf,s)] = uf_find(uf,t); 
+    return (uf_find(uf,t)); 
+  } else {
+    uf[uf_find(uf,s)] = uf_find(uf,t); 
+    return (uf_find(uf,t)); 
+  }
+}
+
+static
+void recursive_postorder(int  j,
+			 int  first_child[],
+			 int  next_child[],
+			 int  postorder[],
+			 int  ipostorder[],
+			 int* next)
+{
+  int c;
+  for (c=first_child[j]; c != -1; c = next_child[c]) {
+    /*printf("*** %d is child of %d\n",c,j);*/
+    recursive_postorder(c,first_child,next_child,
+			postorder,ipostorder,next);
+  }
+  /*printf(">>> j=%d next=%d\n",j,*next);*/
+  if (postorder)  postorder [*next] = j;
+  if (ipostorder) ipostorder[j] = *next;
+  (*next)++;
+}
+
+#define GILBERT_NG_PEYTON_ANALYSIS_SUP
+
+/* in a few tests the supernodal version seemed slower */
+#undef GILBERT_NG_PEYTON_ANALYSIS_SUP
+
+static int ordered_uf_makeset(int* uf, int i)
+{ 
+  uf[i] = i; 
+  return i; 
+}
+static int ordered_uf_find   (int* uf, int i) 
+{ 
+  if (uf[i] != i) 
+    uf[i] = uf_find(uf,uf[i]); 
+  return uf[i]; 
+}
+static int ordered_uf_union  (int* uf, int s, int t) 
+{
+  assert(uf[t] == t);
+  assert(uf[s] == s);
+  assert(t > s);
+  if (t > s) {
+    uf[s] = t; 
+    return t; 
+  } else
+    uf[t] = s; 
+    return s; 
+}
+
+static void 
+tree_level(int j,
+	   int isroot, 
+	   int first_child[],
+	   int next_child[],
+	   int level[],
+	   int level_j)
+{
+  int c;
+  if (!isroot) level[j] = level_j;
+  for (c=first_child[j]; c != -1; c = next_child[c]) {
+    tree_level(c,
+	       FALSE,
+	       first_child,
+	       next_child,
+	       level,
+	       level_j+1);
+  }
+}
+
+static void
+tree_first_descendant(int j,
+		      int isroot, 
+		      int first_child[],
+		      int next_child[],
+		      int ipostorder[],
+		      int first_descendant[])
+{
+  int c;
+  int fd = ipostorder[j];
+  for (c=first_child[j]; c != -1; c = next_child[c]) {
+    tree_first_descendant(c,
+			  FALSE,
+			  first_child,
+			  next_child,
+			  ipostorder,
+			  first_descendant);
+    if (first_descendant[c] < fd) fd = first_descendant[c]; 
+  }
+  if (!isroot) first_descendant[j] = fd;
+}
+
+
+int
+taucs_ccs_etree(taucs_ccs_matrix* A,
+		int* parent,
+		int* l_colcount,
+		int* l_rowcount,
+		int* l_nnz);
+
+int 
+taucs_ccs_etree_liu(taucs_ccs_matrix* A,
+		    int* parent,
+		    int* l_colcount,
+		    int* l_rowcount,
+		    int* l_nnz);
+
+
+
+static int
+recursive_symbolic_elimination(int            j,
+			       taucs_ccs_matrix* A,
+			       int            first_child[],
+			       int            next_child[],
+			       int*           n_sn,
+			       int            sn_size[],
+			       int            sn_up_size[],
+			       int*           sn_rowind[],
+			       int            sn_first_child[], 
+			       int            sn_next_child[], 
+			       int            rowind[],
+			       int            column_to_sn_map[],
+			       int            map[],
+			       int            do_order,
+			       int            ipostorder[]
+			       )
+{
+  int  i,ip,c,c_sn;
+  int  in_previous_sn;
+  int  nnz = 0; /* just to suppress the warning */
+  
+  for (c=first_child[j]; c != -1; c = next_child[c]) {
+    if (recursive_symbolic_elimination(c,A,
+				       first_child,next_child,
+				       n_sn,
+				       sn_size,sn_up_size,sn_rowind,
+				       sn_first_child,sn_next_child,
+				       rowind, /* temporary */
+				       column_to_sn_map,
+				       map,
+				       do_order,ipostorder
+				       ) 
+	== -1) return -1;
+  }
+
+  in_previous_sn = 1;
+  if (j == A->n) 
+    in_previous_sn = 0; /* this is not a real column */
+  else if (first_child[j] == -1) 
+    in_previous_sn = 0; /* this is a leaf */
+  else if (next_child[first_child[j]] != -1) 
+    in_previous_sn = 0; /* more than 1 child */
+  else { 
+    /* check that the structure is nested */
+    /* map contains child markers         */
+
+    c=first_child[j];
+    for (ip=(A->colptr)[j]; ip<(A->colptr)[j+1]; ip++) {
+      i = (A->rowind)[ip];
+      in_previous_sn = in_previous_sn && (map[i] == c);
+    }
+  }
+
+  if (in_previous_sn) {
+    c = first_child[j];
+    c_sn = column_to_sn_map[c];
+    column_to_sn_map[j] = c_sn;
+
+    /* swap row indices so j is at the end of the */
+    /* supernode, not in the update indices       */
+    for (ip=sn_size[c_sn]; ip<sn_up_size[c_sn]; ip++) 
+      if (sn_rowind[c_sn][ip] == j) break;
+    assert(ip<sn_up_size[c_sn]);
+    sn_rowind[c_sn][ip] = sn_rowind[c_sn][sn_size[c_sn]];
+    sn_rowind[c_sn][sn_size[c_sn]] = j;
+
+    /* mark the nonzeros in the map */
+    for (ip=sn_size[c_sn]; ip<sn_up_size[c_sn]; ip++) 
+      map[ sn_rowind[c_sn][ip] ] = j;
+
+    sn_size   [c_sn]++;
+
+    return 0;
+  }
+
+  /* we are in a new supernode */
+
+  if (j < A->n) {
+    nnz = 1;
+    rowind[0] = j;
+    map[j]    = j;
+    
+    for (c=first_child[j]; c != -1; c = next_child[c]) {
+      c_sn = column_to_sn_map[c];
+      for (ip=sn_size[c_sn]; ip<sn_up_size[c_sn]; ip++) {
+	i = sn_rowind[c_sn][ip];
+	if (i > j && map[i] != j) { /* new row index */
+	  map[i] = j;
+	  rowind[nnz] = i;
+	  nnz++;
+	}
+      }
+    }
+    
+    for (ip=(A->colptr)[j]; ip<(A->colptr)[j+1]; ip++) {
+      i = (A->rowind)[ip];
+      if (map[i] != j) { /* new row index */
+	map[i] = j;
+	rowind[nnz] = i;
+	nnz++;
+      }
+    }
+  }
+    
+  /*printf("children of sn %d: ",*n_sn);*/
+  for (c=first_child[j]; c != -1; c = next_child[c]) {
+    c_sn = column_to_sn_map[c];
+    /*printf("%d ",c_sn);*/
+    if (c==first_child[j])
+      sn_first_child[*n_sn] = c_sn;
+    else {
+      sn_next_child[ c_sn ] = sn_first_child[*n_sn];
+      sn_first_child[*n_sn] = c_sn;
+    }
+  }
+  /*printf("\n");*/
+
+  if (j < A->n) {
+    column_to_sn_map[j] = *n_sn;
+    sn_size   [*n_sn] = 1;
+    sn_up_size[*n_sn] = nnz;
+    sn_rowind [*n_sn] = (int*) taucs_malloc(nnz * sizeof(int));
+    if (!( sn_rowind [*n_sn] )) return -1;
+    for (ip=0; ip<nnz; ip++) sn_rowind[*n_sn][ip] = rowind[ip];
+    if (do_order) {
+      /* Sivan and Vladimir: we think that we can sort in */
+      /* column order, not only in etree postorder.       */
+      /*
+	radix_sort(sn_rowind [*n_sn],nnz);
+	qsort(sn_rowind [*n_sn],nnz,sizeof(int),compare_ints);
+      */
+      compare_indirect_map = ipostorder;
+      qsort(sn_rowind [*n_sn],nnz,sizeof(int),compare_indirect_ints);
+    }
+    assert(sn_rowind [*n_sn][0] == j);
+    (*n_sn)++;
+  }
+
+  return 0;
+}
+
+/* count zeros and nonzeros in a supernode to compute the */
+/* utility of merging fundamental supernodes.             */
+
+typedef struct {
+  double zeros;
+  double nonzeros;
+} znz;
+
+static znz
+recursive_amalgamate_supernodes(int           sn,
+				int*           n_sn,
+				int            sn_size[],
+				int            sn_up_size[],
+				int*           sn_rowind[],
+				int            sn_first_child[], 
+				int            sn_next_child[], 
+				int            rowind[],
+				int            column_to_sn_map[],
+				int            map[],
+				int            do_order,
+				int            ipostorder[]
+				)
+{
+  int  i,ip,c_sn,gc_sn;
+  /*int  i,ip,c,c_sn,gc_sn;*/
+  int  nnz;
+  int  nchildren /*, ichild*/; /* number of children, child index */
+  znz* c_znz = NULL;
+  znz  sn_znz, merged_znz;
+  /*int zero_count = 0;*/
+  int new_sn_size, new_sn_up_size;
+
+  sn_znz.zeros    = 0.0;
+  sn_znz.nonzeros = (double) (((sn_up_size[sn] - sn_size[sn]) * sn_size[sn]) 
+                              + (sn_size[sn] * (sn_size[sn] + 1))/2);
+
+  if (sn_first_child[sn] == -1) { /* leaf */
+    return sn_znz;
+  }
+
+  nchildren = 0;
+  for (c_sn=sn_first_child[sn]; c_sn != -1; c_sn = sn_next_child[c_sn])
+    nchildren++;
+
+  /*  c_znz = (znz*) alloca(nchildren * sizeof(znz));*/
+  c_znz = (znz*) taucs_malloc(nchildren * sizeof(znz));
+  assert(c_znz);
+
+  /*printf("supernode %d out of %d\n",sn,*n_sn);*/
+
+  /* merge the supernode with its children! */
+
+  i = 0;
+  for (c_sn=sn_first_child[sn]; c_sn != -1; c_sn = sn_next_child[c_sn]) {
+    c_znz[i] = 
+      recursive_amalgamate_supernodes(c_sn,
+				      n_sn,
+				      sn_size,sn_up_size,sn_rowind,
+				      sn_first_child,sn_next_child,
+				      rowind, /* temporary */
+				      column_to_sn_map,
+				      map,
+				      do_order,ipostorder
+				      );
+    assert(c_znz[i].zeros + c_znz[i].nonzeros ==
+	   (double) (((sn_up_size[c_sn] - sn_size[c_sn]) * sn_size[c_sn]) 
+		     + (sn_size[c_sn] * (sn_size[c_sn] + 1))/2 ));
+    i++;
+  }
+
+  merged_znz.nonzeros = sn_znz.nonzeros;
+  merged_znz.zeros    = sn_znz.zeros;
+                   
+  for (i=0; i<nchildren; i++) {
+    merged_znz.nonzeros += (c_znz[i]).nonzeros;
+    merged_znz.zeros    += (c_znz[i]).zeros;
+  }
+
+  taucs_free(c_znz);
+
+  /*  printf("supernode %d out of %d (continuing)\n",sn,*n_sn);*/
+
+  /* should we merge the supernode with its children? */
+
+  nnz = 0;
+  for (c_sn=sn_first_child[sn]; c_sn != -1; c_sn = sn_next_child[c_sn]) {
+    for (ip=0; ip<sn_size[c_sn]; ip++) {
+      i = sn_rowind[c_sn][ip];
+      assert( map[i] != sn );
+      map[i] = sn;
+      rowind[nnz] = i;
+      nnz++;
+    }
+  }
+
+  for (ip=0; ip<sn_size[sn]; ip++) {
+    i = sn_rowind[sn][ip];
+    assert( map[i] != sn );
+    map[i] = sn;
+    rowind[nnz] = i;
+    nnz++;
+  }
+
+  new_sn_size = nnz;
+
+  for (c_sn=sn_first_child[sn]; c_sn != -1; c_sn = sn_next_child[c_sn]) {
+    for (ip=sn_size[c_sn]; ip<sn_up_size[c_sn]; ip++) {
+      i = sn_rowind[c_sn][ip];
+      if (map[i] != sn) { /* new row index */
+	map[i] = sn;
+	rowind[nnz] = i;
+	nnz++;
+      }
+    }
+  }
+
+  for (ip=sn_size[sn]; ip<sn_up_size[sn]; ip++) {
+    i = sn_rowind[sn][ip];
+    if (map[i] != sn) { /* new row index */
+      map[i] = sn;
+      rowind[nnz] = i;
+      nnz++;
+    }
+  }
+  
+  new_sn_up_size = nnz;
+
+  if (do_order) {
+    compare_indirect_map = ipostorder;
+    qsort(rowind,nnz,sizeof(int),compare_indirect_ints);
+  }
+
+  /* determine whether we should merge the supernode and its children */
+
+  {
+    int n;
+    double* zcount = NULL;
+
+    n = 0;
+    for (ip=0; ip<nnz; ip++) {
+      i = rowind[ip];
+      if (i >= n) n = i+1;
+    }
+
+    /*zcount = (double*) alloca(n * sizeof(double));*/
+    zcount = (double*) taucs_malloc(n * sizeof(double));
+    assert(zcount);
+    
+    for (ip=0; ip<new_sn_size; ip++) {
+      i = rowind[ip]; assert(i<n);
+      zcount[i] = (double) (ip+1);
+    }
+    for (ip=new_sn_size; ip<new_sn_up_size; ip++) {
+      i = rowind[ip]; assert(i<n);
+      zcount[i] = (double) new_sn_size;
+    }
+
+    /*
+    for (ip=0; ip<new_sn_up_size; ip++) 
+      printf("row %d zcount = %.0f\n",rowind[ip],zcount[rowind[ip]]);
+    */
+    
+    for (c_sn=sn_first_child[sn]; c_sn != -1; c_sn = sn_next_child[c_sn]) {
+      for (ip=0; ip<sn_size[c_sn]; ip++) {
+	i = sn_rowind[c_sn][ip]; assert(i<n);
+	zcount[i] -= (double) (ip+1);
+      }
+      for (ip=sn_size[c_sn]; ip<sn_up_size[c_sn]; ip++) {
+	i = sn_rowind[c_sn][ip]; assert(i<n);
+	zcount[i] -= (double) sn_size[c_sn];
+      }
+    }
+
+    for (ip=0; ip<sn_size[sn]; ip++) {
+      i = sn_rowind[sn][ip]; assert(i<n);
+      zcount[i] -= (double) (ip+1);
+    }
+    for (ip=sn_size[sn]; ip<sn_up_size[sn]; ip++) {
+      i = sn_rowind[sn][ip]; assert(i<n);
+      zcount[i] -= (double) sn_size[sn];
+    }
+
+    /*
+    for (ip=0; ip<new_sn_up_size; ip++) 
+      printf("ROW %d zcount = %.0f\n",rowind[ip],zcount[rowind[ip]]);
+    printf("zeros before merging %.0f\n",merged_znz.zeros);
+    */
+    
+    for (ip=0; ip<new_sn_up_size; ip++) {
+      i = rowind[ip]; assert(i<n);
+      assert(zcount[i] >= 0.0);
+      merged_znz.zeros += zcount[i];
+    }
+
+    /*printf("zeros after merging %.0f\n",merged_znz.zeros);*/
+
+    /* voodoo constants (need some kind of a utility function */
+    if ((new_sn_size < 16)
+	||
+	((sn_size[sn] < 50) && (merged_znz.zeros < 0.5 * merged_znz.nonzeros))
+	||
+	((sn_size[sn] < 250) && (merged_znz.zeros < 0.25 * merged_znz.nonzeros))
+	||
+	((sn_size[sn] < 500) && (merged_znz.zeros < 0.10 * merged_znz.nonzeros))
+	||
+	(merged_znz.zeros < 0.05 * merged_znz.nonzeros)
+	) {
+      /*
+      taucs_printf("merging sn %d, zeros (%f) vs nonzeros (%f)\n",
+		   sn,merged_znz.zeros,merged_znz.nonzeros);
+      */
+    } else {
+      /*
+      taucs_printf("sn %d, too many zeros (%f) vs nonzeros (%f)\n",
+		   sn,merged_znz.zeros,merged_znz.nonzeros);
+      printf("returning without merging\n");
+      */
+      taucs_free(zcount);
+      return sn_znz;
+    }
+
+    taucs_free(zcount);
+  }
+
+  /* now merge the children lists */
+
+  sn_size[sn]    = new_sn_size;
+  sn_up_size[sn] = new_sn_up_size;
+  sn_rowind[sn]  = (int*) taucs_realloc(sn_rowind[sn], 
+				  new_sn_up_size * sizeof(int));
+  for (ip=0; ip<new_sn_up_size; ip++) sn_rowind[sn][ip] = rowind[ip];
+
+  /*  printf("supernode %d out of %d (merging)\n",sn,*n_sn);*/
+
+  nchildren = 0;
+  for (c_sn=sn_first_child[sn]; c_sn != -1; c_sn = sn_next_child[c_sn]) {
+    for (ip=0; ip<sn_size[c_sn]; ip++) {
+      i = (sn_rowind[c_sn])[ip];
+      assert(column_to_sn_map[i] == c_sn);
+      column_to_sn_map[i] = sn;
+    }
+
+    for (gc_sn=sn_first_child[c_sn]; gc_sn != -1; gc_sn = sn_next_child[gc_sn]) {
+      rowind[nchildren] = gc_sn;
+      nchildren++;
+    }
+  }
+
+  /* free the children's rowind vectors */
+  for (c_sn=sn_first_child[sn]; c_sn != -1; c_sn = sn_next_child[c_sn]) {
+    taucs_free( sn_rowind[c_sn] );
+    sn_rowind[c_sn]  = NULL;
+    sn_size[c_sn]    = 0;
+    sn_up_size[c_sn] = 0;
+  }
+
+  sn_first_child[sn] = -1;
+  for (i=0; i<nchildren; i++) {
+    sn_next_child[ rowind[i] ] = sn_first_child[sn];
+    sn_first_child[sn] = rowind[i];
+  }    
+
+  /*
+  printf("supernode %d out of %d (done)\n",sn,*n_sn);
+  printf("returning, merging\n");
+  */
+  return merged_znz;
+}
+#endif /* #ifdef TAUCS_CORE_GENERAL */
+
+
+#ifndef TAUCS_CORE_GENERAL
+
+
+/*************************************************************/
+/* factor routines                                           */
+/*************************************************************/
+
+static void extend_add_wrapper(supernodal_frontal_matrix * child_matrix,
+			       supernodal_frontal_matrix ** my_matrix_ptr,
+			       int is_root,
+			       int *v,
+			       int sn_size,
+			       int sn_up_size,
+			       int * rowind,
+			       int * bitmap,
+			       int * fail) {
+
+  if (*fail) {
+    if (*my_matrix_ptr)
+      supernodal_frontal_free(*my_matrix_ptr);
+    return;
+  }
+  
+  if (!is_root) {
+    if (!(*my_matrix_ptr)) {
+      *my_matrix_ptr = supernodal_frontal_create(v,sn_size,sn_up_size,rowind);
+      if (!(*my_matrix_ptr)) {
+	*fail = TRUE;
+	supernodal_frontal_free(child_matrix);
+	return;
+      }
+    }
+    multifrontal_supernodal_front_extend_add(*my_matrix_ptr,child_matrix,bitmap);
+  }
+  
+  /* moved outside "if !is_root"; Sivan 27 Feb 2002 */
+  supernodal_frontal_free(child_matrix);
+}
+
+cilk
+static supernodal_frontal_matrix*
+recursive_multifrontal_supernodal_factor_llt(int sn,       /* this supernode */
+					     int is_root,  /* is v the root? */
+#if 1
+					     int** bitmaps,
+#else
+					     int* bitmap,
+#endif
+					     taucs_ccs_matrix* A,
+					     supernodal_factor_matrix* snL,
+					     int* fail)
+{
+  supernodal_frontal_matrix* my_matrix=NULL;
+  /*supernodal_frontal_matrix* child_matrix=NULL;*/
+  int child;
+  int* v; 
+  int  sn_size;
+  int* first_child   = snL->first_child;
+  int* next_child    = snL->next_child;
+
+#ifdef TAUCS_CILK
+  /* Inlet for syncronization */
+  inlet void extend_add_inlet(supernodal_frontal_matrix * child_matrix) {
+
+    if (!is_root) {
+      if (!(my_matrix)) {
+	my_matrix = supernodal_frontal_create(v,sn_size,
+					      snL->sn_up_size[sn],snL->sn_struct[sn]);
+
+	if (!(my_matrix)) {
+	  *fail = TRUE;
+	  supernodal_frontal_free(child_matrix);
+	  return;
+	}
+      }
+      multifrontal_supernodal_front_extend_add(my_matrix,child_matrix,bitmaps[Self]);
+    }
+
+    /* moved outside "if !is_root"; Sivan 27 Feb 2002 */
+    supernodal_frontal_free(child_matrix);
+    /*
+      The following approach is not working correctly because nothing is guaranteed about different procedure instances atomcity:
+      
+      extend_add_wrapper(child_matrix,&my_matrix,is_root,v,sn_size,snL->sn_up_size[sn],snL->sn_struct[sn],bitmaps[Self],fail);
+    */
+  }
+#endif
+
+  /* Sivan fixed a bug 25/2/2003: v was set even at the root, */
+  /* but this element of sn_struct was not allocated.         */
+
+  if (!is_root) {
+    sn_size = snL->sn_size[sn];
+    v = &( snL->sn_struct[sn][0] );
+  } else {
+    sn_size = -1;
+    v = NULL; /* not used */
+  }
+
+  for (child = first_child[sn]; child != -1; child = next_child[child]) {
+    /* original non-cilk code: */
+    /*
+    child_matrix = 
+      recursive_multifrontal_supernodal_factor_llt(child,
+						   FALSE,
+						   bitmap,
+						   A,snL,fail);
+    */
+
+#ifdef TAUCS_CILK
+    extend_add_inlet(spawn recursive_multifrontal_supernodal_factor_llt(child,
+									FALSE,
+									bitmaps,
+									A,snL,fail));
+#else
+    extend_add_wrapper(recursive_multifrontal_supernodal_factor_llt(child,
+								    FALSE,
+								    bitmaps,
+								    A,snL,fail),
+		       &my_matrix,
+		       is_root,
+		       v,
+		       sn_size,
+		       snL->sn_up_size[sn],
+		       snL->sn_struct[sn],
+		       bitmaps[Self],
+		       fail);
+#endif
+
+
+    if (*fail) { 
+      if (my_matrix) supernodal_frontal_free(my_matrix);
+      return NULL;
+    }
+
+#if 0
+    if (!is_root) {
+      if (!my_matrix) {
+	my_matrix =  supernodal_frontal_create(v,sn_size,
+					       snL->sn_up_size[sn],
+					       snL->sn_struct[sn]);
+	if (!my_matrix) {
+	  *fail = TRUE;
+	  supernodal_frontal_free(child_matrix);
+	  return NULL;
+	}
+      }
+      multifrontal_supernodal_front_extend_add(my_matrix,child_matrix,bitmap);
+    }
+    /* moved outside "if !is_root"; Sivan 27 Feb 2002 */
+    supernodal_frontal_free(child_matrix);
+#endif /* 0, old pre-cilk code */
+  }
+  sync;
+
+  /* in case we have no children, we allocate now */
+  if (!is_root && !my_matrix) {
+    my_matrix =  supernodal_frontal_create(v,sn_size,
+					   snL->sn_up_size[sn],
+					   snL->sn_struct[sn]);
+    if (!my_matrix) {
+      *fail = TRUE;
+      return NULL;
+    }
+  }
+  
+  if(!is_root) {
+    int rc;
+    rc = spawn multifrontal_supernodal_front_factor(sn,
+						    v,sn_size,
+						    A,
+						    my_matrix,
+#if 1
+						    bitmaps[Self],
+#else
+						    bitmap,
+#endif
+						    snL);
+    sync;
+    if (rc) { 
+      /* nonpositive pivot */
+      *fail = TRUE;
+      supernodal_frontal_free(my_matrix);
+      return NULL;
+    }
+  }
+  return my_matrix;
+}
+
+cilk
+void* 
+taucs_dtl(ccs_factor_llt_mf)(taucs_ccs_matrix* A)
+{
+  void* p;
+
+  p = spawn taucs_dtl(ccs_factor_llt_mf_maxdepth)(A,0);
+  sync;
+
+  return p;
+}
+
+cilk
+static void
+recursive_multifrontal_supernodal_factor_llt_caller(int n_sn,     /* this supernode */
+						    int is_root,  /* is v the root? */
+						    taucs_ccs_matrix* A,
+						    supernodal_factor_matrix* snL,
+						    int* fail)
+{
+  int** maps;
+  int   i,j;
+  supernodal_frontal_matrix* always_null;
+
+  maps = (int**)taucs_malloc(Cilk_active_size*sizeof(int*));
+  if (!maps) {
+    taucs_supernodal_factor_free(snL);
+    assert(0); return;
+    /*return NULL;*/
+  }
+
+  for (i=0; i < Cilk_active_size; i++) {
+    maps[i] = (int*)taucs_malloc((A->n+1)*sizeof(int));
+    if (!maps[i]) {
+      for (j=0; j < i ; j++)
+	taucs_free(maps[j]);
+      taucs_free(maps);
+      taucs_supernodal_factor_free(snL);
+      assert(0); return;
+      /*return NULL;*/
+    }
+  }
+
+  /*#ifdef TAUCS_CILK  */
+#if 0
+  context = Cilk_init(&argc,argv);
+  always_null = EXPORT(recursive_multifrontal_supernodal_factor_llt)(context,
+								     n_sn,
+								     TRUE, 
+								     maps,
+								     A,snL,fail);
+  Cilk_terminate(context);
+#else
+  always_null = spawn recursive_multifrontal_supernodal_factor_llt(n_sn,
+								   TRUE, 
+								   maps,
+								   A,snL,fail);
+  sync;
+#endif
+
+  for(i=0;i<Cilk_active_size;i++)
+    taucs_free(maps[i]);
+  taucs_free(maps);
+
+  /*
+    always_null = spawn recursive_multifrontal_supernodal_factor_llt((L->n_sn),  
+    TRUE, 
+    map,
+    A,L,&fail);
+  */
+}
+
+cilk
+void* 
+taucs_dtl(ccs_factor_llt_mf_maxdepth)(taucs_ccs_matrix* A,int max_depth)
+{
+  supernodal_factor_matrix* L;
+#if 1
+#else
+  int* map;
+#endif
+  int fail;
+  double wtime, ctime;
+
+  wtime = taucs_wtime();
+  ctime = taucs_ctime();
+
+  L = multifrontal_supernodal_create();
+  if (!L) return NULL;
+
+#ifdef TAUCS_CORE_COMPLEX
+  fail = taucs_ccs_symbolic_elimination(A,L,
+					TRUE /* sort, to avoid complex conjuation */,
+					max_depth);
+#else
+  fail = taucs_ccs_symbolic_elimination(A,L,
+					FALSE /* don't sort row indices */          ,
+					max_depth);
+#endif
+  if (fail == -1) {
+    taucs_supernodal_factor_free(L);
+    return NULL;
+  }
+
+  wtime = taucs_wtime()-wtime;
+  ctime = taucs_ctime()-ctime;
+  taucs_printf("\t\tSymbolic Analysis            = % 10.3f seconds (%.3f cpu)\n",
+	       wtime,ctime);
+
+#if 1
+#else
+  map = (int*)taucs_malloc((A->n+1)*sizeof(int));
+  if (!map) {
+    taucs_supernodal_factor_free(L);
+    return NULL;
+  }
+#endif
+
+  wtime = taucs_wtime();
+  ctime = taucs_ctime();
+
+  fail = FALSE;
+  spawn recursive_multifrontal_supernodal_factor_llt_caller((L->n_sn),  
+							    TRUE, 
+							    A,L,&fail);
+  sync;
+  
+  wtime = taucs_wtime()-wtime;
+  ctime = taucs_ctime()-ctime;
+  taucs_printf("\t\tSupernodal Multifrontal LL^T = % 10.3f seconds (%.3f cpu)\n",
+	       wtime,ctime);
+
+#if 1
+#else
+  taucs_free(map);
+#endif
+
+  if (fail) {
+    taucs_supernodal_factor_free(L);
+    return NULL;
+  }
+
+  return (void*) L;
+}
+
+/*************************************************************/
+/* symbolic-numeric routines                                 */
+/*************************************************************/
+
+void* 
+taucs_dtl(ccs_factor_llt_symbolic)(taucs_ccs_matrix* A)
+{
+  return taucs_dtl(ccs_factor_llt_symbolic_maxdepth)(A,0);
+}
+
+void* 
+taucs_dtl(ccs_factor_llt_symbolic_maxdepth)(taucs_ccs_matrix* A, int max_depth)
+{
+  supernodal_factor_matrix* L;
+  int fail;
+  double wtime, ctime;
+
+  wtime = taucs_wtime();
+  ctime = taucs_ctime();
+
+  L = multifrontal_supernodal_create();
+  if (!L) return NULL;
+
+#ifdef TAUCS_CORE_COMPLEX
+  fail = taucs_ccs_symbolic_elimination(A,L,
+					TRUE /* sort, to avoid complex conjuation */,
+					max_depth);
+#else
+  fail = taucs_ccs_symbolic_elimination(A,L,
+					FALSE /* don't sort row indices */          ,
+					max_depth);
+#endif
+
+  if (fail == -1) {
+    taucs_supernodal_factor_free(L);
+    return NULL;
+  }
+
+  wtime = taucs_wtime()-wtime;
+  ctime = taucs_ctime()-ctime;
+  taucs_printf("\t\tSymbolic Analysis            = % 10.3f seconds (%.3f cpu)\n",
+	       wtime,ctime);
+  return L;
+}
+
+cilk
+int 
+taucs_dtl(ccs_factor_llt_numeric)(taucs_ccs_matrix* A,void* vL)
+{
+  supernodal_factor_matrix* L = (supernodal_factor_matrix*) vL;
+  int* map;
+  int fail;
+  double wtime, ctime;
+
+  map = (int*)taucs_malloc((A->n+1)*sizeof(int));
+
+  wtime = taucs_wtime();
+  ctime = taucs_ctime();
+
+  /* XXX: sivan, we don't need map */
+  fail = FALSE;
+  spawn recursive_multifrontal_supernodal_factor_llt_caller((L->n_sn),  
+							    TRUE, 
+							    A,L,&fail);
+  sync;
+  /*
+    recursive_multifrontal_supernodal_factor_llt((L->n_sn),  
+					       TRUE, 
+					       map,
+					       A,L,&fail);
+  */
+
+  wtime = taucs_wtime()-wtime;
+  ctime = taucs_ctime()-ctime;
+  taucs_printf("\t\tSupernodal Multifrontal LL^T = % 10.3f seconds (%.3f cpu)\n",
+	       wtime,ctime);
+
+  taucs_free(map);
+
+  if (fail) {
+    taucs_supernodal_factor_free_numeric(L);
+    return -1;
+  }
+
+  return 0;
+}
+
+/*************************************************************/
+/* left-looking factor routines                              */
+/*************************************************************/
+
+static void
+recursive_leftlooking_supernodal_update(int J,int K,
+					int bitmap[],
+					taucs_datatype* dense_update_matrix,
+					taucs_ccs_matrix* A,
+					supernodal_factor_matrix* L)
+{
+  int i,j,ir;
+  int  child;
+  int* first_child   = L->first_child;
+  int* next_child    = L->next_child;
+  int sn_size_father = (L->sn_size)[J];
+  int sn_up_size_father = (L->sn_up_size)[J];
+  int sn_size_child = (L->sn_size)[K];
+  int sn_up_size_child = (L->sn_up_size)[K];
+  int exist_upd=0;
+  int first_row = 0;
+  int row_count=0;
+  int PK,M,N,LDA,LDB,LDC;
+
+  /*
+  for(i=0;i<sn_size_father;i++) {
+    bitmap[L->sn_struct[J][i]]=i+1;
+  }
+
+  for(i=sn_size_father;i<sn_up_size_father;i++)
+    bitmap[L->sn_struct[J][i]] = i - sn_size_father + 1;
+  */
+
+  for(i=sn_size_child;i<sn_up_size_child;i++)
+    /* is this row index included in the columns of sn J? */
+    if(bitmap[L->sn_struct[K][i]]
+       && L->sn_struct[K][i] <= L->sn_struct[J][sn_size_father-1]) {
+      if(!exist_upd) first_row = i;
+      row_count++;
+      exist_upd = 1;
+      /*taucs_printf("update from K = %d to J = %d \n",K,J);*/
+      /* loop over columns of sn K */
+            
+      /* for(j=0;j<sn_size_child;j++)
+	for(ir=i;ir<sn_up_size_child;ir++)
+	  if( L->sn_struct[K][ir] <= L->sn_struct[J][sn_size_father-1]){
+	    L->sn_blocks[J][ (bitmap[L->sn_struct[K][i]]-1)*(L->sn_blocks_ld[J])+(bitmap[L->sn_struct[K][ir]]-1)] -= L->up_blocks[K][j*(L->up_blocks_ld[K])+ir-sn_size_child]* L->up_blocks[K][j*L->up_blocks_ld[K]+i-sn_size_child];
+	    taucs_printf("sn_block: L[%d,%d] = %lf\n",(bitmap[L->sn_struct[K][ir]]-1),(bitmap[L->sn_struct[K][i]]-1),L->sn_blocks[J][ (bitmap[L->sn_struct[K][i]]-1)*(L->sn_blocks_ld[J])+(bitmap[L->sn_struct[K][ir]]-1)]);}
+	  else{
+	    L->up_blocks[J][ (bitmap[L->sn_struct[K][i]]-1)*(L->up_blocks_ld[J])+(bitmap[L->sn_struct[K][ir]]-1)] -=  L->up_blocks[K][j*L->up_blocks_ld[K]+ir-sn_size_child]* L->up_blocks[K][j*L->up_blocks_ld[K]+i-sn_size_child];
+	   taucs_printf("up_block: L[%d,%d] = %lf\n",(bitmap[L->sn_struct[K][ir]]-1),(bitmap[L->sn_struct[K][i]]-1),L->up_blocks[J][ (bitmap[L->sn_struct[K][i]]-1)*(L->up_blocks_ld[J])+(bitmap[L->sn_struct[K][ir]]-1)]);
+	   }*/
+        }
+
+  if(exist_upd){
+    LDA = LDB = (L->up_blocks_ld)[K];
+    M  = sn_up_size_child - first_row ; /* +-1 ? */    
+    LDC =  sn_up_size_father;
+    N  = row_count; 
+    PK = L->sn_size[K];    
+
+    /* The GEMM code computes on the upper triangle of the trapezoidal
+       matrix, which is junk. */
+    /*
+    taucs_gemm ("No Conjugate",
+		"Conjugate",
+		&M,&N,&PK,
+		&taucs_one_const,
+		&(L->up_blocks[K][first_row-sn_size_child]),&LDA,
+		&(L->up_blocks[K][first_row-sn_size_child]),&LDB,
+		&taucs_zero_const,
+		dense_update_matrix,&LDC);
+    */
+
+    /* This is the HERK+GEMM fix by Elad */
+    taucs_herk ("Lower",
+		"No Conjugate",
+		&N,&PK,
+		&taucs_one_real_const,
+		&(L->up_blocks[K][first_row-sn_size_child]),&LDA,
+		&taucs_zero_real_const,
+		dense_update_matrix,&LDC);
+
+    if(M-N > 0)
+    {
+        int newM = M - N;
+   
+        taucs_gemm ("No Conjugate",
+		"Conjugate",
+		&newM,&N,&PK,
+		&taucs_one_const,
+		&(L->up_blocks[K][first_row-sn_size_child+N]),&LDA,
+		&(L->up_blocks[K][first_row-sn_size_child]),&LDB,
+		&taucs_zero_const,
+		dense_update_matrix+N,&LDC);
+    }
+    /* end of GEMM/HERK+GEMM fix */ 
+
+    /*for(j=0;j<row_count;j++)
+       for(ir=0;ir<sn_up_size_father;ir++)
+	 taucs_printf("dense[%d,%d] = %lf\n",ir,j,dense_update_matrix[j*LDC+ir]);
+    */
+
+    for(j=0;j<row_count;j++)
+      for(ir=j;ir<row_count;ir++){
+
+#if 0
+	L->sn_blocks[J][(bitmap[L->sn_struct[K][first_row+j]]-1)*sn_size_father+(bitmap[L->sn_struct[K][first_row+ir]]-1)] -= dense_update_matrix[j*LDC+ir];
+#endif
+
+	L->sn_blocks[J][(bitmap[L->sn_struct[K][first_row+j]]-1)*sn_size_father+(bitmap[L->sn_struct[K][first_row+ir]]-1)] = 
+	  taucs_sub( L->sn_blocks[J][(bitmap[L->sn_struct[K][first_row+j]]-1)*sn_size_father+(bitmap[L->sn_struct[K][first_row+ir]]-1)] , dense_update_matrix[j*LDC+ir]);
+
+	/*	taucs_printf("sn_block: L[%d,%d] = %lf\n",(bitmap[L->sn_struct[K][first_row+ir]]-1),(bitmap[L->sn_struct[K][first_row+j]]-1),L->sn_blocks[J][(bitmap[L->sn_struct[K][first_row+j]]-1)*sn_size_father+(bitmap[L->sn_struct[K][first_row+ir]]-1)]);*/
+
+      }
+
+    for(j=0;j<row_count;j++)
+      for(ir=row_count;ir<M;ir++){
+#if 0
+	L->up_blocks[J][(bitmap[L->sn_struct[K][first_row+j]]-1)*(L->up_blocks_ld)[J]+(bitmap[L->sn_struct[K][ir+first_row]]-1)] -= dense_update_matrix[j*LDC+ir];
+#endif
+
+	L->up_blocks[J][(bitmap[L->sn_struct[K][first_row+j]]-1)*(L->up_blocks_ld)[J]+(bitmap[L->sn_struct[K][ir+first_row]]-1)] =
+	  taucs_sub( L->up_blocks[J][(bitmap[L->sn_struct[K][first_row+j]]-1)*(L->up_blocks_ld)[J]+(bitmap[L->sn_struct[K][ir+first_row]]-1)] , dense_update_matrix[j*LDC+ir]);
+
+	/*	taucs_printf("up_block: L[%d,%d] = %lf\n",(bitmap[L->sn_struct[K][ir+first_row]]-1),(bitmap[L->sn_struct[K][first_row+j]]-1),L->up_blocks[J][(bitmap[L->sn_struct[K][first_row+j]]-1)*(L->up_blocks_ld)[J]+(bitmap[L->sn_struct[K][ir+first_row]]-1)]);*/
+
+	}
+    /*
+    for(i=0;i<sn_up_size_father;i++)
+      bitmap[L->sn_struct[J][i]]=0;
+    */
+    
+    for (child = first_child[K]; child != -1; child = next_child[child]) {
+      recursive_leftlooking_supernodal_update(J,child,
+					      bitmap,dense_update_matrix,
+					      A,L);
+    }
+  }
+
+  /*
+  else
+    for(i=0;i<sn_up_size_father;i++)
+      bitmap[L->sn_struct[J][i]]=0;
+  */
+
+}
+
+static int
+leftlooking_supernodal_front_factor(int sn,
+				    int* bitmap,
+				    taucs_ccs_matrix* A,
+				    supernodal_factor_matrix* L)
+{
+  int ip,jp;
+  int*    ind;
+  taucs_datatype* re;
+  int INFO;
+
+  int sn_size = (L->sn_size)[sn];
+  int up_size = (L->sn_up_size)[sn] - (L->sn_size)[sn];
+
+  /* creating transform for real indices */
+  for(ip=0;ip<(L->sn_up_size)[sn];ip++) bitmap[(L->sn_struct)[sn][ip]] = ip;
+
+  for(jp=0;jp<sn_size;jp++) {
+    ind = &(A->rowind[A->colptr[ (L->sn_struct)[sn][jp] ]]);
+    re  = &(A->taucs_values[A->colptr[ (L->sn_struct)[sn][jp] ]]); 
+    for(ip=0;
+	ip < A->colptr[ (L->sn_struct)[sn][jp] + 1 ] 
+           - A->colptr[ (L->sn_struct)[sn][jp] ];
+	ip++) {
+      if (bitmap[ind[ip]] < sn_size)
+	(L->sn_blocks)[sn][ (L->sn_blocks_ld)[sn]*jp + bitmap[ind[ip]]] =
+	  taucs_add( (L->sn_blocks)[sn][ (L->sn_blocks_ld)[sn]*jp + bitmap[ind[ip]]] , re[ip] );
+      else
+	(L->up_blocks)[sn][ (L->up_blocks_ld)[sn]*jp + bitmap[ind[ip]] - sn_size] =
+	taucs_add( (L->up_blocks)[sn][ (L->up_blocks_ld)[sn]*jp + bitmap[ind[ip]] - sn_size] , re[ip] );
+    }
+  }
+  
+  /* we use the BLAS through the Fortran interface */
+
+  /* solving of lower triangular system for L */
+  if (sn_size)
+    taucs_potrf ("LOWER",
+		 &sn_size,
+		 (L->sn_blocks)[sn],&((L->sn_blocks_ld)[sn]),
+		 &INFO);
+
+  if (INFO) {
+    taucs_printf("\t\tLL^T Factorization: Matrix is not positive definite.\n");
+    taucs_printf("\t\t                    nonpositive pivot in column %d\n",
+		 (L->sn_struct)[INFO-1]);
+    return -1;
+  }
+
+  /* getting completion for found columns of L */
+  if (up_size && sn_size)
+    taucs_trsm ("Right",
+		"Lower",
+		"Conjugate",
+		"No unit diagonal",
+		&up_size,&sn_size,
+		&taucs_one_const,
+		(L->sn_blocks)[sn],&((L->sn_blocks_ld)[sn]),
+		(L->up_blocks)[sn],&((L->up_blocks_ld)[sn]));
+
+  return 0;
+}
+
+static int
+recursive_leftlooking_supernodal_factor_llt(int sn,       /* this supernode */
+					    int is_root,  /* is v the root? */
+					    int* bitmap,
+					    int* indmap,
+					    taucs_ccs_matrix* A,
+					    supernodal_factor_matrix* L)
+{
+  int  child;
+  int  sn_size;
+  int* first_child   = L->first_child;
+  int* next_child    = L->next_child;
+  taucs_datatype* dense_update_matrix = NULL;
+
+  if (!is_root)
+    sn_size = L->sn_size[sn];
+  else
+    sn_size = -1;
+
+  if (!is_root) { 
+    (L->sn_blocks   )[sn] = (L->up_blocks   )[sn] = NULL;
+    if (L->sn_size[sn]) {
+      (L->sn_blocks   )[sn] = (taucs_datatype*)taucs_calloc(((L->sn_size)[sn])*((L->sn_size)[sn]),
+							    sizeof(taucs_datatype));
+      if (!((L->sn_blocks)[sn])) return -1; /* the caller will free L */
+    }
+    (L->sn_blocks_ld)[sn] = (L->sn_size   )[sn];
+
+    if (((L->sn_up_size)[sn] - (L->sn_size)[sn]) && (L->sn_size)[sn]) {
+      (L->up_blocks   )[sn] = (taucs_datatype*)taucs_calloc(((L->sn_up_size)[sn]-(L->sn_size)[sn])
+							    *((L->sn_size)[sn]),sizeof(taucs_datatype));
+      if (!((L->up_blocks)[sn])) return -1; /* the caller will free L */
+    }
+    (L->up_blocks_ld)[sn] = (L->sn_up_size)[sn]-(L->sn_size)[sn];
+  }
+
+  for (child = first_child[sn]; child != -1; child = next_child[child]) {
+    if (recursive_leftlooking_supernodal_factor_llt(child,
+						    FALSE,
+						    bitmap,
+						    indmap,
+						    A,L)
+	== -1 ) {
+      taucs_free(dense_update_matrix);
+      return -1;
+    }
+    
+    if (!is_root) {
+      if (!dense_update_matrix) {
+	dense_update_matrix = 
+	  (taucs_datatype*) taucs_calloc((L->sn_up_size)[sn]*(L->sn_size)[sn],sizeof(taucs_datatype));
+	if (!dense_update_matrix) return -1; /* caller will free L */
+      }
+
+      /* prepare the bitmap. Moved out of the recusive
+	 update procedure 20/1/2003. Sivan and Elad */
+
+      {
+	int i;
+	int J = sn;
+	int sn_size_father = (L->sn_size)[J];
+	int sn_up_size_father = (L->sn_up_size)[J];
+
+	for(i=0;i<sn_size_father;i++)
+	  bitmap[L->sn_struct[J][i]]=i+1;
+	for(i=sn_size_father;i<sn_up_size_father;i++)
+	  bitmap[L->sn_struct[J][i]] = i - sn_size_father + 1;
+      }
+
+      recursive_leftlooking_supernodal_update(sn,child,
+					      bitmap,dense_update_matrix,
+					      A,L);
+
+      {
+	int i;
+	int J = sn;
+	int sn_size_father = (L->sn_size)[J];
+	int sn_up_size_father = (L->sn_up_size)[J];
+
+	for(i=0;i<sn_size_father;i++)
+	  bitmap[L->sn_struct[J][i]]=0;
+	for(i=0;i<sn_up_size_father;i++)
+	  bitmap[L->sn_struct[J][i]]=0;
+      }
+
+    }
+  }
+  taucs_free(dense_update_matrix);
+  
+  if(!is_root) {
+    if (leftlooking_supernodal_front_factor(sn,
+					    indmap,
+					    A,
+					    L)) {
+      return -1; /* nonpositive pivot */
+    }
+  }
+
+  return 0;
+}
+
+
+void* 
+taucs_dtl(ccs_factor_llt_ll)(taucs_ccs_matrix* A)
+{
+  return taucs_dtl(ccs_factor_llt_ll_maxdepth)(A,0);
+}
+
+void* 
+taucs_dtl(ccs_factor_llt_ll_maxdepth)(taucs_ccs_matrix* A,int max_depth)
+{
+  supernodal_factor_matrix* L;
+  int* map;
+  int *map2;
+  double wtime, ctime;
+  int fail;
+
+  wtime = taucs_wtime();
+  ctime = taucs_ctime();
+
+  L = multifrontal_supernodal_create();
+  if (!L) return NULL;
+
+  fail = taucs_ccs_symbolic_elimination(A,L,
+					TRUE /* sort row indices */,
+					max_depth);
+
+  wtime = taucs_wtime()-wtime;
+  ctime = taucs_ctime()-ctime;
+  taucs_printf("\t\tSymbolic Analysis            = % 10.3f seconds (%.3f cpu)\n",
+	       wtime,ctime);
+
+  map  = (int*)taucs_malloc((A->n+1)*sizeof(int));
+  map2 = (int*)taucs_calloc((A->n+1),sizeof(int));
+
+  if (fail == -1 || !map || !map2) {
+    taucs_supernodal_factor_free(L);
+    taucs_free(map2);
+    taucs_free(map);
+    return NULL;
+  }
+
+  wtime = taucs_wtime();
+  ctime = taucs_ctime();
+
+  if (recursive_leftlooking_supernodal_factor_llt((L->n_sn),  
+						  TRUE, 
+						  map2,
+						  map,
+						  A,L)
+      == -1) {
+    taucs_supernodal_factor_free(L);
+    taucs_free(map);
+    taucs_free(map2);
+    return NULL;
+  }
+
+  wtime = taucs_wtime()-wtime;
+  ctime = taucs_ctime()-ctime;
+  taucs_printf("\t\tSupernodal Left-Looking LL^T = % 10.3f seconds (%.3f cpu)\n",
+	       wtime,ctime);
+
+  taucs_free(map);
+  taucs_free(map2);
+
+  return (void*) L;
+}
+
+
+/*************************************************************/
+/* supernodal solve routines                                 */
+/*************************************************************/
+
+static void 
+recursive_supernodal_solve_l(int sn,       /* this supernode */
+			     int is_root,  /* is v the root? */
+			     int* first_child, int* next_child,
+			     int** sn_struct, int* sn_sizes, int* sn_up_sizes,
+			     int* sn_blocks_ld,taucs_datatype* sn_blocks[],
+			     int* up_blocks_ld,taucs_datatype* up_blocks[],
+			     taucs_datatype x[], taucs_datatype b[],
+			     taucs_datatype t[])
+{
+  int child;
+  int  sn_size; /* number of rows/columns in the supernode    */
+  int  up_size; /* number of rows that this supernode updates */
+  int    ione = 1;
+
+  taucs_datatype* xdense;
+  taucs_datatype* bdense;
+  double  flops;
+  int i;/*ip,j,jp omer*/
+
+  for (child = first_child[sn]; child != -1; child = next_child[child]) {
+    recursive_supernodal_solve_l(child,
+				 FALSE,
+				 first_child,next_child,
+ 				 sn_struct,sn_sizes,sn_up_sizes,
+				 sn_blocks_ld,sn_blocks,
+				 up_blocks_ld,up_blocks,
+				 x,b,t);
+  }
+
+  if(!is_root) {
+
+    sn_size = sn_sizes[sn];
+    up_size = sn_up_sizes[sn] - sn_sizes[sn];
+
+    flops = ((double)sn_size)*((double)sn_size) 
+      + 2.0*((double)sn_size)*((double)up_size);
+
+    if (flops > BLAS_FLOPS_CUTOFF) {
+      xdense = t;
+      bdense = t + sn_size;
+      
+      for (i=0; i<sn_size; i++)
+	xdense[i] = b[ sn_struct[ sn ][ i ] ];
+      for (i=0; i<up_size; i++)
+	bdense[i] = taucs_zero;
+      
+      taucs_trsm ("Left",
+		  "Lower",
+		  "No Conjugate",
+		  "No unit diagonal",
+		  &sn_size,&ione,
+		  &taucs_one_const,
+		  sn_blocks[sn],&(sn_blocks_ld[sn]),
+		  xdense       ,&sn_size);
+      
+      if (up_size > 0 && sn_size > 0) {
+	taucs_gemm ("No Conjugate","No Conjugate",
+		    &up_size, &ione, &sn_size,
+		    &taucs_one_const,
+		    up_blocks[sn],&(up_blocks_ld[sn]),
+		    xdense       ,&sn_size,
+		    &taucs_zero_const,
+		    bdense       ,&up_size);
+      }
+      
+      for (i=0; i<sn_size; i++)
+	x[ sn_struct[ sn][ i ] ]  = xdense[i];
+      for (i=0; i<up_size; i++)
+	/*b[ sn_struct[ sn ][ sn_size + i ] ] -= bdense[i];*/
+	b[ sn_struct[ sn ][ sn_size + i ] ] =
+	  taucs_sub( b[ sn_struct[ sn ][ sn_size + i ] ] , bdense[i] );
+
+#if 1
+    }
+#else
+    } else if (sn_size > SOLVE_DENSE_CUTOFF) {
+
+      xdense = t;
+      bdense = t + sn_size;
+      
+      for (i=0; i<sn_size; i++)
+	xdense[i] = b[ sn_struct[ sn ][ i ] ];
+      for (i=0; i<up_size; i++)
+	bdense[i] = 0.0;
+      
+      for (jp=0; jp<sn_size; jp++) {
+	xdense[jp] = xdense[jp] / sn_blocks[sn][ sn_blocks_ld[sn]*jp + jp];
+
+	for (ip=jp+1; ip<sn_size; ip++) {
+	  xdense[ip] -= xdense[jp] * sn_blocks[sn][ sn_blocks_ld[sn]*jp + ip];
+	}
+      }
+
+      for (jp=0; jp<sn_size; jp++) {
+	for (ip=0; ip<up_size; ip++) {
+	  bdense[ip] += xdense[jp] * up_blocks[sn][ up_blocks_ld[sn]*jp + ip];
+	}
+      }
+
+      for (i=0; i<sn_size; i++)
+	x[ sn_struct[ sn][ i ] ]  = xdense[i];
+      for (i=0; i<up_size; i++)
+	b[ sn_struct[ sn ][ sn_size + i ] ] -= bdense[i];
+      
+    } else {
+
+      for (jp=0; jp<sn_size; jp++) {
+	j = sn_struct[sn][jp];
+	x[j] = b[j] / sn_blocks[sn][ sn_blocks_ld[sn]*jp + jp];
+	for (ip=jp+1; ip<sn_size; ip++) {
+	  i = sn_struct[sn][ip];
+	  b[i] -= x[j] * sn_blocks[sn][ sn_blocks_ld[sn]*jp + ip];
+	}
+
+	for (ip=0; ip<up_size; ip++) {
+	  i = sn_struct[sn][sn_size + ip];
+	  b[i] -= x[j] * up_blocks[sn][ up_blocks_ld[sn]*jp + ip];
+	}
+      }
+
+    }
+#endif
+  }
+}
+
+static void 
+recursive_supernodal_solve_lt(int sn,       /* this supernode */
+ 			      int is_root,  /* is v the root? */
+			      int* first_child, int* next_child,
+			      int** sn_struct, int* sn_sizes, int* sn_up_sizes,
+			      int* sn_blocks_ld,taucs_datatype* sn_blocks[],
+			      int* up_blocks_ld,taucs_datatype* up_blocks[],
+			      taucs_datatype x[], taucs_datatype b[],
+			      taucs_datatype t[])
+{
+  int child;
+  int  sn_size; /* number of rows/columns in the supernode    */
+  int  up_size; /* number of rows that this supernode updates */
+  int    ione = 1;
+
+  taucs_datatype* xdense;
+  taucs_datatype* bdense;
+  double  flops;
+  int i;/*ip,j,jp omer*/
+
+  if(!is_root) {
+
+    sn_size = sn_sizes[sn];
+    up_size = sn_up_sizes[sn]-sn_sizes[sn];
+    
+    flops = ((double)sn_size)*((double)sn_size) 
+      + 2.0*((double)sn_size)*((double)up_size);
+
+    if (flops > BLAS_FLOPS_CUTOFF) {
+
+      bdense = t;
+      xdense = t + sn_size;
+      
+      for (i=0; i<sn_size; i++)
+	bdense[i] = b[ sn_struct[ sn][ i ] ];
+      for (i=0; i<up_size; i++)
+	xdense[i] = x[ sn_struct[sn][sn_size+i] ];
+      
+      if (up_size > 0 && sn_size > 0)
+	taucs_gemm ("Conjugate","No Conjugate",
+		     &sn_size, &ione, &up_size,
+		     &taucs_minusone_const,
+		     up_blocks[sn],&(up_blocks_ld[sn]),
+		     xdense       ,&up_size,
+		     &taucs_one_const,
+		     bdense       ,&sn_size);
+      
+      taucs_trsm ("Left",
+		  "Lower",
+		  "Conjugate",
+		  "No unit diagonal",
+		  &sn_size,&ione,
+		  &taucs_one_const,
+		  sn_blocks[sn],&(sn_blocks_ld[sn]),
+		  bdense       ,&sn_size);
+      
+      for (i=0; i<sn_size; i++)
+	x[ sn_struct[ sn][ i ] ]  = bdense[i];
+#if 1
+    }
+#else    
+    } else if (sn_size > SOLVE_DENSE_CUTOFF) {
+
+      bdense = t;
+      xdense = t + sn_size;
+      
+      for (i=0; i<sn_size; i++)
+	bdense[i] = b[ sn_struct[ sn][ i ] ];
+      for (i=0; i<up_size; i++)
+	xdense[i] = x[ sn_struct[sn][sn_size+i] ];
+      
+      for (ip=sn_size-1; ip>=0; ip--) {
+	for (jp=0; jp<up_size; jp++) {
+	  bdense[ip] -= xdense[jp] * up_blocks[sn][ up_blocks_ld[sn]*ip + jp];
+	}
+      }
+
+      for (ip=sn_size-1; ip>=0; ip--) {
+	for (jp=sn_size-1; jp>ip; jp--) {
+	  bdense[ip] -= bdense[jp] * sn_blocks[sn][ sn_blocks_ld[sn]*ip + jp];
+	}
+	bdense[ip] = bdense[ip] / sn_blocks[sn][ sn_blocks_ld[sn]*ip + ip];
+      }
+
+      for (i=0; i<sn_size; i++)
+	x[ sn_struct[ sn][ i ] ]  = bdense[i];
+    
+    } else {
+
+      for (ip=sn_size-1; ip>=0; ip--) {
+	i = sn_struct[sn][ip];
+
+	for (jp=0; jp<up_size; jp++) {
+	  j = sn_struct[sn][sn_size + jp];
+	  b[i] -= x[j] * up_blocks[sn][ up_blocks_ld[sn]*ip + jp];
+	}
+
+	for (jp=sn_size-1; jp>ip; jp--) {
+	  j = sn_struct[sn][jp];
+	  b[i] -= x[j] * sn_blocks[sn][ sn_blocks_ld[sn]*ip + jp];
+	}
+	x[i] = b[i] / sn_blocks[sn][ sn_blocks_ld[sn]*ip + ip];
+
+      }
+
+    }
+#endif
+
+  }
+
+  for (child = first_child[sn]; child != -1; child = next_child[child]) {
+    recursive_supernodal_solve_lt(child,
+				  FALSE,
+				  first_child,next_child,
+				  sn_struct,sn_sizes,sn_up_sizes,
+				  sn_blocks_ld,sn_blocks,
+				  up_blocks_ld,up_blocks,
+				  x,b,t);
+  }
+}
+
+
+int 
+taucs_dtl(supernodal_solve_llt)(void* vL, void* vx, void* vb)
+{
+  supernodal_factor_matrix* L = (supernodal_factor_matrix*) vL;
+  taucs_datatype* x = (taucs_datatype*) vx;
+  taucs_datatype* b = (taucs_datatype*) vb;
+  taucs_datatype* y;
+  taucs_datatype* t; /* temporary vector */
+  int     i;
+  
+  y = taucs_malloc((L->n) * sizeof(taucs_datatype));
+  t = taucs_malloc((L->n) * sizeof(taucs_datatype));
+  if (!y || !t) {
+    taucs_free(y);
+    taucs_free(t);
+    taucs_printf("multifrontal_supernodal_solve_llt: out of memory\n");
+    return -1;
+  }
+
+  for (i=0; i<L->n; i++) x[i] = b[i];
+
+  recursive_supernodal_solve_l (L->n_sn,
+				TRUE,  /* this is the root */
+				L->first_child, L->next_child,
+				L->sn_struct,L->sn_size,L->sn_up_size,
+				L->sn_blocks_ld, L->sn_blocks,
+				L->up_blocks_ld, L->up_blocks,
+				y, x, t);
+
+  recursive_supernodal_solve_lt(L->n_sn,
+				TRUE,  /* this is the root */
+				L->first_child, L->next_child,
+				L->sn_struct,L->sn_size,L->sn_up_size,
+				L->sn_blocks_ld, L->sn_blocks,
+				L->up_blocks_ld, L->up_blocks,
+				x, y, t);
+
+  taucs_free(y);
+  taucs_free(t);
+    
+  return 0;
+}
+#endif /*#ifndef TAUCS_CORE_GENERAL*/
+
+/*************************************************************/
+/* generic interfaces to user-callable routines              */
+/*************************************************************/
+
+#ifdef TAUCS_CORE_GENERAL
+
+cilk 
+void* 
+taucs_ccs_factor_llt_mf_maxdepth(taucs_ccs_matrix* A,int max_depth)
+{
+  void* p= NULL;
+
+#ifdef TAUCS_DOUBLE_IN_BUILD
+  if (A->flags & TAUCS_DOUBLE)
+    p = spawn taucs_dccs_factor_llt_mf_maxdepth(A,max_depth);
+#endif
+
+#ifdef TAUCS_SINGLE_IN_BUILD
+  if (A->flags & TAUCS_SINGLE)
+    p = spawn taucs_sccs_factor_llt_mf_maxdepth(A,max_depth);
+#endif
+
+#ifdef TAUCS_DCOMPLEX_IN_BUILD
+  if (A->flags & TAUCS_DCOMPLEX)
+    p = spawn taucs_zccs_factor_llt_mf_maxdepth(A,max_depth);
+#endif
+
+#ifdef TAUCS_SCOMPLEX_IN_BUILD
+  if (A->flags & TAUCS_SCOMPLEX)
+    p = spawn taucs_cccs_factor_llt_mf_maxdepth(A,max_depth);
+#endif
+
+  sync;
+  return p;
+}
+
+void* 
+taucs_ccs_factor_llt_ll_maxdepth(taucs_ccs_matrix* A,int max_depth)
+{
+
+#ifdef TAUCS_DOUBLE_IN_BUILD
+  if (A->flags & TAUCS_DOUBLE)
+    return taucs_dccs_factor_llt_ll_maxdepth(A,max_depth);
+#endif
+
+#ifdef TAUCS_SINGLE_IN_BUILD
+  if (A->flags & TAUCS_SINGLE)
+    return taucs_sccs_factor_llt_ll_maxdepth(A,max_depth);
+#endif
+
+#ifdef TAUCS_DCOMPLEX_IN_BUILD
+  if (A->flags & TAUCS_DCOMPLEX)
+    return taucs_zccs_factor_llt_ll_maxdepth(A,max_depth);
+#endif
+
+#ifdef TAUCS_SCOMPLEX_IN_BUILD
+  if (A->flags & TAUCS_SCOMPLEX)
+    return taucs_cccs_factor_llt_ll_maxdepth(A,max_depth);
+#endif
+
+	assert(0);
+  return NULL;
+}
+
+void* taucs_ccs_factor_llt_symbolic_maxdepth(taucs_ccs_matrix* A,int max_depth)
+{
+
+#ifdef TAUCS_DOUBLE_IN_BUILD
+  if (A->flags & TAUCS_DOUBLE)
+    return taucs_dccs_factor_llt_symbolic_maxdepth(A,max_depth);
+#endif
+
+#ifdef TAUCS_SINGLE_IN_BUILD
+  if (A->flags & TAUCS_SINGLE)
+    return taucs_sccs_factor_llt_symbolic_maxdepth(A,max_depth);
+#endif
+
+#ifdef TAUCS_DCOMPLEX_IN_BUILD
+  if (A->flags & TAUCS_DCOMPLEX)
+    return taucs_zccs_factor_llt_symbolic_maxdepth(A,max_depth);
+#endif
+
+#ifdef TAUCS_SCOMPLEX_IN_BUILD
+  if (A->flags & TAUCS_SCOMPLEX)
+    return taucs_cccs_factor_llt_symbolic_maxdepth(A,max_depth);
+#endif
+  
+  assert(0);
+  return NULL;
+}
+
+cilk
+void* 
+taucs_ccs_factor_llt_mf(taucs_ccs_matrix* A)
+{
+  void* p = NULL;
+#ifdef TAUCS_DOUBLE_IN_BUILD
+  if (A->flags & TAUCS_DOUBLE)
+    p = spawn taucs_dccs_factor_llt_mf(A);
+#endif
+
+#ifdef TAUCS_SINGLE_IN_BUILD
+  if (A->flags & TAUCS_SINGLE)
+    p = spawn taucs_sccs_factor_llt_mf(A);
+#endif
+
+#ifdef TAUCS_DCOMPLEX_IN_BUILD
+  if (A->flags & TAUCS_DCOMPLEX)
+    p = spawn taucs_zccs_factor_llt_mf(A);
+#endif
+
+#ifdef TAUCS_SCOMPLEX_IN_BUILD
+  if (A->flags & TAUCS_SCOMPLEX)
+    p = spawn taucs_cccs_factor_llt_mf(A);
+#endif
+
+  sync;
+  return p;
+}
+
+void* taucs_ccs_factor_llt_ll(taucs_ccs_matrix* A)
+{
+
+#ifdef TAUCS_DOUBLE_IN_BUILD
+  if (A->flags & TAUCS_DOUBLE)
+    return taucs_dccs_factor_llt_ll(A);
+#endif
+
+#ifdef TAUCS_SINGLE_IN_BUILD
+  if (A->flags & TAUCS_SINGLE)
+    return taucs_sccs_factor_llt_ll(A);
+#endif
+
+#ifdef TAUCS_DCOMPLEX_IN_BUILD
+  if (A->flags & TAUCS_DCOMPLEX)
+    return taucs_zccs_factor_llt_ll(A);
+#endif
+
+#ifdef TAUCS_SCOMPLEX_IN_BUILD
+  if (A->flags & TAUCS_SCOMPLEX)
+    return taucs_cccs_factor_llt_ll(A);
+#endif
+  
+  assert(0);
+  return NULL;
+}
+
+void* taucs_ccs_factor_llt_symbolic(taucs_ccs_matrix* A)
+{
+
+#ifdef TAUCS_DOUBLE_IN_BUILD
+  if (A->flags & TAUCS_DOUBLE)
+    return taucs_dccs_factor_llt_symbolic(A);
+#endif
+
+#ifdef TAUCS_SINGLE_IN_BUILD
+  if (A->flags & TAUCS_SINGLE)
+    return taucs_sccs_factor_llt_symbolic(A);
+#endif
+
+#ifdef TAUCS_DCOMPLEX_IN_BUILD
+  if (A->flags & TAUCS_DCOMPLEX)
+    return taucs_zccs_factor_llt_symbolic(A);
+#endif
+
+#ifdef TAUCS_SCOMPLEX_IN_BUILD
+  if (A->flags & TAUCS_SCOMPLEX)
+    return taucs_cccs_factor_llt_symbolic(A);
+#endif
+  
+  assert(0);
+  return NULL;
+}
+
+cilk
+int taucs_ccs_factor_llt_numeric(taucs_ccs_matrix* A, void* L)
+{
+  int rc = TAUCS_ERROR;
+#ifdef TAUCS_DOUBLE_IN_BUILD
+  if (A->flags & TAUCS_DOUBLE)
+    rc = spawn taucs_dccs_factor_llt_numeric(A,L);
+#endif
+
+#ifdef TAUCS_SINGLE_IN_BUILD
+  if (A->flags & TAUCS_SINGLE)
+    rc = spawn taucs_sccs_factor_llt_numeric(A,L);
+#endif
+
+#ifdef TAUCS_DCOMPLEX_IN_BUILD
+  if (A->flags & TAUCS_DCOMPLEX)
+    rc = spawn taucs_zccs_factor_llt_numeric(A,L);
+#endif
+
+#ifdef TAUCS_SCOMPLEX_IN_BUILD
+  if (A->flags & TAUCS_SCOMPLEX)
+    rc = spawn taucs_cccs_factor_llt_numeric(A,L);
+#endif
+  
+  sync;
+  return rc;
+}
+
+
+int taucs_supernodal_solve_llt(void* L, void* x, void* b)
+{
+	
+#ifdef TAUCS_DOUBLE_IN_BUILD
+  if (((supernodal_factor_matrix*) L)->flags & TAUCS_DOUBLE)
+    return taucs_dsupernodal_solve_llt(L,x,b);
+#endif
+
+#ifdef TAUCS_SINGLE_IN_BUILD
+  if (((supernodal_factor_matrix*) L)->flags & TAUCS_SINGLE)
+    return taucs_ssupernodal_solve_llt(L,x,b);
+#endif
+
+#ifdef TAUCS_DCOMPLEX_IN_BUILD
+  if (((supernodal_factor_matrix*) L)->flags & TAUCS_DCOMPLEX)
+    return taucs_zsupernodal_solve_llt(L,x,b);
+#endif
+
+#ifdef TAUCS_SCOMPLEX_IN_BUILD
+  if (((supernodal_factor_matrix*) L)->flags & TAUCS_SCOMPLEX)
+    return taucs_csupernodal_solve_llt(L,x,b);
+#endif
+  
+  assert(0);
+  return -1;
+}
+
+void taucs_supernodal_factor_free(void* L)
+{
+
+#ifdef TAUCS_DOUBLE_IN_BUILD
+  if (((supernodal_factor_matrix*) L)->flags & TAUCS_DOUBLE) {
+    taucs_dsupernodal_factor_free(L);
+    return;
+  }
+#endif
+
+#ifdef TAUCS_SINGLE_IN_BUILD
+  if (((supernodal_factor_matrix*) L)->flags & TAUCS_SINGLE) {
+    taucs_ssupernodal_factor_free(L);
+    return;
+  }
+#endif
+
+#ifdef TAUCS_DCOMPLEX_IN_BUILD
+  if (((supernodal_factor_matrix*) L)->flags & TAUCS_DCOMPLEX) {
+    taucs_zsupernodal_factor_free(L);
+    return;
+  }
+#endif
+
+#ifdef TAUCS_SCOMPLEX_IN_BUILD
+  if (((supernodal_factor_matrix*) L)->flags & TAUCS_SCOMPLEX) {
+    taucs_csupernodal_factor_free(L);
+    return;
+  }
+#endif
+  
+  assert(0);
+}
+
+void taucs_supernodal_factor_free_numeric(void* L)
+{
+
+
+#ifdef TAUCS_DOUBLE_IN_BUILD
+  if (((supernodal_factor_matrix*) L)->flags & TAUCS_DOUBLE) {
+    taucs_dsupernodal_factor_free_numeric(L);
+    return;
+  }
+#endif
+
+#ifdef TAUCS_SINGLE_IN_BUILD
+  if (((supernodal_factor_matrix*) L)->flags & TAUCS_SINGLE) {
+    taucs_ssupernodal_factor_free_numeric(L);
+    return;
+  }
+#endif
+
+#ifdef TAUCS_DCOMPLEX_IN_BUILD
+  if (((supernodal_factor_matrix*) L)->flags & TAUCS_DCOMPLEX) {
+    taucs_zsupernodal_factor_free_numeric(L);
+    return;
+  }
+#endif
+
+#ifdef TAUCS_SCOMPLEX_IN_BUILD
+  if (((supernodal_factor_matrix*) L)->flags & TAUCS_SCOMPLEX) {
+    taucs_csupernodal_factor_free_numeric(L);
+    return;
+  }
+#endif
+  
+  assert(0);
+}
+
+taucs_ccs_matrix* 
+taucs_supernodal_factor_to_ccs(void* L)
+{
+
+#ifdef TAUCS_DOUBLE_IN_BUILD
+  if (((supernodal_factor_matrix*) L)->flags & TAUCS_DOUBLE)
+    return taucs_dsupernodal_factor_to_ccs(L);
+#endif
+
+#ifdef TAUCS_SINGLE_IN_BUILD
+  if (((supernodal_factor_matrix*) L)->flags & TAUCS_SINGLE)
+    return taucs_ssupernodal_factor_to_ccs(L);
+#endif
+
+#ifdef TAUCS_DCOMPLEX_IN_BUILD
+  if (((supernodal_factor_matrix*) L)->flags & TAUCS_DCOMPLEX)
+    return taucs_zsupernodal_factor_to_ccs(L);
+#endif
+
+#ifdef TAUCS_SCOMPLEX_IN_BUILD
+  if (((supernodal_factor_matrix*) L)->flags & TAUCS_SCOMPLEX)
+    return taucs_csupernodal_factor_to_ccs(L);
+#endif
+  
+  assert(0);
+  return NULL;
+}
+
+void* 
+taucs_supernodal_factor_get_diag(void* L)
+{
+
+#ifdef TAUCS_DOUBLE_IN_BUILD
+  if (((supernodal_factor_matrix*) L)->flags & TAUCS_DOUBLE)
+    return taucs_dsupernodal_factor_get_diag(L);
+#endif
+
+#ifdef TAUCS_SINGLE_IN_BUILD
+  if (((supernodal_factor_matrix*) L)->flags & TAUCS_SINGLE)
+    return taucs_ssupernodal_factor_get_diag(L);
+#endif
+
+#ifdef TAUCS_DCOMPLEX_IN_BUILD
+  if (((supernodal_factor_matrix*) L)->flags & TAUCS_DCOMPLEX)
+    return taucs_zsupernodal_factor_get_diag(L);
+#endif
+
+#ifdef TAUCS_SCOMPLEX_IN_BUILD
+  if (((supernodal_factor_matrix*) L)->flags & TAUCS_SCOMPLEX)
+    return taucs_csupernodal_factor_get_diag(L);
+#endif
+  
+  assert(0);
+  return NULL;
+}
+
+int
+taucs_ccs_etree(taucs_ccs_matrix* A,
+		int* parent,
+		int* l_colcount,
+		int* l_rowcount,
+		int* l_nnz)
+{
+  int* prev_p;
+  /*int* prev_nbr;omer*/
+  int* level;
+  /*int* first_descendant;omer*/
+  int* l_cc;
+  int* l_rc;
+  int* wt;
+
+  int  n = A->n;
+  int  pprime;/*p,q,u omer*/
+  int  ju;
+  int* postorder;
+  int* ipostorder;
+  int  *first_child,*next_child;
+
+  int i,j,k,ip,jp,kp;
+  int nnz,jnnz;
+  int* uf;
+  int* rowptr;
+  int* colind;
+  int* rowcount;
+  int* realroot;
+
+  /* we need the row structures for the lower triangle */
+
+  nnz = (A->colptr)[n];
+  
+  uf       = taucs_malloc(n     * sizeof(int));
+  rowcount = taucs_malloc((n+1) * sizeof(int));
+  rowptr   = taucs_malloc((n+1) * sizeof(int));
+  colind   = taucs_malloc(nnz   * sizeof(int));
+
+  if (!uf || !rowcount || !rowptr || !colind) {
+    taucs_free(uf);
+    taucs_free(rowcount);
+    taucs_free(rowptr);
+    taucs_free(colind);
+    return -1;
+  }
+
+  for (i=0; i <=n; i++) rowcount[i] = 0;
+  for (j=0; j < n; j++) {
+    jnnz = (A->colptr)[j+1] - (A->colptr)[j];
+    for (ip=0; ip<jnnz; ip++) {
+      i = (A->rowind)[(A->colptr)[j] + ip];
+      if (j < i) rowcount[i]++;
+    }
+  }
+
+  ip = 0;
+  for (i=0; i <= n; i++) {
+    int next_ip = ip + rowcount[i];
+    rowcount[i] = ip;
+    rowptr  [i] = ip;
+    ip = next_ip;
+  }
+
+  for (j=0; j < n; j++) {
+    jnnz = (A->colptr)[j+1] - (A->colptr)[j];
+    for (ip=0; ip<jnnz; ip++) {
+      i = (A->rowind)[(A->colptr)[j] + ip];
+      if (i==j) continue;
+      assert( rowcount[i] < rowptr[i+1] );
+      colind[ rowcount[i] ] = j;
+      rowcount[i]++;
+    }
+  }
+
+  /* now compute the etree */
+
+  {
+    int u,t,vroot;
+    realroot = rowcount; /* reuse space */
+
+    for (i=0; i<n; i++) {
+      uf_makeset(uf,i);
+      realroot[i] = i;
+      parent[i] = n;
+      vroot = i;
+      for (kp=rowptr[i]; kp<rowptr[i+1]; kp++) {
+	k=colind[kp];
+	u = uf_find(uf,k);
+	t = realroot[u];
+	if (parent[t] == n && t != i) {
+	  parent[t] = i;
+	  vroot = uf_union(uf,vroot,u);
+	  realroot[vroot] = i;
+	}
+      }
+    }
+  }
+
+  taucs_free(colind);
+  taucs_free(rowptr);
+  taucs_free(rowcount);
+
+  /* now only uf remains allocated */
+  
+  /* compute column counts */
+
+  if (l_colcount || l_rowcount || l_nnz) {
+    int* l_nz;
+    int  tmp;
+    int  u,p,q;
+
+    first_child = taucs_malloc((n+1) * sizeof(int));
+    next_child  = taucs_malloc((n+1) * sizeof(int));
+    postorder   = taucs_malloc(n     * sizeof(int));
+    ipostorder  = taucs_malloc(n     * sizeof(int));
+    wt          = taucs_malloc(n     * sizeof(int));
+    level       = taucs_malloc(n     * sizeof(int));
+    prev_p      = taucs_malloc(n     * sizeof(int));
+
+#ifdef GILBERT_NG_PEYTON_ANALYSIS_SUP
+    prev_nbr         = taucs_malloc(n     * sizeof(int));
+    first_descendant = taucs_malloc(n     * sizeof(int));
+#endif
+
+    /* we allocate scratch vectors to avoid conditionals */
+    /* in the inner loop.                                */
+
+    if (l_colcount) l_cc = l_colcount;
+    else            l_cc = (int*) taucs_malloc(n*sizeof(int));
+    if (l_rowcount) l_rc = l_rowcount;
+    else            l_rc = (int*) taucs_malloc(n*sizeof(int));
+    if (l_nnz)      l_nz = l_nnz;
+    else            l_nz = &tmp;
+
+
+    if (!first_child || !next_child || !postorder
+	|| !ipostorder || !wt || !level || !prev_p
+	|| (!l_colcount && !l_cc) 
+	|| (!l_rowcount && !l_rc) 
+#ifdef GILBERT_NG_PEYTON_ANALYSIS_SUP
+	|| !prev_nbr || !first_descendant
+#endif
+	) {
+      taucs_free(uf);
+
+      if (!l_colcount) taucs_free(l_cc);
+      if (!l_rowcount) taucs_free(l_rc);
+
+      taucs_free(postorder);
+      taucs_free(ipostorder);
+      taucs_free(wt);
+      taucs_free(level);
+      taucs_free(prev_p);
+      
+#ifdef GILBERT_NG_PEYTON_ANALYSIS_SUP
+      taucs_free(prev_nbr);
+      taucs_free(first_descendant);
+#endif
+      return -1;
+    }
+
+    /*for (j=0; j<n; j++) printf("parent[%d] = %d\n",j,parent[j]);*/
+
+    /* compute the postorder */
+    
+    for (j=0; j<=n; j++) first_child[j] = -1;
+    for (j=n-1; j>=0; j--) {
+      next_child[j] = first_child[parent[j]];
+      first_child[parent[j]] = j;
+    }
+    
+    {
+      int next = 0;
+      recursive_postorder(n,first_child,next_child,
+			  postorder,
+			  ipostorder,&next);
+    }
+    
+    /* sort by postorder of etree */
+    /* compute level, fst_desc */
+
+    tree_level(n,TRUE,first_child,next_child,
+	       level,-1);
+    
+    for (u=0; u < n; u++) prev_p  [u] = -1;
+    for (u=0; u < n; u++) l_rc    [u] =  1;
+    for (u=0; u < n; u++) ordered_uf_makeset(uf,u);
+    for (u=0; u < n; u++) {
+      if (first_child[u] == -1)
+	wt[u] = 1; /* leaves     */
+      else
+	wt[u] =  0; /* nonleaves */
+    }
+
+#ifdef GILBERT_NG_PEYTON_ANALYSIS_SUP
+    for (u=0; u < n; u++) prev_nbr[u] = -1;
+
+    tree_first_descendant(n,TRUE,
+			  first_child,next_child,ipostorder,
+			  first_descendant);
+#endif
+
+    taucs_free(first_child);
+    taucs_free(next_child);
+
+    for (p=0; p<n; p++) {
+      jp = postorder[p];
+      if (parent[jp] != n) wt[parent[jp]] --;
+      for (ip = (A->colptr)[jp]; ip < (A->colptr)[jp+1]; ip++) {
+	ju = (A->rowind)[ip];
+	u  = ipostorder[ju];
+	if (ju==jp) continue; /* we only want proper neighbors */
+#ifdef GILBERT_NG_PEYTON_ANALYSIS_SUP
+	if (first_descendant[jp] > prev_nbr[u]) {
+#else
+	if (1) {
+#endif
+	  wt[jp] ++;
+	  pprime = prev_p[ju];
+	  if (pprime == -1) 
+	    l_rc[ju] += level[jp] - level[ju];
+	  else {
+	    q = ordered_uf_find(uf,pprime);
+	    l_rc[ju] += level[jp] - level[q];
+	    wt[q] --;
+	  }
+	  prev_p[ju] = jp;
+	}
+#ifdef GILBERT_NG_PEYTON_ANALYSIS_SUP
+	prev_nbr[u] = p;
+#endif
+      }
+      if (parent[jp] != n) {
+	if (!(ipostorder[parent[jp]] > ipostorder[jp])) {
+	  printf("jp %d parent %d (ipo_j %d ipo_parent %d\n",
+		 jp,parent[jp],ipostorder[jp],ipostorder[parent[jp]]);
+	}
+	assert(ipostorder[parent[jp]] > ipostorder[jp]);
+	ordered_uf_union(uf,jp,parent[jp]);
+      }
+    }
+
+    *l_nz = 0;
+    for (u=0; u<n; u++) {
+      l_cc[u] = wt[u];
+      *l_nz += wt[u];
+    }
+    for (u=0; u<n; u++) {
+      if (parent[u] != n) {
+	l_cc[parent[u]] += l_cc[u];
+	*l_nz += l_cc[u];
+      }
+    }
+
+    /* free scrtach vectors                              */
+
+    if (!l_colcount) taucs_free(l_cc);
+    if (!l_rowcount) taucs_free(l_rc);
+
+    /* free other data structures */
+
+    taucs_free(postorder);
+    taucs_free(ipostorder);
+    taucs_free(wt);
+    taucs_free(level);
+    taucs_free(prev_p);
+    
+#ifdef GILBERT_NG_PEYTON_ANALYSIS_SUP
+    taucs_free(prev_nbr);
+    taucs_free(first_descendant);
+#endif
+  }
+
+  taucs_free(uf);
+
+  return 0;
+}
+
+int 
+taucs_ccs_etree_liu(taucs_ccs_matrix* A,
+		    int* parent,
+		    int* l_colcount,
+		    int* l_rowcount,
+		    int* l_nnz)
+{
+  int n = A->n;
+  int i,j,k,ip,kp;/*jp omer*/
+  int nnz,jnnz;
+
+  int* uf;
+  int* rowptr;
+  int* colind;
+
+  int* rowcount;
+  int* marker;
+  int* realroot;
+
+  int* l_cc;
+  int* l_rc;
+
+  /* we need the row structures for the lower triangle */
+
+  nnz = (A->colptr)[n];
+  
+  uf       = taucs_malloc(n     * sizeof(int));
+  rowcount = taucs_malloc((n+1) * sizeof(int));
+  rowptr   = taucs_malloc((n+1) * sizeof(int));
+  colind   = taucs_malloc(nnz   * sizeof(int));
+
+  for (i=0; i <=n; i++) rowcount[i] = 0;
+
+  for (j=0; j < n; j++) {
+    
+    jnnz = (A->colptr)[j+1] - (A->colptr)[j];
+
+    for (ip=0; ip<jnnz; ip++) {
+      i = (A->rowind)[(A->colptr)[j] + ip];
+      if (j < i) rowcount[i]++;
+    }
+  }
+
+  ip = 0;
+  for (i=0; i <= n; i++) {
+    int next_ip = ip + rowcount[i];
+    rowcount[i] = ip;
+    rowptr  [i] = ip;
+    ip = next_ip;
+  }
+
+  for (j=0; j < n; j++) {
+    jnnz = (A->colptr)[j+1] - (A->colptr)[j];
+
+    for (ip=0; ip<jnnz; ip++) {
+      i = (A->rowind)[(A->colptr)[j] + ip];
+      if (i==j) continue;
+      assert( rowcount[i] < rowptr[i+1] );
+      colind[ rowcount[i] ] = j;
+      rowcount[i]++;
+    }
+  }
+
+  /* now compute the etree */
+
+  {
+    int u,t,vroot;
+    realroot = rowcount; /* reuse space */
+
+    for (i=0; i<n; i++) {
+      uf_makeset(uf,i);
+      realroot[i] = i;
+      parent[i] = n;
+      vroot = i;
+      for (kp=rowptr[i]; kp<rowptr[i+1]; kp++) {
+	k=colind[kp];
+	u = uf_find(uf,k);
+	t = realroot[u];
+	if (parent[t] == n && t != i) {
+	  parent[t] = i;
+	  vroot = uf_union(uf,vroot,u);
+	  realroot[vroot] = i;
+	}
+      }
+    }
+  }
+
+  /* compute column counts */
+
+  if (l_colcount || l_rowcount || l_nnz) {
+    int* l_nz;
+    int  tmp;
+
+    /* we allocate scratch vectors to avoid conditionals */
+    /* in the inner loop.                                */
+
+    if (l_colcount) l_cc = l_colcount;
+    else            l_cc = (int*) taucs_malloc(n*sizeof(int));
+    if (l_rowcount) l_rc = l_rowcount;
+    else            l_rc = (int*) taucs_malloc(n*sizeof(int));
+    if (l_nnz)      l_nz = l_nnz;
+    else            l_nz = &tmp;
+
+    marker = rowcount; /* we reuse the space */
+    
+    for (j=0; j < n; j++) l_cc[j] = 1;
+    *l_nz = n;
+    
+    for (i=0; i<n; i++) marker[i] = n; /* clear the array */
+    
+    for (i=0; i<n; i++) {
+      l_rc[i] = 1;
+      marker[ i ] = i;
+      for (kp=rowptr[i]; kp<rowptr[i+1]; kp++) {
+	k=colind[kp];
+	j=k;
+	while (marker[j] != i) {
+	  l_cc[j]++;
+	  l_rc[i]++;
+	  (*l_nz)++;
+	  marker[j] = i;
+	  j = parent[j];
+	}
+      }
+    }
+
+    /* free scrtach vectors                              */
+
+    if (!l_colcount) taucs_free(l_cc);
+    if (!l_rowcount) taucs_free(l_rc);
+  }
+
+  taucs_free(colind);
+  taucs_free(rowptr);
+  taucs_free(rowcount);
+  taucs_free(uf);
+
+  return 0;
+}
+
+
+int
+taucs_ccs_symbolic_elimination(taucs_ccs_matrix* A,
+			       void* vL,
+			       int do_order,
+			       int max_depth
+			       )
+{
+  supernodal_factor_matrix* L = (supernodal_factor_matrix*) vL;
+  int* first_child;
+  int* next_child;
+  int j;
+  int* column_to_sn_map;
+  int* map;
+  int* rowind;
+  int* parent;
+  int* ipostorder;
+
+  int depth;
+
+  L->n           = A->n;
+  /* use calloc so we can deallocate unallocated entries */
+  L->sn_struct   = (int**)taucs_calloc((A->n  ),sizeof(int*)); 
+  L->sn_size     = (int*) taucs_malloc((A->n+1)*sizeof(int));
+  L->sn_up_size  = (int*) taucs_malloc((A->n+1)*sizeof(int));
+  L->first_child = (int*) taucs_malloc((A->n+1)*sizeof(int));
+  L->next_child  = (int*) taucs_malloc((A->n+1)*sizeof(int));
+  
+  column_to_sn_map = (int*) taucs_malloc((A->n+1)*sizeof(int));
+  map              = (int*) taucs_malloc((A->n+1)*sizeof(int));
+  first_child      = (int*) taucs_malloc((A->n+1)*sizeof(int));
+  next_child       = (int*) taucs_malloc((A->n+1)*sizeof(int));
+  parent           = (int*) taucs_malloc((A->n+1)*sizeof(int));
+  rowind           = (int*) taucs_malloc((A->n  )*sizeof(int));
+
+  if (!(L->sn_struct) || !(L->sn_size) || !(L->sn_up_size) ||
+      !(L->first_child) || !(L->next_child) || !column_to_sn_map
+      || !map || !first_child || !next_child || !rowind || !parent) {
+    taucs_free(parent);
+    taucs_free(rowind);
+    taucs_free(next_child);
+    taucs_free(first_child);
+    taucs_free(map);
+    taucs_free(column_to_sn_map);
+    taucs_free(L->next_child);
+    taucs_free(L->first_child);
+    taucs_free(L->sn_up_size);
+    taucs_free(L->sn_size);
+    taucs_free(L->sn_struct);
+    L->sn_struct = NULL;
+    L->sn_size = L->sn_up_size = L->first_child = L->next_child = NULL;
+    return -1;
+  }
+
+  if (taucs_ccs_etree(A,parent,NULL,NULL,NULL) == -1) {
+    taucs_free(parent);
+    taucs_free(rowind);
+    taucs_free(next_child);
+    taucs_free(first_child);
+    taucs_free(map);
+    taucs_free(column_to_sn_map);
+    taucs_free(L->next_child);
+    taucs_free(L->first_child);
+    taucs_free(L->sn_up_size);
+    taucs_free(L->sn_size);
+    taucs_free(L->sn_struct);
+    L->sn_struct = NULL;
+    L->sn_size = L->sn_up_size = L->first_child = L->next_child = NULL;
+    return -1;
+  }
+
+  if (0) {
+    double wtime;
+    int *cc1,*cc2,*rc1,*rc2;
+    int *p1;
+    int nnz1,nnz2;
+
+    cc1=(int*)taucs_malloc((A->n)*sizeof(int));
+    cc2=(int*)taucs_malloc((A->n)*sizeof(int));
+    rc1=(int*)taucs_malloc((A->n)*sizeof(int));
+    rc2=(int*)taucs_malloc((A->n)*sizeof(int));
+    p1 =(int*)taucs_malloc((A->n)*sizeof(int));
+
+    wtime = taucs_wtime();
+    taucs_ccs_etree_liu(A,parent,cc1,rc1,&nnz1);
+    wtime = taucs_wtime() - wtime;
+    printf("\t\t\tLiu Analysis = %.3f seconds\n",wtime);
+
+    wtime = taucs_wtime();
+    taucs_ccs_etree(A,p1,cc2,rc2,&nnz2);
+    wtime = taucs_wtime() - wtime;
+    printf("\t\t\tGNP Analysis = %.3f seconds\n",wtime);
+
+    for (j=0; j<(A->n); j++) assert(parent[j]==p1[j]);
+    for (j=0; j<(A->n); j++) {
+      if (cc1[j]!=cc2[j]) printf("j=%d cc1=%d cc2=%d\n",j,cc1[j],cc2[j]);
+      assert(cc1[j]==cc2[j]);
+    }
+
+    for (j=0; j<(A->n); j++) {
+      if (rc1[j]!=rc2[j]) printf("j=%d rc1=%d rc2=%d\n",j,rc1[j],rc2[j]);
+      assert(rc1[j]==rc2[j]);
+    }
+
+    if (nnz1!=nnz2) printf("nnz1=%d nnz2=%d\n",nnz1,nnz2);
+    
+    taucs_free(cc1); taucs_free(cc2); taucs_free(rc1); taucs_free(rc2);
+  }
+
+  for (j=0; j <= (A->n); j++) first_child[j] = -1;
+  for (j = (A->n)-1; j >= 0; j--) {
+    int p = parent[j];
+    next_child[j] = first_child[p];
+    first_child[p] = j;
+  }
+
+  /* let's compute the depth of the etree, to bail out if it is too deep */
+  /* the whole thing will work better if we compute supernodal etrees    */
+
+  {
+    int next_depth_count;
+    int this_depth_count;
+    int child,i;
+
+    int* this_depth = rowind; /* we alias rowind */
+    int* next_depth = map;    /* and map         */
+    int* tmp;
+
+    this_depth[0] = A->n;
+    this_depth_count = 1;
+    next_depth_count = 0;
+    depth = -1;
+
+    while (this_depth_count) {
+      for (i=0; i<this_depth_count; i++) {
+	child = first_child[ this_depth[i] ];
+	while (child != -1) {
+	  next_depth[ next_depth_count ] = child;
+	  next_depth_count++;
+	  child = next_child[ child ];
+	}
+      }
+      
+      tmp = this_depth;
+      this_depth = next_depth;
+      next_depth = tmp;
+
+      this_depth_count = next_depth_count;
+      next_depth_count = 0;
+      depth++;
+    }
+  }
+
+  taucs_printf("\t\tElimination tree depth is %d\n",depth);
+
+  if (max_depth && depth > max_depth) {
+    taucs_printf("taucs_ccs_symbolic_elimination: etree depth %d, maximum allowed is %d\n",
+		 depth, max_depth);
+    taucs_free(parent);
+    taucs_free(rowind);
+    taucs_free(next_child);
+    taucs_free(first_child);
+    taucs_free(map);
+    taucs_free(column_to_sn_map);
+    taucs_free(L->next_child);
+    taucs_free(L->first_child);
+    taucs_free(L->sn_up_size);
+    taucs_free(L->sn_size);
+    taucs_free(L->sn_struct);
+    L->sn_struct = NULL;
+    L->sn_size = L->sn_up_size = L->first_child = L->next_child = NULL;
+    return -1;
+  }
+
+  /*
+  taucs_free(parent);
+  ipostorder = (int*)taucs_malloc((A->n+1)*sizeof(int));
+  */
+  
+  ipostorder = parent;
+  { 
+    int next = 0;
+    /*int* postorder = (int*)taucs_malloc((A->n+1)*sizeof(int));*/
+    recursive_postorder(A->n,first_child,next_child,
+			NULL,
+			ipostorder,&next);
+    /*
+    printf("ipostorder ");
+    for (j=0; j <= (A->n); j++) printf("%d ",ipostorder[j]);
+    printf("\n");
+    printf(" postorder ");
+    for (j=0; j <= (A->n); j++) printf("%d ",postorder[j]);
+    printf("\n");
+    */
+  }
+
+  L->n_sn = 0;
+  for (j=0; j < (A->n); j++) map[j] = -1;
+  for (j=0; j <= (A->n); j++) (L->first_child)[j] = (L->next_child)[j] = -1;
+  
+  if (recursive_symbolic_elimination(A->n,
+				     A,
+				     first_child,next_child,
+				     &(L->n_sn),
+				     L->sn_size,L->sn_up_size,L->sn_struct,
+				     L->first_child,L->next_child,
+				     rowind,
+				     column_to_sn_map,
+				     map,
+				     do_order,ipostorder
+				     )
+      == -1) {
+    for (j=0; j < (A->n); j++) taucs_free((L->sn_struct)[j]);
+
+    taucs_free(parent);
+    taucs_free(rowind);
+    taucs_free(next_child);
+    taucs_free(first_child);
+    taucs_free(map);
+    taucs_free(column_to_sn_map);
+    taucs_free(L->next_child);
+    taucs_free(L->first_child);
+    taucs_free(L->sn_up_size);
+    taucs_free(L->sn_size);
+    taucs_free(L->sn_struct);
+    L->sn_struct = NULL;
+    L->sn_size = L->sn_up_size = L->first_child = L->next_child = NULL;
+    return -1;
+  }
+
+  {
+    double nnz   = 0.0;
+    double flops = 0.0;
+    int sn,i,colnnz;
+    int bytes;
+
+    bytes = 
+      1*sizeof(char)                /* uplo             */
+      + 2*sizeof(int)               /* n, n_sn          */
+      + 3*(L->n_sn)*sizeof(int)     /* etree            */
+      + 4*(L->n_sn)*sizeof(int)     /* block sizes, lda */
+      + 1*(L->n_sn)*sizeof(int*)    /* row/col indices  */
+      + 3*(L->n_sn)*sizeof(taucs_datatype*) /* actual blocks    */
+      ;
+
+    for (sn=0; sn<(L->n_sn); sn++) {
+      bytes += (L->sn_up_size)[sn] * sizeof(int);    
+      bytes += ((L->sn_size)[sn]*(L->sn_up_size)[sn]) * sizeof(taucs_datatype);
+
+      for (i=0, colnnz = (L->sn_up_size)[sn]; 
+	   i<(L->sn_size)[sn]; 
+	   i++, colnnz--) {
+	/* There was a bug here. I did not count muliply-adds in the
+	   update part of the computation as 2 flops but one. */
+	/*flops += ((double)(colnnz) - 1.0) * ((double)(colnnz) + 2.0) / 2.0;*/
+	flops += 1.0 + ((double)(colnnz)) * ((double)(colnnz));
+	nnz   += (double) (colnnz);
+      }
+    }
+    taucs_printf("\t\tSymbolic Analysis of LL^T: %.2e nonzeros, %.2e flops, %.2e bytes in L\n",
+		 nnz, flops, (float) bytes);
+  }
+
+  for (j=0; j < (A->n); j++) map[j] = -1;
+  if (1)
+  (void) recursive_amalgamate_supernodes((L->n_sn) - 1,
+					 &(L->n_sn),
+					 L->sn_size,L->sn_up_size,L->sn_struct,
+					 L->first_child,L->next_child,
+					 rowind,
+					 column_to_sn_map,
+					 map,
+					 do_order,ipostorder
+					 );
+
+
+  {
+    double nnz   = 0.0;
+    double flops = 0.0;
+    int sn,i,colnnz;
+    int bytes;
+
+    bytes = 
+      1*sizeof(char)                /* uplo             */
+      + 2*sizeof(int)               /* n, n_sn          */
+      + 3*(L->n_sn)*sizeof(int)     /* etree            */
+      + 4*(L->n_sn)*sizeof(int)     /* block sizes, lda */
+      + 1*(L->n_sn)*sizeof(int*)    /* row/col indices  */
+      + 3*(L->n_sn)*sizeof(taucs_datatype*) /* actual blocks    */
+      ;
+
+    for (sn=0; sn<(L->n_sn); sn++) {
+      bytes += (L->sn_up_size)[sn] * sizeof(int);
+      bytes += ((L->sn_size)[sn]*(L->sn_up_size)[sn]) * sizeof(taucs_datatype);
+
+      for (i=0, colnnz = (L->sn_up_size)[sn]; 
+	   i<(L->sn_size)[sn]; 
+	   i++, colnnz--) {
+	/* There was a bug here. I did not count muliply-adds in the
+	   update part of the computation as 2 flops but one. */
+	/*flops += ((double)(colnnz) - 1.0) * ((double)(colnnz) + 2.0) / 2.0;*/
+	flops += 1.0 + ((double)(colnnz)) * ((double)(colnnz));
+	nnz   += (double) (colnnz);
+      }
+    }
+    taucs_printf("\t\tRelaxed  Analysis of LL^T: %.2e nonzeros, %.2e flops, %.2e bytes in L\n",
+		 nnz, flops, (float) bytes);
+  }
+
+  /*
+  {
+    int i;
+    printf("c2sn: ");
+    for (i=0; i<A->n; i++) printf("%d ",column_to_sn_map[i]);
+    printf("\n");
+  }
+  */
+  
+  taucs_free(parent);
+  taucs_free(rowind);
+  taucs_free(map);
+  taucs_free(column_to_sn_map);
+  taucs_free(next_child);
+  taucs_free(first_child);
+
+  L->sn_blocks_ld  = taucs_malloc((L->n_sn) * sizeof(int));
+  L->sn_blocks     = taucs_calloc((L->n_sn), sizeof(taucs_datatype*)); /* so we can free before allocation */
+  
+  L->up_blocks_ld  = taucs_malloc((L->n_sn) * sizeof(int));
+  L->up_blocks     = taucs_calloc((L->n_sn), sizeof(taucs_datatype*));
+
+  if (!(L->sn_blocks_ld)
+      || !(L->sn_blocks_ld)
+      || !(L->sn_blocks)
+      || !(L->up_blocks_ld)
+      || !(L->up_blocks))
+    return -1; /* the caller will free L */
+
+  return 0;
+}
+#endif
+
+
+/*************************************************************/
+/* end of file                                               */
+/*************************************************************/
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/contrib/taucs/src/taucs_superlu.c b/contrib/taucs/src/taucs_superlu.c
new file mode 100644
index 0000000000000000000000000000000000000000..14f033ae1c658065863905509bbd94f785a4bf0b
--- /dev/null
+++ b/contrib/taucs/src/taucs_superlu.c
@@ -0,0 +1,236 @@
+/*********************************************************/
+/* TAUCS                                                 */
+/* Author: Sivan Toledo                                  */
+/*********************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <math.h>
+#include "taucs.h"
+
+/*** superlu includes ***/
+
+#include "dsp_defs.h"
+#include "util.h"
+
+typedef struct {
+  SuperMatrix L,U;
+  int*        perm_c;
+  int*        perm_r;
+} taucs_superlu_factor;
+
+/*********************************************************/
+/* calling superlu                                       */
+/*********************************************************/
+
+void* 
+taucs_ccs_factor_superlu(taucs_ccs_matrix* A)
+{
+  double* a;
+  int*    asub;
+  int*    xa;
+  SuperMatrix sA,sAC;
+  double* b;
+  int     n,m,nnz,info;
+  int  i,j,ip;
+  int* etree;
+  taucs_superlu_factor* F;
+
+  double wtime_prepare;
+  double wtime_factor;
+
+  wtime_prepare = taucs_wtime();
+
+  assert(A->flags & TAUCS_SYMMETRIC);
+  assert(A->flags & TAUCS_LOWER);
+
+  n = m = A->n;
+  nnz = 2 * (A->colptr)[n] - n;
+
+  a    = (double*) taucs_malloc(nnz * sizeof(double));
+  asub = (int*)    taucs_malloc(nnz * sizeof(int));
+  xa   = (int*)    taucs_malloc((n+1)*sizeof(int));
+  etree  = (int*) taucs_malloc(n*sizeof(int));
+ 
+  F = (taucs_superlu_factor*) taucs_malloc(sizeof(taucs_superlu_factor));
+
+  (F->perm_r) = (int*) taucs_malloc(n*sizeof(int));
+  (F->perm_c) = (int*) taucs_malloc(n*sizeof(int));
+
+  for (i=0; i<n; i++) (F->perm_c)[i] = 0;
+
+  for (j=0; j<n; j++) {
+    for (ip = (A->colptr)[j]; ip<(A->colptr)[j+1]; ip++) {
+      i = (A->rowind)[ip];
+      if (i==j) 
+	(F->perm_c)[j]++;
+      else {
+	(F->perm_c)[i]++;
+	(F->perm_c)[j]++;
+      }
+    }
+  }
+  
+  xa[0] = 0;
+  for (j=1; j<=n; j++) xa[j] = xa[j-1] + (F->perm_c)[j-1];
+  for (j=0; j< n; j++) (F->perm_c)[j] = xa[j];
+
+  assert(nnz = xa[n]);
+
+  for (j=0; j<n; j++) {
+    for (ip = (A->colptr)[j]; ip<(A->colptr)[j+1]; ip++) {
+      i = (A->rowind)[ip];
+      if (i==j) {
+	asub[(F->perm_c)[j]] = i;
+	a   [(F->perm_c)[j]] = (A->values)[ip];
+	(F->perm_c)[j]++;
+      } else {
+	asub[(F->perm_c)[j]] = i;
+	a   [(F->perm_c)[j]] = (A->values)[ip];
+	(F->perm_c)[j]++;
+
+	asub[(F->perm_c)[i]] = j;
+	a   [(F->perm_c)[i]] = (A->values)[ip];
+	(F->perm_c)[i]++;
+      }
+    }
+  }
+
+
+  for (j=0; j<n; j++) {
+    for (ip=xa[j]+1; ip<xa[j+1]; ip++) {
+      int    key  = asub[ip];
+      double keyv = a   [ip];
+      int kp = ip-1;
+      while (kp >= xa[j] && asub[kp] > key) {
+	asub[kp+1] = asub[kp];
+	a   [kp+1] = a   [kp];
+	kp--;
+      }
+      asub[kp+1] = key;
+      a   [kp+1] = keyv;
+    }
+  }
+
+#if 0
+  printf("xa=[ ");
+  for (j=0; j<=n; j++) printf("%d ",xa[j]);
+  printf("]\n");
+
+  printf("asub=[ ");
+  for (j=0; j<nnz; j++) printf("%d ",asub[j]);
+  printf("]\n");
+
+  printf("a=[ ");
+  for (j=0; j<nnz; j++) printf("%lf ",a[j]);
+  printf("]\n");
+#endif
+
+  for (j=0; j< n; j++) assert((F->perm_c)[j] = xa[j+1]);
+
+  wtime_prepare = taucs_wtime() - wtime_prepare;
+  taucs_printf("\t\tSuperLU preparation time = % 10.3f seconds\n",wtime_prepare);
+
+  wtime_factor = taucs_wtime();
+
+  dCreate_CompCol_Matrix(&sA,m,n,nnz,a,asub,xa,NC,_D,GE);
+
+  for (i=0; i<n; i++) (F->perm_r)[i] = (F->perm_c)[i] = i;
+
+#if 0
+  printf("perm_c=[ ");
+  for (j=0; j<n; j++) printf("%d ",(F->perm_c)[j]);
+  printf("]\n");
+#endif
+
+  //get_perm_c(2,&sA,(F->perm_c));
+  sp_preorder("N", &sA, (F->perm_c), etree, &sAC);
+
+#if 0
+  printf("perm_c=[ ");
+  for (j=0; j<n; j++) printf("%d ",(F->perm_c)[j]);
+  printf("]\n");
+#endif
+
+  for (i=0; i<n; i++) (F->perm_r)[i] = (F->perm_c)[i];
+
+#if 0
+  b = (double*) taucs_malloc(n*sizeof(int));
+  for (i=0; i<n; i++) b[i] = 1.0;
+  dCreate_Dense_Matrix(&sB,n,1,b,n,DN,_D,GE);
+  dgssv(&sA,(F->perm_c),(F->perm_r),&sL,&sU,&sB,&info);
+#endif
+
+  StatInit(100,8); /* panel size, relax */
+
+  dgstrf("N", /* not a refactorization */
+	 &sAC,
+	 0.0, /* don't pivot! */
+	 0.0, /* drop tolerance, not implemented */
+	 8,   /* relax */
+	 100, /* panel size */
+	 etree,
+	 NULL, /* work */
+	 0,    /* lwork, no memory preallocated */
+	 (F->perm_r), 
+	 (F->perm_c),
+	 &(F->L),
+	 &(F->U),
+	 &info);
+
+  taucs_printf("\t\ttaucs_ccs_factor_superlu info=%d\n",info);
+
+  {
+    mem_usage_t usage;
+    extern SuperLUStat_t SuperLUStat;
+
+    dQuerySpace(&(F->L), &(F->U), 100,&usage);
+    taucs_printf("\t\ttaucs_ccs_factor_superlu %.2e bytes for L+U\n",usage.for_lu);
+    taucs_printf("\t\ttaucs_ccs_factor_superlu %.2e bytes total\n",usage.total_needed);
+    taucs_printf("\t\ttaucs_ccs_factor_superlu %.2e expansions\n",(float) usage.expansions);
+
+    taucs_printf("\t\ttaucs_ccs_factor_superlu %.2e flops\n",(float) (SuperLUStat.ops)[FACT]);
+  }
+
+  StatFree();
+
+#if 0
+  printf("perm_r=[ ");
+  for (j=0; j<n; j++) printf("%d ",(F->perm_r)[j]);
+  printf("]\n");
+#endif
+
+  wtime_factor = taucs_wtime() - wtime_factor;
+  taucs_printf("\t\tSuperLU factor time = % 10.3f seconds\n",wtime_factor);
+
+  return F;
+}
+
+int 
+taucs_ccs_solve_superlu(void* vF, double* x, double* b)
+{
+  taucs_superlu_factor* F = (taucs_superlu_factor*) vF;
+  int n,i;
+  SuperMatrix X,B;
+  int info;
+  double* t;
+
+  n = (F->L).ncol;
+
+  t = (double*) taucs_malloc(n*sizeof(double));
+  for (i=0; i<n; i++) t[i] = b[i];
+  dCreate_Dense_Matrix(&B,n,1,t,n,DN,_D,GE);
+
+  dgstrs("T", /* no transpose */
+	 &(F->L),&(F->U),F->perm_r,F->perm_c,
+	 &B,
+	 &info
+	 );
+
+  for (i=0; i<n; i++) x[i] = t[i];
+
+  taucs_printf("\t\ttaucs_ccs_solve_superlu info=%d\n",info);
+  
+  return info;
+}
diff --git a/contrib/taucs/src/taucs_timer.c b/contrib/taucs/src/taucs_timer.c
new file mode 100644
index 0000000000000000000000000000000000000000..f0b0606410942758808fc153f8fff3d267801377
--- /dev/null
+++ b/contrib/taucs/src/taucs_timer.c
@@ -0,0 +1,408 @@
+/*********************************************************/
+/* TAUCS                                                 */
+/* Author: Sivan Toledo                                  */
+/*********************************************************/
+
+#include "taucs.h"
+
+#ifndef TAUCS_CONFIG_TIMING
+double taucs_wtime() { return 0.0; }
+double taucs_ctime() { return 0.0; }
+#else
+
+#ifdef OSTYPE_win32
+#define TAUCS_TIMER
+
+#include <windows.h>
+
+double taucs_wtime() {
+  double wtime;
+
+  SYSTEMTIME systime;
+
+  GetSystemTime(&systime);
+
+  wtime = 0.0
+    +  0.001 * (double) systime.wMilliseconds
+    +    1.0 * (double) systime.wSecond
+    +   60.0 * (double) systime.wMinute
+    + 3600.0 * (double) systime.wHour;
+
+  return wtime; 
+}
+double taucs_ctime() { 
+  double ctime;
+  FILETIME creationt,exitt,kernel,user;
+  ULARGE_INTEGER ukernel,uuser;
+  HANDLE self;
+
+  self = GetCurrentProcess();
+
+  if (!GetProcessTimes(self,
+		       &creationt,
+		       &exitt,
+		       &kernel,
+		       &user))
+    return 0.0;
+
+  ukernel.LowPart  = kernel.dwLowDateTime;
+  ukernel.HighPart = kernel.dwHighDateTime;
+  uuser.LowPart  = user.dwLowDateTime;
+  uuser.HighPart = user.dwHighDateTime;
+  
+  ctime = ((double) (signed __int64) ukernel.QuadPart / 10000000.0)
+        + ((double) (signed __int64) uuser.QuadPart   / 10000000.0);
+
+  CloseHandle( self );
+
+  return ctime;
+}
+#endif /* win32 */
+
+/*#include "taucs.h"*/
+
+/* Return time in nanoseconds */
+ 
+#if 0
+#ifdef OSTYPE_linux_not_reliable
+#define TAUCS_TIMER
+
+#include <stdio.h>                                                 
+#include <unistd.h>
+#include <sys/types.h>                                                 
+#include <sys/timeb.h>                                                 
+
+
+/* p5tsc.h -- functions to use Pentium cycle counter for timing of events.
+   Christian Kurmann <kurmann@inf.ethz.ch>
+   based on Brad Karp, David Mazieres's p5cnt package from Harvard. */
+
+typedef unsigned long uint32;
+
+/* Cycle Counter */
+
+/*
+ *  Write <hi>:<lo> into MSR number <msr>.
+ */
+
+__inline__ void
+wrmsr (const uint32 msr, uint32 hi, uint32 lo)
+{
+  __asm __volatile (
+		    /*
+		      "movl %0, %%ecx         # MSR to be written
+		      movl %1, %%edx          # High order 32 bits
+		      movl %2, %%eax          # Low order 32 bits
+		      .byte 0xf; .byte 0x30   # WRMSR instruction"
+		    */
+        "movl %0, %%ecx movl %1, %%edx movl %2, %%eax .byte 0xf; .byte 0x30 # WRMSR instr"
+    : : "g" (msr), "g" (hi), "g" (lo) : "eax", "ecx", "edx");
+}
+
+/* macro for clearing tsc */
+#define cltsc wrmsr((uint32) 0x10, (uint32) (0), (uint32) (0))
+
+/*
+ *  Read 64 bit time stamp counter.  Put the high 32 bits in
+ *  <*hi>, and the lower 32 bits in <*lo>.
+ */
+__inline__ void
+rdtsc (uint32 *hi, uint32 *lo)
+{
+  __asm __volatile (
+		    /*
+		      ".byte 0xf; .byte 0x31  # RDTSC instruction
+		      movl %%edx, %0          # High order 32 bits
+		      movl %%eax, %1          # Low order 32 bits"
+		    */
+      ".byte 0xf; .byte 0x31 movl %%edx, %0 movl %%eax, %1 # RDTSC instruction"
+    : "=g" (*hi), "=g" (*lo) :: "eax", "edx");
+}
+
+/* Performance Monitor Counters */
+
+__inline__ void
+spmc (uint32 ecx)
+{
+  __asm __volatile (
+        "movl %0, %%ecx         # select counter "
+    : : "g" (ecx) : "ecx");
+}
+
+/*
+ *  Read 64 bit Performance Monitor Counter.  Put the high 32 bits in
+ *  <*hi>, and the lower 32 bits in <*lo>.
+ */
+__inline__ void
+rdpmc (uint32 *hi, uint32 *lo)
+{
+  __asm __volatile (
+		    /*
+		      ".byte 0xf; .byte 0x33  # RDPMC instruction
+		      movl %%edx, %0          # High order 32 bits
+		      movl %%eax, %1          # Low order 32 bits"
+		    */
+    ".byte 0xf; .byte 0x33 movl %%edx, %0 movl %%eax, %1 # RDPMC instruction"
+    : "=g" (*hi), "=g" (*lo) :: "eax", "edx");
+}
+
+/* 64 bit subtract t1-t0 (result 32 bit integer) */
+int subtract64(uint32 hi0, uint32 lo0, uint32 hi1, uint32 lo1 )
+{
+  uint32 hir, lor;
+
+  hir = (hi1 - hi0);
+  lor = (lo1 - lo0);
+  if (lo1 < lo0) hir -= 1;
+  return (hir > 0 ? 0:lor);
+}
+
+double timer()
+{
+  uint32        hi, lo;
+  static uint32 hi0, lo0;
+  static uint32 hi1, lo1;
+    uint32 hir, lor;
+  static double loticks_per_ns;
+  static double hiticks_per_ns;
+  static int    first_time    = 1;
+
+  if (first_time) {
+    struct timeb T;
+    static time_t start_time, time_diff;
+    static time_t start_mill, mill_diff;
+    int    rc;
+    double dt;
+    /*uint32 hi0, lo0, hi1, lo1;*/
+    double ticks;
+
+    first_time = 0;
+
+    rc = ftime( &T );
+    rdtsc(&hi0,&lo0);
+    start_time = T.time;
+    start_mill = T.millitm;
+
+    sleep(1);
+
+    rc = ftime( &T );
+    rdtsc(&hi1,&lo1);
+    time_diff = T.time - start_time;
+    mill_diff = T.millitm - start_mill; 
+
+    dt = (1e9) * ((double) time_diff) + (1e6) * ((double) mill_diff);
+
+    hir = (hi1 - hi0);
+    lor = (lo1 - lo0);
+    if (lo1 < lo0) hir -= 1;
+    ticks = (double) lor + (double) hir * 4294967296.0;
+
+    loticks_per_ns = ticks/dt;
+    hiticks_per_ns = ticks/(dt / 4294967296.0);
+
+    hir = (hi1 - hi0);
+    loticks_per_ns = (4294967296.0 * (double) hir + (double) lo1 - (double) lo0) / dt;
+
+    log_printf ("timer: hi0 %u lo0 %u hi1 %u lo1 %u\n",hi0,lo0,hi1,lo1);
+    log_printf ("timer: lo/ns %lg hi/ns %lg\n",loticks_per_ns,hiticks_per_ns);
+
+    rdtsc(&hi0,&lo0);
+  }
+
+  rdtsc(&hi1,&lo1);
+
+  hir = (hi1 - hi0);
+  return ( loticks_per_ns * ( 4294967296.0 * (double) hir + (double) lo1 ) );
+
+  /*
+  rdtsc(&hi,&lo);
+  return ( ((double) hi * hiticks_per_ns) + ((double) lo * loticks_per_ns) );
+  */
+}
+
+#endif /* OSTYPE_linux_not_reliable */
+#endif /* if 0 */
+
+/*********************************************************/
+/*                                                       */
+/*********************************************************/
+
+#ifndef OSTYPE_win32
+#define TAUCS_TIMER
+
+#include <stdio.h>                                                 
+#include <unistd.h>
+
+/*
+  #ifndef OSTYPE_solaris
+  #include <sys/time.h>
+  #include <sys/resource.h>
+  #endif
+*/
+#ifdef OSTYPE_solaris
+#define _XPG4_2
+#endif
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/types.h>                                                 
+#include <sys/timeb.h>                                                 
+
+double taucs_wtime()
+{
+  struct timeb T;
+  /*static int first_time    = 1;*/
+  /*  static time_t start_time, time_diff;
+      static time_t start_mill, mill_diff;
+  */
+
+  static time_t time_diff;
+  static time_t mill_diff;
+  /*int    rc;*/
+  double dt;
+  
+  (void) ftime( &T );
+  /*
+  if (first_time) {
+    first_time = 0;
+    start_time = T.time;
+    start_mill = T.millitm;
+  }
+
+  time_diff = T.time - start_time;
+  mill_diff = T.millitm - start_mill; 
+  */
+  time_diff = T.time;
+  mill_diff = T.millitm;
+
+  dt = ((double) time_diff) + (1e-3) * ((double) mill_diff);
+
+  return dt;
+}
+
+double taucs_ctime()
+{
+  /*
+  #ifdef OSTYPE_solaris
+  return 0.0;
+  #else
+  */
+  struct rusage a;
+  
+  getrusage(RUSAGE_SELF,&a);
+
+  return (double) 
+    (double) ((a.ru_utime).tv_sec +(a.ru_stime).tv_sec ) +
+    (double) ((a.ru_utime).tv_usec+(a.ru_stime).tv_usec) * 1.0e-6;
+  /*#endif*/
+}
+
+#endif /* not win32 */
+
+/*********************************************************/
+/*                                                       */
+/*********************************************************/
+
+#ifndef TAUCS_TIMER
+#define TAUCS_TIMER
+double taucs_wtime() { return 0.0; }
+double taucs_ctime() { return 0.0; }
+
+#endif
+
+#if 0
+void cpu_time_from_last(char s[])
+{
+  struct rusage a;
+  struct timeb T;
+  static int first_time    = 1;
+  static time_t start_time, time_diff;
+  static time_t start_mill, mill_diff;
+  /*int    rc;*/
+  double dt,cpu_t;
+  static double last_cpu_t;
+
+  (void) ftime( &T );
+
+  if (first_time) {
+    first_time = 0;
+    start_time = T.time;
+    start_mill = T.millitm;
+    getrusage(RUSAGE_SELF,&a);
+    
+    last_cpu_t = (a.ru_utime).tv_sec+(a.ru_stime).tv_sec+
+      ((a.ru_utime).tv_usec+(a.ru_stime).tv_usec)/1000000.0; 
+      
+
+    taucs_printf("starting timer\n");
+  }
+  else
+    {
+      time_diff = T.time - start_time;
+      mill_diff = T.millitm - start_mill; 
+      
+      dt = ((double) time_diff) + ((double) mill_diff)/1000.;
+
+      start_time = T.time;
+      start_mill = T.millitm;
+      
+      getrusage(RUSAGE_SELF,&a);
+
+      cpu_t = (a.ru_utime).tv_sec+(a.ru_stime).tv_sec+
+	((a.ru_utime).tv_usec+(a.ru_stime).tv_usec)/1000000.0; 
+      
+      taucs_printf("%s - CPU Time from last time : %lf\n", s, cpu_t-last_cpu_t);
+      taucs_printf("%s - Wall Clock Time from last time : %lf\n",s,dt);
+      
+      last_cpu_t = cpu_t;
+    }
+}
+
+void cpu_time_from_start(char s[])
+{
+  struct rusage a;
+  struct timeb T;
+  static int first_time    = 1;
+  static time_t start_time, time_diff;
+  static time_t start_mill, mill_diff;
+  /*int    rc;*/
+  double dt,cpu_t;
+  static double start_cpu_t;
+  
+  (void) ftime( &T );
+
+  if (first_time) {
+    first_time = 0;
+    start_time = T.time;
+    start_mill = T.millitm;
+    getrusage(RUSAGE_SELF,&a);
+
+    start_cpu_t = (a.ru_utime).tv_sec+(a.ru_stime).tv_sec+
+      ((a.ru_utime).tv_usec+(a.ru_stime).tv_usec)/1000000.0; 
+      
+
+    taucs_printf("starting timer\n");
+  }
+  else
+    {
+      time_diff = T.time - start_time;
+      mill_diff = T.millitm - start_mill; 
+      
+      dt = ((double) time_diff) + ((double) mill_diff)/1000.;
+      
+      getrusage(RUSAGE_SELF,&a);
+
+      cpu_t = (a.ru_utime).tv_sec+(a.ru_stime).tv_sec+
+	((a.ru_utime).tv_usec+(a.ru_stime).tv_usec)/1000000.0; 
+      
+      taucs_printf("%s - CPU Time from beginning : %lf\n",s,cpu_t-start_cpu_t);
+      taucs_printf("%s - Wall Clock Time from beginning : %lf\n",s,dt);
+      
+    }
+}
+#endif /* if 0 */
+
+#endif /* TAUCS_CONFIG_TIMING */
+
+/*********************************************************/
+/*                                                       */
+/*********************************************************/
diff --git a/contrib/taucs/src/taucs_vaidya.c b/contrib/taucs/src/taucs_vaidya.c
new file mode 100644
index 0000000000000000000000000000000000000000..0aceb0c58a1b33908061118439ce7dea43b285b8
--- /dev/null
+++ b/contrib/taucs/src/taucs_vaidya.c
@@ -0,0 +1,4116 @@
+/*********************************************************/
+/* TAUCS                                                 */
+/* Author: Doron Chen and Sivan Toledo                   */
+/* File  : taucs_vaidya.c                                */
+/* Description: constructs Vaidya's preconditioners      */
+/*********************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <assert.h>
+#include "taucs.h"
+
+/*long int random(void); omer*/
+
+/*********************************************************/
+/*                                                       */
+/*********************************************************/
+
+#ifdef TAUCS_CORE_DOUBLE
+
+typedef unsigned char byte;
+/*typedef int byte;*/
+
+#define Do(i,n) for ((i)=0;(i)<(n);(i)++)
+
+typedef struct {
+  int n;
+  int nent;
+  int max_size;
+  int *ivec1;
+  int *ivec2;
+  double *dvec;
+} sym_matrix;
+
+typedef struct {
+  int    i;
+  int    j;
+  double v;
+} wedge; /* weighted edge */
+
+typedef struct {
+  int n;
+  int nent;
+  int max_size;
+  wedge* edges;
+} graph;
+
+/************** UNION FIND ***********/
+
+static char *label = NULL;
+static int *p      = NULL;
+static int *rank   = NULL;
+
+static
+int unionfind_init(int size)
+{
+  int i;
+  p = (int *)taucs_malloc(size * sizeof(int));
+  rank = (int *)taucs_malloc(size * sizeof(int));
+  label = (char *)taucs_malloc(size * sizeof(char));
+  if (!p || !rank || !label) {
+    taucs_free(p);
+    taucs_free(rank);
+    taucs_free(label);
+    return -1;
+  }
+
+  Do(i,size)
+    {
+      p[i] = i;
+      rank[i] = 0;
+      label[i] = 0;
+    }
+
+  return 0;
+}
+
+static void 
+unionfind_free (void)
+{
+  taucs_free(p);
+  taucs_free(rank);
+  taucs_free(label);
+}
+
+static
+int Union(int a,int b,int x,int y,int l) /* unite a's and b's trees, whose roots are x and y. returns the root
+					  of the united tree */
+{
+  if (rank[x] > rank[y])
+    {
+      p[y] = x;
+      label[y] = label[a] ^ label[b] ^ l;
+      return(x);
+    }
+  else
+    {
+      p[x] = y;
+      label[x] = label[a] ^ label[b] ^ l;
+      if (rank[x] == rank[y])
+	rank[y]++;
+      return(y);
+    }
+}
+
+static
+int find_set(int x)
+{
+  int tmp;
+
+  if (x != p[x])
+    {
+      tmp = find_set(p[x]);
+      label[x] ^= label[p[x]];
+      p[x] = tmp;
+    }
+  return(p[x]);
+}
+
+/*********** HEAP OPERATIONS ************/
+
+typedef struct hea {
+  int heap_size;
+  int alloc_size;
+  int *edges;
+  double *key;
+} heap;
+
+#define INF 100000000.0
+
+#define Parent(i) ((((i)+1)/2) - 1)
+#define Left(i) ((((i)+1)*2) - 1)
+#define Right(i) ((((i)+1)*2 + 1) - 1)
+
+static
+void exchange(heap A,int a,int b)
+{
+  int tmp1;
+  double tmp2;
+
+  tmp1 = A.edges[a];
+  A.edges[a] = A.edges[b];
+  A.edges[b] = tmp1;
+
+  tmp2 = A.key[a];
+  A.key[a] = A.key[b];
+  A.key[b] = tmp2;
+
+}
+
+static
+void Heapify(heap A,int i)
+{
+  int l,r,smallest;
+  
+  l = Left(i);
+  r = Right(i);
+  
+  if ((l < A.heap_size) && (A.key[l] < A.key[i]))
+    smallest = l;
+  else
+    smallest = i;
+
+  if ((r < A.heap_size) && (A.key[r] < A.key[smallest]))
+    smallest = r;
+
+  if (smallest != i)
+    {
+      exchange(A,i,smallest);
+      Heapify(A,smallest);
+    }
+}
+
+#if 0
+static
+int build_heap(int size,heap *h,graph *A)
+{
+  int i,k=0;
+
+  h->heap_size = 0;
+  h->edges = (int *)taucs_malloc(size * sizeof(int));
+  h->key = (double *)taucs_malloc(size * sizeof(double));
+  if (!(h->edges) || !(h->key)) {
+    taucs_free(h->edges);
+    taucs_free(h->key);
+    return -1;
+  }
+
+  Do(i,size)
+    if ((A->edges)[i].i != (A->edges)[i].j)
+      {
+	h->edges[k] = i;
+	h->key[k] = fabs((A->edges)[i].v);
+	k++;
+      }
+  
+  h->heap_size = k;
+  size = k;
+
+  /*
+  for(i=(size/2)-1;i>=0;i--)
+    Heapify(*h,i);
+  */
+
+  return 0;
+}
+#endif
+
+static
+void free_heap(heap h)
+{
+  taucs_free(h.edges);
+  taucs_free(h.key);
+}
+
+
+static int partition(heap h, int p, int r)
+{
+  int pivot;
+  double x;
+  int i,j;
+
+  if (r-p < 16) pivot = 0;
+  /* Sivan: chnaged random() to rand() to remove warning (random is not ansi C) */
+  else if ((r - p) < 8) pivot = p + (rand() % (r-p+1)); 
+  else {
+    int c[3]; /* candidates */
+    int t;
+    c[0] = p + (rand() % (r-p+1));
+    c[1] = p + (rand() % (r-p+1));
+    c[2] = p + (rand() % (r-p+1));
+    
+    if (h.key[c[1]] < h.key[c[0]]) {t=c[0]; c[0]=c[1]; c[1]=t;}
+    if (h.key[c[2]] < h.key[c[1]]) {t=c[1]; c[1]=c[2]; c[2]=t;}
+    if (h.key[c[1]] < h.key[c[0]]) {t=c[0]; c[0]=c[1]; c[1]=t;}
+
+    pivot = c[1];
+  }
+
+  x = h.key[pivot];
+  /*
+  i = p-1;
+  j = r+1;
+
+  while (1) {
+    do {
+      j--;
+    } while ( h.key[j] > x );
+    
+    do {
+      i++;
+    } while ( h.key[i] < x );
+    
+    if (i < j)
+      exchange(h,i,j);
+    else 
+      return j;
+  }
+  */
+  i = p-1;
+  j = r+1;
+
+  while (1) {
+    for (j--; h.key[j] > x; j--);
+    for (i++; h.key[i] < x; i++);
+
+    if (i < j)
+      exchange(h,i,j);
+    else 
+      return j;
+  }
+}
+
+#if 0
+static void insertion_sort(heap h, int p, int r)
+{
+  int i,j;
+
+  for (j=p+1; j<r; j++) {
+    double key  = h.key[j];
+    int    edge = h.edges[j];
+
+    for (i=j-1; i>=p && h.key[i] > key; i--) {
+      h.key[i+1] = h.key[i];
+      h.edges[i+1] = h.edges[i];
+    }
+
+    h.key[i+1] = key;
+    h.edges[i+1] = edge;
+  }
+}
+#endif /* 0, we don't need insertion sort, heap sort */
+
+static
+void heapify_offset(heap A,int p,int r,int i)
+{
+  int L,R,smallest;
+  int size = r-p+1;
+  
+  L = Left(i);
+  R = Right(i);
+  
+  if ((L < size) && (A.key[p+L] < A.key[p+i]))
+    smallest = L;
+  else
+    smallest = i;
+
+  if ((R < size) && (A.key[p+R] < A.key[p+smallest]))
+    smallest = R;
+
+  if (smallest != i)
+    {
+      exchange(A,p+i,p+smallest);
+      heapify_offset(A,p,r,smallest);
+    }
+}
+
+
+static void heapsort_sort(heap h, int p, int r)
+{
+  int size = r-p+1;
+  int i;
+
+  for(i=(size/2)-1;i>=0;i--)
+    heapify_offset(h,p,r,i);
+
+  for(i=size-1;i>=1;i--)
+    {
+      exchange(h,0,i);
+      heapify_offset(h,0,i,i);
+    }
+}
+
+static void quick_sort(heap h, int p, int r)
+{
+  int q;
+  if (p >= r) return;
+  if (r - p < 100) {
+    /*insertion_sort(h,p,r);*/
+    heapsort_sort(h,p,r);
+    return;
+  }
+  q = partition(h,p,r);
+  quick_sort(h,p,q);
+  quick_sort(h,q+1,r);
+}
+
+#if 0
+static
+int heap_sort(int size,heap *h,graph *A)
+{
+  int i;
+  /*double *dvec;*/
+
+  if (build_heap(size,h,A) == -1) 
+    return -1;
+  size = h->heap_size;
+
+#define noQSORT
+#ifdef QSORT
+  quick_sort(*h,0,size-1);
+#else
+
+  for(i=(size/2)-1;i>=0;i--)
+    Heapify(*h,i);
+
+
+  for(i=size-1;i>=1;i--)
+    {
+      exchange(*h,0,i);
+      h->heap_size--;
+      Heapify(*h,0);
+    }
+#endif
+
+  return(size); /* cannot be -1, so -1 is an error */
+}
+#endif /* 0, no heap_sort */
+
+static int pqueue_fill(heap* h, graph* G)
+{
+  int i,size;
+
+  size=0;
+  Do(i,G->nent) {
+    if ((G->edges)[i].i != (G->edges)[i].j) {
+      assert(size <= h->alloc_size);
+      h->edges[size] = i;
+      h->key[size] = fabs((G->edges)[i].v);
+      size++;
+    }
+  }
+  
+  h->heap_size = size;
+
+#define noQSORT
+#ifdef QSORT
+  quick_sort(*h,0,size-1);
+#else
+
+  for(i=(size/2)-1;i>=0;i--)
+    Heapify(*h,i);
+
+  for(i=size-1;i>=1;i--) {
+    exchange(*h,0,i);
+    h->heap_size--;
+    Heapify(*h,0);
+  }
+#endif
+
+  h->heap_size = size;
+
+  return size;
+}
+
+static int pqueue_create(heap* h, int size)
+{
+  h->heap_size  = 0;
+  h->alloc_size = size;
+  h->edges = (int *)taucs_malloc(size * sizeof(int));
+  h->key = (double *)taucs_malloc(size * sizeof(double));
+  if (!(h->edges) || !(h->key)) {
+    taucs_free(h->edges);
+    taucs_free(h->key);
+    return -1;
+  }
+  return 0;
+}
+
+/***************************************************/
+#ifdef GRAPHSORT
+
+#define Parent(i) ((((i)+1)/2) - 1)
+#define Left(i) ((((i)+1)*2) - 1)
+#define Right(i) ((((i)+1)*2 + 1) - 1)
+
+static
+void new_exchange(wedge* e,int i,int j)
+{
+  wedge t;
+  t = e[i];
+  e[i] = e[j];
+  e[j] = t;
+  /*
+  int tmp1;
+  double tmp2;
+
+  tmp1 = A.edges[a];
+  A.edges[a] = A.edges[b];
+  A.edges[b] = tmp1;
+
+  tmp2 = A.key[a];
+  A.key[a] = A.key[b];
+  A.key[b] = tmp2;
+  */
+}
+
+static
+void heapify(wedge* e,int n,int i)
+{
+  int l,r,smallest;
+  
+  l = Left(i);
+  r = Right(i);
+  
+  if ((l < n) && (fabs(e[l].v) < fabs(e[i].v)))
+    smallest = l;
+  else
+    smallest = i;
+
+  if ((r < n) && (fabs(e[r].v) < fabs(e[smallest].v)))
+    smallest = r;
+
+  if (smallest != i)
+    {
+      new_exchange(e,i,smallest);
+      heapify(e,n,smallest);
+    }
+}
+
+static
+int new_heap_sort(wedge* e, int n)
+{
+  int i;
+
+  for (i=(n/2)-1; i>=0; i--)
+    heapify(e,n,i);
+
+  for(i=n-1; i>=1; i--) {
+    new_exchange(e,0,i);
+    n--;
+    heapify(e,n,0);
+  }
+}
+
+
+int wedge_compare(const void* ve1, const void* ve2)
+{
+  wedge* e1 = (wedge*) ve1;
+  wedge* e2 = (wedge*) ve2;
+
+  double k1, k2;
+
+  /*k1 = fabs(e1->v);*/
+  /*k2 = fabs(e2->v);*/
+
+  k1 = fabs(e1->v);
+  k2 = fabs(e2->v);
+
+  if (k1 < k2) return -1;
+  if (k1 > k2) return  1;
+  return 0;
+}
+
+
+
+static insertion_sort(wedge* e, int n)
+{
+  int i,j;
+
+  for (j=1; j<n; j++) {
+    double key = fabs(e[j].v);
+    wedge  ej = e[j];
+    for (i=j-1; i>=0 && fabs(e[i].j) > key; i--) {
+      /*e[i+1] = e[i];*/
+
+      e[i+1].i = e[i].i;
+      e[i+1].j = e[i].j;
+      e[i+1].v = e[i].v;
+    }
+    e[i+1].i = ej.i;
+    e[i+1].j = ej.j;
+    e[i+1].v = ej.v;
+    /*    e[i+1] = ej;*/
+  }
+}
+
+static int partition(wedge* e, int n)
+{
+  int pivot = (rand() % n);
+  double x = fabs(e[pivot].v);
+  int i,j;
+
+  i = -1;
+  j = n;
+
+  while (1) {
+    do {
+      j--;
+    } while ( fabs(e[j].v) > x );
+    
+    do {
+      i++;
+    } while ( fabs(e[i].v) < x );
+    
+    if (i < j) {
+      /*
+      wedge t;
+      t = e[i];
+      e[i] = e[j];
+      e[j] = t;
+      */
+
+      int ti,tj; double tv;
+      ti = e[i].i;
+      tj = e[i].j;
+      tv = e[i].v;
+      e[i].i = e[j].i;
+      e[i].j = e[j].j;
+      e[i].v = e[j].v;
+      e[j].i = ti;
+      e[j].j = tj;
+      e[j].v = tv;
+    } else return j;
+  }
+}
+
+static quick_sort(wedge* e, int n)
+{
+  int q;
+  if (n <= 1) return;
+  if (n < 32) {
+    insertion_sort(e,n);
+    return;
+  }
+  q = partition(e,n);
+  quick_sort(e    ,q+1);
+  quick_sort(e+q+1,n-q-1);
+}
+
+static int
+graph_sort(graph* G) 
+{
+  /*
+  qsort(G->edges,
+	G->nent,
+	sizeof(wedge),
+	wedge_compare);
+  */
+  quick_sort(G->edges,G->nent);
+}
+
+#endif /* GRAPHSORT */
+/************ VAIDYA'S PRECONDITIONERS *************/
+
+#define swap(a,b) {int TMP; TMP = a; a = b; b = TMP;}
+#define EPSILON 0.00000001
+
+/************ GRAPHS *************/
+
+static
+graph* construct_graph(int size)
+{
+  graph *out;
+  
+  out = (graph *)taucs_malloc(sizeof(graph));
+  if (!out) return NULL;
+
+  out->edges = (wedge*) taucs_malloc(size*sizeof(wedge));
+  if (!(out->edges)) {
+    taucs_free(out);
+    return NULL;
+  }
+  
+  out->max_size = size;
+
+  return out;
+}
+
+static
+void free_graph(graph *a)
+{
+  if(a)
+    {
+      taucs_free(a->edges);
+      taucs_free(a);
+    }
+}
+
+void free_ccs_matrix(taucs_ccs_matrix *a)
+{
+  if (a)
+    {
+      taucs_free(a->rowind);
+      taucs_free(a->colptr);
+      taucs_free(a->values.d/*taucs_values*/);
+      taucs_free(a);
+    }
+}
+
+static
+taucs_ccs_matrix* construct_ccs_matrix(int nent,int n)
+{
+  taucs_ccs_matrix *out;
+  
+  out = (taucs_ccs_matrix *)taucs_malloc(sizeof(taucs_ccs_matrix));
+  if (!out) return NULL;
+  out->colptr = (int *)taucs_malloc((n+1)*sizeof(int));
+  out->rowind = (int *)taucs_malloc(nent*sizeof(int));
+  out->values.d/*taucs_values*/ = (double *)taucs_malloc(nent*sizeof(double));
+  if (!(out->colptr) || !(out->rowind) || !(out->values.d/*taucs_values*/)) {
+    taucs_free(out->colptr);
+    taucs_free(out->rowind);
+    taucs_free(out->values.d/*taucs_values*/);
+    taucs_free(out);
+    return NULL;
+  }
+  
+  out->n = n;
+  out->m = n;
+  out->flags = TAUCS_SYMMETRIC | TAUCS_LOWER | TAUCS_DOUBLE;
+  
+  return out;
+}
+
+#if 0
+static
+graph *ccs_matrix_to_graph(taucs_ccs_matrix *in)
+{
+  graph *out;
+  int nent,n;
+  int j,ip;
+
+  nent = in->colptr[in->n];
+  out = construct_graph(nent);
+  if (!out) return NULL;
+
+  n = in->n;
+
+  out->n = n;
+  out->nent = nent;
+  
+  for(j=0;j<n;j++) {
+    for(ip=in->colptr[j];ip<in->colptr[j+1];ip++) {
+      (out->edges)[ip].i = (in->rowind)[ip];
+      (out->edges)[ip].j = j;
+      (out->edges)[ip].v = (in->values.d/*taucs_values*/)[ip];
+    }
+  }
+  
+  return(out);
+}
+#endif /* 0, we don't need this routine */
+
+static
+int graph_resize(graph *a,int new_size)
+{
+  wedge* edges;
+  
+  assert(new_size > a->max_size);
+  
+  edges = (wedge*) taucs_malloc(new_size*sizeof(wedge));
+  if (!edges) {
+    return -1;
+  }
+  
+  memcpy(edges,a->edges,a->max_size*sizeof(wedge));
+  
+  taucs_free(a->edges);
+
+  a->edges=edges;
+
+  a->max_size = new_size;
+
+  return 0;
+}
+
+/************ LINKED LISTS *************/
+
+typedef struct edg {
+  int entry_no;
+  struct edg *next;
+} edge;
+
+typedef struct linke {
+  edge **point;
+  edge *array;
+} linked;
+
+typedef struct thre {
+  int group_1;
+  int group_2;
+  int a;
+  int b;
+  double c;
+  byte already_connected;
+  byte completed_to_basis;
+  struct thre *next;
+} three;
+
+typedef struct si {
+  int group_1;
+  int group_2;
+  int a[2];
+  int b[2];
+  double c[2];
+  byte cross[2];
+  byte no_edges; /* number of edges connecting group_1 and group_2 (0,1 or 2) */
+  struct si *next;
+} six;
+
+int taucs_check_diag_dominant_matrix(graph *A, int force_diagonal_dominance)
+{
+  int i;
+  double *sum;
+  int n;
+  int diagonally_dominant, all_nonpositive;
+
+  n = A->n;
+
+  sum = (double *)taucs_calloc(n,sizeof(double));
+  if (!sum) return -1;
+
+  Do(i,A->nent)
+    {
+      if ((A->edges)[i].i != (A->edges)[i].j)
+	{
+	  sum[(A->edges)[i].i]-=fabs((A->edges)[i].v);
+	  sum[(A->edges)[i].j]-=fabs((A->edges)[i].v);
+	}
+      else
+	{
+	  sum[(A->edges)[i].i]+=(A->edges)[i].v;
+	  if ((A->edges)[i].v < 0)
+	    {
+	      taucs_printf("ERROR! This matrix is not diagonally dominant. It has negative diagonals.\n");
+	      /* taucs_free(sum); */
+	      /* return -2; */
+	    }
+	}
+    }
+  
+  diagonally_dominant = 1; /* until proven otherwise */
+  all_nonpositive = 1;
+  Do(i,n)
+    {
+      if (sum[i] < -EPSILON) diagonally_dominant = 0;
+      if (sum[i] > EPSILON)  all_nonpositive     = 0;
+    }
+
+  if ((force_diagonal_dominance)&&(diagonally_dominant == 0)) {
+    int first_time = 1;
+    for(i=0;i<A->nent;i++)
+      {
+	if ((A->edges)[i].i == (A->edges)[i].j && sum[(A->edges)[i].i] <= EPSILON)
+	  {
+	    if (first_time) {
+	      first_time=0; 
+	      taucs_printf("\t\tAMWB warning: perturbing to force diagonal dominance\n");
+	    }
+	    (A->edges)[i].v -= sum[ (A->edges)[i].i ];
+	    if (all_nonpositive && (A->edges)[i].i == 0) {
+	      taucs_printf("taucs warning: perturbing to ensure strict diagonal dominance\n");
+	      (A->edges)[i].v += 0.1; /* arbitrary perturbation */
+	    }
+	  }
+      }
+  } else
+    if (diagonally_dominant == 0)
+      {
+	taucs_printf("ERROR! This matrix is not diagonally dominant. sum[%d] = %lf\n",i,sum[i]);
+	taucs_free(sum);
+	return -2;
+      }
+  
+  taucs_free(sum);
+  return 0;
+}
+
+#if 0
+static
+double *analyze_graph(graph *A)
+{
+  int i;
+  int t1,t2;
+  int n;
+  double *diag,t3;
+  
+  n = A->n;
+  diag = (double *)taucs_calloc(n,sizeof(double));
+  if (!diag) return NULL;
+
+  Do(i,A->nent)
+    {
+      t1=(A->edges)[i].i;
+      t2=(A->edges)[i].j;
+      t3=(A->edges)[i].v;
+
+      if (t1 == t2)
+	diag[t1] += fabs(t3);
+      else
+	{
+	  diag[t1] -= fabs(t3);
+	  diag[t2] -= fabs(t3);
+	}
+    }
+  return(diag);
+}
+#endif /* 0, we don't need this routine */
+
+/*********************************************************/
+/* ccs diagnostics, row sums, and conversion to a graph  */
+/*********************************************************/
+
+#define TAUCS_SYM_NOT_SYMLOWER     1
+#define TAUCS_SYM_POS_OFFDIAGONALS 2
+#define TAUCS_SYM_NEG_DIAGONALS    4
+#define TAUCS_SYM_NOT_DIAGDOMINANT 8
+
+static
+graph *ccs_matrix_to_graph_plus(taucs_ccs_matrix *in,
+				int*   diagnostics,
+				double diag[],
+				int    force_diagonal_dominance)
+{
+  graph *out;
+  int nent,n;
+  int i,j,k,ip;
+  double v;
+  int negative_on_diagonal;
+  int positive_off_diagonal;
+  int not_diagonally_dominant;
+
+  *diagnostics = 0;
+
+  if (!(in->flags & TAUCS_SYMMETRIC) || !(in->flags & TAUCS_LOWER)) {
+    *diagnostics = TAUCS_SYM_NOT_SYMLOWER;
+    return NULL;
+  }
+
+  nent = in->colptr[in->n];
+  out = construct_graph(nent);
+  if (!out) return NULL;
+
+  n = in->n;
+
+  out->n = n;
+  out->nent = nent;
+  
+  for (i=0; i<n; i++) diag[i] = 0.0;
+
+  negative_on_diagonal  = 0;
+  positive_off_diagonal = 0;
+
+  for(j=0;j<n;j++) {
+    for(ip=in->colptr[j];ip<in->colptr[j+1];ip++) {
+      i = (in->rowind)[ip];
+      v = (in->values.d/*taucs_values*/)[ip];
+      (out->edges)[ip].i = i;
+      (out->edges)[ip].j = j;
+      (out->edges)[ip].v = v;
+
+      if (i == j) {
+	negative_on_diagonal |= (v < 0.0);
+
+	diag[i] += fabs(v);
+      } else {
+	positive_off_diagonal |= (v > 0.0);
+
+	diag[i] -= fabs(v);
+	diag[j] -= fabs(v);
+      }
+    }
+  }
+  
+  if (force_diagonal_dominance) {
+    int strict_diagdominance = 0;
+    int first_time = 1;
+
+    for (i=0; i<n; i++) 
+      strict_diagdominance |= (diag[i] > 0.0);
+
+    for(k=0;k<out->nent;k++) {
+      i = (out->edges)[k].i;
+      j = (out->edges)[k].j;
+      v = (out->edges)[k].v;
+
+      if (i == j && diag[i] < 0.0) {
+	if (first_time) {
+	  first_time=0; 
+	  taucs_printf("taucs warning: perturbing to force diagonal dominance\n");
+	}
+	(out->edges)[k].v -= diag[i];
+	diag[i] = 0.0;
+	if (strict_diagdominance == 0 && i == 0) {
+	  taucs_printf("taucs warning: perturbing to ensure strict diagonal dominance\n");
+	  (out->edges)[k].v += 1e-8; /* arbitrary perturbation */
+	}
+      }
+    }
+
+    not_diagonally_dominant = 0;
+
+  } else {
+
+    not_diagonally_dominant = 0;
+    for (i=i; i<n; i++) 
+      not_diagonally_dominant |= (diag[i] < -1e-12); /* arbitrary threashold */
+
+  }
+
+  *diagnostics = 0;
+  
+  if (not_diagonally_dominant) *diagnostics |= TAUCS_SYM_NOT_DIAGDOMINANT;
+  if (negative_on_diagonal   ) *diagnostics |= TAUCS_SYM_NEG_DIAGONALS;
+  if (positive_off_diagonal  ) *diagnostics |= TAUCS_SYM_POS_OFFDIAGONALS;
+
+  return(out);
+}
+
+
+/*********************************************************/
+/*                                                       */
+/*********************************************************/
+
+
+static
+void free_linked_list(linked* a)
+{
+  taucs_free(a->point);
+  taucs_free(a->array);
+  taucs_free(a);
+}
+
+static
+void free_linked_list_2(six *a)
+{
+  if (a!=NULL)
+    {
+      free_linked_list_2(a->next);
+      taucs_free(a);
+    }
+
+}
+
+static
+linked* create_linked_list(graph *A,int n,int Anent,double *min,double *max)
+{
+  /* creates linked list which holds the off-diagonal entries of the sparse graph. */
+  int i;
+  edge *tmp;
+  /*linked out; */
+  linked* out;
+  int free_place = 0;
+
+  *min = INF;
+  *max = -INF;
+
+  out = (linked*) taucs_malloc(sizeof(linked));
+  if (!out) {
+    return NULL;
+  }
+  out->point = (edge **)taucs_calloc(n,sizeof(edge *));
+  out->array = (edge *)taucs_calloc(2*Anent,sizeof(edge));
+  if (!(out->point) || !(out->array)) {
+    taucs_free(out->point);
+    taucs_free(out->array);
+    taucs_free(out);
+    return NULL;
+  }
+  
+  Do(i,Anent)
+    {
+      if ((A->edges)[i].i != (A->edges)[i].j)
+	{
+	  if (-(A->edges)[i].v < *min)
+	    *min = -(A->edges)[i].v;
+	  if (-(A->edges)[i].v > *max)
+	    *max = -(A->edges)[i].v;
+	  
+	  tmp = &(out->array[free_place++]);
+	  tmp->entry_no = i;
+	  tmp->next = out->point[(A->edges)[i].i];
+	  out->point[(A->edges)[i].i] = tmp;
+
+	  tmp = &(out->array[free_place++]);
+	  tmp->entry_no = i;
+	  tmp->next = out->point[(A->edges)[i].j];
+	  out->point[(A->edges)[i].j] = tmp;
+	}
+    }
+
+  return(out);
+}
+
+static
+linked* create_linked_list_cluster(graph *A,int n,int Anent,double *min,double *max,int *partition,int *new_partition)
+{
+  /* creates linked list which holds off-diagonal entries of the sparse graph.
+   This linked list contains all the edges which connect vertices whose endpoints are
+   in different sections in partition, but in the same section in new_partition.
+   This will help us build trees within each section in new_partition. Each vertex
+   in these trees will be a contracted section in partition */
+  int i;
+  edge *tmp;
+
+  linked* out = NULL;
+  int free_place = 0;
+
+  *min = INF;
+  *max = -INF;
+
+  out = (linked*) taucs_malloc(sizeof(linked));
+  if (!out) {
+    return NULL;
+  }
+  out->point = (edge **)taucs_calloc(n,sizeof(edge *));
+  out->array = (edge *)taucs_calloc(2*Anent,sizeof(edge));
+  if (!(out->point) || !(out->array)) {
+    taucs_free(out->point);
+    taucs_free(out->array);
+    taucs_free(out);
+    return NULL;
+  }
+  
+  Do(i,Anent)
+    {
+      if ((partition[(A->edges)[i].i] != partition[(A->edges)[i].j]) &&
+	  (new_partition[(A->edges)[i].i] == new_partition[(A->edges)[i].j]))
+	{
+	  if (-(A->edges)[i].v < *min)
+	    *min = -(A->edges)[i].v;
+	  if (-(A->edges)[i].v > *max)
+	    *max = -(A->edges)[i].v;
+	  
+	  tmp = &(out->array[free_place++]);
+	  tmp->entry_no = i;
+	  tmp->next = out->point[partition[(A->edges)[i].i]];
+	  out->point[partition[(A->edges)[i].i]] = tmp;
+
+	  tmp = &(out->array[free_place++]);
+	  tmp->entry_no = i;
+	  tmp->next = out->point[partition[(A->edges)[i].j]];
+	  out->point[partition[(A->edges)[i].j]] = tmp;
+	}
+    }
+
+  return(out);
+}
+
+
+static
+taucs_ccs_matrix *graph_to_ccs_matrix(graph *A)
+{
+  taucs_ccs_matrix *out;
+  int n,nent,i,j1,j2;
+  /*int count=0;*/
+  int *tmp;
+
+  n = A->n;
+  nent = A->nent;
+
+  tmp = (int *)taucs_malloc(n*sizeof(int));
+  if (!tmp) return NULL;
+
+  out=construct_ccs_matrix(nent,n);
+  if (!out) {
+    taucs_free(tmp);
+    return NULL;
+  }
+  out->flags = TAUCS_SYMMETRIC | TAUCS_LOWER | TAUCS_DOUBLE;
+
+  Do(i,n)
+    tmp[i] = 0;
+  Do(i,nent)
+    tmp[min((A->edges)[i].i,(A->edges)[i].j)]++;
+  out->colptr[0] = 0;
+  Do(i,n)
+    out->colptr[i+1] = out->colptr[i] + tmp[i];
+
+  Do(i,n)
+    tmp[i] = out->colptr[i];
+
+  Do(i,nent)
+    {
+      j1 = min((A->edges)[i].i , (A->edges)[i].j);
+      j2 = max((A->edges)[i].i , (A->edges)[i].j);
+      out->rowind[tmp[j1]]=j2;
+      out->values.d/*taucs_values*/[tmp[j1]]=(A->edges)[i].v;
+      tmp[j1]++;
+    }
+
+  taucs_free(tmp);
+  return(out);
+}
+
+static
+int compute_sub_tree_sizes(int ver,int *first_child,int *next_child,int *sizes)
+{
+  int sum = 1,v;
+
+  if (first_child[ver] == -1)
+    {
+      sizes[ver] = 1;
+      return(1);
+    }
+  else
+    {
+      v=first_child[ver];
+      while(v != -1)
+	{
+	  sum += compute_sub_tree_sizes(v,first_child,next_child,sizes);
+	  v = next_child[v];
+	}
+    }
+  sizes[ver] = sum;
+  return(sum);
+}
+
+static
+void assign_group(int ver,int gr,int *first_child,int *next_child,int *groups)
+{
+  int v;
+
+  groups[ver] = gr;
+  if (first_child[ver] != -1)
+    {
+      v=first_child[ver];
+      while(v != -1)
+	{
+	  assign_group(v,gr,first_child,next_child,groups);
+	  v = next_child[v];
+	}
+    }
+}
+
+static
+int create_children_arrays(int *pi,int n,int **fc,int **nc)
+{
+  int *first_child,*next_child;
+  int father,child,ch;
+  int i;
+  
+  first_child = (int *)taucs_malloc(n*sizeof(int));
+  next_child = (int *)taucs_malloc(n*sizeof(int));
+  if (!first_child || !next_child) {
+    taucs_free(first_child);
+    taucs_free(next_child);
+    return -1;
+  }
+
+  Do(i,n)
+    first_child[i] = next_child[i] = -1;
+
+
+  Do(i,n)
+    {
+      child = i;
+      father = pi[i];
+      
+      if (father != -1)
+	{
+	  if (first_child[father] == -1)
+	    first_child[father] = child;
+	  else
+	    {
+	      ch = first_child[father];
+	      while(next_child[ch] != -1)
+		ch = next_child[ch];
+	      next_child[ch] = child;
+	      
+	    }
+	}
+    }
+
+  
+  *fc = first_child;
+  *nc = next_child;
+
+  return 0; /* success */
+}
+
+static
+void disconnect(int father,int child,
+		int *first_child,int *next_child,int *pi)
+     /* disconnect subtree whose root is 'child', from the tree */
+{
+
+  int oldfather;
+  int v;
+  /* int tmp;*/
+  
+  oldfather = father;
+
+  assert(first_child[father] != -1);
+  
+  if (first_child[father] == child)
+    first_child[father] = next_child[child];
+  else
+    {
+      v = first_child[father];
+      while(next_child[v] != child)
+	v = next_child[v];
+
+      next_child[v] = next_child[next_child[v]];
+
+    }
+
+}
+
+static
+void divide_to_groups(int r,int *first_child,int *next_child,int *pi,int *curr_group,
+		      int *groups,int *sub_tree_sizes,int root,double subgraphs,
+		      int n)
+     /* divides the vertices into different groups (divides the tree is subtrees) */
+{
+  int v;
+  double low;
+
+  low = max(1,((double)n/subgraphs));
+
+  if(first_child[r] != -1)
+    {
+      v=first_child[r];
+      sub_tree_sizes[r] = 1;
+      while(v != -1)
+	{
+	  if (sub_tree_sizes[v] > low) 
+	    divide_to_groups(v,first_child,next_child,pi,curr_group,groups,
+			     sub_tree_sizes,root,subgraphs,n);
+	  
+	  if (sub_tree_sizes[v] >= low)
+	    {
+	      assign_group(v,*curr_group,first_child,next_child,groups);
+	      disconnect(r,v,first_child,next_child,pi);
+	      (*curr_group)++;
+	    }
+	  else
+	    sub_tree_sizes[r] += sub_tree_sizes[v];
+	  v = next_child[v];
+	}
+      
+    }
+  
+}
+
+
+static
+void DFS_visit(graph *precond,int r,byte *color,linked l,int *pi,int *visited)
+{
+  edge *p;
+  int r1;
+  
+  color[r] = 1;
+  (*visited)++;
+
+  p = l.point[r];
+  while (p != NULL)
+    {
+      /* this looks strange. Sivan */
+      /*r1 = ivec1[ p->entry_no ]+ivec2[ p->entry_no ]-r;*/
+      r1 = (precond->edges)[ p->entry_no ].i + (precond->edges)[ p->entry_no ].j - r;
+      if (color[r1]==0)
+	{
+	  DFS_visit(precond,r1,color,l,pi,visited);
+	  pi[r1] = r;
+	}
+      p = p->next;
+    }
+}
+
+static
+void make_perm(int perm[],int k)
+{
+  int i,tmp,tmp1;
+ 
+ for(i=0;i<k;i++)
+    perm[i]=i;
+
+  for(i=0;i<k;i++)
+    {
+      tmp = rand()%(k-i);
+      tmp1 = perm[i+tmp];
+      perm[i+tmp]=perm[i];
+      perm[i]=tmp1;
+    }
+}
+
+#define USE_HEAPSORT
+
+static
+taucs_ccs_matrix*
+amwb_preconditioner_create(graph *mtxA, double* diag,
+			   int rnd,
+			   double subgraphs)
+     /*
+amwb_preconditioner_create(taucs_ccs_matrix *taucs_ccs_mtxA, 
+			   int rnd,
+			   double subgraphs)
+     */
+{
+  taucs_ccs_matrix *out;
+
+  /*  graph *mtxA,*mtxA_tmp;*/
+  graph *mtxA_tmp;
+  graph *precond;
+
+  int i;
+  /*int j,tmp,entry;*/
+  /*
+  int *ivec1, *ivec2 ;
+  int *ivec1_p, *ivec2_p ;
+  double *dvec; 
+  double *dvec_p; 
+  */
+
+  int size;
+  int *pi;
+  /*edge **array;*/
+  /*edge *p;*/
+#ifdef USE_HEAPSORT
+  heap Ah;
+  heap Bh;
+  heap h;
+#endif
+  int Bent;
+  /*int row,col;*/
+  double weight;
+  int *first_child,*next_child;
+  int *groups,*sub_tree_sizes;
+  int curr_group;
+  int n,Anent;
+  int precond_maxnent,chunk;
+  /*double *diag;*/
+  byte *closed_cycle,closed_cycle_x,closed_cycle_y;
+  byte *color;
+  byte *already_added;
+  int count=0;
+  int edge_sign; /*  byte edge_sign;*/
+  int u,v,x,y,un_root,r;
+  linked* l;
+  int *perm;
+  int visited,*roots,rcount;
+  int basis_Bent=0,step2_Bent=0;
+  three *complete_subgraph;
+  six **pairs;
+  /*char bool;*/
+  /* FILE *graph_file0,*graph_file1,*graph_file2,*graph_file3,*group_file; */
+
+  double wtime;
+  double wtime_sort  = 0.0;
+  double wtime_global_basis;
+  double wtime_treepartition;
+  double wtime_component_bases;
+  double wtime_pair_bases;
+  double wtime_total;
+  double dummy;
+
+  wtime_total = taucs_wtime();
+
+  /* graph_file0 = fopen("graphfile0.txt","w"); */
+  /* graph_file1 = fopen("graphfile1.txt","w"); */
+  /* graph_file2 = fopen("graphfile2.txt","w"); */
+  /* graph_file3 = fopen("graphfile3.txt","w"); */
+  /* group_file  = fopen("groupfile.txt" ,"w"); */
+
+
+  /********************************************************/
+  /*                                                      */
+  /********************************************************/
+
+  /********************************************************/
+  /* convert matrix to a graph                            */
+  /********************************************************/
+
+  /*** ALLOCATED: NONE ***/
+
+  /*
+  wtime = taucs_wtime();
+  mtxA = ccs_matrix_to_graph(taucs_ccs_mtxA);
+  if (!mtxA) {
+    return NULL;
+  }
+  wtime = taucs_wtime() - wtime;
+  taucs_printf("\t\tAMWB matrix-to-graph = %.3f seconds\n",wtime);
+  */
+
+  /********************************************************/
+  /* check that the matrix is diagonally dominant         */
+  /********************************************************/
+
+  /*** ALLOCATED: mtxA ***/
+  
+#if 0
+  wtime = taucs_wtime();
+  i = taucs_check_diag_dominant_matrix(mtxA,1 /* force diagonal dominance */);
+  if (i == -1) {
+    free_graph(mtxA);
+    return NULL;
+  }
+  if (i == -2) {
+    free_graph(mtxA);
+    return taucs_ccs_mtxA; /* not diagonally dominant */
+  }
+  wtime = taucs_wtime() - wtime;
+  taucs_printf("\t\tAMWB check-diag-dominance = %.3f seconds\n",wtime);
+#endif
+
+  n = mtxA->n;
+
+  /********************************************************/
+  /* generate random permutation and permute vertices     */
+  /********************************************************/
+
+  wtime = taucs_wtime();
+  perm = (int *)taucs_malloc(mtxA->nent*sizeof(int));
+  if (!perm) {
+    free_graph(mtxA);
+    return NULL;
+  }
+
+  /*** ALLOCATED: mtxA,perm ***/
+
+  make_perm(perm,mtxA->nent);
+
+  mtxA_tmp = construct_graph(mtxA->max_size);
+  if (!mtxA_tmp) {
+    free_graph(mtxA);
+    taucs_free(perm);
+    return NULL;
+  }
+
+  /*** ALLOCATED: mtxA,perm,mtxA_tmp ***/
+
+  mtxA_tmp->nent = mtxA->nent;
+  mtxA_tmp->n = mtxA->n;
+  Do(i,mtxA->nent)
+    {
+      /*
+      mtxA_tmp->ivec1[i] = mtxA->ivec1[perm[i]];
+      mtxA_tmp->ivec2[i] = mtxA->ivec2[perm[i]];
+      mtxA_tmp->dvec[i]  = mtxA->dvec[perm[i]];
+      */
+
+      mtxA_tmp->edges[i].i = mtxA->edges[perm[i]].i;
+      mtxA_tmp->edges[i].j = mtxA->edges[perm[i]].j;
+      mtxA_tmp->edges[i].v = mtxA->edges[perm[i]].v;
+    }
+
+  taucs_free(perm);
+  free_graph(mtxA);
+
+  wtime = taucs_wtime() - wtime;
+  taucs_printf("\t\tAMWB random permute = %.3f seconds\n",wtime);
+
+  /********************************************************/
+  /* compute and remember row weights                     */
+  /********************************************************/
+
+  wtime = taucs_wtime();
+
+  /*** ALLOCATED: mtxA_tmp ***/
+
+  /*
+  diag = analyze_graph(mtxA_tmp);
+  if (!diag) {
+    free_graph(mtxA_tmp);
+    return NULL;
+  }
+  */
+
+  wtime = taucs_wtime() - wtime;
+  taucs_printf("\t\tAMWB row weights = %.3f seconds\n",wtime);
+
+  /********************************************************/
+  /* allocate vectors                                     */
+  /********************************************************/
+
+  /*** ALLOCATED: mtxA_tmp,diag ***/
+
+  Anent = mtxA_tmp->nent;
+
+  already_added = (byte *)taucs_calloc(Anent,sizeof(byte));
+  pi            = (int *) taucs_malloc(n*sizeof(int));
+  if (!already_added || !pi) {
+    free_graph(mtxA_tmp);
+    taucs_free(diag);
+    taucs_free(already_added);
+    taucs_free(pi);
+    return NULL;
+  }
+
+  /*** ALLOCATED: mtxA_tmp,diag,already_added,pi ***/
+
+  Do(i,n)
+    pi[i] = -1;
+
+  /********************************************************/
+  /* construct empty preconditioner                       */
+  /********************************************************/
+
+  precond_maxnent = 3*n;
+
+  taucs_printf("allocating space for %d entries in precond\n",precond_maxnent);fflush(stdout);
+  
+  precond = construct_graph(precond_maxnent);
+  if (!precond) {
+    free_graph(mtxA_tmp);
+    taucs_free(diag);
+    taucs_free(already_added);
+    taucs_free(pi);
+    return NULL;
+  }
+
+  /*** ALLOCATED: mtxA_tmp,diag,already_added,pi,precond ***/
+
+  precond->n=mtxA_tmp->n;
+
+  /*
+  ivec1_p = precond->ivec1 ;
+  ivec2_p = precond->ivec2 ;
+  dvec_p = precond->dvec ;
+  */
+	 
+  Bent = 0;
+
+  /********************************************************/
+  /* allocate vectors                                     */
+  /********************************************************/
+
+  wtime_global_basis = taucs_wtime();
+
+  closed_cycle = (byte *)taucs_calloc(n,sizeof(byte));
+  if (!closed_cycle) {
+    free_graph(mtxA_tmp);
+    free_graph(precond);
+    taucs_free(diag);
+    taucs_free(already_added);
+    taucs_free(pi);
+    return NULL;
+  }
+
+  /*** ALLOCATED: mtxA_tmp,diag,already_added,pi,precond,closed_cycle ***/
+
+  /* Variation on Kruskal - Introduction to Algorithms page 505 */
+
+  /********************************************************/
+  /* initialize union-find                                */
+  /********************************************************/
+
+  if (unionfind_init(n) == -1) {
+    free_graph(mtxA_tmp);
+    free_graph(precond);
+    taucs_free(diag);
+    taucs_free(already_added);
+    taucs_free(pi);
+    taucs_free(closed_cycle);
+    return NULL;
+  }
+
+  /********************************************************/
+  /* sort edges of matrix                                 */
+  /********************************************************/
+
+  /*** ALLOCATED: mtxA_tmp,diag,already_added,pi,precond,closed_cycle,UF ***/
+
+  wtime = taucs_wtime();
+#ifdef USE_HEAPSORT
+
+
+  /*
+  size = heap_sort(Anent,&h,mtxA_tmp);
+  if (size == -1) {
+  */
+  if (pqueue_create(&Ah,Anent) == -1) {
+    free_graph(mtxA_tmp);
+    free_graph(precond);
+    taucs_free(diag);
+    taucs_free(already_added);
+    taucs_free(pi);
+    taucs_free(closed_cycle);
+    unionfind_free();
+    return NULL;
+  }
+  size = pqueue_fill(&Ah,mtxA_tmp);
+#else
+  assert(Anent == mtxA_tmp->nent);
+  size = mtxA_tmp->nent;
+  graph_sort(mtxA_tmp);
+#endif
+  wtime = taucs_wtime() - wtime;
+  taucs_printf("\t\tAMWB sort(%d) = %.3f seconds\n",Anent,wtime);
+  wtime_sort += wtime;
+
+  /*** ALLOCATED: mtxA_tmp,diag,already_added,pi,precond,closed_cycle,UF,heap ***/
+
+  /********************************************************/
+  /* build a basis for the entire graph                   */
+  /********************************************************/
+
+  Do(i,size)
+    {
+      if (count == n)
+	break;
+
+#ifdef USE_HEAPSORT
+      u = mtxA_tmp->edges[Ah.edges[i]].i;
+      v = mtxA_tmp->edges[Ah.edges[i]].j;
+      weight = mtxA_tmp->edges[Ah.edges[i]].v;
+#else
+      u      = mtxA_tmp->edges[i].i;
+      v      = mtxA_tmp->edges[i].j;
+      weight = mtxA_tmp->edges[i].v;
+      if (u==v) continue;
+#endif
+      
+      edge_sign = (weight>0);
+
+      x = find_set(u);
+      y = find_set(v);
+
+      if (x!=y)
+	{
+	  /* printf("different trees\n"); */
+	  if (!((closed_cycle[x])&&(closed_cycle[y])))
+	    {
+	      count++;
+	      /* printf("(%d,%d) - %lf\n",u,v,weight); */
+	      /*
+	      ivec1_p[Bent] = u; 
+	      ivec2_p[Bent] = v; 
+	      dvec_p[Bent] = weight; 
+	      */
+	      precond->edges[Bent].i = u; 
+	      precond->edges[Bent].j = v; 
+	      precond->edges[Bent].v = weight; 
+
+	      Bent++; 
+
+	      diag[u] += fabs(weight);
+	      diag[v]+=fabs(weight);
+
+#ifdef USE_HEAPSORT
+	      already_added[Ah.edges[i]] = 1;
+#else
+	      already_added[i] = 1;
+#endif
+	      un_root = Union(u,v,x,y,edge_sign);
+	      closed_cycle[un_root] = closed_cycle[x] | closed_cycle[y];
+	    }
+	  /* else
+	    {
+	      printf("cannot add (%d,%d) - %lf - both trees already have a cycle\n",u,v,weight); 
+	    }
+	  */
+	}
+      else
+	{
+	  /* printf("same tree\n"); */
+	  if ((edge_sign != (label[u]^label[v])) && (closed_cycle[x]==0))
+	    {
+	      count++;
+	      /* printf("(%d,%d) - %lf\n",u,v,weight); */
+	      /*
+	      ivec1_p[Bent] = u; 
+	      ivec2_p[Bent] = v; 
+	      dvec_p[Bent] = weight; 
+	      */
+	      precond->edges[Bent].i = u; 
+	      precond->edges[Bent].j = v; 
+	      precond->edges[Bent].v = weight; 
+
+	      Bent++; 
+
+	      diag[u] += fabs(weight);
+	      diag[v]+=fabs(weight);
+
+#ifdef USE_HEAPSORT
+	      already_added[Ah.edges[i]] = 1;
+#else
+	      already_added[i] = 1;
+#endif
+	      closed_cycle[x] = 1;
+	    }
+	  /* else
+	    {
+	      if (closed_cycle[x]==1)
+		printf("cannot add (%d,%d) - %lf - tree already contains cycle\n",u,v,weight); 
+	      else
+		printf("cannot add (%d,%d) - %lf - it closes a positive cycle\n",u,v,weight); 
+	    }
+	  */
+	}
+    }
+
+  /********************************************************/
+  /* the preconditioner is now a max-weight-basis         */
+  /********************************************************/
+
+  wtime_global_basis = taucs_wtime() - wtime_global_basis;
+  taucs_printf("\t\tAMWB global basis = %.3f seconds\n",wtime_global_basis);
+  
+  taucs_free(closed_cycle);
+#ifdef USE_HEAPSORT
+  /*free_heap(h);*/
+#endif
+  unionfind_free();
+
+  precond->nent = Bent;
+  basis_Bent = Bent;
+
+  /********************************************************/
+  /* break into subgraphs                                 */
+  /********************************************************/
+
+  /*** ALLOCATED: mtxA_tmp,diag,already_added,pi,precond ***/
+
+  l = create_linked_list(precond,n,Bent,&dummy,&dummy);
+  if (!l) {
+    free_graph(mtxA_tmp);
+    free_graph(precond);
+    taucs_free(diag);
+    taucs_free(already_added);
+    taucs_free(pi);
+    return NULL;
+  }
+
+  /*** ALLOCATED: mtxA_tmp,diag,already_added,pi,precond,linked ***/
+
+  color = (byte *)taucs_calloc(n,sizeof(byte));
+  roots = (int *)taucs_malloc(n*sizeof(int));
+  if (!color || !roots) {
+    free_graph(mtxA_tmp);
+    free_graph(precond);
+    taucs_free(diag);
+    taucs_free(already_added);
+    taucs_free(pi);
+    free_linked_list(l);
+    return NULL;
+  }
+
+  /*** ALLOCATED: mtxA_tmp,diag,already_added,pi,precond,linked,color,roots ***/
+
+  wtime_treepartition = taucs_wtime();
+
+  visited = 0;
+  rcount = 0;
+  /*
+  while(visited<n)
+    {
+      r = rand()%n;
+      while (color[r])
+	r = rand()%n;
+      roots[rcount++] = r;
+      pi[r] = -1;
+      DFS_visit(precond,r,color,*l,pi,&visited);
+    }
+  */
+
+  for (r=0; r<n; r++) {
+    if (color[r] != 0) continue;
+    roots[rcount++] = r;
+    pi[r] = -1;
+    DFS_visit(precond,r,color,*l,pi,&visited);
+  }
+
+  taucs_free(color);
+
+  /*** ALLOCATED: mtxA_tmp,diag,already_added,pi,precond,linked,roots ***/
+
+  /* pi now contains the parent array of the tree */
+
+  if (create_children_arrays(pi,n,&first_child,&next_child) == -1) {
+    free_graph(mtxA_tmp);
+    free_graph(precond);
+    taucs_free(diag);
+    taucs_free(already_added);
+    taucs_free(pi);
+    taucs_free(roots);
+    free_linked_list(l);
+    return NULL;
+  }
+
+  /*** ALLOCATED: mtxA_tmp,diag,already_added,pi,precond,linked,roots,FC,NC ***/
+
+  /* the first_child/next_child arrays enable us to find the children
+     of any vertex in the tree */
+  
+  groups = (int *)taucs_malloc(n*sizeof(int));
+  sub_tree_sizes = (int *)taucs_malloc(n*sizeof(int));
+  if(!groups || !sub_tree_sizes) {
+    free_graph(mtxA_tmp);
+    free_graph(precond);
+    taucs_free(diag);
+    taucs_free(already_added);
+    taucs_free(pi);
+    taucs_free(roots);
+    taucs_free(groups);
+    taucs_free(sub_tree_sizes);
+    taucs_free(first_child);
+    taucs_free(next_child);
+    free_linked_list(l);
+    return NULL;
+  }
+
+  /*** ALLOCATED: mtxA_tmp,diag,already_added,pi,precond,linked,roots,FC,NC ***/
+  /*** ALLOCATED: groups,sub_tree_sizes ***/
+
+  Do(i,n)
+    groups[i] = -1;
+  
+  curr_group = 0;
+  
+  Do(i,rcount)
+    {
+      r = roots[i];
+      compute_sub_tree_sizes(r,first_child,next_child,sub_tree_sizes);
+      /* now for every vertex v in the tree, sub_tree_sizes[v] is the size
+	 of the subtree whose root is v */
+      
+      divide_to_groups(r,first_child,next_child,pi,&curr_group,groups,
+		       sub_tree_sizes,r,subgraphs,n);
+      if ((sub_tree_sizes[r]<((double)n/subgraphs))&&(curr_group>0))
+	curr_group--;
+      assign_group(r,curr_group,first_child,next_child,groups);
+      curr_group++;
+    }
+
+  taucs_printf("actual number of subgraphs = %ld\n",curr_group);fflush(stdout);
+  /* now the tree is divided into linked groups */
+
+  chunk = max(min((curr_group*(curr_group-1)/2)/10,5000),10000);
+
+  taucs_free(roots);
+  taucs_free(first_child);
+  taucs_free(next_child);
+  taucs_free(sub_tree_sizes);
+
+  wtime_treepartition = taucs_wtime() - wtime_treepartition;
+  taucs_printf("\t\tAMWB treepartition = %.3f seconds\n",wtime_treepartition);
+
+
+
+
+  wtime_component_bases = taucs_wtime();
+
+  /*** ALLOCATED: mtxA_tmp,diag,already_added,pi,precond,linked ***/
+  /*** ALLOCATED: groups ***/
+
+  precond->nent = Bent;
+
+  /********************************************************/
+  /* complete each subgraph into a basis                  */
+  /********************************************************/
+
+  complete_subgraph = (three *)taucs_calloc(curr_group,sizeof(three));
+  /*** ALLOCATED: mtxA_tmp,diag,already_added,pi,precond,linked ***/
+  /*** ALLOCATED: groups ***/
+  if (!complete_subgraph) {
+    free_graph(mtxA_tmp);
+    free_graph(precond);
+    taucs_free(diag);
+    taucs_free(already_added);
+    taucs_free(pi);
+    taucs_free(groups);
+    free_linked_list(l);
+    return NULL;
+  }
+  
+  /*** ALLOCATED: mtxA_tmp,diag,already_added,pi,precond,linked ***/
+  /*** ALLOCATED: groups,complete_subgraph ***/
+  
+  /* For each subgraph, complete_subgraph will store the edge needed
+     to complete the subgraph into a basis (if such an edge exists) */
+  
+  /* Variation on Kruskal - Introduction to Algorithms page 505 */
+  if (unionfind_init(n) == -1) {
+    free_graph(mtxA_tmp);
+    free_graph(precond);
+    free_linked_list(l);
+    taucs_free(diag);
+    taucs_free(already_added);
+    taucs_free(pi);
+    taucs_free(groups);
+    taucs_free(complete_subgraph);
+    return NULL;
+  }
+  
+  /*** ALLOCATED: mtxA_tmp,diag,already_added,pi,precond,linked ***/
+  /*** ALLOCATED: groups,complete_subgraph,UF ***/
+  
+  closed_cycle = (byte *)taucs_calloc(n,sizeof(byte));
+  if (!closed_cycle) {
+    free_graph(mtxA_tmp);
+    free_graph(precond);
+    free_linked_list(l);
+    unionfind_free();
+    taucs_free(diag);
+    taucs_free(already_added);
+    taucs_free(pi);
+    taucs_free(groups);
+    taucs_free(complete_subgraph);
+    return NULL;
+  }
+  
+  /*** ALLOCATED: mtxA_tmp,diag,already_added,pi,precond,linked ***/
+  /*** ALLOCATED: groups,complete_subgraph,UF,closed_cycle ***/
+  
+  wtime = taucs_wtime();
+#ifdef USE_HEAPSORT
+  /*
+    size = heap_sort(Bent,&h,precond);
+    if (size == -1) {
+    free_graph(mtxA_tmp);
+    free_graph(precond);
+    free_linked_list(l);
+    unionfind_free();
+    taucs_free(diag);
+    taucs_free(already_added);
+    taucs_free(pi);
+    taucs_free(groups);
+    taucs_free(complete_subgraph);
+    taucs_free(closed_cycle);
+    return NULL;
+    }
+  */
+  if (pqueue_create(&Bh,Anent) == -1) {
+    free_graph(mtxA_tmp);
+    free_graph(precond);
+    taucs_free(diag);
+    taucs_free(already_added);
+    taucs_free(pi);
+    taucs_free(closed_cycle);
+    unionfind_free();
+    return NULL;
+  }
+  size = pqueue_fill(&Bh,precond);
+#else
+  assert(Bent == precond->nent);
+  size = precond->nent;
+  graph_sort(precond);
+#endif
+  wtime = taucs_wtime() - wtime;
+  taucs_printf("\t\tAMWB sort 2(%d) = %.3f seconds\n",Bent,wtime);
+  wtime_sort += wtime;
+  
+  /*** ALLOCATED: mtxA_tmp,diag,already_added,pi,precond,linked ***/
+  /*** ALLOCATED: groups,complete_subgraph,UF,closed_cycle,heap ***/
+
+  Do(i,size)
+    {
+#ifdef USE_HEAPSORT
+      u = precond->edges[Bh.edges[i]].i;
+      v = precond->edges[Bh.edges[i]].j;
+#else
+      u = precond->edges[i].i;
+      v = precond->edges[i].j;
+      if (u==v) continue;
+#endif
+      
+      
+      if (groups[u] == groups[v])
+	{
+#ifdef USE_HEAPSORT
+	  weight = precond->edges[Bh.edges[i]].v;
+#else
+	  weight = precond->edges[i].v;
+#endif
+	  
+	  edge_sign = (weight>0);
+	  
+	  x = find_set(u);
+	  y = find_set(v);
+	  
+	  if (x!=y)
+	    {
+	      if (!((closed_cycle[x])&&(closed_cycle[y])))
+		{
+		  un_root = Union(u,v,x,y,edge_sign);
+		  closed_cycle[un_root] = closed_cycle[x] | closed_cycle[y];
+		}
+	      else
+		assert(0); /* this is a subgraph of a basis */
+	    }
+	  else
+	    {
+	      if ((edge_sign != (label[u]^label[v])) && (closed_cycle[x]==0))
+		closed_cycle[x] = 1;
+	    }
+	  
+	}
+    }
+  
+  wtime = taucs_wtime();
+#ifdef USE_HEAPSORT
+  /*
+    free_heap(h);
+    size = heap_sort(Anent,&h,mtxA_tmp);
+    if (size == -1) {
+    free_graph(mtxA_tmp);
+    free_graph(precond);
+    free_linked_list(l);
+    unionfind_free();
+    taucs_free(diag);
+    taucs_free(already_added);
+    taucs_free(pi);
+    taucs_free(groups);
+    taucs_free(complete_subgraph);
+    taucs_free(closed_cycle);
+    return NULL;
+    }
+  */
+  /*size = pqueue_fill(&Ah,mtxA_tmp);*/
+  size = Ah.heap_size;
+#else
+  assert(Anent == mtxA_tmp->nent);
+  size = mtxA_tmp->nent;
+  graph_sort(mtxA_tmp);
+#endif
+  wtime = taucs_wtime() - wtime;
+  taucs_printf("\t\tAMWB sort(%d) = %.3f seconds\n",Anent,wtime);
+  wtime_sort += wtime;
+  
+  Do(i,size)
+    {
+#ifdef USE_HEAPSORT
+      u = mtxA_tmp->edges[Ah.edges[i]].i;
+      v = mtxA_tmp->edges[Ah.edges[i]].j;
+#else
+      u      = mtxA_tmp->edges[i].i;
+      v      = mtxA_tmp->edges[i].j;
+      if (u==v) continue;
+#endif
+      
+      if (groups[u] == groups[v])
+	{
+#ifdef USE_HEAPSORT
+	  weight = mtxA_tmp->edges[Ah.edges[i]].v;
+#else
+	  weight = mtxA_tmp->edges[i].v;
+#endif
+	  
+	  edge_sign = (weight>0);
+	  
+	  x = find_set(u);
+	  y = find_set(v);
+	  
+	  if (x!=y)
+	    {
+	      if (!((closed_cycle[x])&&(closed_cycle[y])))
+		{
+		  /*
+		    ivec1_p[Bent] = u; 
+		    ivec2_p[Bent] = v; 
+		    dvec_p[Bent] = weight; 
+		  */
+
+		  precond->edges[Bent].i = u; 
+		  precond->edges[Bent].j = v; 
+		  precond->edges[Bent].v = weight; 
+		  
+		  Bent++; 
+
+		  diag[u] += fabs(weight);
+		  diag[v]+=fabs(weight);
+		  
+		  assert(complete_subgraph[groups[u]].completed_to_basis==0);
+		  complete_subgraph[groups[u]].completed_to_basis = 1;
+		  complete_subgraph[groups[u]].a = u;
+		  complete_subgraph[groups[u]].b = v;
+		  complete_subgraph[groups[u]].c = weight;
+
+		  un_root = Union(u,v,x,y,edge_sign);
+		  closed_cycle[un_root] = closed_cycle[x] | closed_cycle[y];
+		}
+	    }
+	  else
+	    {
+	      if ((edge_sign != (label[u]^label[v])) && (closed_cycle[x]==0))
+		{
+		  /*
+		    ivec1_p[Bent] = u; 
+		    ivec2_p[Bent] = v; 
+		    dvec_p[Bent] = weight; 
+		  */
+		  precond->edges[Bent].i = u; 
+		  precond->edges[Bent].j = v; 
+		  precond->edges[Bent].v = weight; 
+		  
+		  Bent++; 
+		  
+		  diag[u] += fabs(weight);
+		  diag[v] += fabs(weight);
+		  
+		  assert(complete_subgraph[groups[u]].completed_to_basis==0);
+		  complete_subgraph[groups[u]].completed_to_basis = 1;
+		  complete_subgraph[groups[u]].a = u;
+		  complete_subgraph[groups[u]].b = v;
+		  complete_subgraph[groups[u]].c = weight;
+		  
+		  closed_cycle[x] = 1;
+		}
+	    }
+	  
+	}
+    }
+#ifdef USE_HEAPSORT
+  /*    free_heap(h);*/
+#endif
+  taucs_free(closed_cycle);
+  unionfind_free();
+  
+  /*** ALLOCATED: mtxA_tmp,diag,already_added,pi,precond,linked ***/
+  /*** ALLOCATED: groups,complete_subgraph ***/
+  
+  precond->nent = Bent;
+  
+  wtime_component_bases = taucs_wtime() - wtime_component_bases;
+  taucs_printf("\t\tAMWB component bases = %.3f seconds\n",wtime_component_bases);
+
+
+
+  wtime_pair_bases = taucs_wtime();
+
+  step2_Bent = Bent;
+  
+  /* COMPLETE EACH PAIR OF SUBGRAPHS INTO A BASIS */
+  if (curr_group>1) {
+    pairs = (six **)taucs_calloc(curr_group,sizeof(six *));
+    closed_cycle = (byte *)taucs_calloc(n,sizeof(byte));
+    if(!pairs || !closed_cycle) {
+      free_graph(mtxA_tmp);
+      free_graph(precond);
+      free_linked_list(l);
+      taucs_free(diag);
+      taucs_free(already_added);
+      taucs_free(pi);
+      taucs_free(groups);
+      taucs_free(complete_subgraph);
+      taucs_free(pairs);
+      taucs_free(closed_cycle);
+    }
+
+    /*** ALLOCATED: mtxA_tmp,diag,already_added,pi,precond,linked ***/
+    /*** ALLOCATED: groups,complete_subgraph,pairs,closed_cycle ***/
+
+    if (unionfind_init(n) == -1) {
+      free_graph(mtxA_tmp);
+      free_graph(precond);
+      free_linked_list(l);
+      taucs_free(diag);
+      taucs_free(already_added);
+      taucs_free(pi);
+      taucs_free(groups);
+      taucs_free(complete_subgraph);
+      taucs_free(pairs);
+      taucs_free(closed_cycle);
+      return NULL;
+    }
+
+    /*** ALLOCATED: mtxA_tmp,diag,already_added,pi,precond,linked,UF ***/
+    /*** ALLOCATED: groups,complete_subgraph,pairs,closed_cycle ***/
+
+    wtime = taucs_wtime();
+#ifdef USE_HEAPSORT
+    /*
+    size = heap_sort(basis_Bent,&h,precond);
+    if (size == -1) {
+      free_graph(mtxA_tmp);
+      free_graph(precond);
+      free_linked_list(l);
+      unionfind_free();
+      taucs_free(diag);
+      taucs_free(already_added);
+      taucs_free(pi);
+      taucs_free(groups);
+      taucs_free(complete_subgraph);
+      taucs_free(pairs);
+      taucs_free(closed_cycle);
+      return NULL;
+    }
+    */
+    size = pqueue_fill(&Bh,precond);
+
+#else
+    assert(basis_Bent == precond->nent);
+    size = precond->nent;
+    graph_sort(precond);
+#endif
+    wtime = taucs_wtime() - wtime;
+    taucs_printf("\t\tAMWB sort(%d) = %.3f seconds\n",basis_Bent,wtime);
+    wtime_sort += wtime;
+
+    /*** ALLOCATED: mtxA_tmp,diag,already_added,pi,precond,linked,UF ***/
+    /*** ALLOCATED: groups,complete_subgraph,pairs,closed_cycle,heap ***/
+
+    Do(i,size)
+      {
+#ifdef USE_HEAPSORT
+	u = precond->edges[Bh.edges[i]].i;
+	v = precond->edges[Bh.edges[i]].j;
+#else
+	u = precond->edges[i].i;
+	v = precond->edges[i].j;
+	if (u==v) continue;
+#endif
+
+	if (groups[u] == groups[v])
+	  {
+#ifdef USE_HEAPSORT
+	    weight = precond->edges[Bh.edges[i]].v;
+#else
+	    weight = precond->edges[i].v;
+#endif
+	    
+	    edge_sign = (weight>0);
+	    
+	    x = find_set(u);
+	    y = find_set(v);
+	    
+	    if (x!=y)
+	      {
+		if (!((closed_cycle[x])&&(closed_cycle[y])))
+		  {
+		    un_root = Union(u,v,x,y,edge_sign);
+		    closed_cycle[un_root] = closed_cycle[x] | closed_cycle[y];
+		  }
+	      }
+	    else
+	      {
+		if ((edge_sign != (label[u]^label[v])) && (closed_cycle[x]==0))
+		  closed_cycle[x] = 1;
+	      }
+	    
+	  }
+      }
+
+    wtime = taucs_wtime();
+#ifdef USE_HEAPSORT
+    /*
+    free_heap(h);
+    size = heap_sort(Anent,&h,mtxA_tmp);
+    if (size == -1) {
+      free_graph(mtxA_tmp);
+      free_graph(precond);
+      free_linked_list(l);
+      unionfind_free();
+      taucs_free(diag);
+      taucs_free(already_added);
+      taucs_free(pi);
+      taucs_free(groups);
+      taucs_free(complete_subgraph);
+      taucs_free(pairs);
+      taucs_free(closed_cycle);
+      return NULL;
+    }
+    */
+    /*size = pqueue_fill(&Ah,mtxA_tmp);*/
+    size = Ah.heap_size;
+#else
+    assert(Anent == mtxA_tmp->nent);
+    size = mtxA_tmp->nent;
+    graph_sort(mtxA_tmp);
+#endif
+    wtime = taucs_wtime() - wtime;
+    taucs_printf("\t\tAMWB sort(%d) = %.3f seconds\n",Anent,wtime);
+    wtime_sort += wtime;
+
+    Do(i,size)
+      {
+	int g1,g2;
+	six *p;
+	six *tmp;
+
+
+#ifdef USE_HEAPSORT
+	u = mtxA_tmp->edges[Ah.edges[i]].i;
+	v = mtxA_tmp->edges[Ah.edges[i]].j;
+#else
+	u = mtxA_tmp->edges[i].i;
+	v = mtxA_tmp->edges[i].j;
+	if (u==v) continue;
+#endif
+
+	if (Bent == precond_maxnent)
+	  {
+	    precond_maxnent += chunk;
+	    taucs_printf("adding space for %d entries in precond\n",chunk);fflush(stdout);
+	    if (graph_resize(precond,precond_maxnent) == -1) {
+	      int i_local;
+	      Do(i_local,curr_group)
+		free_linked_list_2(pairs[i_local]);
+	      /* precond has not been freed */
+	      free_graph(mtxA_tmp);
+	      free_graph(precond);
+	      free_linked_list(l);
+	      unionfind_free();
+#ifdef USE_HEAPSORT
+	      free_heap(h);
+#endif
+	      taucs_free(diag);
+	      taucs_free(already_added);
+	      taucs_free(pi);
+	      taucs_free(groups);
+	      taucs_free(complete_subgraph);
+	      taucs_free(pairs);
+	      taucs_free(closed_cycle);
+	      return NULL;
+	    }
+	    
+	    /*
+	    ivec1_p = precond->ivec1;
+	    ivec2_p = precond->ivec2;
+	    dvec_p = precond->dvec;
+	    */
+	  }
+
+	g1 = min(groups[u],groups[v]); g2 = max(groups[u],groups[v]);
+	
+	if (g1 != g2)
+	  {
+	    p = pairs[(g1+g2)%curr_group];
+	    while (p!=NULL)
+	      {
+		if ((p->group_1 == g1)&&(p->group_2 == g2))
+		  goto after2;
+		p = p->next;
+	      }
+	  after2:
+	    
+	    if (p == NULL)
+	      {
+		tmp = (six *)taucs_calloc(1,sizeof(six));
+		if (tmp == NULL) {
+		  int i_local;
+		  Do(i_local,curr_group)
+		    free_linked_list_2(pairs[i_local]);
+		  free_graph(mtxA_tmp);
+		  free_graph(precond);
+		  free_linked_list(l);
+		  unionfind_free();
+#ifdef USE_HEAPSORT
+		  free_heap(h);
+#endif
+		  taucs_free(diag);
+		  taucs_free(already_added);
+		  taucs_free(pi);
+		  taucs_free(groups);
+		  taucs_free(complete_subgraph);
+		  taucs_free(pairs);
+		  taucs_free(closed_cycle);
+		  return NULL;
+		}
+		tmp->group_1 = g1;
+		tmp->group_2 = g2;
+		tmp->no_edges = 0;
+		tmp->next = pairs[(g1+g2)%curr_group];
+		pairs[(g1+g2)%curr_group] = tmp;
+		p = tmp;
+	      }
+
+	    /* first check if the next edges to be added are inner to one of the subgraphs
+	       (not cross edges) */
+	    if (p->no_edges < 2)
+	      {
+#ifdef USE_HEAPSORT
+		weight = mtxA_tmp->edges[Ah.edges[i]].v;
+#else
+		weight = mtxA_tmp->edges[i].v;
+#endif
+		
+		edge_sign = (weight>0);
+		
+		x = find_set(u);
+		y = find_set(v);
+		closed_cycle_x = closed_cycle[x];
+		closed_cycle_y = closed_cycle[y];
+
+		if (complete_subgraph[g1].completed_to_basis)
+		  if (fabs(weight)<=fabs(complete_subgraph[g1].c))
+		    if ((p->no_edges==0)||
+			((groups[p->a[0]]!=g1)||
+			 (groups[p->b[0]]!=g1)))
+		    {
+		      p->a[p->no_edges] = complete_subgraph[g1].a;
+		      p->b[p->no_edges] = complete_subgraph[g1].b;
+		      p->c[p->no_edges] = complete_subgraph[g1].c;
+		      p->cross[p->no_edges] = 0;
+		      (p->no_edges)++;
+		    }
+
+		if (p->no_edges < 2)		
+		  if (complete_subgraph[g2].completed_to_basis)
+		    if (fabs(weight)<=fabs(complete_subgraph[g2].c))
+		      if ((p->no_edges==0)||
+			  ((groups[p->a[0]]!=g2)||
+			   (groups[p->b[0]]!=g2)))
+			{
+			  p->a[p->no_edges] = complete_subgraph[g2].a;
+			  p->b[p->no_edges] = complete_subgraph[g2].b;
+			  p->c[p->no_edges] = complete_subgraph[g2].c;
+			  p->cross[p->no_edges] = 0;
+			  (p->no_edges)++;
+			}
+		
+		/* if p->no_edges == 2 get out of if */
+
+		if (p->no_edges == 1)
+		  {
+		    if (groups[p->a[0]] == groups[p->b[0]])
+		      {
+			if (groups[p->a[0]] == groups[u])
+			  closed_cycle_x = 1;
+			else
+			  closed_cycle_y = 1;
+		      }
+		      
+		  }
+
+		
+		if (p->no_edges == 0) {
+		  /* sivan: added this brace below to avoid warning. */
+		  /* I have no idea why there are two identical ifs. */
+		  /* I hope I added the matching brace in the right place */
+		  if (p->no_edges == 0) { 
+		    if (x != y) {
+		      if (!((closed_cycle[x])&&(closed_cycle[y]))) {
+#ifdef USE_HEAPSORT
+			if (already_added[Ah.edges[i]]==0)
+#else
+			  if (already_added[i]==0)
+#endif
+			    {
+			      /*
+				ivec1_p[Bent] = u; 
+				ivec2_p[Bent] = v; 
+				dvec_p[Bent] = weight; 
+			      */
+			      precond->edges[Bent].i = u; 
+			      precond->edges[Bent].j = v; 
+			      precond->edges[Bent].v = weight; 
+			      
+			      Bent++; 
+			      
+			      diag[u] += fabs(weight);
+			      diag[v]+=fabs(weight);
+			    }
+			p->a[p->no_edges] = u;
+			p->b[p->no_edges] = v;
+			p->c[p->no_edges] = weight;
+			p->cross[p->no_edges] = 1;
+			(p->no_edges)++;
+		      }
+		    } else {
+		      if ((edge_sign != (label[u]^label[v])) && (closed_cycle[x]==0)) {
+#ifdef USE_HEAPSORT
+			if (already_added[Ah.edges[i]]==0)
+#else
+			  if (already_added[i]==0)
+#endif
+			    {
+			      /*
+				ivec1_p[Bent] = u; 
+				ivec2_p[Bent] = v; 
+				dvec_p[Bent] = weight; 
+			      */
+			      precond->edges[Bent].i = u; 
+			      precond->edges[Bent].j = v; 
+			      precond->edges[Bent].v = weight; 
+			      
+			      Bent++; 
+			      
+			      diag[u] += fabs(weight);
+			      diag[v] += fabs(weight);
+			    }
+			p->a[p->no_edges] = u;
+			p->b[p->no_edges] = v;
+			p->c[p->no_edges] = weight;
+			p->cross[p->no_edges] = 1;
+			(p->no_edges)++;
+		      }
+		    }
+		  }
+		}
+
+		      if (p->no_edges == 1) {
+			if (p->cross[0]==0) {
+			  if (groups[p->a[0]]==groups[u])
+			    closed_cycle_x = 1;
+			  else
+			    closed_cycle_y = 1;
+			}
+		    
+		    if ((!((closed_cycle_x)&&(closed_cycle_y)))&&(p->cross[0]==0))
+		      {
+#ifdef USE_HEAPSORT
+			if (already_added[Ah.edges[i]]==0)
+#else
+			if (already_added[i]==0)
+#endif
+			  {
+			    /*
+			    ivec1_p[Bent] = u; 
+			    ivec2_p[Bent] = v; 
+			    dvec_p[Bent] = weight; 
+			    */
+			    precond->edges[Bent].i = u;
+			    precond->edges[Bent].j = v;
+			    precond->edges[Bent].v = weight;
+
+			    Bent++; 
+
+			    diag[u] += fabs(weight);
+			    diag[v]+=fabs(weight);
+			  }
+			p->a[p->no_edges] = u;
+			p->b[p->no_edges] = v;
+			p->c[p->no_edges] = weight;
+			p->cross[p->no_edges] = 1;
+			(p->no_edges)++;
+		      }
+		    
+		    if ((!closed_cycle_x)&&(!closed_cycle_y)&&(p->cross[0]==1))
+		      {
+			int x1,y1,edge_sign1;
+			x1 = find_set(p->a[0]);
+			y1 = find_set(p->b[0]);
+			edge_sign1 = (p->c[0]>0);			    
+			
+			if ((edge_sign^edge_sign1^label[u]^label[v]^label[p->a[0]]^label[p->b[0]]) ==1)
+			  {
+#ifdef USE_HEAPSORT
+			    if (already_added[Ah.edges[i]]==0)
+#else
+			    if (already_added[i]==0)
+#endif
+			      {
+				/*
+				ivec1_p[Bent] = u; 
+				ivec2_p[Bent] = v; 
+				dvec_p[Bent] = weight; 
+				*/
+
+				precond->edges[Bent].i = u; 
+				precond->edges[Bent].j = v; 
+				precond->edges[Bent].v = weight; 
+
+				Bent++; 
+
+				diag[u] += fabs(weight);
+				diag[v]+=fabs(weight);
+			      }
+			    p->a[p->no_edges] = u;
+			    p->b[p->no_edges] = v;
+			    p->c[p->no_edges] = weight;
+			    p->cross[p->no_edges] = 1;
+			    (p->no_edges)++;
+			  }
+		      }
+		  }
+	      }
+	  }
+      }
+
+    Do(i,curr_group)
+      free_linked_list_2(pairs[i]);
+
+    taucs_free(pairs);
+#ifdef USE_HEAPSORT
+    /*    free_heap(h);*/
+    free_heap(Ah);
+    free_heap(Bh);
+#endif
+    taucs_free(closed_cycle);
+    unionfind_free();
+    
+    precond->nent = Bent;
+
+    /*** ALLOCATED: mtxA_tmp,diag,already_added,pi,precond,linked ***/
+    /*** ALLOCATED: groups,complete_subgraph ***/
+  }
+
+  taucs_free(complete_subgraph);
+  taucs_free(already_added);
+
+  wtime_pair_bases = taucs_wtime() - wtime_pair_bases;
+  taucs_printf("\t\tAMWB pair bases = %.3f seconds\n",wtime_pair_bases);
+
+
+  /*** ALLOCATED: mtxA_tmp,diag,pi,precond,linked ***/
+  /*** ALLOCATED: groups ***/
+
+  /* allocate more memory to the preconditioner if needed */
+
+  wtime = taucs_wtime();
+  if (precond_maxnent < Bent + n)
+    {
+      taucs_printf("adding space for %d entries in precond for diagonal entries\n",Bent+n-precond_maxnent);
+      precond_maxnent = Bent + n;
+
+
+      if (graph_resize(precond,precond_maxnent) == -1) {
+	/* precond has not been freed */
+	free_graph(mtxA_tmp);
+	free_graph(precond);
+	free_linked_list(l);
+	taucs_free(diag);
+	taucs_free(pi);
+	taucs_free(groups);
+	return NULL;
+      }
+
+      precond->nent = Bent;
+      /*
+      ivec1_p = precond->ivec1 ;
+      ivec2_p = precond->ivec2 ;
+      dvec_p = precond->dvec ;
+      */
+    }
+
+  Do(i,n)
+    {
+      /*
+      ivec1_p[Bent] = i;
+      ivec2_p[Bent] = i;
+      dvec_p[Bent] = diag[i];
+      */
+
+      precond->edges[Bent].i = i; 
+      precond->edges[Bent].j = i; 
+      precond->edges[Bent].v = diag[i]; 
+
+      Bent++;
+    }
+  precond->nent = Bent;
+
+  wtime = taucs_wtime() - wtime;
+  taucs_printf("\t\tAMWB precond resize = %.3f seconds\n",wtime);
+
+
+  taucs_printf("actual number of entries in preconditioner = %d\n",Bent);fflush(stdout);
+
+  taucs_free(diag);
+  taucs_free(groups);
+  taucs_free(pi);
+  free_linked_list(l);
+  free_graph(mtxA_tmp);
+
+  /*** ALLOCATED: precond ***/
+
+  wtime = taucs_wtime();
+  out = graph_to_ccs_matrix(precond);
+  if (!out) {
+    free_graph(precond);
+    return NULL;
+  }
+  wtime = taucs_wtime() - wtime;
+  taucs_printf("\t\tAMWB graph-to-matrix = %.3f seconds\n",wtime);
+
+  free_graph(precond);
+
+  
+  wtime_total = taucs_wtime() - wtime_total;
+  taucs_printf("\t\tAMWB time = %.3f seconds (%.3f sort)\n",
+	       wtime_total,
+	       wtime_sort);
+  
+  return out;
+}
+
+/*********************************************************/
+/* MST-specific routines                                 */
+/*********************************************************/
+
+typedef struct msthea {
+  int     heap_size;
+  int*    vertices;
+  double* key;
+} mstheap;
+
+#define INF 100000000.0
+
+#define Parent(i) ((((i)+1)/2) - 1)
+#define Left(i) ((((i)+1)*2) - 1)
+#define Right(i) ((((i)+1)*2 + 1) - 1)
+
+static
+void mstheap_exchange(mstheap A,int a,int b,int *point_to_heap)
+{
+  int tmp1;
+  double tmp2;
+
+  tmp1 = A.vertices[a];
+  A.vertices[a] = A.vertices[b];
+  A.vertices[b] = tmp1;
+
+  tmp2 = A.key[a];
+  A.key[a] = A.key[b];
+  A.key[b] = tmp2;
+
+  point_to_heap[A.vertices[a]] = a;
+  point_to_heap[A.vertices[b]] = b;
+}
+
+static
+void Mstheapify(mstheap A,int i,int *point_to_heap)
+{
+  int l,r,largest;
+  
+  l = Left(i);
+  r = Right(i);
+  
+  if ((l < A.heap_size) && (A.key[l] > A.key[i]))
+    largest = l;
+  else
+    largest = i;
+
+  if ((r < A.heap_size) && (A.key[r] > A.key[largest]))
+    largest = r;
+
+  if (largest != i)
+    {
+      mstheap_exchange(A,i,largest,point_to_heap);
+      Mstheapify(A,largest,point_to_heap);
+    }
+}
+
+static
+int build_mstheap(int r,int size,mstheap *h,int *point_to_heap,char alloc_flag)
+{
+  int i;
+
+  h->heap_size = size;
+
+  if (alloc_flag)
+    {
+      h->vertices = (int *)taucs_malloc(size * sizeof(int));
+      h->key = (double *)taucs_malloc(size * sizeof(double));
+      
+      if (h->key == NULL || h->vertices == NULL) return -1;
+    }
+
+  Do(i,size)
+    {
+      h->vertices[i] = i;
+      h->key[i] = (- INF);
+      point_to_heap[i] = i;
+    }
+  
+  h->key[r] = 0;
+
+  mstheap_exchange((*h),r,0,point_to_heap);
+  
+  return 0;
+}
+
+static
+int Mstheap_Extract_Max(mstheap *A,int *point_to_heap)
+{
+  int out;
+
+  assert (A->heap_size >= 1);
+  
+  out = A->vertices[0];
+  A->vertices[0] = A->vertices[A->heap_size - 1];
+  A->key[0] = A->key[A->heap_size - 1];
+  point_to_heap[A->vertices[0]] = 0;
+  
+  A->heap_size --;
+  
+  Mstheapify((*A),0,point_to_heap);
+  
+  return(out);
+}
+
+static
+void mstheap_increase_key(mstheap h,int vv,double val,int *point_to_heap)
+{
+  int i;
+  /*int count=0;*/
+  double key;
+  int ver, v;
+  
+  v = point_to_heap[vv];
+  
+  h.key[v] = val;
+
+  key = h.key[v];
+  ver = h.vertices[v];
+
+  i = v;
+
+  while((i>0) && (h.key[Parent(i)] < key))
+    {
+      h.key[i]      = h.key[Parent(i)];
+      h.vertices[i] = h.vertices[Parent(i)];
+      point_to_heap[h.vertices[i]] = i;
+      i = Parent(i);
+    }
+  
+  h.key[i] = key;
+  h.vertices[i] = ver;
+  point_to_heap[h.vertices[i]] = i;  
+
+}
+
+static
+void free_mstheap(mstheap h)
+{
+  taucs_free(h.vertices);
+  taucs_free(h.key);
+}
+
+static
+int add_heavy_edges(graph* mtxA,
+		    graph* precond,
+		    int Bent,
+		    edge **array,
+		    int *groups,
+		    int no_groups,
+		    int *pi,
+		    double *diag)
+{
+  three **pairs;
+  int i,j,a,b,k;
+  double w;
+  /*
+  int *ivec1_p, *ivec2_p ;
+  double *dvec_p; 
+  int *ivec1, *ivec2 ;
+  double *dvec; 
+  */
+  int orig_Bent;
+  three *p,*tmp;
+  int n,nent;
+  int precond_maxnent,chunk;
+
+  three *pool;
+  int   next_in_pool;
+
+  n = mtxA->n;
+  nent = mtxA->nent;
+
+  precond_maxnent = precond->max_size;
+  chunk = max(min((no_groups*(no_groups-1)/2)/10,5000),10000);
+  orig_Bent = Bent;
+  
+  pairs = (three **) taucs_calloc(no_groups,sizeof(three *));
+  pool  = (three*)   taucs_malloc((n+nent) * sizeof(three));
+  if (!pairs || !pool) {
+    taucs_free(pairs);
+    taucs_free(pool);
+    return -1;
+  }
+  next_in_pool = 0;
+
+  /*
+  ivec1 = mtxA->ivec1;
+  ivec2 = mtxA->ivec2;
+  dvec = mtxA->dvec;
+  */
+
+  Do(k,n) {
+    i = k;
+    j = pi[k];
+    
+    if (j != (-1)) {
+      a = min(groups[i],groups[j]);
+      b = max(groups[i],groups[j]);
+      
+      if (a != b) {
+	p = pairs[(a+b)%no_groups];
+	while (p!=NULL) {
+	  if ((p->group_1 == a)&&(p->group_2 == b))
+	    goto after;
+	  p = p->next;
+	}
+      after:
+	if (p == NULL) {
+	  /*tmp = (three *)taucs_malloc(sizeof(three));*/
+	  /*if (tmp == NULL)  {taucs_printf("ERROR! OUT OF MEMORY\n");exit(234);}*/
+	  assert(next_in_pool < n+nent);
+	  tmp = pool + next_in_pool; 
+	  next_in_pool ++;
+	  tmp->group_1 = a;
+	  tmp->group_2 = b;
+	  tmp->already_connected = 1;
+	  tmp->next = pairs[(a+b)%no_groups];
+	  pairs[(a+b)%no_groups] = tmp;
+	}
+      }
+    }
+  }
+
+  Do(k,nent) {
+    i = (mtxA->edges)[k].i;
+    j = (mtxA->edges)[k].j;
+
+    a = min(groups[i],groups[j]);
+    b = max(groups[i],groups[j]);
+    
+    if (a != b)	{
+      w = - (mtxA->edges)[k].v;
+      if (w) {
+	p = pairs[(a+b)%no_groups];
+	while (p!=NULL) {
+	  if ((p->group_1 == a)&&(p->group_2 == b)) {
+	    if (p->already_connected==0)
+	      if (w > p->c) {
+		p->a = i;
+		p->b = j;
+		p->c = w;
+	      }
+	    goto after1;
+	  }
+	  p = p->next;
+	}
+      after1:
+	if (p == NULL) {
+ 	  /*tmp = (three *)taucs_malloc(sizeof(three));*/
+	  /*if (tmp == NULL)  {taucs_printf("ERROR! OUT OF MEMORY\n");exit(234);}*/
+	  assert(next_in_pool < n+nent);
+	  tmp = pool + next_in_pool; 
+	  next_in_pool ++;
+	  tmp->group_1 = a;
+	  tmp->group_2 = b;
+	  tmp->a = i;
+	  tmp->b = j;
+	  tmp->c = w;
+	  tmp->already_connected = 0;
+	  tmp->next = pairs[(a+b)%no_groups];
+	  pairs[(a+b)%no_groups] = tmp;
+	}
+      }
+    }
+  }
+  
+  /*
+  ivec1_p = precond->ivec1;
+  ivec2_p = precond->ivec2;
+  dvec_p = precond->dvec;
+  */
+  
+  Do(i,no_groups) {
+    p = pairs[i];
+    while(p!=NULL) {
+      if(p->already_connected == 0) {
+	if (p->a > p->b)
+	  swap(p->a,p->b);
+	
+	(precond->edges)[Bent].i = p->a;
+	(precond->edges)[Bent].j = p->b;
+	(precond->edges)[Bent].v = -(p->c);
+
+	Bent++;
+	
+	if (Bent == precond_maxnent) {
+	  precond_maxnent += chunk;
+	  taucs_printf("adding space for %d entries in precond\n",chunk);
+	  graph_resize(precond,precond_maxnent);
+	  precond->nent = orig_Bent;
+		
+	  /*  
+	  ivec1_p = precond->ivec1;
+	  ivec2_p = precond->ivec2;
+	  dvec_p = precond->dvec;
+	  */
+	  
+	}
+	diag[p->a] -= (-p->c);
+	diag[p->b] -= (-p->c);
+      }
+
+      p = p->next;
+    }
+  }
+  
+  /*Do(i,no_groups) free_linked_list_2(pairs[i]);*/
+  taucs_free(pool);
+  taucs_free(pairs);
+
+  return(Bent);
+}
+
+static int Dijkstra(graph *mtxA,int r,int *pi,linked *l,int *d,int *maxdist,int *partition,double min,double y,int j);
+
+
+static int Av_Part_W(graph *mtxA,int *partition,int *new_partition,int *parts,graph *out,int nparts)
+{
+  /* Peleg, Noga et al.'s partition. From Peleg's book, page 217 */
+   
+  linked *l = NULL, *l_c = NULL;
+  int *pi=NULL,*d=NULL,i,k,*findrho=NULL,minrho,maxdist,classes,n,nent,root,j,curr_partition=0;
+  int row, col;
+  int *pi1 = 0; /* warning */
+  double x, y, min, max, not;
+  byte bool=1;
+  edge *p,*dummy, *pe ,*max_pe;
+  int count = 0;
+ 
+  n = mtxA->n;
+  nent = mtxA->nent;
+ 
+  x = exp(sqrt(log(n)*log(log(n))))/3;
+  y = x * 9*log(n) * (floor(3*log(n)/log(x))+1);
+
+  pi      = (int *)taucs_malloc(n*sizeof(int));
+  d       = (int *)taucs_malloc(n*sizeof(int));
+  l       = create_linked_list(mtxA,n,mtxA->nent,&min,&max);
+  if (!pi || !d || !l)
+    goto exit_Av_Part_W;
+  
+  classes = (int)(log(max/min)/log(y))+1;
+
+  for(i=0;i<n;i++)
+    new_partition[i] = -1;
+
+  j = 1;
+
+  while (count < n)
+    {
+      root = rand() % n;
+      if (new_partition[root] == -1)
+	{
+	  for(i=0;i<n;i++)
+	    d[i] = -1;
+	  Dijkstra(mtxA,root,pi,l,d,&maxdist,partition,min,y,j);
+	  
+	  findrho     = (int *)taucs_calloc((maxdist+1)*classes,sizeof(int));
+	  if (!findrho)
+	    goto exit_Av_Part_W;
+	  
+	  for(i=0;i<n;i++)
+	    {
+	      if (d[i] != -1)
+		{
+		  p = (l->point)[i];
+		  while (p != NULL)
+		    {
+		      if ((d[mtxA->edges[p->entry_no].i]!=-1) && (d[mtxA->edges[p->entry_no].j]!=-1))
+			if ((d[mtxA->edges[p->entry_no].i] - d[mtxA->edges[p->entry_no].j] >= -1) &&
+			    (d[mtxA->edges[p->entry_no].i] - d[mtxA->edges[p->entry_no].j] <= 1))
+			  findrho[(max(d[mtxA->edges[p->entry_no].i],d[mtxA->edges[p->entry_no].j]))*classes+
+				 /*(int)(log(abs(mtxA->edges[p->entry_no].v)/min)/log(y))]++; omer*/
+				 (int)(log(abs((int)(mtxA->edges[p->entry_no].v))/min)/log(y))]++;
+		      p = p->next;
+		    }
+		}
+	    }
+
+	  for(i=0;i<min(j,classes);i++)
+	    findrho[i] = 0; /* ignore edges connecting two vertices whose distance is 0 */
+	
+	  /* At this point, findrho[k,i], or findrho[k*classes+i], contains the number of edges in E_i,
+	     connecting two vertices whose distance is k, or a vertex of distance k with a vertex of distance k-1 */
+	  
+	  for(k=1;k<maxdist;k++)
+	    for(i=0;i<min(j,classes);i++)
+	      findrho[k*classes+i] += findrho[(k-1)*classes+i];
+	  
+	  /* At this point, findrho[k,i], or findrho[k*classes+i], contains the number of edges in E_i,
+	     connecting two vertices whose distance is j, or a vertex of distance j with a vertex of distance j-1
+	     for j=1,...,k */
+	  
+	  for(minrho=1;minrho<maxdist;minrho++)
+	    {
+	      bool = 1;
+	      for(k=0;k<min(j,classes);k++)
+		{
+		  if ((double)(findrho[(minrho+1)*classes+k]-findrho[minrho*classes+k]) > (findrho[minrho*classes+k])/x)
+		    bool = 0;
+		}
+	      if (bool)
+		goto afterr;
+	    }
+	
+	afterr:
+	  if (bool)
+	    {
+	      for(i=0;i<n;i++)
+		if ((d[i] <= minrho) && (d[i] != -1) )
+		  if (new_partition[i] == -1)
+		    {
+		      count ++;
+		      new_partition[i] = curr_partition;
+		    }
+	    }
+	  else
+	    {
+	      for(i=0;i<n;i++)
+		if ((new_partition[i] == -1) && (d[i] != -1))
+		  {
+		    count ++;
+		    new_partition[i] = curr_partition;
+		  }
+	    }
+	  
+	  for(i=0;i<n;i++)
+	    {
+	      if (new_partition[i] == curr_partition)
+		l->point[i] = NULL;
+	      else
+		{
+		  p = l->point[i];
+		  l->point[i] = NULL;
+		  while (p != NULL)
+		    {
+		      if ((new_partition[mtxA->edges[p->entry_no].i] != curr_partition) && (new_partition[mtxA->edges[p->entry_no].j] != curr_partition))
+			{
+			  dummy = l->point[i];
+			  l->point[i] = p;
+			  p = p->next;
+			  (l->point[i])->next = dummy;
+			}
+		      else
+			p = p->next;
+		    }
+		}
+	    }
+	  
+	  curr_partition ++;
+	  j++;
+	  taucs_free(findrho);
+	}
+    }
+
+  *parts = curr_partition;
+
+  l_c = create_linked_list_cluster(mtxA,nparts,mtxA->nent,&not,&not,partition,new_partition);
+  pi1 = (int *)taucs_malloc(nparts*sizeof(int));
+
+  if (!l_c || !pi1)
+    goto exit_Av_Part_W;
+  
+  for(i=0;i<n;i++)
+    if (pi[i] != -1)
+      {
+	if (partition[i] != partition[pi[i]])
+	  pi1[partition[i]] = partition[pi[i]];
+      }
+    else
+      pi1[partition[i]] = -1;
+
+  Do(i,nparts) {
+    row = i;
+    col = pi1[i];
+    if (col != (-1)) {
+      pe = l_c->point[row];
+      max_pe = NULL;
+      while (pe != NULL) {
+	if (   (partition[(mtxA->edges)[pe->entry_no].j] == col) 
+	       || (partition[(mtxA->edges)[pe->entry_no].i] == col))
+	  {
+	    if (!max_pe)
+	      max_pe = pe;
+	    else
+	      if (-mtxA->edges[pe->entry_no].v > -mtxA->edges[max_pe->entry_no].v)
+		max_pe = pe;
+	  }
+	pe = pe->next;
+      }
+      
+      assert(max_pe);
+      out->edges[out->nent].i = (mtxA->edges)[max_pe->entry_no].i;
+      out->edges[out->nent].j = (mtxA->edges)[max_pe->entry_no].j;
+      out->edges[out->nent].v = (mtxA->edges)[max_pe->entry_no].v;
+      out->nent++;
+      
+    }
+  }
+
+
+  taucs_free(pi);taucs_free(d);free_linked_list(l);free_linked_list(l_c);taucs_free(pi1);
+  return 1;
+  
+ exit_Av_Part_W:
+  taucs_free(pi);taucs_free(d);free_linked_list(l);taucs_free(l);taucs_free(findrho);free_linked_list(l_c);taucs_free(l_c);taucs_free(pi1);
+  return 0;
+
+  
+}
+
+static taucs_ccs_matrix *amst_preconditioner_create(graph *mtxA, double* diag,int rnd,double subgraphs,int stretch_flag);
+static int Prim(graph *mtxA,int r,int *pi,int *d,linked *l);
+/*static int Prim_cluster(graph *mtxA,int r,int *pi,linked *l,int *partition,int *new_partition,int nparts,int *point_to_heap,char *in_Q,mstheap h);*/
+
+static int Dijkstra(graph *mtxA,int r,int *pi,linked *l,int *d,int *maxdist,int *partition,double min,double y,int j)
+{
+  /* Dijkstra is used in order to compute the distance of vertices from the root.
+     The distance of two vertices within the same partition is 0 */
+
+  int n,entry,u,v,i;
+  mstheap h;
+  int *point_to_heap = NULL;
+  char *in_Q = NULL;
+  int size_of_Q;
+  edge *p;
+  double weight;
+
+  *maxdist=0;
+  
+  n = mtxA->n;
+
+  point_to_heap = (int*)  taucs_malloc(n*sizeof(int));
+  in_Q          = (char*) taucs_malloc(n*sizeof(char));
+
+  Do(i,n) in_Q[i] = 1;
+  size_of_Q = n;
+  
+  /* Prim's Algorithm - Introduction to Algorithms page 509 */
+  
+  if (build_mstheap(r,n,&h,point_to_heap,1) == -1) 
+    goto exit_Dijkstra;
+
+  pi[r] = -1;
+  
+  while(size_of_Q > 0) {
+    if (h.key[0] == -INF)
+      goto after_Dijkstra;
+    assert(h.key[0] != -INF);
+    weight = -h.key[0];
+    u = Mstheap_Extract_Max(&h,point_to_heap);
+    d[u] = (int)weight;
+    if (weight>*maxdist)
+      *maxdist = (int)weight;
+
+    in_Q[u] = 0;
+    size_of_Q --;
+
+    p = (l->point)[u];
+    while (p != NULL) {
+      entry = p->entry_no;
+      /*if ((int)(log(abs(mtxA->edges[entry].v/min))/log(y)) < j) omer*/
+			if ((int)(log(abs((int)(mtxA->edges[entry].v/min)))/log(y)) < j)
+	{      
+	  /* v belongs to Adj[u] */
+
+	  if ((mtxA->edges)[entry].j != u)
+	    v = (mtxA->edges)[entry].j;
+	  else
+	    v = (mtxA->edges)[entry].i;
+	  
+	  if (in_Q[v] && (-h.key[point_to_heap[v]] > ((partition[v]==partition[u])?0:1) + weight))
+	    {
+	      pi[v] = u;
+	      mstheap_increase_key(h,v,-(((partition[v]==partition[u])?0:1) + weight),point_to_heap);
+	    }
+	}
+      p = p->next;
+    }
+  }
+
+ after_Dijkstra:
+  free_mstheap(h);
+  taucs_free(point_to_heap);
+  taucs_free(in_Q);
+  return 1;
+
+ exit_Dijkstra:
+  free_mstheap(h);
+  taucs_free(point_to_heap);
+  taucs_free(in_Q);
+  return 0;
+  
+}
+
+void stupid_part(int *partition,int n,int j,int *nparts)
+{
+  int i,k,q;
+  
+  k = 1<<j;
+  
+  q = ((n%k == 0)?(n/k):(n/k+1));
+  
+  for(i=0;i<n;i++)
+    for(j=0;j<n;j++)
+      {
+	partition[i*n+j] = q*(i/k)+(j/k);
+      }
+  *nparts=partition[n*n-1]+1;
+
+  /* for(i=0;i<n*n;i++) */
+    /* printf("ASD %d %d - %d %d\n",i,partition[i],n,k); */
+  /* exit(345); */
+}
+
+graph *low_stretch(graph *mtxA)
+{
+  int *partition = NULL,*new_partition = NULL,*choose_root = NULL;
+  int i,n,nparts,new_nparts;/* r omer*/
+  /*double dummy; omer*/
+  int *pi = NULL;
+  /*linked *l = NULL;*/
+  /*int k=0,j=0;*/ /* row,col, omer*/
+  /*edge *pe,*max_pe; omer*/
+  graph *out;
+  /*int *point_to_heap = NULL;*/
+  /*char *in_Q = NULL;*/
+  /*mstheap h; omer*/
+
+  n = mtxA->n;
+  
+  out = construct_graph(n-1);
+  partition = (int *)taucs_malloc(n*sizeof(int));
+  new_partition = (int *)taucs_malloc(n*sizeof(int));
+  choose_root = (int *)taucs_malloc(n*sizeof(int));
+  pi = (int *)taucs_malloc(n*sizeof(int));
+
+  if (!out || !partition || !new_partition || !choose_root || !pi)
+    goto exit_low_stretch;
+
+  out->n = n;
+  out->nent = 0;
+  
+  for (i=0;i<n;i++)
+    partition[i] = i;
+
+  nparts = n;
+
+  while (nparts > 1)
+    {
+      if (Av_Part_W(mtxA,partition,new_partition,&new_nparts,out,nparts) == 0)
+	goto exit_low_stretch;
+
+#if 0
+      for(i=0;i<n;i++)
+	pi[i] = -1;
+
+      out->n = n;
+      out->nent = n-1;
+
+      j++;
+
+      /* stupid_part(new_partition,(int)(sqrt(n)),j,&new_nparts); */
+
+      for(i=0;i<new_nparts;i++)
+	choose_root[i] = 0;
+      
+      l = create_linked_list_cluster(mtxA,nparts,mtxA->nent,&dummy,&dummy,partition,new_partition);
+      /* This linked list contains all the edges which connect vertices whose endpoints are
+	 in different sections in partition, but in the same section in new_partition.
+	 This will help us build trees within each section in new_partition. */
+
+      if (l == NULL)
+	goto exit_low_stretch;
+
+
+      point_to_heap = (int *)taucs_malloc(n*sizeof(int));
+      in_Q = (char *)taucs_malloc(n*sizeof(char));
+      h.vertices = NULL;
+      h.key = NULL;
+      h.vertices = (int *)taucs_malloc(n * sizeof(int));
+      h.key = (double *)taucs_malloc(n * sizeof(double));
+
+      if (!point_to_heap || !in_Q || !h.vertices || !h.key)
+	{
+	  taucs_free(point_to_heap);taucs_free(in_Q);taucs_free(h.vertices);taucs_free(h.key);
+	  goto exit_low_stretch;
+	}
+
+      for(i=0;i<n;i++)
+	{
+	  if (choose_root[new_partition[i]] == 0)
+	    {
+	      /* new_partition[i] is a section for which a tree has not yet been found */
+	      choose_root[new_partition[i]] = 1;
+	      r = partition[i];
+	      if (Prim_cluster(mtxA,r,pi,l,partition,new_partition,nparts,point_to_heap,in_Q,h) == 0)
+		{
+		  free_linked_list(l);
+		  goto exit_low_stretch;
+		}
+	    }
+	}
+      
+      taucs_free(point_to_heap);
+      taucs_free(in_Q);
+      free_mstheap(h);
+
+      Do(i,nparts) {
+	row = i;
+	col = pi[i];
+	if (col != (-1)) {
+	  pe = l->point[row];
+	  max_pe = NULL;
+	  while (pe != NULL) {
+	    if (   (partition[(mtxA->edges)[pe->entry_no].j] == col) 
+		   || (partition[(mtxA->edges)[pe->entry_no].i] == col))
+	      {
+		if (!max_pe)
+		  max_pe = pe;
+		else
+		  if (-mtxA->edges[pe->entry_no].v > -mtxA->edges[max_pe->entry_no].v)
+		    max_pe = pe;
+	      }
+	    pe = pe->next;
+	  }
+	  
+	  assert(max_pe);
+	  out->edges[k].i = (mtxA->edges)[max_pe->entry_no].i;
+	  out->edges[k].j = (mtxA->edges)[max_pe->entry_no].j;
+	  out->edges[k].v = (mtxA->edges)[max_pe->entry_no].v;
+	  k++;
+	  
+	}
+      }
+
+      out->nent = k;
+      free_linked_list(l);
+#endif
+
+      for(i=0;i<n;i++)
+	partition[i] = new_partition[i];
+      
+      nparts = new_nparts;
+
+    }
+  
+  assert(out->nent==(n-1)); /* helps verify that out is a tree */
+
+  taucs_free(partition);taucs_free(new_partition);taucs_free(choose_root);taucs_free(pi);
+  return out;
+  
+ exit_low_stretch:
+  free_graph(out);taucs_free(partition);taucs_free(new_partition);taucs_free(choose_root);taucs_free(pi);
+  return 0;
+}
+
+static
+int Prim(graph *mtxA,int r,int *pi,int *d,linked *l)
+{
+  /* Prim's Algorithm - Introduction to Algorithms page 509 */
+  
+  int n,entry,u,v,i;
+  mstheap h;
+  int *point_to_heap;
+  char *in_Q;
+  int size_of_Q;
+  edge *p;
+
+  n = mtxA->n;
+
+  point_to_heap = (int*)  taucs_malloc(n*sizeof(int));
+  in_Q          = (char*) taucs_malloc(n*sizeof(char));
+
+  Do(i,n) in_Q[i] = 1;
+  size_of_Q = n;
+  
+  /* Prim's Algorithm - Introduction to Algorithms page 509 */
+
+  if ((build_mstheap(r,n,&h,point_to_heap,1) == -1)) {
+    taucs_free(in_Q);
+    taucs_free(pi);
+    taucs_free(point_to_heap);
+    /* free linked_list; */
+    return 0;
+  }
+
+  pi[r] = -1;
+  d[r] = 0;
+  
+  while(size_of_Q > 0) {
+    u = Mstheap_Extract_Max(&h,point_to_heap);
+    in_Q[u] = 0;
+    size_of_Q --;
+
+    p = (l->point)[u];
+    while (p != NULL) {
+      entry = p->entry_no;
+      /* v belongs to Adj[u] */
+
+      if ((mtxA->edges)[entry].j != u)
+	v = (mtxA->edges)[entry].j;
+      else
+	v = (mtxA->edges)[entry].i;
+      
+      if (in_Q[v] && ((-((mtxA->edges)[entry].v)) > h.key[point_to_heap[v]])) {
+	pi[v] = u;
+	d[v] = d[u] + 1;
+	mstheap_increase_key(h,v,-((mtxA->edges)[entry].v),point_to_heap);
+      }
+      p = p->next;
+    }
+  }
+
+  free_mstheap(h);
+  taucs_free(point_to_heap);
+  taucs_free(in_Q);
+  return 1;
+}
+
+#if 0
+static
+int Prim_cluster(graph *mtxA,int r,int *pi,linked *l,int *partition,int *new_partition,int nparts,int *point_to_heap,char *in_Q,mstheap h)
+{
+  /* Prim's Algorithm - Introduction to Algorithms page 509 */
+  
+  int n,entry,u,v,i;
+  /* mstheap h; */
+  /* int *point_to_heap = NULL; */
+  /* char *in_Q = NULL; */
+  int size_of_Q;
+  edge *p;
+
+  n = mtxA->n;
+
+  /* point_to_heap = (int*)  taucs_malloc(nparts*sizeof(int)); */
+  /* in_Q          = (char*) taucs_malloc(nparts*sizeof(char)); */
+  /* if (!point_to_heap || !in_Q) */
+    /* goto exit_Prim_cluster; */
+
+  Do(i,nparts) in_Q[i] = 1;
+  size_of_Q = nparts;
+  
+  /* Prim's Algorithm - Introduction to Algorithms page 509 */
+
+  if (build_mstheap(r,nparts,&h,point_to_heap,0) == -1)
+    goto exit_Prim_cluster;
+
+  pi[r] = -1;
+  
+  while(size_of_Q > 0) {
+    if (h.key[0] == -INF)
+      goto after_Prim_cluster;
+    u = Mstheap_Extract_Max(&h,point_to_heap);
+
+    in_Q[u] = 0;
+    size_of_Q --;
+
+    p = (l->point)[u];
+    while (p != NULL) {
+      entry = p->entry_no;
+      /* v belongs to Adj[u] */
+
+      if (partition[(mtxA->edges)[entry].j] != u)
+	v = partition[(mtxA->edges)[entry].j];
+      else
+	v = partition[(mtxA->edges)[entry].i];
+
+      if (in_Q[v] && ((-((mtxA->edges)[entry].v)) > h.key[point_to_heap[v]])) {
+	pi[v] = u;
+	mstheap_increase_key(h,v,-((mtxA->edges)[entry].v),point_to_heap);
+      }
+      p = p->next;
+    }
+  }
+
+ after_Prim_cluster:
+  /* free_mstheap(h); */
+  /* taucs_free(point_to_heap); */
+  /* taucs_free(in_Q); */
+  return 1;
+
+ exit_Prim_cluster:
+  /* free_mstheap(h); */
+  /* taucs_free(point_to_heap); */
+  /* taucs_free(in_Q); */
+  return 0;
+}
+#endif /* 0, we don't need this routine */
+
+static double dist(int i, int j, double w, graph *mtxA,int *pi,int *d,linked *l,double *dilation,double *congestion)
+{
+  double out=0;
+  int tmp;
+  edge *e;
+  double q = 0;
+  
+  if (d[i] < d[j])
+    {tmp = i;i=j;j=tmp;}
+
+  /* now we know that d[i] >= d[j] */
+  
+  while (d[i] > d[j])
+    {
+      e = l->point[i];
+      while ((mtxA->edges[e->entry_no].i != pi[i]) && (mtxA->edges[e->entry_no].j != pi[i]))
+	e = e->next;
+      out += mtxA->edges[e->entry_no].v;
+      q += mtxA->edges[e->entry_no].v;
+      congestion[i] += mtxA->edges[e->entry_no].v/w;
+      i = pi[i];
+    }
+  
+  /* now we know that d[i] == d[j] */
+  
+  while (i != j)
+    {
+      e = l->point[i];
+      while ((mtxA->edges[e->entry_no].i != pi[i]) && (mtxA->edges[e->entry_no].j != pi[i]))
+	e = e->next;
+      out += mtxA->edges[e->entry_no].v;
+      q += mtxA->edges[e->entry_no].v;
+      congestion[i] += mtxA->edges[e->entry_no].v/w;
+      i = pi[i];
+
+      e = l->point[j];
+      while ((mtxA->edges[e->entry_no].i != pi[j]) && (mtxA->edges[e->entry_no].j != pi[j]))
+	e = e->next;
+      out += mtxA->edges[e->entry_no].v;
+      q += mtxA->edges[e->entry_no].v;
+      congestion[j] += mtxA->edges[e->entry_no].v/w;
+      j = pi[j];
+    }
+
+  *dilation = max(*dilation,q/w);
+  return(out);
+}
+
+static double find_stretch(graph *mtxA,int *pi,int *d)
+{
+  int i,E=0;
+  double stretch=0,dummy;
+  linked *l;
+  double dilation;
+  double *congestion,cong=0;
+  
+  congestion = (double *)taucs_calloc(mtxA->n,sizeof(double));
+
+  l = create_linked_list(mtxA,mtxA->n,mtxA->nent,&dummy,&dummy);
+  assert(l && congestion);
+
+  for (i=0;i<mtxA->nent;i++)
+    {
+      if (mtxA->edges[i].i != mtxA->edges[i].j)
+	{
+	  E++;
+	  stretch += dist(mtxA->edges[i].i,mtxA->edges[i].j,mtxA->edges[i].v,mtxA,pi,d,l,&dilation,congestion)/mtxA->edges[i].v;
+	}
+    }
+  
+  Do(i,mtxA->n)
+    cong = max(cong,congestion[i]);
+
+  printf("Cong-Dil = %f\n",cong*dilation);
+
+  free_linked_list(l);
+  taucs_free(congestion);
+  return(stretch/E);
+}
+
+static
+taucs_ccs_matrix*
+amst_preconditioner_create(graph *mtxA, double* diag,
+			   int rnd,
+			   double subgraphs,
+			   int stretch_flag)
+{
+  taucs_ccs_matrix* out;
+  /*
+  sym_matrix *mtxA;
+  sym_matrix *precond;
+  */
+  /*graph *mtxA;*/
+  graph *precond;
+  int i;
+  /*int tmp;*/
+  /*
+  int *ivec1, *ivec2 ;
+  int *ivec1_p, *ivec2_p ;
+  double *dvec; 
+  double *dvec_p; 
+  */
+  int *pi,*d;
+  edge **array;
+  edge *p;
+  int r;
+  int Bent,row,col;
+  double weight = 0;
+  int *first_child,*next_child;
+  int *groups,*sub_tree_sizes;
+  int curr_group;
+  int n;
+  int precond_maxnent,chunk;
+  /*double *diag;*/
+  /*linked l;*/
+  linked* lp;
+  double dummy;
+  double wtime;
+
+  n = mtxA->n;
+
+  wtime = taucs_wtime();
+
+  pi            = (int*)  taucs_malloc(n*sizeof(int));
+  d            = (int*)  taucs_malloc(n*sizeof(int));
+
+  /*l = create_linked_list_old(mtxA,n,mtxA->nent); */ /* THIS MAY RUN OUT OF MEMORY ! */
+  lp = create_linked_list(mtxA,n,mtxA->nent,&dummy,&dummy); /* THIS MAY RUN OUT OF MEMORY ! */
+
+  if (!pi || !d) {
+    taucs_free(pi);
+    taucs_free(d);
+    taucs_free(diag);
+    free_graph(mtxA);
+  }
+
+#if 0
+  taucs_free(diag);
+  diag = analyze_graph(mtxA); /* should change! */
+#endif
+
+  /* array is an array of linked lists, which hold the
+     off-diagonal entries of mtxA */
+
+  array = lp->point;
+
+  Do(i,n) pi[i] = -1;
+
+  /*
+  ivec1 = mtxA->ivec1 ;
+  ivec2 = mtxA->ivec2 ;
+  dvec = mtxA->dvec ;
+  */
+
+
+  wtime = taucs_wtime() - wtime;
+  taucs_printf("\t\tAMST prepare for mst = %.3f seconds\n",wtime);
+
+  wtime = taucs_wtime();
+  
+  r = rnd % n;
+
+  if (stretch_flag)
+    {
+      graph *low_stretch_tree;
+      linked *lp_low;
+
+      if ((low_stretch_tree = low_stretch(mtxA)) == NULL)
+	{
+	  taucs_free(pi);
+	  taucs_free(diag);
+	  free_graph(mtxA);
+	  return 0;
+	}
+
+      lp_low = create_linked_list(low_stretch_tree,n,low_stretch_tree->nent,&dummy,&dummy); /* THIS MAY RUN OUT OF MEMORY ! */
+      if (lp_low == NULL)
+	{
+	  taucs_free(pi);
+	  taucs_free(diag);
+	  free_graph(mtxA);
+	  free_graph(low_stretch_tree);
+	  return 0;
+	}
+      Do(i,n)
+	pi[i] = -2;
+      Prim(low_stretch_tree,r,pi,d,lp_low);
+      
+      free_graph(low_stretch_tree);
+      free_linked_list(lp_low);
+    }
+
+  else
+    {
+      Prim(mtxA,r,pi,d,lp);
+    }
+
+  /* pi now contains the parent array of the tree, d the distance from the root */
+  printf("Stretch = %f\n",find_stretch(mtxA,pi,d));
+  
+
+  groups         = (int *) taucs_malloc(n*sizeof(int));
+  first_child    = (int *) taucs_malloc(n*sizeof(int));
+  next_child     = (int *) taucs_malloc(n*sizeof(int));
+  sub_tree_sizes = (int *) taucs_malloc(n*sizeof(int));
+
+  if (!groups || !first_child || !next_child || !sub_tree_sizes) {
+    taucs_free(groups);
+    taucs_free(first_child);
+    taucs_free(next_child);
+    taucs_free(sub_tree_sizes);
+
+    taucs_free(pi);
+    taucs_free(diag);
+    free_graph(mtxA);
+    /* free linked_list; */
+  }
+
+
+  if (create_children_arrays(pi,n,&first_child,&next_child) == -1) {
+    taucs_free(groups);
+    taucs_free(first_child);
+    taucs_free(next_child);
+    taucs_free(sub_tree_sizes);
+
+    taucs_free(pi);
+    taucs_free(diag);
+    free_graph(mtxA);
+    /* free linked_list; */
+  }
+
+  wtime = taucs_wtime() - wtime;
+  taucs_printf("\t\tAMST mst = %.3f seconds\n",wtime);
+
+  wtime = taucs_wtime();
+
+  Do(i,n) groups[i] = -1;
+  
+  curr_group = 0;
+  
+  compute_sub_tree_sizes(r,first_child,next_child,sub_tree_sizes);
+
+  /* now for every vertex v in the tree, sub_tree_sizes[v] is the size
+     of the subtree whose root is v */
+  
+  divide_to_groups(r,first_child,next_child,pi,&curr_group,groups,
+		   sub_tree_sizes,r,subgraphs,n);
+  assign_group(r,curr_group,first_child,next_child,groups);
+  curr_group++;
+
+  taucs_printf("actual number of subgraphs = %ld\n",curr_group);
+
+  
+  taucs_free(first_child);
+  taucs_free(next_child);
+  taucs_free(sub_tree_sizes);
+
+  wtime = taucs_wtime() - wtime;
+  taucs_printf("\t\tAMST partition = %.3f seconds\n",wtime);
+
+  /* now the tree is devided into linked groups */
+
+  wtime = taucs_wtime();
+
+  chunk = max(min((curr_group*(curr_group-1)/2)/10,5000),100);
+  precond_maxnent = (n-1) + n + chunk;
+
+  taucs_printf("allocating space for %d entries in precond\n",precond_maxnent);
+  
+  precond = construct_graph(precond_maxnent);
+  if (!precond) {
+    taucs_free(pi);
+    taucs_free(diag);
+    free_graph(mtxA);
+    /* free linked_list; */
+    return NULL;
+  }
+  precond->n=mtxA->n;
+
+  /*
+  ivec1_p = precond->ivec1 ;
+  ivec2_p = precond->ivec2 ;
+  dvec_p = precond->dvec ;
+  */
+	 
+  Bent = 0;
+
+  wtime = taucs_wtime() - wtime;
+  taucs_printf("\t\tAMST allocating mst precond = %.3f seconds\n",wtime);
+
+  wtime = taucs_wtime();
+
+  /* adds the tree edges to the preconditioner */
+  Do(i,n) {
+    row = i;
+    col = pi[i];
+    if (col != (-1)) {
+      p = array[row];
+      
+      while (p != NULL) {
+	if (   ((mtxA->edges)[p->entry_no].j == col) 
+	    || ((mtxA->edges)[p->entry_no].i == col)) {
+	  weight = (mtxA->edges)[p->entry_no].v;
+	  break;
+	}
+	p = p->next;
+      }
+      
+      (precond->edges)[Bent].i = row;
+      (precond->edges)[Bent].j = col;
+      (precond->edges)[Bent].v = weight;
+
+      Bent++;
+
+      diag[row] -= weight;
+      diag[col] -= weight;
+    }
+  }
+  
+  precond->nent = Bent;
+
+
+  wtime = taucs_wtime() - wtime;
+  taucs_printf("\t\tAMST adding tree edges = %.3f seconds\n",wtime);
+
+  /* 
+     add the heavy edges between every two subgraphs, if such an edge
+     exists, and if the subraphs were not already connected through the
+     tree 
+  */
+
+  wtime = taucs_wtime();
+
+  if (curr_group>1) /* more than 1 group */
+    Bent = add_heavy_edges(mtxA,precond,Bent,array,groups,curr_group,pi,diag);
+
+  if (Bent == -1) { /* memory allocation failure in add_heavy_edges */
+    taucs_free(pi);
+    taucs_free(diag);
+    taucs_free(groups);
+    free_linked_list(lp);
+    /* taucs_free(point_to_heap); */
+    free_graph(mtxA);
+    free_graph(precond);
+    return NULL;
+  }
+
+  wtime = taucs_wtime() - wtime;
+  taucs_printf("\t\tAMST finding heavy edges = %.3f seconds\n",wtime);
+  
+  /* allocate more memory to the preconditioner if needed */
+
+  wtime = taucs_wtime();
+
+  if (precond_maxnent < Bent + n) {
+    taucs_printf("adding space for %d entries in precond for diagonal entries\n",Bent+n-precond_maxnent);
+    precond_maxnent = Bent + n;
+
+    graph_resize(precond,precond_maxnent);
+    precond->nent = Bent;
+    /*
+    ivec1_p = precond->ivec1 ;
+    ivec2_p = precond->ivec2 ;
+    dvec_p = precond->dvec ;
+    */
+  }
+  
+  Do(i,n) {
+
+    (precond->edges)[Bent].i = i;
+    (precond->edges)[Bent].j = i;
+    (precond->edges)[Bent].v = diag[i];
+
+    Bent++;
+  }
+
+  precond->nent = Bent;
+
+  wtime = taucs_wtime() - wtime;
+  taucs_printf("\t\tAMST resize and add heavy edges = %.3f seconds\n",wtime);
+
+  wtime = taucs_wtime();
+
+  taucs_free(pi);
+  taucs_free(diag);
+  taucs_free(groups);
+  free_linked_list(lp);
+  taucs_printf("actual number of entries in preconditioner = %d\n",Bent);
+  free_graph(mtxA);
+
+  /* out could be NULL, but we return out anyway */
+
+  out = graph_to_ccs_matrix(precond); 
+
+  assert(out);
+
+  free_graph(precond);
+
+  wtime = taucs_wtime() - wtime;
+  taucs_printf("\t\tAMST free memory and convert to ccs = %.3f seconds\n",wtime);
+
+#define TAUCS_VAIDYA_RELAX_NONONO
+#ifdef TAUCS_VAIDYA_RELAX
+  {
+    taucs_ccs_matrix* A = ccs_mtxA;
+    taucs_ccs_matrix* M = out;
+    int j;
+    
+    for (j=0; j<A->n; j++) {
+      double dA, dM, modification;
+      int i,ip;
+      for (ip=(A->colptr)[j]; ip<(A->colptr)[j+1]; ip++) {
+	i = (A->rowind)[ ip ];
+	if (i==j) {
+	  dA = (A->values.d/*taucs_values*/)[ ip ];
+	  break;
+	}
+      }
+
+      for (ip=(M->colptr)[j]; ip<(M->colptr)[j+1]; ip++) {
+	i = (M->rowind)[ ip ];
+	if (i==j) {
+	  dM = (M->values.d/*taucs_values*/)[ ip ];
+	  break;
+	}
+      }
+      
+      assert(dA >= dM);
+      
+      modification = dA - dM;
+      if (j < 30) printf(">>> %.4e %.4e\n",dA,dM);
+      
+      (M->values.d/*taucs_values*/)[ ip ] += 0.01 * modification;
+    }
+  }
+#endif
+
+  return out;
+}
+
+
+taucs_ccs_matrix*
+taucs_amwb_preconditioner_create(taucs_ccs_matrix *A, 
+				 int rnd,
+				 double subgraphs,
+				 int stretch_flag)
+{
+  double  wtime;
+  double* diag;
+  graph*  G_A;
+  int     diagnostics;
+  int     n;
+
+  if (!(A->flags & TAUCS_DOUBLE)) {
+    taucs_printf("taucs_amwb_preconditioner_create: matrix must be double-precision real\n");
+    return NULL;
+  }
+
+  n = A->n;
+
+  diag = (double*) taucs_malloc(n*sizeof(double));
+  if (diag == NULL) return NULL;
+
+  wtime = taucs_wtime();
+  G_A = ccs_matrix_to_graph_plus(A,&diagnostics,diag,1 /* force diag dominance */);
+  if (!G_A) {
+    taucs_free(diag);
+    return NULL;
+  }
+  wtime = taucs_wtime() - wtime;
+  taucs_printf("\t\tAMWB matrix-to-graph + analysis = %.3f seconds\n",wtime);
+
+  if (diagnostics & TAUCS_SYM_NOT_SYMLOWER) {
+    taucs_printf("taucs_amwb_preconditioner_create: matrix must be symmetrix & lower\n");
+    /* in this case, G_A == NULL, no need to free */
+    taucs_free(diag);
+    return A;
+  }
+  if (diagnostics & TAUCS_SYM_NOT_DIAGDOMINANT) {
+    taucs_printf("taucs_amwb_preconditioner_create: matrix not diagonally dominant\n");
+    taucs_free(diag);
+    free_graph(G_A);
+    return A;
+  }
+  if (diagnostics & TAUCS_SYM_NEG_DIAGONALS) {
+    taucs_printf("taucs_amwb_preconditioner_create: negative diagonal elements\n");
+    taucs_free(diag);
+    free_graph(G_A);
+    return A;
+  }
+
+  if (diagnostics & TAUCS_SYM_POS_OFFDIAGONALS)
+    return amwb_preconditioner_create(G_A, diag, rnd, subgraphs);
+  else
+    return amst_preconditioner_create(G_A, diag, rnd, subgraphs,stretch_flag);
+}
+
+#endif /* TAUCS_CORE_DOUBLE */
+
+/*********************************************************/
+/*                                                       */
+/*********************************************************/
+
diff --git a/contrib/taucs/src/taucs_vec_base.c b/contrib/taucs/src/taucs_vec_base.c
new file mode 100644
index 0000000000000000000000000000000000000000..5f921f0349a431c3d63b9a34313471b2020c25f1
--- /dev/null
+++ b/contrib/taucs/src/taucs_vec_base.c
@@ -0,0 +1,212 @@
+/*********************************************************/
+/* TAUCS                                                 */
+/* Author: Sivan Toledo                                  */
+/*********************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <math.h>
+#include "taucs.h"
+
+#ifndef TAUCS_CORE
+#error "This is a TAUCS core file: you must define a primitive data type"
+#endif
+
+#define RNDM ((double)random()/(double)RAND_MAX);
+
+#ifndef max /*omer*/
+#define max(x,y) ( ((x) > (y)) ? (x) : (y) )
+#endif
+
+/*********************************************************/
+/*                                                       */
+/*********************************************************/
+
+#ifdef TAUCS_CORE_GENERAL
+
+taucs_double
+taucs_vec_norm2(int n, int flags, void* x)
+{
+  int one = 1;
+#ifdef TAUCS_CONFIG_DREAL
+  if (flags & TAUCS_DOUBLE)
+    return (taucs_double) taucs_blas_name(dnrm2)(&n, x, &one);
+#endif
+
+#ifdef TAUCS_CONFIG_SREAL
+  if (flags & TAUCS_SINGLE)
+    return (taucs_double) taucs_blas_name(snrm2)(&n, x, &one);
+#endif
+
+#ifdef TAUCS_CONFIG_DCOMPLEX
+  if (flags & TAUCS_DCOMPLEX)
+    return (taucs_double) taucs_blas_name(dznrm2)(&n, x, &one);
+#endif
+
+#ifdef TAUCS_CONFIG_SCOMPLEX
+  if (flags & TAUCS_SCOMPLEX)
+    return (taucs_double) taucs_blas_name(scnrm2)(&n, x, &one);
+#endif
+
+  return taucs_get_nan();
+}
+
+void
+taucs_vec_axpby(int n, int flags,
+		taucs_double a, void* x,
+		taucs_double b, void* y,
+		void* axpby)
+{
+#ifdef TAUCS_CONFIG_DREAL
+  if (flags & TAUCS_DOUBLE)
+    taucs_dvec_axpby(n,
+		     (taucs_double) a, (taucs_double*) x,
+		     (taucs_double) b, (taucs_double*) y,
+		     (taucs_double*) axpby);
+#endif
+
+#ifdef TAUCS_CONFIG_SREAL
+  if (flags & TAUCS_SINGLE)
+    taucs_svec_axpby(n,
+		     (taucs_single) a, (taucs_single*) x,
+		     (taucs_single) b, (taucs_single*) y,
+		     (taucs_single*) axpby);
+#endif
+
+#ifdef TAUCS_CONFIG_DCOMPLEX
+  if (flags & TAUCS_DCOMPLEX)
+    taucs_zvec_axpby(n,
+		     (taucs_double) a, (taucs_dcomplex*) x,
+		     (taucs_double) b, (taucs_dcomplex*) y,
+		     (taucs_dcomplex*) axpby);
+#endif
+
+#ifdef TAUCS_CONFIG_SCOMPLEX
+  if (flags & TAUCS_SCOMPLEX)
+    taucs_cvec_axpby(n,
+		     (taucs_single) a, (taucs_scomplex*) x,
+		     (taucs_single) b, (taucs_scomplex*) y,
+		     (taucs_scomplex*) axpby);
+#endif
+}
+
+void* taucs_vec_create(int n, int flags)
+{
+#ifdef TAUCS_CONFIG_DREAL
+  if (flags & TAUCS_DOUBLE)
+    return taucs_dvec_create(n);
+#endif
+
+#ifdef TAUCS_CONFIG_SREAL
+  if (flags & TAUCS_SINGLE)
+    return taucs_svec_create(n);
+#endif
+
+#ifdef TAUCS_CONFIG_DCOMPLEX
+  if (flags & TAUCS_DCOMPLEX)
+    return taucs_zvec_create(n);
+#endif
+
+#ifdef TAUCS_CONFIG_SCOMPLEX
+  if (flags & TAUCS_SCOMPLEX)
+    return taucs_cvec_create(n);
+#endif
+
+  return NULL;
+}
+
+void taucs_vec_permute(int n, int flags, void* v, void* pv, int p[])
+{
+#ifdef TAUCS_CONFIG_DREAL
+  if (flags & TAUCS_DOUBLE)
+    taucs_dvec_permute(n, (taucs_double*) v, (taucs_double*) pv, p);
+#endif
+
+#ifdef TAUCS_CONFIG_SREAL
+  if (flags & TAUCS_SINGLE)
+    taucs_svec_permute(n,  (taucs_single*) v, (taucs_single*) pv, p);
+#endif
+
+#ifdef TAUCS_CONFIG_DCOMPLEX
+  if (flags & TAUCS_DCOMPLEX)
+    taucs_zvec_permute(n,  (taucs_dcomplex*) v, (taucs_dcomplex*) pv, p);
+#endif
+
+#ifdef TAUCS_CONFIG_SCOMPLEX
+  if (flags & TAUCS_SCOMPLEX)
+    taucs_cvec_permute(n,  (taucs_scomplex*) v, (taucs_scomplex*) pv, p);
+#endif
+}
+
+void taucs_vec_ipermute(int n, int flags, void* v, void* pv, int p[])
+{
+#ifdef TAUCS_CONFIG_DREAL
+  if (flags & TAUCS_DOUBLE)
+    taucs_dvec_ipermute(n, (taucs_double*) v, (taucs_double*) pv, p);
+#endif
+
+#ifdef TAUCS_CONFIG_SREAL
+  if (flags & TAUCS_SINGLE)
+    taucs_svec_ipermute(n,  (taucs_single*) v, (taucs_single*) pv, p);
+#endif
+
+#ifdef TAUCS_CONFIG_DCOMPLEX
+  if (flags & TAUCS_DCOMPLEX)
+    taucs_zvec_ipermute(n,  (taucs_dcomplex*) v, (taucs_dcomplex*) pv, p);
+#endif
+
+#ifdef TAUCS_CONFIG_SCOMPLEX
+  if (flags & TAUCS_SCOMPLEX)
+    taucs_cvec_ipermute(n,  (taucs_scomplex*) v, (taucs_scomplex*) pv, p);
+#endif
+}
+
+#else
+void*
+taucs_dtl(vec_create)(int n)
+{
+  return (taucs_datatype*) taucs_malloc(n*sizeof(taucs_datatype));
+} 
+
+void
+taucs_dtl(vec_axpby)(int n, 
+		     taucs_real_datatype a, taucs_datatype* x,
+		     taucs_real_datatype b, taucs_datatype* y,
+		     taucs_datatype* axpby)
+{
+  int i;
+
+  for (i=0; i<n; i++) {
+#ifdef TAUCS_CORE_COMPLEX
+    axpby[i] = taucs_complex_create(a * taucs_re(x[i]) + b * taucs_re(y[i]),
+				    a * taucs_im(x[i]) + b * taucs_im(y[i]));
+    /*
+    taucs_re(axpby[i]) = a * taucs_re(x[i]) + b * taucs_re(y[i]);
+    taucs_im(axpby[i]) = a * taucs_im(x[i]) + b * taucs_im(y[i]);
+    */
+#else
+    axpby[i] = a * x[i] + b * y[i];
+#endif
+  }
+} 
+
+void
+taucs_dtl(vec_permute)(int n, taucs_datatype v[], taucs_datatype pv[], int p[])
+{
+  int i;
+  for (i=0; i<n; i++) pv[i] = v[p[i]];
+} 
+
+void
+taucs_dtl(vec_ipermute)(int n, taucs_datatype pv[], taucs_datatype v[], int invp[]) 
+{
+  int i;
+  for (i=0; i<n; i++) v[invp[i]] = pv[i];
+} 
+
+#endif
+/*********************************************************/
+/*                                                       */
+/*********************************************************/
+