//*****************************************************************************
// FILE:        par_eig.cpp
//
//    Copyright (C)  2012 Kristian Damkjer.
//
// DESCRIPTION:
//>   @todo Describe this file
//<
//
// LIMITATIONS:
//>   Does not work for cell-arrays of complex matrices.
//<
//
// SOFTWARE HISTORY:
//> 2013-JUL-03  K. Damkjer
//               Initial Coding.
//<
//*****************************************************************************

#ifdef _OPENMP
#include <omp.h>
#endif

#include <vector>
#include <sstream>

//***
// Fix "wide char" definition for older versions of MATLAB. This must be placed
// after other includes and before the mex.h include.
//***
#if (defined(MATLAB_MAJOR) && defined(MATLAB_MINOR))
   #if MATLAB_MAJOR <= 7 && MATLAB_MINOR <= 10 && defined(_CHAR16T)
      #define CHAR16_T
   #endif
#endif

#include <Eigen/Dense>

#include "mex.h"

//*****************************************************************************
// FUNCTION: mexFunction
//>   The MATLAB Executable Gateway Function.
//
//    @todo Describe this MEX function
//
//    @param nlhs the number of left-hand side parameters.
//    @param plhs the array of left-hand side parameters.
//    @param nrhs the number of right-hand side parameters.
//    @param prhs the array of right-hand side parameters.
//<
//*****************************************************************************
void mexFunction(
        int nlhs, mxArray* plhs[],
        int nrhs, const mxArray* prhs[])
{
   if (nrhs != 1 || !mxIsCell(prhs[0]))
   {
      mexErrMsgIdAndTxt("Damkjer:fastcov:varargin",
                        "Missing or invalid input argument.");
   }
    
   if (nlhs > 2)
   {
      mexErrMsgIdAndTxt("Damkjer:fastcov:varargout",
                        "Too many output arguments.");
   }
   
   mwSize cells = mxGetNumberOfElements (prhs[0]);

   plhs[0] = mxCreateCellMatrix(cells, 1);

   // Better way?
   if (nlhs > 1)
   {
      plhs[1] = mxCreateCellMatrix(cells, 1);
   }
   
   std::vector<double*> data(cells,0);
   std::vector<mwSize> Ms(cells,0);
   std::vector<mwSize> Ns(cells,0);

   std::vector<mxArray*> vecs(cells,0);
   std::vector<double*> vecs_data(cells,0);

   std::vector<mxArray*> vals(cells,0);
   std::vector<double*> vals_data(cells,0);
   
   // Note for future: Ms - points, Ns - dimensions
   for (int cell = 0; cell < cells; ++cell)
   {
       data[cell]=mxGetPr(mxGetCell(prhs[0], cell));
       Ms[cell]=mxGetM(mxGetCell(prhs[0], cell));
       Ns[cell]=mxGetN(mxGetCell(prhs[0], cell));
   }

   // Always return values as vector
   for (int cell = 0; cell < cells; ++cell)
   {
      // We will be setting each value, so don't bother to initialize to zero.
      vals[cell] = mxCreateDoubleMatrix(0, 0, mxREAL);
      mxSetM(vals[cell], Ms[cell]);
      mxSetN(vals[cell], 1);
      mxSetData(vals[cell], mxMalloc(sizeof(double)*Ms[cell]));
      vals_data[cell] = mxGetPr(vals[cell]);         
   }

#ifdef _OPENMP
   omp_set_dynamic(1);
   omp_set_num_threads(omp_get_num_procs());
#endif

   #pragma omp parallel for schedule(guided)
   for (int cellp = 0; cellp < cells; ++cellp)
   {
      Eigen::VectorXd eivals = 
              Eigen::Map<Eigen::MatrixXd>(data[cellp], Ms[cellp], Ns[cellp]).
              selfadjointView<Eigen::Lower>().eigenvalues();
      
      for (mwSize m = Ms[cellp]; m --> 0;)
      {
         vals_data[cellp][m] = eivals(m);
      }
   }
      
   if (nlhs < 2)
   {
      // Only return values as vector
      for (int cell = 0; cell < cells; ++cell)
      {
         mxSetCell(plhs[0], cell, vals[cell]);
      }
   }
   else
   {
      // Return both values and vectors as matices
      for (int cell = 0; cell < cells; ++cell)
      {
         // We will be setting each value, so don't bother to initialize to zero.
         vecs[cell] = mxCreateDoubleMatrix(0, 0, mxREAL);
         mxSetM(vecs[cell], Ms[cell]);
         mxSetN(vecs[cell], Ns[cell]);
         mxSetData(vecs[cell], mxMalloc(sizeof(double)*Ms[cell]*Ns[cell]));
         vecs_data[cell] = mxGetPr(vecs[cell]);
      }

#ifdef _OPENMP
   omp_set_dynamic(1);
   omp_set_num_threads(omp_get_num_procs());
#endif

      #pragma omp parallel for schedule(guided)
      for (int cellp = 0; cellp < cells; ++cellp)
      {
         Eigen::SelfAdjointEigenSolver<Eigen::MatrixXd>
         eig(Eigen::Map<Eigen::MatrixXd>(data[cellp], Ms[cellp], Ns[cellp]));
         
         Eigen::MatrixXd eivecs=eig.eigenvectors();
         
         for (mwSize m = Ms[cellp]; m --> 0;)
         {
            for (mwSize n = Ns[cellp]; n --> 0;)
            {
               vecs_data[cellp][n + Ns[cellp] * m] = eivecs(n,m);
            }
         }
      }
      
      for (int cell = 0; cell < cells; ++cell)
      {
         mxSetCell(plhs[0], cell, vecs[cell]);
         mxSetCell(plhs[1], cell, vals[cell]);
      }
   }
}
