//=========================================================================
// FILE:        fastcoeffvar.cpp
//
//    Copyright (C)  2012 Kristian Damkjer.
//
// DESCRIPTION: This MEX source file provides a fast implementation of cov
//              method for cell-arrays of real valued matrices.
//
// LIMITATIONS: Does not work for cell-arrays of complex matrices.
//
// SOFTWARE HISTORY:
//> 2012-SEP-11  K. Damkjer
//               Initial Coding.
//<
//=========================================================================

#ifdef _OPENMP
#include <omp.h>
#endif

#include <vector>

#if (defined(MATLAB_MAJOR) && defined(MATLAB_MINOR))
   #if MATLAB_MAJOR <= 7 && MATLAB_MINOR <= 10 && defined(_CHAR16T)
      #define CHAR16_T
   #endif
#endif

#include "mex.h"

void mexFunction(
        int nlhs, mxArray* plhs[],
        int nrhs, const mxArray* prhs[])
{
   if (nrhs != 1 || !mxIsCell(prhs[0]))
   {
      mexErrMsgIdAndTxt("Damkjer:fastcoeffvar:varargin",
                        "Missing or invalid input argument.");
   }
    
   if (nlhs > 1)
   {
      mexErrMsgIdAndTxt("Damkjer:fastcoeffvar:varargout",
                        "Too many output arguments.");
   }
   
   mwSize cells = mxGetNumberOfElements (prhs[0]);

   plhs[0] = mxCreateCellMatrix(cells, 1);

   std::vector<const double*> vals(cells,0);
   std::vector<mwSize> Ms(cells,0);
   std::vector<mwSize> Ns(cells,0);

   std::vector<mxArray*> covs(cells,0);
   std::vector<double*> cov_vals(cells,0);

   for (int cell = 0; cell < cells; ++cell)
   {
       vals[cell]=mxGetPr(mxGetCell(prhs[0], cell));
       Ms[cell]=mxGetM(mxGetCell(prhs[0], cell));
       Ns[cell]=mxGetN(mxGetCell(prhs[0], cell));
       
       covs[cell] = mxCreateDoubleMatrix(0, 0, mxREAL);
       mxSetM(covs[cell], Ns[cell]);
       mxSetN(covs[cell], Ns[cell]);
       mxSetData(covs[cell], mxMalloc(sizeof(double)));
       cov_vals[cell] = mxGetPr(covs[cell]);
   }

#ifdef _OPENMP
   omp_set_dynamic(1);
   omp_set_num_threads(omp_get_num_procs());
#endif

   #pragma omp parallel for
   for (int cellp = 0; cellp < cells; ++cellp)
   {
      std::vector<double> dist(Ms[cellp], 0.);
      double mean=0;
      
      // calculate the average distance to this point
      double w1 = 1./Ms[cellp];
      
      for (mwSize m = Ms[cellp]; m --> 0;)
      {
         for (mwSize n = Ns[cellp]; n --> 0;)
         {
            double diff = vals[cellp][m + Ms[cellp] * n]-vals[cellp][Ms[cellp] * n];
            dist[n]+=diff*diff;
         }
         mean+=sqrt(dist[m])*w1;
      }
      
      double w2 = 1./(Ms[cellp]-1);
      
      for (mwSize n = Ns[cellp]; n --> 0;)
      {
            cov_vals[cellp] = 0;

            for (mwSize mc = Ms[cellp]; mc --> 0;)
            {
               cov_vals[cellp] +=
                       w2
                       * (vals[cellp][mc + Ms[cellp] * n]-mean[n]
                       * (vals[cellp][mc + Ms[cellp] * n]-vals[cellp][Ms[cellp] * n2]);
            }
         }
      }
   }

   for (int cell = 0; cell < cells; ++cell)
   {
      mxSetCell(plhs[0], cell, covs[cell]);
   }
}
