//=========================================================================
// FILE:        fastcov.cpp
//
//    Copyright (C)  2012 Kristian Damkjer.
//
// DESCRIPTION: This MEX source file provides a fast implementation of
//              centering method for cell-arrays of real valued matrices.
//
// LIMITATIONS: Does not work for cell-arrays of complex matrices.
//
// SOFTWARE HISTORY:
//> 2012-SEP-11  K. Damkjer
//               Initial Coding.
//<
//=========================================================================

#ifdef _OPENMP
#include <omp.h>
#endif

#include <vector>

#if (defined(MATLAB_MAJOR) && defined(MATLAB_MINOR))
   #if MATLAB_MAJOR <= 7 && MATLAB_MINOR <= 10 && defined(_CHAR16T)
      #define CHAR16_T
   #endif
#endif

#include "mex.h"

void mexFunction(
        int nlhs, mxArray* plhs[],
        int nrhs, const mxArray* prhs[])
{
   if (nrhs != 1 || !mxIsCell(prhs[0]))
   {
      mexErrMsgIdAndTxt("Damkjer:fastcenter:varargin",
                        "Missing or invalid input argument.");
   }
    
   if (nlhs > 1)
   {
      mexErrMsgIdAndTxt("Damkjer:fastcenter:varargout",
                        "Too many output arguments.");
   }
   
   mwSize cells = mxGetNumberOfElements (prhs[0]);

   plhs[0] = mxCreateCellMatrix(cells, 1);

   std::vector<const double*> vals(cells,0);
   std::vector<mwSize> Ms(cells,0);
   std::vector<mwSize> Ns(cells,0);

   std::vector<mxArray*> adjs(cells,0);
   std::vector<double*> adj_vals(cells,0);

   for (int cell = 0; cell < cells; ++cell)
   {
       vals[cell]=mxGetPr(mxGetCell(prhs[0], cell));
       Ms[cell]=mxGetM(mxGetCell(prhs[0], cell));
       Ns[cell]=mxGetN(mxGetCell(prhs[0], cell));
       
       adjs[cell] = mxCreateDoubleMatrix(0, 0, mxREAL);
       mxSetM(adjs[cell], Ms[cell]);
       mxSetN(adjs[cell], Ns[cell]);
       mxSetData(adjs[cell], mxMalloc(sizeof(double)*Ms[cell]*Ns[cell]));
       adj_vals[cell] = mxGetPr(adjs[cell]);
   }

#ifdef _OPENMP
   omp_set_dynamic(1);
   omp_set_num_threads(omp_get_num_procs());
#endif

   #pragma omp parallel for
   for (int cellp = 0; cellp < cells; ++cellp)
   {
      double w1 = 1./Ms[cellp];
      double mean;
      
      for (mwSize n = Ns[cellp]; n --> 0;)
      {
         mean = 0;

         for (mwSize m = Ms[cellp]; m --> 0;)
         {
            mean += vals[cellp][m + Ms[cellp] * n] * w1;
         }

         for (mwSize m = Ms[cellp]; m --> 0;)
         {
            adj_vals[cellp][m + Ms[cellp] * n] =
                    vals[cellp][m + Ms[cellp] * n] - mean;
         }
      }
   }

   for (int cell = 0; cell < cells; ++cell)
   {
      mxSetCell(plhs[0], cell, adjs[cell]);
   }
}
