//****************************************************************************
// FILE:        VpTree.h
//
//    Copyright (C)  2012 Kristian Damkjer.
//
// DESCRIPTION:
//>   The interface definition for vantage-point trees.
//<
//
// LIMITATIONS:
//>   This class template file follows the template inclusion pattern. This
//    header file should be the only file included by clients wishing to
//    instantiate a VpTree specialization.
//<
//
// SOFTWARE HISTORY:
//
//> 2012-SEP-11  K. Damkjer
//               Initial Coding.
//  2013-JUL-23  K. Damkjer
//               Set typedefs to make code more readable and to allow for
//               varying container and metric types. This is useful when the
//               default double-precision is overkill. It is now easy to set
//               types to float.
//<
//****************************************************************************
#ifndef Damkjer_VpTree_HEADER
#define Damkjer_VpTree_HEADER

#if _OPENMP
#include <omp.h>  // USES OpenMP pragmas
#endif

#include <cstddef> // USES std::size_t for index type.
#include <vector>  // USES std::vector interface for default point type.
#include <deque>   // USES std::deque interface for search results.
#include <set>     // USES std::set interface for random sampling.
#include <queue>   // USES std::priority_queue interface for search results.
#include <limits>  // USES std::numeric_limits to establish default reach.

// USES Euclidean metric as default search space.
#include "Util/SpatialIndexing/Metrics/EuclideanDistance.h" 

namespace Damkjer
{

//*****************************************************************************
// CLASS: VpTree
//>   A vantage point tree data structure and related search algorithms.
//
//    This class is an implementation of the vantage point tree data structure
//    described by Peter Yianilos in "Data Structures and Algorithms for
//    Nearest Neighbor Search in General Metric Spaces". @cite Yianilos:1993
//
//    This structure includes the enhancement to collapse subtrees near leaves
//    into a single node. This change reduces overall storage and some
//    recursion overhead, but may make queries less efficient if the leaf size
//    is chosen to be too large.
//
//    @note
//    The tree is currently coded as VPsb tree to attempt to realize the best
//    balance in construct and search performance.
//    @note
//    Code was developed and analyzed to determine the benefit of enhancing
//    the vantage point selection process. Extra effort was spent on finding a
//    "good" pivot for large trees with more effort devoted as the tree
//    coverage grows (determined by examining sqrt(size) elements for pivot).
//    The thought was that selection of a better pivot carries more weight in
//    queries as tree coverage increases. However, in practice, the vantage
//    point selection seemed to have marginal, if any, effect on query time.
//    @note
//    The tree includes the enhancement to track tight bounds on the branch
//    sub-spaces. For a slight storage overhead, we track the tight lower and
//    upper bounds for each branch which allows for earlier pruning of
//    branches at the cost of slightly increased storage and construct time.
//    @note
//    The tree also includes the enhancement to track distances to ancestral
//    pivots. This change potentially incurs a signficant storage overhead,
//    but allows for further explicit pruning or inclusion optimizations for
//    range queries.
//    @note
//    A final proposed enhancement would be to increase fan-out of the tree by
//    establishing multiple partition boundaries, or by modifying the
//    structure into an MVP tree. Of course, if that path is selected, we're
//    really talking about a different data structure.
//
//    @tparam MetricT The metric search space.
//<
//*****************************************************************************
template<typename MetricT = EuclideanDistance<std::vector<double> > >
class VpTree
{
public:
   typedef typename MetricT::value_type PointT;
      //> The point record type, derived from the metric value type.
      //<

   typedef typename PointT::difference_type DiffT;
      //> The difference type, derived from the point record difference type.
      //<

   typedef typename MetricT::return_type DistT;
      //> The distance type, derived from the metric return type.
      //<

   typedef std::size_t IndexT;
      //> The index type, consistent with STL size type.
      //<

   typedef std::pair<std::deque<IndexT>,
                     std::deque<DistT> > SearchResultsT;
      //> The search results type, a pair of deques of indices and distances.
      //<

   explicit VpTree(const MetricT& metric=MetricT(),
                   const IndexT& leafCapacity=8);
      //> Instantiate an empty tree.
      //<

   template<typename DatabaseT>
   explicit VpTree(const DatabaseT&,
                   const MetricT& metric=MetricT(),
                   const IndexT& leafCapacity=8);
      //> Instantiate a tree from a database of items.
      //<

   virtual ~VpTree();
      //> Destruct the tree and deallocate resources.
      //<
   
   SearchResultsT knn(const PointT&, const IndexT&,
                      const DistT& limit=std::numeric_limits<DistT>::max())
                      const;
      //> Perform a k nearest neighbor search on the tree returning the
      //  indices of and distances to the k nearest neighbors.
      //<
   
   SearchResultsT rnn(const PointT&, const DistT&) const;
      //> Perform a fixed radius nearest neighbor search on the tree
      //  returning the indices of and distances to the neighbors in the
      //  fixed radius.
      //<
    
private:

   // Make the node types inner classes.
   class Node;
      //> An abstract node interface for a vantage point tree.
      //<

   class Branch;
      //> A vantage-point tree internal (non-terminal) node.
      //<

   class Leaf;
      //> A vantage-point tree terminal node.
      //<

   // Perform item management with inner classes as well.
   class Item;
      //> A simple container to capture a database element along with its index
      //  and ancestral pivot history.
      //<

   class ResultsCandidate;
      //> A light-weight representation of a search result candidate item.
      //<

   typedef std::priority_queue<ResultsCandidate> ResultsSetT;
      //> Search results are built by traversing the vantage point tree and
      //  maintaining candidate results on a priority queue sorted by distance
      //  to the query.
      //<

   VpTree(const VpTree&);
      //> Explicitly disable the compiler-generated copy constructor. No
      //  definition provided.
      //<

   Node* makeTree(const IndexT&, const IndexT&);
      //> Set this tree's root to be the root of the tree created from the
      //  argument set of metric-space elements.
      //<
    
   std::set<IndexT> randomSample(const IndexT&, const IndexT&) const;
      //> Select a random sample in the range between the provided indices.
      //<

   IndexT selectVp(const IndexT&, const IndexT&) const;
      //> Select a vantage point in the range between the provided indices.
      //<
    
   Node* theRoot;
      //> The root is the starting point for all queries against the database
      //  represented by the VpTree.
      //<
    
   std::deque<Item> theItems;
      //> The items are the set of metric-space elements managed by the vantage
      //  point tree paired with their original indices in the unordered 
      //  database.
      //<
   
   IndexT theLeafCapacity;
      //> Improve search efficiency by stopping recursion before completely
      //  forming a binary search tree. The leaf capacity sets the threshold
      //  for the number of elements that may be collapsed into a single node.
      //  This threshold should be set by empirically testing the point at
      //  which a linear search becomes more efficient than a binary search.
      //<
   
   MetricT theMetric;
      //> The distance metric used to construct and search this tree.
      //<
    
   friend class VpTree<MetricT>::Node;
      //> There is a defect in the C++03 standard that forces friendship of
      //  nested classes to be explicitly declared. This relationship will be
      //  made implicit in C++11.
      //<

   friend class VpTree<MetricT>::Branch;
      //> There is a defect in the C++03 standard that forces friendship of
      //  nested classes to be explicitly declared. This relationship will be
      //  made implicit in C++11.
      //<

   friend class VpTree<MetricT>::Leaf;
      //> There is a defect in the C++03 standard that forces friendship of
      //  nested classes to be explicitly declared. This relationship will be
      //  made implicit in C++11.
      //<
};

//*****************************************************************************
// VpTree::VpTree(const MetricT&, const IndexT&)
//>   Instantiate an empty tree.
//
//    @tparam MetricT      The metric search space.
//    @param  metric       The metric space to use for this tree's spatial
//                         index.
//    @param  leafCapacity The number of nodes to collapse into a single leaf.
//<
//*****************************************************************************
template<typename MetricT>
inline
VpTree<MetricT>::VpTree(const MetricT& metric,
                        const IndexT& leafCapacity)
   : theRoot(0)
   , theItems()
   , theLeafCapacity((leafCapacity<1)?1:leafCapacity)
   , theMetric(metric)
{
}

//*****************************************************************************
// VpTree::VpTree(const DatabaseT&, const MetricT&, const IndexT&)
//>   Instantiate a tree from a database of items.
//
//    @tparam MetricT      The metric search space.
//    @param  elems        The container of elements to be indexed.
//    @param  metric       The metric space to use for this tree's spatial
//                         index.
//    @param  leafCapacity The number of nodes to collapse into a single leaf.
//<
//*****************************************************************************
template<typename MetricT>
template<typename DatabaseT>
inline
VpTree<MetricT>::VpTree(const DatabaseT& elems,
                        const MetricT& metric,
                        const IndexT& leafCapacity)
   : theRoot(0)
   , theItems(elems.size())
   , theLeafCapacity((leafCapacity<1)?1:leafCapacity)
   , theMetric(metric)
{
   #if _OPENMP
   if (omp_get_num_threads() == 1)
   {
      omp_set_dynamic(1);
      omp_set_num_threads(omp_get_num_procs());
   }
   #endif

   #pragma omp parallel for
   for (int i = 0; i < static_cast<int>(theItems.size()); ++i)
   {
      unsigned int idx = static_cast<unsigned int>(i);
      theItems[idx].theIndex=idx;
      theItems[idx].theElement=elems[idx];
   }

   theRoot = makeTree(0, theItems.size());
}
                
//*****************************************************************************
// VpTree::~VpTree()
//>   Destruct the tree and deallocate resources.
//
//    @tparam MetricT The metric search space.
//<
//*****************************************************************************
template<typename MetricT>
inline
VpTree<MetricT>::~VpTree()
{
   delete theRoot;
   theRoot = 0;
}

}

//*****************************************************************************
// External References
//*****************************************************************************
#include "VpTree_Node.h"
#include "VpTree_Branch.h"
#include "VpTree_Leaf.h"
#include "VpTree_Item.h"
#include "VpTree_ResultsCandidate.h"

//*****************************************************************************
// Template Inclusion
//*****************************************************************************
#include "VpTree.hpp" // HASA VpTree Template Implementation.

#endif
