//*****************************************************************************
// FILE:        VpTree.h
//
//    Copyright (C)  2012 Kristian Damkjer.
//
// DESCRIPTION: VpTree
//
//    This class is an implementation of the vantage point tree data structure
//    described by Peter Yianilos in "Data Structures and Algorithms for
//    Nearest Neighbor Search in General Metric Spaces".
//
//    This structure includes the enhancement to collapse subtrees near leaves
//    into a single node. This change reduces overall storage and some
//    recursion overhead, but may make queries less efficient if the leaf size
//    is chosen to be too large.
//
// LIMITATIONS:
//
//    The tree is currently coded as VPsb tree to attempt to realize the best
//    balance in construct and search performance.
//
//    Code was developed and analyzed to determine the benefit of enhancing
//    the vantage point selection process. Extra effort was spent on finding a
//    "good" pivot for large trees with more effort devoted as the tree
//    coverage grows (determined by examining sqrt(size) elements for pivot).
//    The thought was that selection of a better pivot carries more weight in
//    queries as tree coverage increases. However, in practice, the vantage
//    point selection seemed to have marginal, if any, effect on query time.
//
//    The tree includes the enhancement to track tight bounds on the branch
//    sub-spaces. For a slight storage overhead, we track the tight lower and
//    upper bounds for each branch which allows for earlier pruning of
//    branches at the cost of slightly increased storage and construct time.
//
//    The tree also includes the enhancement to track distances to ancestral
//    pivots. This change potentially incurs a signficant storage overhead,
//    but allows for further explicit pruning or inclusion optimizations for
//    range queries.
//
//    A final proposed enhancement would be to increase fan-out of the tree by
//    establishing multiple partition boundaries, or by modifying the
//    structure into an MVP tree. Of course, if that path is selected, we're
//    really talking about a different data structure.
//
// SOFTWARE HISTORY:
//
//> 2012-SEP-11  K. Damkjer
//               Initial Coding.
//  2013-JUL-23  K. Damkjer
//               Set typedefs to make code more readable and to allow for
//               varying container and metric types. This is useful when the
//               default double-precision is overkill. It is now easy to set
//               types to float.
//<
//*****************************************************************************

#ifndef VpTree_HEADER
#define VpTree_HEADER

#include <vector>
#include <deque>
#include <set>
#include <queue>
#include <limits>

#include "damkjerConfig.h"
#include "Util/SpatialIndexing/Metrics/Metrics.h"

namespace Damkjer
{

//*****************************************************************************
// CLASS: VpTree
//*****************************************************************************

EXPORT template<typename MetricT = EuclideanDistance<std::vector<double> > >
class VpTree
{
public:
   typedef typename MetricT::value_type  PointT;
   typedef typename MetricT::return_type DistT;
   typedef std::size_t                   IndexT;
   typedef std::pair<std::deque<IndexT>,
                     std::deque<DistT> > SearchResultsT;

   VpTree(const MetricT& metric=MetricT(),
          const IndexT& leafCapacity=8);
      //> The default constructor creates a null tree.
      //<

   template<typename DatabaseT>
   VpTree(const DatabaseT&,
          const MetricT& metric=MetricT(),
          const IndexT& leafCapacity=8);
      //> Create a VpTree from a (probably unsorted) database of items.
      //<

   virtual ~VpTree();
      //> Free resources allocated by this tree.
      //<
   
   SearchResultsT knn(const PointT&, const IndexT&,
                   const DistT& limit=std::numeric_limits<DistT>::max()) const;
      //> Perform a k nearest neighbor search on the tree returning the
      //  indices of and distances to the k nearest neighbors.
      //<
   
   SearchResultsT rnn(const PointT&, const DistT&) const;
      //> Perform a fixed radius nearest neighbor search on the tree
      //  returning the indices of and distances to the neighbors in the
      //  fixed radius.
      //<
    
private:
   VpTree(const VpTree&);
      //> Disable copy construction for VpTree.
      //<
    
   class Item;
      //> The VpsTree::Item class provides a simple container to capture a
      //  database element along with its index and ancestral pivot
      //  history.
      //<

   class ResultsCandidate;
      //> The VpTree::ResultsCandidate class provides a private
      //  representation of search result candidates to be employed in an
      //  intermediate priority queue of candidate results.
      //<

   typedef std::priority_queue<ResultsCandidate> ResultsSetT;

   class Node;
      //> The VpTree::Node class provides the private definition of
      //  general VpTree nodes.
      //<
    
   class Internal;
      //> The VpTree::Internal class provides the private definition of
      //  internal VpTree nodes.
      //<
    
   class Leaf;
      //> The VpTree::Node class provides the private definition of
      //  VpTree leaf nodes.
      //<

   // All of the node-types need access to "theItems".
   friend class VpTree<MetricT>::Node;
   friend class VpTree<MetricT>::Internal;
   friend class VpTree<MetricT>::Leaf;

   Node* makeTree(const IndexT&, const IndexT&);
      //> Set this tree's root to be the root of the tree created from the
      //  argument set of metric-space elements.
      //<
    
   IndexT selectVp(const IndexT&, const IndexT&) const;
      //> Select a vantage point in the range between the provided indices.
      //<
    
   std::set<IndexT> randomSample(const IndexT&, const IndexT&) const;
      //> Select a random sample in the range between the provided indices.
      //<

   Node* theRoot;
      //> The VpTree root is the starting point for all queries against the
      //  database represented by the VpTree.
      //<
    
   std::deque<Item> theItems;
      //> The items are the set of metric-space elements managed by the
      //  VpTree paired with their original indices in the unordered
      //  database.
      //<
   
   IndexT theLeafCapacity;
      //> Improve search efficiency by stopping recursion before completely
      //  forming a binary search tree. The leaf capacity sets the threshold
      //  for the number of elements that may be collapsed into a single node.
      //  This threshold should be set by empirically testing the point at
      //  which a linear search becomes more efficient than a binary search.
      //<
   
   MetricT theMetric;
      //> The distance metric used to construct and search this tree.
      //<
};

}

#if MAKE_DEPENDING || TEMPLATE_INCLUSION
#include "VpTree.hpp"
#endif

#endif
