RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
HierarchicalClusterPicker.h
Go to the documentation of this file.
1//
2// Copyright (C) 2003-2006 Rational Discovery LLC
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef _HIERARCHCLUSTERPICKER_H
12#define _HIERARCHCLUSTERPICKER_H
13
14#include <RDGeneral/types.h>
15#include "DistPicker.h"
16
17namespace RDPickers {
18
19/*! \brief Diversity picker based on hierarchical clustering
20 *
21 * This class inherits from DistPicker since it uses the distance matrix
22 * for diversity picking. The clustering itself is done using the Murtagh
23 * code in $RDBASE/Code/ML/Cluster/Mutagh/
24 */
26 public:
27 /*! \brief The type of hierarchical clustering algorithm to use
28 */
29 typedef enum {
30 WARD = 1,
31 SLINK = 2,
32 CLINK = 3,
33 UPGMA = 4,
34 MCQUITTY = 5,
35 GOWER = 6,
36 CENTROID = 7
37 } ClusterMethod;
38
39 /*! \brief Constructor - takes a ClusterMethod as an argument
40 *
41 * Sets the hierarchy clustering method
42 */
44 : d_method(clusterMethod) {}
45
46 /*! \brief This is the function that does the picking
47 *
48 * Here is how the algorithm works \n
49 * FIX: Supply reference
50 *
51 * - The entire pool is clustered using the distance matrix using one of the
52 * hierarchical clustering method (specified via the constructor). \n
53 * - Starting with the individual items in the pool, clusters are merged based
54 * on the output from clustering method. \n
55 * - The merging is stopped when the number of clusters is same as
56 * the number of picks.
57 * - For each item in a cluster the sum of square of the distances to the rest
58 *of
59 * of the items (in the cluster) is computed. The item with the smallest of
60 *values is
61 * picked as a representative of the cluster. Basically trying to pick the
62 *item closest
63 * to the centroid of the cluster.
64 *
65 *
66 * \param distMat - distance matrix - a vector of double. It is assumed
67 *that only the
68 * lower triangle element of the matrix are supplied in a 1D
69 *array\n
70 * NOTE: this matrix WILL BE ALTERED during the picking\n
71 * \param poolSize - the size of the pool to pick the items from. It is
72 *assumed that the
73 * distance matrix above contains the right number of elements;
74 *i.e.
75 * poolSize*(poolSize-1) \n
76 * \param pickSize - the number items to pick from pool (<= poolSize)
77 */
78 RDKit::INT_VECT pick(const double *distMat, unsigned int poolSize,
79 unsigned int pickSize) const override;
80
81 /*! \brief This is the function that does the clustering of the items - used
82 *by the picker
83 *
84 * ARGUMENTS:
85 *
86 * \param distMat - distance matrix - a vector of double. It is assumed that
87 *only the
88 * lower triangle element of the matrix are supplied in a 1D
89 *array\n
90 * NOTE: this matrix WILL BE ALTERED during the picking\n
91 * \param poolSize - the size of the pool to pick the items from. It is
92 *assumed that the
93 * distance matrix above contains the right number of elements;
94 *i.e.
95 * poolSize*(poolSize-1) \n
96 * \param pickSize - the number clusters to divide the pool into (<=
97 *poolSize)
98 */
99 RDKit::VECT_INT_VECT cluster(const double *distMat, unsigned int poolSize,
100 unsigned int pickSize) const;
101
102 private:
103 ClusterMethod d_method;
104};
105}; // namespace RDPickers
106
107#endif
Abstract base class to do perform item picking (typically molecules) using a distance matrix.
Definition DistPicker.h:46
Diversity picker based on hierarchical clustering.
HierarchicalClusterPicker(ClusterMethod clusterMethod)
Constructor - takes a ClusterMethod as an argument.
RDKit::INT_VECT pick(const double *distMat, unsigned int poolSize, unsigned int pickSize) const override
This is the function that does the picking.
RDKit::VECT_INT_VECT cluster(const double *distMat, unsigned int poolSize, unsigned int pickSize) const
This is the function that does the clustering of the items - used by the picker.
ClusterMethod
The type of hierarchical clustering algorithm to use.
#define RDKIT_SIMDIVPICKERS_EXPORT
Definition export.h:497
std::vector< int > INT_VECT
Definition types.h:291
std::vector< INT_VECT > VECT_INT_VECT
Definition types.h:305