RDKit
Open-source cheminformatics and machine learning.
Vector.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2004-2008 Greg Landrum and Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #ifndef __RD_VECTOR_H__
11 #define __RD_VECTOR_H__
12 
13 #include <RDGeneral/Invariant.h>
14 #include <RDGeneral/utils.h>
15 #include <math.h>
16 #include <iostream>
17 #include <iomanip>
18 #include <cstdlib>
19 #include <cstring>
20 #include <time.h>
21 #include <boost/random.hpp>
22 #include <boost/smart_ptr.hpp>
23 
24 namespace RDNumeric {
25 
26 //! A class to represent vectors of numbers.
27 template <class TYPE>
28 class Vector {
29  public:
30  typedef boost::shared_array<TYPE> DATA_SPTR;
31 
32  //! Initialize with only a size.
33  explicit Vector(unsigned int N) {
34  d_size = N;
35  TYPE *data = new TYPE[N];
36  memset(static_cast<void *>(data), 0, d_size * sizeof(TYPE));
37  d_data.reset(data);
38  }
39 
40  //! Initialize with a size and default value.
41  Vector(unsigned int N, TYPE val) { //: Vector(N) {
42  d_size = N;
43  TYPE *data = new TYPE[N];
44 
45  unsigned int i;
46  for (i = 0; i < N; i++) {
47  data[i] = val;
48  }
49  d_data.reset(data);
50  }
51 
52  //! Initialize from a smart pointer.
53  /*!
54  <b>NOTE:</b> the data is not copied in this case
55  */
56  Vector(unsigned int N, DATA_SPTR data) { // TYPE *data) {
57  d_size = N;
58  d_data = data;
59  }
60 
61  //! copy constructor
62  /*! We make a copy of the other vector's data.
63  */
64  Vector(const Vector &other) {
65  d_size = other.size();
66  const TYPE *otherData = other.getData();
67  TYPE *data = new TYPE[d_size];
68 
69  memcpy(static_cast<void *>(data), static_cast<const void *>(otherData),
70  d_size * sizeof(TYPE));
71  d_data.reset(data);
72  }
73 
74  ~Vector() {}
75 
76  //! return the size (dimension) of the vector
77  unsigned int size() const { return d_size; }
78 
79  //! returns the value at a particular index
80  inline TYPE getVal(unsigned int i) const {
81  PRECONDITION(i < d_size, "bad index");
82  return d_data[i];
83  }
84 
85  //! sets the index at a particular value
86  inline void setVal(unsigned int i, TYPE val) {
87  PRECONDITION(i < d_size, "bad index");
88  d_data[i] = val;
89  }
90 
91  inline TYPE operator[](unsigned int i) const {
92  PRECONDITION(i < d_size, "bad index");
93  return d_data[i];
94  }
95 
96  inline TYPE &operator[](unsigned int i) {
97  PRECONDITION(i < d_size, "bad index");
98  return d_data[i];
99  }
100 
101  //! returns a pointer to our data array
102  inline TYPE *getData() { return d_data.get(); }
103 
104  //! returns a const pointer to our data array
105  inline const TYPE *getData() const {
106  // return dp_data;
107  return d_data.get();
108  }
109 
110  //! Copy operator.
111  /*! We make a copy of the other Vector's data.
112  */
113 
115  PRECONDITION(d_size == other.size(), "Size mismatch in vector copying");
116  const TYPE *otherData = other.getData();
117  memcpy(static_cast<void *>(d_data.get()),
118  static_cast<const void *>(otherData), d_size * sizeof(TYPE));
119  return *this;
120  }
121 
122  //! elementwise addition, vectors must be the same size.
124  PRECONDITION(d_size == other.size(), "Size mismatch in vector addition");
125  const TYPE *otherData = other.getData();
126  TYPE *data = d_data.get();
127  unsigned int i;
128  for (i = 0; i < d_size; i++) {
129  data[i] += otherData[i];
130  }
131  return *this;
132  }
133 
134  //! elementwise subtraction, vectors must be the same size.
136  PRECONDITION(d_size == other.size(), "Size mismatch in vector subtraction");
137  const TYPE *otherData = other.getData();
138  TYPE *data = d_data.get();
139  unsigned int i;
140  for (i = 0; i < d_size; i++) {
141  data[i] -= otherData[i];
142  }
143  return *this;
144  }
145 
146  //! multiplication by a scalar
147  Vector<TYPE> &operator*=(TYPE scale) {
148  unsigned int i;
149  for (i = 0; i < d_size; i++) {
150  d_data[i] *= scale;
151  }
152  return *this;
153  }
154 
155  //! division by a scalar
156  Vector<TYPE> &operator/=(TYPE scale) {
157  unsigned int i;
158  for (i = 0; i < d_size; i++) {
159  d_data[i] /= scale;
160  }
161  return *this;
162  }
163 
164  //! L2 norm squared
165  inline TYPE normL2Sq() const {
166  TYPE res = (TYPE)0.0;
167  unsigned int i;
168  TYPE *data = d_data.get();
169  for (i = 0; i < d_size; i++) {
170  res += data[i] * data[i];
171  }
172  return res;
173  }
174 
175  //! L2 norm
176  inline TYPE normL2() const { return sqrt(this->normL2Sq()); }
177 
178  //! L1 norm
179  inline TYPE normL1() const {
180  TYPE res = (TYPE)0.0;
181  unsigned int i;
182  TYPE *data = d_data.get();
183  for (i = 0; i < d_size; i++) {
184  res += fabs(data[i]);
185  }
186  return res;
187  }
188 
189  //! L-infinity norm
190  inline TYPE normLinfinity() const {
191  TYPE res = (TYPE)(-1.0);
192  unsigned int i;
193  TYPE *data = d_data.get();
194  for (i = 0; i < d_size; i++) {
195  if (fabs(data[i]) > res) {
196  res = fabs(data[i]);
197  }
198  }
199  return res;
200  }
201 
202  //! \brief Gets the ID of the entry that has the largest absolute value
203  //! i.e. the entry being used for the L-infinity norm
204  inline unsigned int largestAbsValId() const {
205  TYPE res = (TYPE)(-1.0);
206  unsigned int i, id = d_size;
207  TYPE *data = d_data.get();
208  for (i = 0; i < d_size; i++) {
209  if (fabs(data[i]) > res) {
210  res = fabs(data[i]);
211  id = i;
212  }
213  }
214  return id;
215  }
216 
217  //! \brief Gets the ID of the entry that has the largest value
218  inline unsigned int largestValId() const {
219  TYPE res = (TYPE)(-1.e8);
220  unsigned int i, id = d_size;
221  TYPE *data = d_data.get();
222  for (i = 0; i < d_size; i++) {
223  if (data[i] > res) {
224  res = data[i];
225  id = i;
226  }
227  }
228  return id;
229  }
230 
231  //! \brief Gets the ID of the entry that has the smallest value
232  inline unsigned int smallestValId() const {
233  TYPE res = (TYPE)(1.e8);
234  unsigned int i, id = d_size;
235  TYPE *data = d_data.get();
236  for (i = 0; i < d_size; i++) {
237  if (data[i] < res) {
238  res = data[i];
239  id = i;
240  }
241  }
242  return id;
243  }
244 
245  //! returns the dot product between two Vectors
246  inline TYPE dotProduct(const Vector<TYPE> other) const {
247  PRECONDITION(d_size == other.size(),
248  "Size mismatch in vector doct product");
249  const TYPE *oData = other.getData();
250  unsigned int i;
251  TYPE res = (TYPE)(0.0);
252  TYPE *data = d_data.get();
253  for (i = 0; i < d_size; i++) {
254  res += (data[i] * oData[i]);
255  }
256  return res;
257  }
258 
259  //! Normalize the vector using the L2 norm
260  inline void normalize() {
261  TYPE val = this->normL2();
262  (*this) /= val;
263  }
264 
265  //! Set to a random unit vector
266  inline void setToRandom(unsigned int seed = 0) {
267  // we want to get our own RNG here instead of using the global
268  // one. This is related to Issue285.
269  RDKit::rng_type generator(42u);
270  RDKit::uniform_double dist(0, 1.0);
271  RDKit::double_source_type randSource(generator, dist);
272  if (seed > 0) {
273  generator.seed(seed);
274  } else {
275  // we can't initialize using only clock(), because it's possible
276  // that we'll get here fast enough that clock() will return 0
277  // and generator.seed(0) is an error:
278  generator.seed(clock() + 1);
279  }
280 
281  unsigned int i;
282  TYPE *data = d_data.get();
283  for (i = 0; i < d_size; i++) {
284  data[i] = randSource();
285  }
286  this->normalize();
287  }
288 
289  private:
290  unsigned int d_size; //! < our length
291  DATA_SPTR d_data;
292  Vector<TYPE> &operator=(const Vector<TYPE> &other);
293 };
294 
296 
297 //! returns the algebraic tanimoto similarity [defn' from JCIM 46:587-96 (2006)]
298 template <typename T>
299 double TanimotoSimilarity(const Vector<T> &v1, const Vector<T> &v2) {
300  double numer = v1.dotProduct(v2);
301  if (numer == 0.0) return 0.0;
302  double denom = v1.normL2Sq() + v2.normL2Sq() - numer;
303  if (denom == 0.0) return 0.0;
304  return numer / denom;
305 }
306 } // end of namespace RDNumeric
307 
308 //! ostream operator for Vectors
309 template <typename TYPE>
310 std::ostream &operator<<(std::ostream &target,
311  const RDNumeric::Vector<TYPE> &vec) {
312  unsigned int siz = vec.size();
313  target << "Size: " << siz << " [";
314  unsigned int i;
315  for (i = 0; i < siz; i++) {
316  target << std::setw(7) << std::setprecision(3) << vec.getVal(i) << ", ";
317  }
318  target << "]\n";
319  return target;
320 }
321 
322 #endif
void normalize()
Normalize the vector using the L2 norm.
Definition: Vector.h:260
unsigned int largestValId() const
Gets the ID of the entry that has the largest value.
Definition: Vector.h:218
TYPE normL2() const
L2 norm.
Definition: Vector.h:176
boost::shared_array< TYPE > DATA_SPTR
Definition: Vector.h:30
boost::minstd_rand rng_type
Definition: utils.h:35
std::ostream & operator<<(std::ostream &target, const RDNumeric::Vector< TYPE > &vec)
ostream operator for Vectors
Definition: Vector.h:310
double TanimotoSimilarity(const Vector< T > &v1, const Vector< T > &v2)
returns the algebraic tanimoto similarity [defn&#39; from JCIM 46:587-96 (2006)]
Definition: Vector.h:299
Vector< TYPE > & operator-=(const Vector< TYPE > &other)
elementwise subtraction, vectors must be the same size.
Definition: Vector.h:135
TYPE & operator[](unsigned int i)
Definition: Vector.h:96
Vector< double > DoubleVector
Definition: Vector.h:295
unsigned int smallestValId() const
Gets the ID of the entry that has the smallest value.
Definition: Vector.h:232
Vector< TYPE > & assign(const Vector< TYPE > &other)
Copy operator.
Definition: Vector.h:114
unsigned int size() const
return the size (dimension) of the vector
Definition: Vector.h:77
TYPE normLinfinity() const
L-infinity norm.
Definition: Vector.h:190
void setVal(unsigned int i, TYPE val)
sets the index at a particular value
Definition: Vector.h:86
Vector< TYPE > & operator/=(TYPE scale)
division by a scalar
Definition: Vector.h:156
TYPE normL2Sq() const
L2 norm squared.
Definition: Vector.h:165
Vector(const Vector &other)
copy constructor
Definition: Vector.h:64
TYPE normL1() const
L1 norm.
Definition: Vector.h:179
boost::uniform_real uniform_double
Definition: utils.h:37
const TYPE * getData() const
returns a const pointer to our data array
Definition: Vector.h:105
TYPE operator[](unsigned int i) const
Definition: Vector.h:91
Vector< TYPE > & operator+=(const Vector< TYPE > &other)
elementwise addition, vectors must be the same size.
Definition: Vector.h:123
unsigned int largestAbsValId() const
Gets the ID of the entry that has the largest absolute value i.e. the entry being used for the L-infi...
Definition: Vector.h:204
TYPE dotProduct(const Vector< TYPE > other) const
returns the dot product between two Vectors
Definition: Vector.h:246
void setToRandom(unsigned int seed=0)
Set to a random unit vector.
Definition: Vector.h:266
Vector(unsigned int N, DATA_SPTR data)
Initialize from a smart pointer.
Definition: Vector.h:56
Vector< TYPE > & operator*=(TYPE scale)
multiplication by a scalar
Definition: Vector.h:147
Vector(unsigned int N, TYPE val)
Initialize with a size and default value.
Definition: Vector.h:41
TYPE getVal(unsigned int i) const
returns the value at a particular index
Definition: Vector.h:80
#define PRECONDITION(expr, mess)
Definition: Invariant.h:107
boost::variate_generator< rng_type &, uniform_double > double_source_type
Definition: utils.h:39
TYPE * getData()
returns a pointer to our data array
Definition: Vector.h:102
A class to represent vectors of numbers.
Definition: Vector.h:28
Vector(unsigned int N)
Initialize with only a size.
Definition: Vector.h:33