RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
Vector.h
Go to the documentation of this file.
1//
2// Copyright (C) 2004-2008 Greg Landrum and Rational Discovery LLC
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef __RD_VECTOR_H__
12#define __RD_VECTOR_H__
13
14#include <RDGeneral/Invariant.h>
15#include <RDGeneral/utils.h>
16#include <cmath>
17#include <iostream>
18#include <iomanip>
19#include <cstdlib>
20#include <cstring>
21#include <ctime>
22#include <boost/random.hpp>
23#include <boost/smart_ptr.hpp>
24
25static constexpr double zero_tolerance = 1.e-16;
26
27namespace RDNumeric {
28
29//! A class to represent vectors of numbers.
30template <class TYPE>
31class Vector {
32 public:
33 typedef boost::shared_array<TYPE> DATA_SPTR;
34
35 //! Initialize with only a size.
36 explicit Vector(unsigned int N) {
37 d_size = N;
38 TYPE *data = new TYPE[N];
39 memset(static_cast<void *>(data), 0, d_size * sizeof(TYPE));
40 d_data.reset(data);
41 }
42
43 //! Initialize with a size and default value.
44 Vector(unsigned int N, TYPE val) { //: Vector(N) {
45 d_size = N;
46 TYPE *data = new TYPE[N];
47
48 unsigned int i;
49 for (i = 0; i < N; i++) {
50 data[i] = val;
51 }
52 d_data.reset(data);
53 }
54
55 //! Initialize from a smart pointer.
56 /*!
57 <b>NOTE:</b> the data is not copied in this case
58 */
59 Vector(unsigned int N, DATA_SPTR data) { // TYPE *data) {
60 d_size = N;
61 d_data = data;
62 }
63
64 //! copy constructor
65 /*! We make a copy of the other vector's data.
66 */
67 Vector(const Vector &other) {
68 d_size = other.size();
69 const TYPE *otherData = other.getData();
70 TYPE *data = new TYPE[d_size];
71
72 memcpy(static_cast<void *>(data), static_cast<const void *>(otherData),
73 d_size * sizeof(TYPE));
74 d_data.reset(data);
75 }
76
77 ~Vector() = default;
78
79 //! return the size (dimension) of the vector
80 unsigned int size() const { return d_size; }
81
82 //! returns the value at a particular index
83 inline TYPE getVal(unsigned int i) const {
84 PRECONDITION(i < d_size, "bad index");
85 return d_data[i];
86 }
87
88 //! sets the index at a particular value
89 inline void setVal(unsigned int i, TYPE val) {
90 PRECONDITION(i < d_size, "bad index");
91 d_data[i] = val;
92 }
93
94 inline TYPE operator[](unsigned int i) const {
95 PRECONDITION(i < d_size, "bad index");
96 return d_data[i];
97 }
98
99 inline TYPE &operator[](unsigned int i) {
100 PRECONDITION(i < d_size, "bad index");
101 return d_data[i];
102 }
103
104 //! returns a pointer to our data array
105 inline TYPE *getData() { return d_data.get(); }
106
107 //! returns a const pointer to our data array
108 inline const TYPE *getData() const {
109 // return dp_data;
110 return d_data.get();
111 }
112
113 //! Copy operator.
114 /*! We make a copy of the other Vector's data.
115 */
116
118 PRECONDITION(d_size == other.size(), "Size mismatch in vector copying");
119 const TYPE *otherData = other.getData();
120 memcpy(static_cast<void *>(d_data.get()),
121 static_cast<const void *>(otherData), d_size * sizeof(TYPE));
122 return *this;
123 }
124
125 //! elementwise addition, vectors must be the same size.
127 PRECONDITION(d_size == other.size(), "Size mismatch in vector addition");
128 const TYPE *otherData = other.getData();
129 TYPE *data = d_data.get();
130 unsigned int i;
131 for (i = 0; i < d_size; i++) {
132 data[i] += otherData[i];
133 }
134 return *this;
135 }
136
137 //! elementwise subtraction, vectors must be the same size.
139 PRECONDITION(d_size == other.size(), "Size mismatch in vector subtraction");
140 const TYPE *otherData = other.getData();
141 TYPE *data = d_data.get();
142 unsigned int i;
143 for (i = 0; i < d_size; i++) {
144 data[i] -= otherData[i];
145 }
146 return *this;
147 }
148
149 //! multiplication by a scalar
151 unsigned int i;
152 for (i = 0; i < d_size; i++) {
153 d_data[i] *= scale;
154 }
155 return *this;
156 }
157
158 //! division by a scalar
160 unsigned int i;
161 for (i = 0; i < d_size; i++) {
162 d_data[i] /= scale;
163 }
164 return *this;
165 }
166
167 //! L2 norm squared
168 inline TYPE normL2Sq() const {
169 TYPE res = (TYPE)0.0;
170 unsigned int i;
171 TYPE *data = d_data.get();
172 for (i = 0; i < d_size; i++) {
173 res += data[i] * data[i];
174 }
175 return res;
176 }
177
178 //! L2 norm
179 inline TYPE normL2() const { return sqrt(this->normL2Sq()); }
180
181 //! L1 norm
182 inline TYPE normL1() const {
183 TYPE res = (TYPE)0.0;
184 unsigned int i;
185 TYPE *data = d_data.get();
186 for (i = 0; i < d_size; i++) {
187 res += fabs(data[i]);
188 }
189 return res;
190 }
191
192 //! L-infinity norm
193 inline TYPE normLinfinity() const {
194 TYPE res = (TYPE)(-1.0);
195 unsigned int i;
196 TYPE *data = d_data.get();
197 for (i = 0; i < d_size; i++) {
198 if (fabs(data[i]) > res) {
199 res = fabs(data[i]);
200 }
201 }
202 return res;
203 }
204
205 //! \brief Gets the ID of the entry that has the largest absolute value
206 //! i.e. the entry being used for the L-infinity norm
207 inline unsigned int largestAbsValId() const {
208 TYPE res = (TYPE)(-1.0);
209 unsigned int i, id = d_size;
210 TYPE *data = d_data.get();
211 for (i = 0; i < d_size; i++) {
212 if (fabs(data[i]) > res) {
213 res = fabs(data[i]);
214 id = i;
215 }
216 }
217 return id;
218 }
219
220 //! \brief Gets the ID of the entry that has the largest value
221 inline unsigned int largestValId() const {
222 TYPE res = (TYPE)(-1.e8);
223 unsigned int i, id = d_size;
224 TYPE *data = d_data.get();
225 for (i = 0; i < d_size; i++) {
226 if (data[i] > res) {
227 res = data[i];
228 id = i;
229 }
230 }
231 return id;
232 }
233
234 //! \brief Gets the ID of the entry that has the smallest value
235 inline unsigned int smallestValId() const {
236 TYPE res = (TYPE)(1.e8);
237 unsigned int i, id = d_size;
238 TYPE *data = d_data.get();
239 for (i = 0; i < d_size; i++) {
240 if (data[i] < res) {
241 res = data[i];
242 id = i;
243 }
244 }
245 return id;
246 }
247
248 //! returns the dot product between two Vectors
249 inline TYPE dotProduct(const Vector<TYPE> other) const {
250 PRECONDITION(d_size == other.size(),
251 "Size mismatch in vector doct product");
252 const TYPE *oData = other.getData();
253 unsigned int i;
254 TYPE res = (TYPE)(0.0);
255 TYPE *data = d_data.get();
256 for (i = 0; i < d_size; i++) {
257 res += (data[i] * oData[i]);
258 }
259 return res;
260 }
261
262 //! Normalize the vector using the L2 norm
263 inline void normalize() {
264 TYPE val = this->normL2();
265 if (val < zero_tolerance) {
266 throw std::runtime_error("Cannot normalize a zero length vector");
267 }
268 (*this) /= val;
269 }
270
271 //! Set to a random unit vector
272 inline void setToRandom(unsigned int seed = 0) {
273 // we want to get our own RNG here instead of using the global
274 // one. This is related to Issue285.
275 RDKit::rng_type generator(42u);
276 RDKit::uniform_double dist(0, 1.0);
277 RDKit::double_source_type randSource(generator, dist);
278 if (seed > 0) {
279 generator.seed(seed);
280 } else {
281 // we can't initialize using only clock(), because it's possible
282 // that we'll get here fast enough that clock() will return 0
283 // and generator.seed(0) is an error:
284 generator.seed(clock() + 1);
285 }
286
287 unsigned int i;
288 TYPE *data = d_data.get();
289 for (i = 0; i < d_size; i++) {
290 data[i] = randSource();
291 }
292 this->normalize();
293 }
294
295 private:
296 unsigned int d_size; //!< our length
297 DATA_SPTR d_data;
298 Vector<TYPE> &operator=(const Vector<TYPE> &other);
299};
300
302
303//! returns the algebraic tanimoto similarity [defn' from JCIM 46:587-96 (2006)]
304template <typename T>
305double TanimotoSimilarity(const Vector<T> &v1, const Vector<T> &v2) {
306 double numer = v1.dotProduct(v2);
307 if (numer == 0.0) {
308 return 0.0;
309 }
310 double denom = v1.normL2Sq() + v2.normL2Sq() - numer;
311 if (denom == 0.0) {
312 return 0.0;
313 }
314 return numer / denom;
315}
316} // end of namespace RDNumeric
317
318//! ostream operator for Vectors
319template <typename TYPE>
320std::ostream &operator<<(std::ostream &target,
321 const RDNumeric::Vector<TYPE> &vec) {
322 unsigned int siz = vec.size();
323 target << "Size: " << siz << " [";
324 unsigned int i;
325 for (i = 0; i < siz; i++) {
326 target << std::setw(7) << std::setprecision(3) << vec.getVal(i) << ", ";
327 }
328 target << "]\n";
329 return target;
330}
331
332#endif
#define PRECONDITION(expr, mess)
Definition Invariant.h:109
static constexpr double zero_tolerance
Definition Vector.h:25
std::ostream & operator<<(std::ostream &target, const RDNumeric::Vector< TYPE > &vec)
ostream operator for Vectors
Definition Vector.h:320
A class to represent vectors of numbers.
Definition Vector.h:31
TYPE normL2() const
L2 norm.
Definition Vector.h:179
Vector< TYPE > & operator*=(TYPE scale)
multiplication by a scalar
Definition Vector.h:150
Vector(unsigned int N)
Initialize with only a size.
Definition Vector.h:36
Vector(const Vector &other)
copy constructor
Definition Vector.h:67
Vector< TYPE > & assign(const Vector< TYPE > &other)
Copy operator.
Definition Vector.h:117
TYPE & operator[](unsigned int i)
Definition Vector.h:99
void normalize()
Normalize the vector using the L2 norm.
Definition Vector.h:263
void setVal(unsigned int i, TYPE val)
sets the index at a particular value
Definition Vector.h:89
Vector(unsigned int N, TYPE val)
Initialize with a size and default value.
Definition Vector.h:44
Vector< TYPE > & operator-=(const Vector< TYPE > &other)
elementwise subtraction, vectors must be the same size.
Definition Vector.h:138
Vector< TYPE > & operator+=(const Vector< TYPE > &other)
elementwise addition, vectors must be the same size.
Definition Vector.h:126
void setToRandom(unsigned int seed=0)
Set to a random unit vector.
Definition Vector.h:272
unsigned int size() const
return the size (dimension) of the vector
Definition Vector.h:80
TYPE normL1() const
L1 norm.
Definition Vector.h:182
Vector< TYPE > & operator/=(TYPE scale)
division by a scalar
Definition Vector.h:159
TYPE normLinfinity() const
L-infinity norm.
Definition Vector.h:193
boost::shared_array< TYPE > DATA_SPTR
Definition Vector.h:33
TYPE normL2Sq() const
L2 norm squared.
Definition Vector.h:168
TYPE dotProduct(const Vector< TYPE > other) const
returns the dot product between two Vectors
Definition Vector.h:249
unsigned int largestValId() const
Gets the ID of the entry that has the largest value.
Definition Vector.h:221
unsigned int largestAbsValId() const
Gets the ID of the entry that has the largest absolute value i.e. the entry being used for the L-infi...
Definition Vector.h:207
TYPE * getData()
returns a pointer to our data array
Definition Vector.h:105
unsigned int smallestValId() const
Gets the ID of the entry that has the smallest value.
Definition Vector.h:235
TYPE getVal(unsigned int i) const
returns the value at a particular index
Definition Vector.h:83
const TYPE * getData() const
returns a const pointer to our data array
Definition Vector.h:108
TYPE operator[](unsigned int i) const
Definition Vector.h:94
Vector(unsigned int N, DATA_SPTR data)
Initialize from a smart pointer.
Definition Vector.h:59
boost::minstd_rand rng_type
Definition utils.h:36
boost::variate_generator< rng_type &, uniform_double > double_source_type
Definition utils.h:40
boost::uniform_real uniform_double
Definition utils.h:38
double TanimotoSimilarity(const Vector< T > &v1, const Vector< T > &v2)
returns the algebraic tanimoto similarity [defn' from JCIM 46:587-96 (2006)]
Definition Vector.h:305
Vector< double > DoubleVector
Definition Vector.h:301