RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
BFGSOpt.h
Go to the documentation of this file.
1//
2// Copyright (C) 2004-2008 Greg Landrum and Rational Discovery LLC
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#include <cmath>
12#include <RDGeneral/Invariant.h>
14#include <cstring>
15#include <vector>
16#include <algorithm>
17
18namespace BFGSOpt {
21const double FUNCTOL =
22 1e-4; //!< Default tolerance for function convergence in the minimizer
23const double MOVETOL =
24 1e-7; //!< Default tolerance for x changes in the minimizer
25const int MAXITS = 200; //!< Default maximum number of iterations
26const double EPS = 3e-8; //!< Default gradient tolerance in the minimizer
27const double TOLX =
28 4. * EPS; //!< Default direction vector tolerance in the minimizer
29const double MAXSTEP = 100.0; //!< Default maximum step size in the minimizer
30
31//! Do a Quasi-Newton minimization along a line.
32/*!
33 See Numerical Recipes in C, Section 9.7 for a description of the algorithm.
34
35 \param dim the dimensionality of the space.
36 \param oldPt the current position, as an array.
37 \param oldVal the current function value.
38 \param grad the value of the function gradient at oldPt
39 \param dir the minimization direction
40 \param newPt used to return the final position
41 \param newVal used to return the final function value
42 \param func the function to minimize
43 \param maxStep the maximum allowable step size
44 \param resCode used to return the results of the search.
45
46 Possible values for resCode are on return are:
47 - 0: success
48 - 1: the stepsize got too small. This probably indicates success.
49 - -1: the direction is bad (orthogonal to the gradient)
50*/
51template <typename EnergyFunctor>
52void linearSearch(unsigned int dim, double *oldPt, double oldVal, double *grad,
53 double *dir, double *newPt, double &newVal,
54 EnergyFunctor func, double maxStep, int &resCode) {
55 PRECONDITION(oldPt, "bad input array");
56 PRECONDITION(grad, "bad input array");
57 PRECONDITION(dir, "bad input array");
58 PRECONDITION(newPt, "bad input array");
59
60 const unsigned int MAX_ITER_LINEAR_SEARCH = 1000;
61 double sum = 0.0, slope = 0.0, test = 0.0, lambda = 0.0;
62 double lambda2 = 0.0, lambdaMin = 0.0, tmpLambda = 0.0, val2 = 0.0;
63
64 resCode = -1;
65
66 // get the length of the direction vector:
67 sum = 0.0;
68 for (unsigned int i = 0; i < dim; i++) {
69 sum += dir[i] * dir[i];
70 }
71 sum = sqrt(sum);
72
73 // rescale if we're trying to move too far:
74 if (sum > maxStep) {
75 for (unsigned int i = 0; i < dim; i++) {
76 dir[i] *= maxStep / sum;
77 }
78 }
79
80 // make sure our direction has at least some component along
81 // -grad
82 slope = 0.0;
83 for (unsigned int i = 0; i < dim; i++) {
84 slope += dir[i] * grad[i];
85 }
86 if (slope >= 0.0) {
87 return;
88 }
89
90 test = 0.0;
91 for (unsigned int i = 0; i < dim; i++) {
92 double temp = fabs(dir[i]) / std::max(fabs(oldPt[i]), 1.0);
93 if (temp > test) {
94 test = temp;
95 }
96 }
97
98 lambdaMin = MOVETOL / test;
99 lambda = 1.0;
100 unsigned int it = 0;
101 while (it < MAX_ITER_LINEAR_SEARCH) {
102 // std::cerr << "\t" << it<<" : "<<lambda << " " << lambdaMin << std::endl;
103 if (lambda < lambdaMin) {
104 // the position change is too small
105 resCode = 1;
106 break;
107 }
108 for (unsigned int i = 0; i < dim; i++) {
109 newPt[i] = oldPt[i] + lambda * dir[i];
110 }
111 newVal = func(newPt);
112
113 if (newVal - oldVal <= FUNCTOL * lambda * slope) {
114 // we're converged on the function:
115 resCode = 0;
116 return;
117 }
118 // if we made it this far, we need to backtrack:
119 if (it == 0) {
120 // it's the first step:
121 tmpLambda = -slope / (2.0 * (newVal - oldVal - slope));
122 } else {
123 double rhs1 = newVal - oldVal - lambda * slope;
124 double rhs2 = val2 - oldVal - lambda2 * slope;
125 double a = (rhs1 / (lambda * lambda) - rhs2 / (lambda2 * lambda2)) /
126 (lambda - lambda2);
127 double b = (-lambda2 * rhs1 / (lambda * lambda) +
128 lambda * rhs2 / (lambda2 * lambda2)) /
129 (lambda - lambda2);
130 if (a == 0.0) {
131 tmpLambda = -slope / (2.0 * b);
132 } else {
133 double disc = b * b - 3 * a * slope;
134 if (disc < 0.0) {
135 tmpLambda = 0.5 * lambda;
136 } else if (b <= 0.0) {
137 tmpLambda = (-b + sqrt(disc)) / (3.0 * a);
138 } else {
139 tmpLambda = -slope / (b + sqrt(disc));
140 }
141 }
142 if (tmpLambda > 0.5 * lambda) {
143 tmpLambda = 0.5 * lambda;
144 }
145 }
146 lambda2 = lambda;
147 val2 = newVal;
148 lambda = std::max(tmpLambda, 0.1 * lambda);
149 ++it;
150 }
151 // nothing was done
152 // std::cerr<<" RETURN AT END: "<<it<<" "<<resCode<<std::endl;
153 for (unsigned int i = 0; i < dim; i++) {
154 newPt[i] = oldPt[i];
155 }
156}
157
158//! Do a BFGS minimization of a function.
159/*!
160 See Numerical Recipes in C, Section 10.7 for a description of the algorithm.
161
162 \param dim the dimensionality of the space.
163 \param pos the starting position, as an array.
164 \param gradTol tolerance for gradient convergence
165 \param numIters used to return the number of iterations required
166 \param funcVal used to return the final function value
167 \param func the function to minimize
168 \param gradFunc calculates the gradient of func
169 \param funcTol tolerance for changes in the function value for convergence.
170 \param maxIts maximum number of iterations allowed
171 \param snapshotFreq a snapshot of the minimization trajectory
172 will be stored after as many steps as indicated
173 through this parameter; defaults to 0 (no
174 snapshots stored)
175 \param snapshotVect pointer to a std::vector<Snapshot> object that will
176 receive the coordinates and energies every snapshotFreq steps; defaults to
177 NULL (no snapshots stored)
178
179 \return a flag indicating success (or type of failure). Possible values are:
180 - 0: success
181 - 1: too many iterations were required
182*/
183template <typename EnergyFunctor, typename GradientFunctor>
184int minimize(unsigned int dim, double *pos, double gradTol,
185 unsigned int &numIters, double &funcVal, EnergyFunctor func,
186 GradientFunctor gradFunc, unsigned int snapshotFreq,
187 RDKit::SnapshotVect *snapshotVect, double funcTol = TOLX,
188 unsigned int maxIts = MAXITS) {
189 RDUNUSED_PARAM(funcTol);
190 PRECONDITION(pos, "bad input array");
191 PRECONDITION(gradTol > 0, "bad tolerance");
192
193 std::vector<double> grad(dim);
194 std::vector<double> dGrad(dim);
195 std::vector<double> hessDGrad(dim);
196 std::vector<double> xi(dim);
197 std::vector<double> invHessian(dim * dim, 0);
198 std::unique_ptr<double[]> newPos(new double[dim]);
199 snapshotFreq = std::min(snapshotFreq, maxIts);
200
201 // evaluate the function and gradient in our current position:
202 double fp = func(pos);
203 gradFunc(pos, grad.data());
204
205 double sum = 0.0;
206 for (unsigned int i = 0; i < dim; i++) {
207 unsigned int itab = i * dim;
208 // initialize the inverse hessian to be identity:
209 invHessian[itab + i] = 1.0;
210 // the first line dir is -grad:
211 xi[i] = -grad[i];
212 sum += pos[i] * pos[i];
213 }
214 // pick a max step size:
215 double maxStep = MAXSTEP * std::max(sqrt(sum), static_cast<double>(dim));
216
217 for (unsigned int iter = 1; iter <= maxIts; ++iter) {
218 numIters = iter;
219 int status = -1;
220
221 // do the line search:
222 linearSearch(dim, pos, fp, grad.data(), xi.data(), newPos.get(), funcVal, func,
223 maxStep, status);
224 CHECK_INVARIANT(status >= 0, "bad direction in linearSearch");
225
226 // save the function value for the next search:
227 fp = funcVal;
228
229 // set the direction of this line and save the gradient:
230 double test = 0.0;
231 for (unsigned int i = 0; i < dim; i++) {
232 xi[i] = newPos[i] - pos[i];
233 pos[i] = newPos[i];
234 double temp = fabs(xi[i]) / std::max(fabs(pos[i]), 1.0);
235 if (temp > test) {
236 test = temp;
237 }
238 dGrad[i] = grad[i];
239 }
240 // std::cerr<<" iter: "<<iter<<" "<<fp<<" "<<test<<"
241 // "<<TOLX<<std::endl;
242 if (test < TOLX) {
243 if (snapshotVect && snapshotFreq) {
244 RDKit::Snapshot s(boost::shared_array<double>(newPos.release()), fp);
245 snapshotVect->push_back(s);
246 }
247 return 0;
248 }
249
250 // update the gradient:
251 double gradScale = gradFunc(pos, grad.data());
252
253 // is the gradient converged?
254 test = 0.0;
255 double term = std::max(funcVal * gradScale, 1.0);
256 for (unsigned int i = 0; i < dim; i++) {
257 double temp = fabs(grad[i]) * std::max(fabs(pos[i]), 1.0);
258 test = std::max(test, temp);
259 dGrad[i] = grad[i] - dGrad[i];
260 }
261 test /= term;
262 // std::cerr<<" "<<gradScale<<" "<<test<<"
263 // "<<gradTol<<std::endl;
264 if (test < gradTol) {
265 if (snapshotVect && snapshotFreq) {
266 RDKit::Snapshot s(boost::shared_array<double>(newPos.release()), fp);
267 snapshotVect->push_back(s);
268 }
269 return 0;
270 }
271
272 // for(unsigned int i=0;i<dim;i++){
273 // figure out how much the gradient changed:
274 //}
275
276 // compute hessian*dGrad:
277 double fac = 0, fae = 0, sumDGrad = 0, sumXi = 0;
278 for (unsigned int i = 0; i < dim; i++) {
279#if 0
280 unsigned int itab = i * dim;
281 hessDGrad[i] = 0.0;
282 for (unsigned int j = 0; j < dim; j++) {
283 hessDGrad[i] += invHessian[itab + j] * dGrad[j];
284 }
285
286#else
287 double *ivh = &(invHessian[i * dim]);
288 double &hdgradi = hessDGrad[i];
289 double *dgj = dGrad.data();
290 hdgradi = 0.0;
291 for (unsigned int j = 0; j < dim; ++j, ++ivh, ++dgj) {
292 hdgradi += *ivh * *dgj;
293 }
294#endif
295 fac += dGrad[i] * xi[i];
296 fae += dGrad[i] * hessDGrad[i];
297 sumDGrad += dGrad[i] * dGrad[i];
298 sumXi += xi[i] * xi[i];
299 }
300 if (fac > sqrt(EPS * sumDGrad * sumXi)) {
301 fac = 1.0 / fac;
302 double fad = 1.0 / fae;
303 for (unsigned int i = 0; i < dim; i++) {
304 dGrad[i] = fac * xi[i] - fad * hessDGrad[i];
305 }
306
307 for (unsigned int i = 0; i < dim; i++) {
308 unsigned int itab = i * dim;
309#if 0
310 for (unsigned int j = i; j < dim; j++) {
311 invHessian[itab + j] += fac * xi[i] * xi[j] -
312 fad * hessDGrad[i] * hessDGrad[j] +
313 fae * dGrad[i] * dGrad[j];
314 invHessian[j * dim + i] = invHessian[itab + j];
315 }
316#else
317 double pxi = fac * xi[i], hdgi = fad * hessDGrad[i],
318 dgi = fae * dGrad[i];
319 double *pxj = &(xi[i]), *hdgj = &(hessDGrad[i]), *dgj = &(dGrad[i]);
320 for (unsigned int j = i; j < dim; ++j, ++pxj, ++hdgj, ++dgj) {
321 invHessian[itab + j] += pxi * *pxj - hdgi * *hdgj + dgi * *dgj;
322 invHessian[j * dim + i] = invHessian[itab + j];
323 }
324#endif
325 }
326 }
327 // generate the next direction to move:
328 for (unsigned int i = 0; i < dim; i++) {
329 unsigned int itab = i * dim;
330 xi[i] = 0.0;
331#if 0
332 for (unsigned int j = 0; j < dim; j++) {
333 xi[i] -= invHessian[itab + j] * grad[j];
334 }
335#else
336 double &pxi = xi[i];
337 double *ivh = &(invHessian[itab]);
338 double *gj = grad.data();
339 for (unsigned int j = 0; j < dim; ++j, ++ivh, ++gj) {
340 pxi -= *ivh * *gj;
341 }
342#endif
343 }
344 if (snapshotVect && snapshotFreq && !(iter % snapshotFreq)) {
345 RDKit::Snapshot s(boost::shared_array<double>(newPos.release()), fp);
346 snapshotVect->push_back(s);
347 newPos.reset(new double[dim]);
348 }
349 }
350 return 1;
351}
352
353//! Do a BFGS minimization of a function.
354/*!
355 \param dim the dimensionality of the space.
356 \param pos the starting position, as an array.
357 \param gradTol tolerance for gradient convergence
358 \param numIters used to return the number of iterations required
359 \param funcVal used to return the final function value
360 \param func the function to minimize
361 \param gradFunc calculates the gradient of func
362 \param funcTol tolerance for changes in the function value for convergence.
363 \param maxIts maximum number of iterations allowed
364
365 \return a flag indicating success (or type of failure). Possible values are:
366 - 0: success
367 - 1: too many iterations were required
368*/
369template <typename EnergyFunctor, typename GradientFunctor>
370int minimize(unsigned int dim, double *pos, double gradTol,
371 unsigned int &numIters, double &funcVal, EnergyFunctor func,
372 GradientFunctor gradFunc, double funcTol = TOLX,
373 unsigned int maxIts = MAXITS) {
374 return minimize(dim, pos, gradTol, numIters, funcVal, func, gradFunc, 0,
375 nullptr, funcTol, maxIts);
376}
377
378} // namespace BFGSOpt
#define CHECK_INVARIANT(expr, mess)
Definition Invariant.h:101
#define RDUNUSED_PARAM(x)
Definition Invariant.h:196
#define PRECONDITION(expr, mess)
Definition Invariant.h:109
#define RDKIT_OPTIMIZER_EXPORT
Definition export.h:377
const double EPS
Default gradient tolerance in the minimizer.
Definition BFGSOpt.h:26
const double TOLX
Default direction vector tolerance in the minimizer.
Definition BFGSOpt.h:27
void linearSearch(unsigned int dim, double *oldPt, double oldVal, double *grad, double *dir, double *newPt, double &newVal, EnergyFunctor func, double maxStep, int &resCode)
Do a Quasi-Newton minimization along a line.
Definition BFGSOpt.h:52
const double FUNCTOL
Default tolerance for function convergence in the minimizer.
Definition BFGSOpt.h:21
int minimize(unsigned int dim, double *pos, double gradTol, unsigned int &numIters, double &funcVal, EnergyFunctor func, GradientFunctor gradFunc, unsigned int snapshotFreq, RDKit::SnapshotVect *snapshotVect, double funcTol=TOLX, unsigned int maxIts=MAXITS)
Do a BFGS minimization of a function.
Definition BFGSOpt.h:184
const double MAXSTEP
Default maximum step size in the minimizer.
Definition BFGSOpt.h:29
RDKIT_OPTIMIZER_EXPORT int REALLY_A_HEADER_ONLY_LIBRARY
const double MOVETOL
Default tolerance for x changes in the minimizer.
Definition BFGSOpt.h:23
const int MAXITS
Default maximum number of iterations.
Definition BFGSOpt.h:25
RDKIT_OPTIMIZER_EXPORT int HEAD_ONLY_LIBRARY
std::vector< Snapshot > SnapshotVect
Definition Snapshot.h:21