RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
BFGSOpt.h
Go to the documentation of this file.
1//
2// Copyright (C) 2004-2008 Greg Landrum and Rational Discovery LLC
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#include <cmath>
12#include <RDGeneral/Invariant.h>
14#include <cstring>
15#include <vector>
16#include <algorithm>
17
18namespace BFGSOpt {
21const double FUNCTOL =
22 1e-4; //!< Default tolerance for function convergence in the minimizer
23const double MOVETOL =
24 1e-7; //!< Default tolerance for x changes in the minimizer
25const int MAXITS = 200; //!< Default maximum number of iterations
26const double EPS = 3e-8; //!< Default gradient tolerance in the minimizer
27const double TOLX =
28 4. * EPS; //!< Default direction vector tolerance in the minimizer
29const double MAXSTEP = 100.0; //!< Default maximum step size in the minimizer
30
31//! Do a Quasi-Newton minimization along a line.
32/*!
33 See Numerical Recipes in C, Section 9.7 for a description of the algorithm.
34
35 \param dim the dimensionality of the space.
36 \param oldPt the current position, as an array.
37 \param oldVal the current function value.
38 \param grad the value of the function gradient at oldPt
39 \param dir the minimization direction
40 \param newPt used to return the final position
41 \param newVal used to return the final function value
42 \param func the function to minimize
43 \param maxStep the maximum allowable step size
44 \param resCode used to return the results of the search.
45
46 Possible values for resCode are on return are:
47 - 0: success
48 - 1: the stepsize got too small. This probably indicates success.
49 - -1: the direction is bad (orthogonal to the gradient)
50*/
51template <typename EnergyFunctor>
52void linearSearch(unsigned int dim, double *oldPt, double oldVal, double *grad,
53 double *dir, double *newPt, double &newVal,
54 EnergyFunctor func, double maxStep, int &resCode) {
55 PRECONDITION(oldPt, "bad input array");
56 PRECONDITION(grad, "bad input array");
57 PRECONDITION(dir, "bad input array");
58 PRECONDITION(newPt, "bad input array");
59
60 const unsigned int MAX_ITER_LINEAR_SEARCH = 1000;
61 double sum = 0.0, slope = 0.0, test = 0.0, lambda = 0.0;
62 double lambda2 = 0.0, lambdaMin = 0.0, tmpLambda = 0.0, val2 = 0.0;
63
64 resCode = -1;
65
66 // get the length of the direction vector:
67 sum = 0.0;
68 for (unsigned int i = 0; i < dim; i++) {
69 sum += dir[i] * dir[i];
70 }
71 sum = sqrt(sum);
72
73 // rescale if we're trying to move too far:
74 if (sum > maxStep) {
75 for (unsigned int i = 0; i < dim; i++) {
76 dir[i] *= maxStep / sum;
77 }
78 }
79
80 // make sure our direction has at least some component along
81 // -grad
82 slope = 0.0;
83 for (unsigned int i = 0; i < dim; i++) {
84 slope += dir[i] * grad[i];
85 }
86 if (slope >= 0.0) {
87 return;
88 }
89
90 test = 0.0;
91 for (unsigned int i = 0; i < dim; i++) {
92 double temp = fabs(dir[i]) / std::max(fabs(oldPt[i]), 1.0);
93 if (temp > test) {
94 test = temp;
95 }
96 }
97
98 lambdaMin = MOVETOL / test;
99 lambda = 1.0;
100 unsigned int it = 0;
101 while (it < MAX_ITER_LINEAR_SEARCH) {
102 // std::cerr << "\t" << it<<" : "<<lambda << " " << lambdaMin << std::endl;
103 if (lambda < lambdaMin) {
104 // the position change is too small
105 resCode = 1;
106 break;
107 }
108 for (unsigned int i = 0; i < dim; i++) {
109 newPt[i] = oldPt[i] + lambda * dir[i];
110 }
111 newVal = func(newPt);
112
113 if (newVal - oldVal <= FUNCTOL * lambda * slope) {
114 // we're converged on the function:
115 resCode = 0;
116 return;
117 }
118 // if we made it this far, we need to backtrack:
119 if (it == 0) {
120 // it's the first step:
121 tmpLambda = -slope / (2.0 * (newVal - oldVal - slope));
122 } else {
123 double rhs1 = newVal - oldVal - lambda * slope;
124 double rhs2 = val2 - oldVal - lambda2 * slope;
125 double a = (rhs1 / (lambda * lambda) - rhs2 / (lambda2 * lambda2)) /
126 (lambda - lambda2);
127 double b = (-lambda2 * rhs1 / (lambda * lambda) +
128 lambda * rhs2 / (lambda2 * lambda2)) /
129 (lambda - lambda2);
130 if (a == 0.0) {
131 tmpLambda = -slope / (2.0 * b);
132 } else {
133 double disc = b * b - 3 * a * slope;
134 if (disc < 0.0) {
135 tmpLambda = 0.5 * lambda;
136 } else if (b <= 0.0) {
137 tmpLambda = (-b + sqrt(disc)) / (3.0 * a);
138 } else {
139 tmpLambda = -slope / (b + sqrt(disc));
140 }
141 }
142 if (tmpLambda > 0.5 * lambda) {
143 tmpLambda = 0.5 * lambda;
144 }
145 }
146 lambda2 = lambda;
147 val2 = newVal;
148 lambda = std::max(tmpLambda, 0.1 * lambda);
149 ++it;
150 }
151 // nothing was done
152 // std::cerr<<" RETURN AT END: "<<it<<" "<<resCode<<std::endl;
153 for (unsigned int i = 0; i < dim; i++) {
154 newPt[i] = oldPt[i];
155 }
156}
157
158#define CLEANUP() \
159 { \
160 delete[] grad; \
161 delete[] dGrad; \
162 delete[] hessDGrad; \
163 delete[] newPos; \
164 delete[] xi; \
165 delete[] invHessian; \
166 }
167//! Do a BFGS minimization of a function.
168/*!
169 See Numerical Recipes in C, Section 10.7 for a description of the algorithm.
170
171 \param dim the dimensionality of the space.
172 \param pos the starting position, as an array.
173 \param gradTol tolerance for gradient convergence
174 \param numIters used to return the number of iterations required
175 \param funcVal used to return the final function value
176 \param func the function to minimize
177 \param gradFunc calculates the gradient of func
178 \param funcTol tolerance for changes in the function value for convergence.
179 \param maxIts maximum number of iterations allowed
180 \param snapshotFreq a snapshot of the minimization trajectory
181 will be stored after as many steps as indicated
182 through this parameter; defaults to 0 (no
183 snapshots stored)
184 \param snapshotVect pointer to a std::vector<Snapshot> object that will
185 receive the coordinates and energies every snapshotFreq steps; defaults to
186 NULL (no snapshots stored)
187
188 \return a flag indicating success (or type of failure). Possible values are:
189 - 0: success
190 - 1: too many iterations were required
191*/
192template <typename EnergyFunctor, typename GradientFunctor>
193int minimize(unsigned int dim, double *pos, double gradTol,
194 unsigned int &numIters, double &funcVal, EnergyFunctor func,
195 GradientFunctor gradFunc, unsigned int snapshotFreq,
196 RDKit::SnapshotVect *snapshotVect, double funcTol = TOLX,
197 unsigned int maxIts = MAXITS) {
198 RDUNUSED_PARAM(funcTol);
199 PRECONDITION(pos, "bad input array");
200 PRECONDITION(gradTol > 0, "bad tolerance");
201
202 double sum, maxStep, fp;
203
204 double *grad, *dGrad, *hessDGrad;
205 double *newPos, *xi;
206 double *invHessian;
207
208 grad = new double[dim];
209 dGrad = new double[dim];
210 hessDGrad = new double[dim];
211 newPos = new double[dim];
212 xi = new double[dim];
213 invHessian = new double[dim * dim];
214 snapshotFreq = std::min(snapshotFreq, maxIts);
215
216 // evaluate the function and gradient in our current position:
217 fp = func(pos);
218 gradFunc(pos, grad);
219
220 sum = 0.0;
221 memset(invHessian, 0, dim * dim * sizeof(double));
222 for (unsigned int i = 0; i < dim; i++) {
223 unsigned int itab = i * dim;
224 // initialize the inverse hessian to be identity:
225 invHessian[itab + i] = 1.0;
226 // the first line dir is -grad:
227 xi[i] = -grad[i];
228 sum += pos[i] * pos[i];
229 }
230 // pick a max step size:
231 maxStep = MAXSTEP * std::max(sqrt(sum), static_cast<double>(dim));
232
233 for (unsigned int iter = 1; iter <= maxIts; iter++) {
234 numIters = iter;
235 int status;
236
237 // do the line search:
238 linearSearch(dim, pos, fp, grad, xi, newPos, funcVal, func, maxStep,
239 status);
240 CHECK_INVARIANT(status >= 0, "bad direction in linearSearch");
241
242 // save the function value for the next search:
243 fp = funcVal;
244
245 // set the direction of this line and save the gradient:
246 double test = 0.0;
247 for (unsigned int i = 0; i < dim; i++) {
248 xi[i] = newPos[i] - pos[i];
249 pos[i] = newPos[i];
250 double temp = fabs(xi[i]) / std::max(fabs(pos[i]), 1.0);
251 if (temp > test) {
252 test = temp;
253 }
254 dGrad[i] = grad[i];
255 }
256 // std::cerr<<" iter: "<<iter<<" "<<fp<<" "<<test<<"
257 // "<<TOLX<<std::endl;
258 if (test < TOLX) {
259 if (snapshotVect && snapshotFreq) {
260 RDKit::Snapshot s(boost::shared_array<double>(newPos), fp);
261 snapshotVect->push_back(s);
262 newPos = nullptr;
263 }
264 CLEANUP();
265 return 0;
266 }
267
268 // update the gradient:
269 double gradScale = gradFunc(pos, grad);
270
271 // is the gradient converged?
272 test = 0.0;
273 double term = std::max(funcVal * gradScale, 1.0);
274 for (unsigned int i = 0; i < dim; i++) {
275 double temp = fabs(grad[i]) * std::max(fabs(pos[i]), 1.0);
276 test = std::max(test, temp);
277 dGrad[i] = grad[i] - dGrad[i];
278 }
279 test /= term;
280 // std::cerr<<" "<<gradScale<<" "<<test<<"
281 // "<<gradTol<<std::endl;
282 if (test < gradTol) {
283 if (snapshotVect && snapshotFreq) {
284 RDKit::Snapshot s(boost::shared_array<double>(newPos), fp);
285 snapshotVect->push_back(s);
286 newPos = nullptr;
287 }
288 CLEANUP();
289 return 0;
290 }
291
292 // for(unsigned int i=0;i<dim;i++){
293 // figure out how much the gradient changed:
294 //}
295
296 // compute hessian*dGrad:
297 double fac = 0, fae = 0, sumDGrad = 0, sumXi = 0;
298 for (unsigned int i = 0; i < dim; i++) {
299#if 0
300 unsigned int itab = i * dim;
301 hessDGrad[i] = 0.0;
302 for (unsigned int j = 0; j < dim; j++) {
303 hessDGrad[i] += invHessian[itab + j] * dGrad[j];
304 }
305
306#else
307 double *ivh = &(invHessian[i * dim]);
308 double &hdgradi = hessDGrad[i];
309 double *dgj = dGrad;
310 hdgradi = 0.0;
311 for (unsigned int j = 0; j < dim; ++j, ++ivh, ++dgj) {
312 hdgradi += *ivh * *dgj;
313 }
314#endif
315 fac += dGrad[i] * xi[i];
316 fae += dGrad[i] * hessDGrad[i];
317 sumDGrad += dGrad[i] * dGrad[i];
318 sumXi += xi[i] * xi[i];
319 }
320 if (fac > sqrt(EPS * sumDGrad * sumXi)) {
321 fac = 1.0 / fac;
322 double fad = 1.0 / fae;
323 for (unsigned int i = 0; i < dim; i++) {
324 dGrad[i] = fac * xi[i] - fad * hessDGrad[i];
325 }
326
327 for (unsigned int i = 0; i < dim; i++) {
328 unsigned int itab = i * dim;
329#if 0
330 for (unsigned int j = i; j < dim; j++) {
331 invHessian[itab + j] += fac * xi[i] * xi[j] -
332 fad * hessDGrad[i] * hessDGrad[j] +
333 fae * dGrad[i] * dGrad[j];
334 invHessian[j * dim + i] = invHessian[itab + j];
335 }
336#else
337 double pxi = fac * xi[i], hdgi = fad * hessDGrad[i],
338 dgi = fae * dGrad[i];
339 double *pxj = &(xi[i]), *hdgj = &(hessDGrad[i]), *dgj = &(dGrad[i]);
340 for (unsigned int j = i; j < dim; ++j, ++pxj, ++hdgj, ++dgj) {
341 invHessian[itab + j] += pxi * *pxj - hdgi * *hdgj + dgi * *dgj;
342 invHessian[j * dim + i] = invHessian[itab + j];
343 }
344#endif
345 }
346 }
347 // generate the next direction to move:
348 for (unsigned int i = 0; i < dim; i++) {
349 unsigned int itab = i * dim;
350 xi[i] = 0.0;
351#if 0
352 for (unsigned int j = 0; j < dim; j++) {
353 xi[i] -= invHessian[itab + j] * grad[j];
354 }
355#else
356 double &pxi = xi[i], *ivh = &(invHessian[itab]), *gj = grad;
357 for (unsigned int j = 0; j < dim; ++j, ++ivh, ++gj) {
358 pxi -= *ivh * *gj;
359 }
360#endif
361 }
362 if (snapshotVect && snapshotFreq && !(iter % snapshotFreq)) {
363 RDKit::Snapshot s(boost::shared_array<double>(newPos), fp);
364 snapshotVect->push_back(s);
365 newPos = new double[dim];
366 }
367 }
368 CLEANUP();
369 return 1;
370}
371
372//! Do a BFGS minimization of a function.
373/*!
374 \param dim the dimensionality of the space.
375 \param pos the starting position, as an array.
376 \param gradTol tolerance for gradient convergence
377 \param numIters used to return the number of iterations required
378 \param funcVal used to return the final function value
379 \param func the function to minimize
380 \param gradFunc calculates the gradient of func
381 \param funcTol tolerance for changes in the function value for convergence.
382 \param maxIts maximum number of iterations allowed
383
384 \return a flag indicating success (or type of failure). Possible values are:
385 - 0: success
386 - 1: too many iterations were required
387*/
388template <typename EnergyFunctor, typename GradientFunctor>
389int minimize(unsigned int dim, double *pos, double gradTol,
390 unsigned int &numIters, double &funcVal, EnergyFunctor func,
391 GradientFunctor gradFunc, double funcTol = TOLX,
392 unsigned int maxIts = MAXITS) {
393 return minimize(dim, pos, gradTol, numIters, funcVal, func, gradFunc, 0,
394 nullptr, funcTol, maxIts);
395}
396
397} // namespace BFGSOpt
#define CLEANUP()
Definition BFGSOpt.h:158
#define CHECK_INVARIANT(expr, mess)
Definition Invariant.h:101
#define RDUNUSED_PARAM(x)
Definition Invariant.h:196
#define PRECONDITION(expr, mess)
Definition Invariant.h:109
#define RDKIT_OPTIMIZER_EXPORT
Definition export.h:361
const double EPS
Default gradient tolerance in the minimizer.
Definition BFGSOpt.h:26
const double TOLX
Default direction vector tolerance in the minimizer.
Definition BFGSOpt.h:27
void linearSearch(unsigned int dim, double *oldPt, double oldVal, double *grad, double *dir, double *newPt, double &newVal, EnergyFunctor func, double maxStep, int &resCode)
Do a Quasi-Newton minimization along a line.
Definition BFGSOpt.h:52
const double FUNCTOL
Default tolerance for function convergence in the minimizer.
Definition BFGSOpt.h:21
int minimize(unsigned int dim, double *pos, double gradTol, unsigned int &numIters, double &funcVal, EnergyFunctor func, GradientFunctor gradFunc, unsigned int snapshotFreq, RDKit::SnapshotVect *snapshotVect, double funcTol=TOLX, unsigned int maxIts=MAXITS)
Do a BFGS minimization of a function.
Definition BFGSOpt.h:193
const double MAXSTEP
Default maximum step size in the minimizer.
Definition BFGSOpt.h:29
RDKIT_OPTIMIZER_EXPORT int REALLY_A_HEADER_ONLY_LIBRARY
const double MOVETOL
Default tolerance for x changes in the minimizer.
Definition BFGSOpt.h:23
const int MAXITS
Default maximum number of iterations.
Definition BFGSOpt.h:25
RDKIT_OPTIMIZER_EXPORT int HEAD_ONLY_LIBRARY
std::vector< Snapshot > SnapshotVect
Definition Snapshot.h:21